aboutsummaryrefslogtreecommitdiff
path: root/externals/grill/flext
diff options
context:
space:
mode:
authorThomas Grill <xovo@users.sourceforge.net>2003-06-04 02:38:08 +0000
committerThomas Grill <xovo@users.sourceforge.net>2003-06-04 02:38:08 +0000
commita6d4aa2bcc49633db1d8464f7727cb73e2c0d052 (patch)
treedd3bc68f69d188f32d18022cf5d773a8f201079e /externals/grill/flext
parent3fb9132261bbc69087161c0f80a3be10a60f07a9 (diff)
""
svn path=/trunk/; revision=681
Diffstat (limited to 'externals/grill/flext')
-rw-r--r--externals/grill/flext/changes.txt3
-rw-r--r--externals/grill/flext/flext_sh.dsp8
-rw-r--r--externals/grill/flext/source/flext.h4
-rwxr-xr-xexternals/grill/flext/source/flsimd.cpp301
-rw-r--r--externals/grill/flext/source/flsupport.h5
5 files changed, 251 insertions, 70 deletions
diff --git a/externals/grill/flext/changes.txt b/externals/grill/flext/changes.txt
index 6ef61b98..4125fc05 100644
--- a/externals/grill/flext/changes.txt
+++ b/externals/grill/flext/changes.txt
@@ -13,6 +13,9 @@ Donations for further development of the package are highly appreciated.
Version history:
+0.5.0:
+- added some more SIMD functions
+
0.4.4:
- fixed deadly bug for Max/MSP method-to-symbol-binding proxies
- some fixes for CodeWarrior Mach-O compilation
diff --git a/externals/grill/flext/flext_sh.dsp b/externals/grill/flext/flext_sh.dsp
index 35c604b6..dad184c8 100644
--- a/externals/grill/flext/flext_sh.dsp
+++ b/externals/grill/flext/flext_sh.dsp
@@ -40,7 +40,7 @@ RSC=rc.exe
# PROP BASE Target_Dir ""
# PROP Use_MFC 0
# PROP Use_Debug_Libraries 0
-# PROP Output_Dir "pd-msvc/dr"
+# PROP Output_Dir "pd-msvc"
# PROP Intermediate_Dir "pd-msvc/dr"
# PROP Ignore_Export_Lib 0
# PROP Target_Dir ""
@@ -67,7 +67,7 @@ LINK32=link.exe
# PROP BASE Target_Dir ""
# PROP Use_MFC 0
# PROP Use_Debug_Libraries 1
-# PROP Output_Dir "pd-msvc/dd"
+# PROP Output_Dir "pd-msvc"
# PROP Intermediate_Dir "pd-msvc/dd"
# PROP Ignore_Export_Lib 0
# PROP Target_Dir ""
@@ -94,7 +94,7 @@ LINK32=link.exe
# PROP BASE Target_Dir ""
# PROP Use_MFC 0
# PROP Use_Debug_Libraries 1
-# PROP Output_Dir "pd-msvc\ddl"
+# PROP Output_Dir "pd-msvc"
# PROP Intermediate_Dir "pd-msvc\ddl"
# PROP Ignore_Export_Lib 0
# PROP Target_Dir ""
@@ -121,7 +121,7 @@ LINK32=link.exe
# PROP BASE Target_Dir ""
# PROP Use_MFC 0
# PROP Use_Debug_Libraries 0
-# PROP Output_Dir "pd-msvc\drl"
+# PROP Output_Dir "pd-msvc"
# PROP Intermediate_Dir "pd-msvc\drl"
# PROP Ignore_Export_Lib 0
# PROP Target_Dir ""
diff --git a/externals/grill/flext/source/flext.h b/externals/grill/flext/source/flext.h
index 7472e4a1..1a87e2bf 100644
--- a/externals/grill/flext/source/flext.h
+++ b/externals/grill/flext/source/flext.h
@@ -23,10 +23,10 @@ WARRANTIES, see the file, "license.txt," in this distribution.
*/
//! \brief flext version number
-#define FLEXT_VERSION 404
+#define FLEXT_VERSION 500
//! \brief flext version string
-#define FLEXT_VERSTR "0.4.4"
+#define FLEXT_VERSTR "0.5.0pre"
//! @}
diff --git a/externals/grill/flext/source/flsimd.cpp b/externals/grill/flext/source/flsimd.cpp
index 9786e4e7..a491bd51 100755
--- a/externals/grill/flext/source/flsimd.cpp
+++ b/externals/grill/flext/source/flsimd.cpp
@@ -284,15 +284,14 @@ void flext::CopySamples(t_sample *dst,const t_sample *src,int cnt)
#else
#ifdef FLEXT_USE_SIMD
#ifdef _MSC_VER
-#if 1 // t_sample is float
if(GetSIMDCapabilities()&simd_sse) {
// single precision
int n = cnt>>4;
cnt -= n<<4;
- if((reinterpret_cast<unsigned long>(src)&(__alignof(t_sample)-1)) == 0
- && (reinterpret_cast<unsigned long>(dst)&(__alignof(t_sample)-1)) == 0
+ if((reinterpret_cast<unsigned long>(src)&(__alignof(__m128)-1)) == 0
+ && (reinterpret_cast<unsigned long>(dst)&(__alignof(__m128)-1)) == 0
) {
// aligned version
while(n--) {
@@ -316,41 +315,6 @@ void flext::CopySamples(t_sample *dst,const t_sample *src,int cnt)
while(cnt--) *(dst++) = *(src++);
}
else
-#elif 0 // t_sample is double
- if(GetSIMDCapabilities()&simd_sse2) {
- // double precision
-
- int n = cnt>>3;
- cnt -= n<<3;
-
- if((reinterpret_cast<unsigned long>(src)&(__alignof(t_sample)-1)) == 0
- && (reinterpret_cast<unsigned long>(dst)&(__alignof(t_sample)-1)) == 0
- ) {
- // aligned version
- while(n--) {
- _mm_store_pd(dst+0,_mm_load_pd(src+0));
- _mm_store_pd(dst+2,_mm_load_pd(src+2));
- _mm_store_pd(dst+4,_mm_load_pd(src+4));
- _mm_store_pd(dst+6,_mm_load_pd(src+6));
- src += 8,dst += 8;
- }
- }
- else {
- // unaligned version
- while(n--) {
- _mm_storeu_pd(dst+0,_mm_loadu_pd(src+0));
- _mm_storeu_pd(dst+2,_mm_loadu_pd(src+2));
- _mm_storeu_pd(dst+4,_mm_loadu_pd(src+4));
- _mm_storeu_pd(dst+6,_mm_loadu_pd(src+6));
- src += 8,dst += 8;
- }
- }
- while(cnt--) *(dst++) = *(src++);
- }
- else
-#else
- #error t_sample data type has illegal size
-#endif
#elif FLEXT_OS == FLEXT_OS_MAC && defined(__VEC__) && defined(__VECTOROPS__)
{
int n = cnt>>2,n4 = n<<2;
@@ -388,7 +352,6 @@ void flext::SetSamples(t_sample *dst,int cnt,t_sample s)
#else
#ifdef FLEXT_USE_SIMD
#ifdef _MSC_VER
-#if 1 // t_sample is float
if(GetSIMDCapabilities()&simd_sse) {
// single precision
@@ -396,7 +359,7 @@ void flext::SetSamples(t_sample *dst,int cnt,t_sample s)
int n = cnt>>4;
cnt -= n<<4;
- if((reinterpret_cast<unsigned long>(dst)&(__alignof(t_sample)-1)) == 0) {
+ if((reinterpret_cast<unsigned long>(dst)&(__alignof(__m128)-1)) == 0) {
// aligned version
while(n--) {
_mm_store_ps(dst+0,v);
@@ -419,51 +382,261 @@ void flext::SetSamples(t_sample *dst,int cnt,t_sample s)
while(cnt--) *(dst++) = s;
}
else
-#elif 0 // t_sample is double
- if(GetSIMDCapabilities()&simd_sse2) {
- // double precision
+#endif // _MSC_VER
+#endif // FLEXT_USE_SIMD
+ {
+ int n = cnt>>3;
+ cnt -= n<<3;
+ while(n--) {
+ dst[0] = dst[1] = dst[2] = dst[3] = dst[4] = dst[5] = dst[6] = dst[7] = s;
+ dst += 8;
+ }
+
+ while(cnt--) *(dst++) = s;
+ }
+#endif
+}
+
+
+void flext::MulSamples(t_sample *dst,const t_sample *src,t_sample mul,int cnt)
+{
+#ifdef FLEXT_USE_IPP
+ if(sizeof(t_sample) == 4) {
+ ippsCopy_32f((const float *)src,(float *)dst,cnt);
+ ippsMulC_32f_I((float)mul,(float *)dst,cnt);
+ }
+ else if(sizeof(t_sample) == 8) {
+ ippsCopy_64f((const double *)src,(double *)dst,cnt);
+ ippsMulC_64f_I((double)mul,(double *)dst,cnt);
+ }
+ else
+ ERRINTERNAL();
+#else
+#ifdef FLEXT_USE_SIMD
+#ifdef _MSC_VER
+ if(GetSIMDCapabilities()&simd_sse) {
+ // single precision
+ __m128 a = _mm_load1_ps(&mul);
- __m128 v = _mm_load1_pd(&s);
- int n = cnt>>3;
- cnt -= n<<3;
+ int n = cnt>>4;
+ cnt -= n<<4;
- if((reinterpret_cast<unsigned long>(dst)&(__alignof(t_sample)-1)) == 0) {
+ if((reinterpret_cast<unsigned long>(src)&(__alignof(__m128)-1)) == 0
+ && (reinterpret_cast<unsigned long>(dst)&(__alignof(__m128)-1)) == 0
+ ) {
// aligned version
- while(n--) {
- _mm_store_pd(dst+0,v);
- _mm_store_pd(dst+2,v);
- _mm_store_pd(dst+4,v);
- _mm_store_pd(dst+8,v);
- dst += 8;
+ while(n--) {
+ _mm_store_ps(dst+0,_mm_mul_ps(a,_mm_load_ps(src+0)));
+ _mm_store_ps(dst+4,_mm_mul_ps(a,_mm_load_ps(src+4)));
+ _mm_store_ps(dst+8,_mm_mul_ps(a,_mm_load_ps(src+8)));
+ _mm_store_ps(dst+12,_mm_mul_ps(a,_mm_load_ps(src+12)));
+ src += 16,dst += 16;
}
}
else {
// unaligned version
while(n--) {
- _mm_storeu_pd(dst+0,v);
- _mm_storeu_pd(dst+2,v);
- _mm_storeu_pd(dst+4,v);
- _mm_storeu_pd(dst+8,v);
- dst += 8;
+ _mm_storeu_ps(dst+0,_mm_mul_ps(a,_mm_loadu_ps(src+0)));
+ _mm_storeu_ps(dst+4,_mm_mul_ps(a,_mm_loadu_ps(src+4)));
+ _mm_storeu_ps(dst+8,_mm_mul_ps(a,_mm_loadu_ps(src+8)));
+ _mm_storeu_ps(dst+12,_mm_mul_ps(a,_mm_loadu_ps(src+12)));
+ src += 16,dst += 16;
}
}
- while(cnt--) *(dst++) = s;
+ while(cnt--) *(dst++) = *(src++)*mul;
}
else
+/*
+#elif FLEXT_OS == FLEXT_OS_MAC && defined(__VEC__) && defined(__VECTOROPS__)
+ {
+ int n = cnt>>2,n4 = n<<2;
+ cnt -= n4;
+ vScopy(n4,src,dst);
+ src += n4,dst += n4;
+ while(cnt--) *(dst++) = *(src++);
+ }
+*/
+#endif // _MSC_VER
+#endif // FLEXT_USE_SIMD
+ {
+ int n = cnt>>3;
+ cnt -= n<<3;
+ while(n--) {
+ dst[0] = src[0]*mul;
+ dst[1] = src[1]*mul;
+ dst[2] = src[2]*mul;
+ dst[3] = src[3]*mul;
+ dst[4] = src[4]*mul;
+ dst[5] = src[5]*mul;
+ dst[6] = src[6]*mul;
+ dst[7] = src[7]*mul;
+ src += 8,dst += 8;
+ }
+ while(cnt--) *(dst++) = *(src++)*mul;
+ }
+#endif
+}
+
+
+void flext::AddSamples(t_sample *dst,const t_sample *src,t_sample add,int cnt)
+{
+#ifdef FLEXT_USE_IPP
+ if(sizeof(t_sample) == 4) {
+ ippsCopy_32f((const float *)src,(float *)dst,cnt);
+ ippsAddC_32f_I((float)mul,(float *)dst,cnt);
+ }
+ else if(sizeof(t_sample) == 8) {
+ ippsCopy_64f((const double *)src,(double *)dst,cnt);
+ ippsAddC_64f_I((double)mul,(double *)dst,cnt);
+ }
+ else
+ ERRINTERNAL();
#else
- #error t_sample data type has illegal size
+#ifdef FLEXT_USE_SIMD
+#ifdef _MSC_VER
+ if(GetSIMDCapabilities()&simd_sse) {
+ // single precision
+ __m128 a = _mm_load1_ps(&add);
+
+ int n = cnt>>4;
+ cnt -= n<<4;
+
+ if((reinterpret_cast<unsigned long>(src)&(__alignof(__m128)-1)) == 0
+ && (reinterpret_cast<unsigned long>(dst)&(__alignof(__m128)-1)) == 0
+ ) {
+ // aligned version
+ while(n--) {
+ _mm_store_ps(dst+0,_mm_add_ps(a,_mm_load_ps(src+0)));
+ _mm_store_ps(dst+4,_mm_add_ps(a,_mm_load_ps(src+4)));
+ _mm_store_ps(dst+8,_mm_add_ps(a,_mm_load_ps(src+8)));
+ _mm_store_ps(dst+12,_mm_add_ps(a,_mm_load_ps(src+12)));
+ src += 16,dst += 16;
+ }
+ }
+ else {
+ // unaligned version
+ while(n--) {
+ _mm_storeu_ps(dst+0,_mm_add_ps(a,_mm_loadu_ps(src+0)));
+ _mm_storeu_ps(dst+4,_mm_add_ps(a,_mm_loadu_ps(src+4)));
+ _mm_storeu_ps(dst+8,_mm_add_ps(a,_mm_loadu_ps(src+8)));
+ _mm_storeu_ps(dst+12,_mm_add_ps(a,_mm_loadu_ps(src+12)));
+ src += 16,dst += 16;
+ }
+ }
+ while(cnt--) *(dst++) = *(src++)+add;
+ }
+ else
+/*
+#elif FLEXT_OS == FLEXT_OS_MAC && defined(__VEC__) && defined(__VECTOROPS__)
+ {
+ int n = cnt>>2,n4 = n<<2;
+ cnt -= n4;
+ vScopy(n4,src,dst);
+ src += n4,dst += n4;
+ while(cnt--) *(dst++) = *(src++);
+ }
+*/
+#endif // _MSC_VER
+#endif // FLEXT_USE_SIMD
+ {
+ int n = cnt>>3;
+ cnt -= n<<3;
+ while(n--) {
+ dst[0] = src[0]+add;
+ dst[1] = src[1]+add;
+ dst[2] = src[2]+add;
+ dst[3] = src[3]+add;
+ dst[4] = src[4]+add;
+ dst[5] = src[5]+add;
+ dst[6] = src[6]+add;
+ dst[7] = src[7]+add;
+ src += 8,dst += 8;
+ }
+ while(cnt--) *(dst++) = *(src++)+add;
+ }
#endif
+}
+
+
+void flext::ScaleSamples(t_sample *dst,const t_sample *src,t_sample mul,t_sample add,int cnt)
+{
+#ifdef FLEXT_USE_IPP
+ if(sizeof(t_sample) == 4) {
+ ippsCopy_32f((const float *)src,(float *)dst,cnt);
+ ippsMulC_32f_I((float)mul,(float *)dst,cnt);
+ ippsAddC_32f_I((float)add,(float *)dst,cnt);
+ }
+ else if(sizeof(t_sample) == 8) {
+ ippsCopy_64f((const double *)src,(double *)dst,cnt);
+ ippsMulC_64f_I((double)mul,(double *)dst,cnt);
+ ippsAddC_64f_I((double)add,(double *)dst,cnt);
+ }
+ else
+ ERRINTERNAL();
+#else
+#ifdef FLEXT_USE_SIMD
+#ifdef _MSC_VER
+ if(GetSIMDCapabilities()&simd_sse) {
+ // single precision
+ __m128 a = _mm_load1_ps(&add);
+ __m128 m = _mm_load1_ps(&mul);
+
+ int n = cnt>>4;
+ cnt -= n<<4;
+
+ if((reinterpret_cast<unsigned long>(src)&(__alignof(__m128)-1)) == 0
+ && (reinterpret_cast<unsigned long>(dst)&(__alignof(__m128)-1)) == 0
+ ) {
+ // aligned version
+ while(n--) {
+ _mm_store_ps(dst+0,_mm_add_ps(a,_mm_mul_ps(m,_mm_load_ps(src+0))));
+ _mm_store_ps(dst+4,_mm_add_ps(a,_mm_mul_ps(m,_mm_load_ps(src+4))));
+ _mm_store_ps(dst+8,_mm_add_ps(a,_mm_mul_ps(m,_mm_load_ps(src+8))));
+ _mm_store_ps(dst+12,_mm_add_ps(a,_mm_mul_ps(m,_mm_load_ps(src+12))));
+ src += 16,dst += 16;
+ }
+ }
+ else {
+ // unaligned version
+ while(n--) {
+ _mm_storeu_ps(dst+0,_mm_add_ps(a,_mm_mul_ps(m,_mm_loadu_ps(src+0))));
+ _mm_storeu_ps(dst+4,_mm_add_ps(a,_mm_mul_ps(m,_mm_loadu_ps(src+4))));
+ _mm_storeu_ps(dst+8,_mm_add_ps(a,_mm_mul_ps(m,_mm_loadu_ps(src+8))));
+ _mm_storeu_ps(dst+12,_mm_add_ps(a,_mm_mul_ps(m,_mm_loadu_ps(src+12))));
+ src += 16,dst += 16;
+ }
+ }
+ while(cnt--) *(dst++) = *(src++)*mul+add;
+ }
+ else
+/*
+#elif FLEXT_OS == FLEXT_OS_MAC && defined(__VEC__) && defined(__VECTOROPS__)
+ {
+ int n = cnt>>2,n4 = n<<2;
+ cnt -= n4;
+ vScopy(n4,src,dst);
+ src += n4,dst += n4;
+ while(cnt--) *(dst++) = *(src++);
+ }
+*/
#endif // _MSC_VER
#endif // FLEXT_USE_SIMD
{
int n = cnt>>3;
cnt -= n<<3;
while(n--) {
- dst[0] = dst[1] = dst[2] = dst[3] = dst[4] = dst[5] = dst[6] = dst[7] = s;
- dst += 8;
+ dst[0] = src[0]*mul+add;
+ dst[1] = src[1]*mul+add;
+ dst[2] = src[2]*mul+add;
+ dst[3] = src[3]*mul+add;
+ dst[4] = src[4]*mul+add;
+ dst[5] = src[5]*mul+add;
+ dst[6] = src[6]*mul+add;
+ dst[7] = src[7]*mul+add;
+ src += 8,dst += 8;
}
-
- while(cnt--) *(dst++) = s;
+ while(cnt--) *(dst++) = *(src++)*mul+add;
}
#endif
}
+
diff --git a/externals/grill/flext/source/flsupport.h b/externals/grill/flext/source/flsupport.h
index 2235e854..fef7e16a 100644
--- a/externals/grill/flext/source/flsupport.h
+++ b/externals/grill/flext/source/flsupport.h
@@ -959,6 +959,11 @@ public:
static unsigned long GetSIMDCapabilities();
+
+ static void MulSamples(t_sample *dst,const t_sample *src,t_sample mul,int cnt);
+ static void AddSamples(t_sample *dst,const t_sample *src,t_sample add,int cnt);
+ static void ScaleSamples(t_sample *dst,const t_sample *src,t_sample mul,t_sample add,int cnt);
+
//! @} FLEXT_S_SIMD