aboutsummaryrefslogtreecommitdiff
path: root/externals/grill/flext/source/flsimd.cpp
diff options
context:
space:
mode:
authorThomas Grill <xovo@users.sourceforge.net>2003-05-17 02:32:51 +0000
committerThomas Grill <xovo@users.sourceforge.net>2003-05-17 02:32:51 +0000
commitb84b298888b12a3c00ffd6cabd5b39a5937e1347 (patch)
tree5c6c26a16cff9f83619b8d3d1be6b2fd91b21d4d /externals/grill/flext/source/flsimd.cpp
parent8b042a66a4fd811589412747e38e8c7ee784db3c (diff)
""
svn path=/trunk/; revision=625
Diffstat (limited to 'externals/grill/flext/source/flsimd.cpp')
-rwxr-xr-xexternals/grill/flext/source/flsimd.cpp209
1 files changed, 185 insertions, 24 deletions
diff --git a/externals/grill/flext/source/flsimd.cpp b/externals/grill/flext/source/flsimd.cpp
index f5db9f5e..bebf3d97 100755
--- a/externals/grill/flext/source/flsimd.cpp
+++ b/externals/grill/flext/source/flsimd.cpp
@@ -9,7 +9,12 @@ WARRANTIES, see the file, "license.txt," in this distribution.
*/
/*! \file flsimd.cpp
- \brief flext SIMD support functions
+ \brief flext SIMD support functions
+
+ If FLEXT_USE_SIMD is defined at compilation, SIMD instructions are used wherever feasible.
+ If used with MSVC++ the "Processor Pack" must be installed.
+
+ If FLEXT_USE_IPP is defined the Intel Performance Package is used.
*/
#include "flext.h"
@@ -19,14 +24,22 @@ WARRANTIES, see the file, "license.txt," in this distribution.
#include <windows.h>
#endif
-#if FLEXT_CPU == FLEXT_CPU_PPC && defined(__MWERKS__)
-#include "Altivec.h"
-#endif
-
#ifdef FLEXT_USE_IPP
#include <ipps.h>
#endif
+#ifdef FLEXT_USE_SIMD
+ #ifdef _MSC_VER
+ // include MSVC SIMD header files
+ #include <mmintrin.h> // MMX
+ #include <xmmintrin.h> // SSE
+ #include <emmintrin.h> // SSE2
+ #include <mm3dnow.h> // 3DNow!
+ #elif FLEXT_CPU == FLEXT_CPU_PPC && defined(__MWERKS__)
+ #include "Altivec.h"
+ #endif
+#endif // FLEXT_USE_SIMD
+
static unsigned long setsimdcaps();
/*! \brief Holds SIMD capability flags
@@ -37,6 +50,7 @@ unsigned long flext::simdcaps = setsimdcaps();
unsigned long flext::GetSIMDCapabilities() { return simdcaps; }
+#ifdef FLEXT_USE_SIMD
#if FLEXT_CPU == FLEXT_CPU_INTEL
@@ -247,6 +261,10 @@ static unsigned long setsimdcaps()
return simdflags;
}
+#else // FLEXT_USE_SIMD
+static unsigned long setsimdcaps() { return 0; }
+#endif // FLEXT_USE_SIMD
+
void flext::CopySamples(t_sample *dst,const t_sample *src,int cnt)
{
@@ -258,17 +276,89 @@ void flext::CopySamples(t_sample *dst,const t_sample *src,int cnt)
else
ERRINTERNAL();
#else
- int n = cnt>>3;
- cnt -= n<<3;
- while(n--) {
- dst[0] = src[0]; dst[1] = src[1];
- dst[2] = src[2]; dst[3] = src[3];
- dst[4] = src[4]; dst[5] = src[5];
- dst[6] = src[6]; dst[7] = src[7];
- src += 8,dst += 8;
- }
-
- while(cnt--) *(dst++) = *(src++);
+#ifdef FLEXT_USE_SIMD
+#ifdef _MSC_VER
+#if 1 // t_sample is float
+ if(GetSIMDCapabilities()&simd_sse) {
+ // single precision
+
+ int n = cnt>>4;
+ cnt -= n<<4;
+
+ if((reinterpret_cast<unsigned long>(src)&(__alignof(t_sample)-1)) == 0
+ && (reinterpret_cast<unsigned long>(dst)&(__alignof(t_sample)-1)) == 0
+ ) {
+ // aligned version
+ while(n--) {
+ _mm_store_ps(dst+0,_mm_load_ps(src+0));
+ _mm_store_ps(dst+4,_mm_load_ps(src+4));
+ _mm_store_ps(dst+8,_mm_load_ps(src+8));
+ _mm_store_ps(dst+12,_mm_load_ps(src+12));
+ src += 16,dst += 16;
+ }
+ }
+ else {
+ // unaligned version
+ while(n--) {
+ _mm_storeu_ps(dst+0,_mm_loadu_ps(src+0));
+ _mm_storeu_ps(dst+4,_mm_loadu_ps(src+4));
+ _mm_storeu_ps(dst+8,_mm_loadu_ps(src+8));
+ _mm_storeu_ps(dst+12,_mm_loadu_ps(src+12));
+ src += 16,dst += 16;
+ }
+ }
+ while(cnt--) *(dst++) = *(src++);
+ }
+ else
+#elif 0 // t_sample is double
+ if(GetSIMDCapabilities()&simd_sse2) {
+ // double precision
+
+ int n = cnt>>3;
+ cnt -= n<<3;
+
+ if((reinterpret_cast<unsigned long>(src)&(__alignof(t_sample)-1)) == 0
+ && (reinterpret_cast<unsigned long>(dst)&(__alignof(t_sample)-1)) == 0
+ ) {
+ // aligned version
+ while(n--) {
+ _mm_store_pd(dst+0,_mm_load_pd(src+0));
+ _mm_store_pd(dst+2,_mm_load_pd(src+2));
+ _mm_store_pd(dst+4,_mm_load_pd(src+4));
+ _mm_store_pd(dst+6,_mm_load_pd(src+6));
+ src += 8,dst += 8;
+ }
+ }
+ else {
+ // unaligned version
+ while(n--) {
+ _mm_storeu_pd(dst+0,_mm_loadu_pd(src+0));
+ _mm_storeu_pd(dst+2,_mm_loadu_pd(src+2));
+ _mm_storeu_pd(dst+4,_mm_loadu_pd(src+4));
+ _mm_storeu_pd(dst+6,_mm_loadu_pd(src+6));
+ src += 8,dst += 8;
+ }
+ }
+ while(cnt--) *(dst++) = *(src++);
+ }
+ else
+#else
+ #error t_sample data type has illegal size
+#endif
+#endif // _MSC_VER
+#endif // FLEXT_USE_SIMD
+ {
+ int n = cnt>>3;
+ cnt -= n<<3;
+ while(n--) {
+ dst[0] = src[0]; dst[1] = src[1];
+ dst[2] = src[2]; dst[3] = src[3];
+ dst[4] = src[4]; dst[5] = src[5];
+ dst[6] = src[6]; dst[7] = src[7];
+ src += 8,dst += 8;
+ }
+ while(cnt--) *(dst++) = *(src++);
+ }
#endif
}
@@ -282,13 +372,84 @@ void flext::SetSamples(t_sample *dst,int cnt,t_sample s)
else
ERRINTERNAL();
#else
- int n = cnt>>3;
- cnt -= n<<3;
- while(n--) {
- dst[0] = dst[1] = dst[2] = dst[3] = dst[4] = dst[5] = dst[6] = dst[7] = s;
- dst += 8;
- }
-
- while(cnt--) *(dst++) = s;
+#ifdef FLEXT_USE_SIMD
+#ifdef _MSC_VER
+#if 1 // t_sample is float
+ if(GetSIMDCapabilities()&simd_sse) {
+ // single precision
+
+ __m128 v = _mm_load1_ps(&s);
+ int n = cnt>>4;
+ cnt -= n<<4;
+
+ if((reinterpret_cast<unsigned long>(dst)&(__alignof(t_sample)-1)) == 0) {
+ // aligned version
+ while(n--) {
+ _mm_store_ps(dst+0,v);
+ _mm_store_ps(dst+4,v);
+ _mm_store_ps(dst+8,v);
+ _mm_store_ps(dst+12,v);
+ dst += 16;
+ }
+ }
+ else {
+ // unaligned version
+ while(n--) {
+ _mm_storeu_ps(dst+0,v);
+ _mm_storeu_ps(dst+4,v);
+ _mm_storeu_ps(dst+8,v);
+ _mm_storeu_ps(dst+12,v);
+ dst += 16;
+ }
+ }
+ while(cnt--) *(dst++) = s;
+ }
+ else
+#elif 0 // t_sample is double
+ if(GetSIMDCapabilities()&simd_sse2) {
+ // double precision
+
+ __m128 v = _mm_load1_pd(&s);
+ int n = cnt>>3;
+ cnt -= n<<3;
+
+ if((reinterpret_cast<unsigned long>(dst)&(__alignof(t_sample)-1)) == 0) {
+ // aligned version
+ while(n--) {
+ _mm_store_pd(dst+0,v);
+ _mm_store_pd(dst+2,v);
+ _mm_store_pd(dst+4,v);
+ _mm_store_pd(dst+8,v);
+ dst += 8;
+ }
+ }
+ else {
+ // unaligned version
+ while(n--) {
+ _mm_storeu_pd(dst+0,v);
+ _mm_storeu_pd(dst+2,v);
+ _mm_storeu_pd(dst+4,v);
+ _mm_storeu_pd(dst+8,v);
+ dst += 8;
+ }
+ }
+ while(cnt--) *(dst++) = s;
+ }
+ else
+#else
+ #error t_sample data type has illegal size
+#endif
+#endif // _MSC_VER
+#endif // FLEXT_USE_SIMD
+ {
+ int n = cnt>>3;
+ cnt -= n<<3;
+ while(n--) {
+ dst[0] = dst[1] = dst[2] = dst[3] = dst[4] = dst[5] = dst[6] = dst[7] = s;
+ dst += 8;
+ }
+
+ while(cnt--) *(dst++) = s;
+ }
#endif
}