diff options
author | Thomas Grill <xovo@users.sourceforge.net> | 2003-05-17 02:32:51 +0000 |
---|---|---|
committer | Thomas Grill <xovo@users.sourceforge.net> | 2003-05-17 02:32:51 +0000 |
commit | b84b298888b12a3c00ffd6cabd5b39a5937e1347 (patch) | |
tree | 5c6c26a16cff9f83619b8d3d1be6b2fd91b21d4d /externals/grill/flext/source | |
parent | 8b042a66a4fd811589412747e38e8c7ee784db3c (diff) |
""
svn path=/trunk/; revision=625
Diffstat (limited to 'externals/grill/flext/source')
-rwxr-xr-x | externals/grill/flext/source/flprefix.h | 7 | ||||
-rwxr-xr-x | externals/grill/flext/source/flsimd.cpp | 209 |
2 files changed, 190 insertions, 26 deletions
diff --git a/externals/grill/flext/source/flprefix.h b/externals/grill/flext/source/flprefix.h index 67046319..dae6822c 100755 --- a/externals/grill/flext/source/flprefix.h +++ b/externals/grill/flext/source/flprefix.h @@ -99,8 +99,9 @@ WARRANTIES, see the file, "license.txt," in this distribution. #endif // Definition of OS/CPU -#if defined(_MSC_VER) +#if defined(_MSC_VER) || (defined(__ICC) && (FLEXT_OS == FLEXT_OS_WIN || defined(_WIN32))) // Microsoft C++ + // and Intel C++ (as guessed) #ifndef FLEXT_CPU #if defined(_M_IX86) @@ -193,8 +194,10 @@ WARRANTIES, see the file, "license.txt," in this distribution. // This is important for everything #pragma bool on -#elif defined(__GNUG__) +#elif defined(__GNUG__) || (defined(__ICC) && (FLEXT_OS == FLEXT_OS_LINUX || defined(linux) || defined(__linux__))) + // GNU C++ + // and Intel (as suggested by Tim Blechmann) #ifndef FLEXT_CPU #if defined(_X86_) || defined(__i386__) || defined(__i586__) || defined(__i686__) diff --git a/externals/grill/flext/source/flsimd.cpp b/externals/grill/flext/source/flsimd.cpp index f5db9f5e..bebf3d97 100755 --- a/externals/grill/flext/source/flsimd.cpp +++ b/externals/grill/flext/source/flsimd.cpp @@ -9,7 +9,12 @@ WARRANTIES, see the file, "license.txt," in this distribution. */ /*! \file flsimd.cpp - \brief flext SIMD support functions + \brief flext SIMD support functions + + If FLEXT_USE_SIMD is defined at compilation, SIMD instructions are used wherever feasible. + If used with MSVC++ the "Processor Pack" must be installed. + + If FLEXT_USE_IPP is defined the Intel Performance Package is used. */ #include "flext.h" @@ -19,14 +24,22 @@ WARRANTIES, see the file, "license.txt," in this distribution. #include <windows.h> #endif -#if FLEXT_CPU == FLEXT_CPU_PPC && defined(__MWERKS__) -#include "Altivec.h" -#endif - #ifdef FLEXT_USE_IPP #include <ipps.h> #endif +#ifdef FLEXT_USE_SIMD + #ifdef _MSC_VER + // include MSVC SIMD header files + #include <mmintrin.h> // MMX + #include <xmmintrin.h> // SSE + #include <emmintrin.h> // SSE2 + #include <mm3dnow.h> // 3DNow! + #elif FLEXT_CPU == FLEXT_CPU_PPC && defined(__MWERKS__) + #include "Altivec.h" + #endif +#endif // FLEXT_USE_SIMD + static unsigned long setsimdcaps(); /*! \brief Holds SIMD capability flags @@ -37,6 +50,7 @@ unsigned long flext::simdcaps = setsimdcaps(); unsigned long flext::GetSIMDCapabilities() { return simdcaps; } +#ifdef FLEXT_USE_SIMD #if FLEXT_CPU == FLEXT_CPU_INTEL @@ -247,6 +261,10 @@ static unsigned long setsimdcaps() return simdflags; } +#else // FLEXT_USE_SIMD +static unsigned long setsimdcaps() { return 0; } +#endif // FLEXT_USE_SIMD + void flext::CopySamples(t_sample *dst,const t_sample *src,int cnt) { @@ -258,17 +276,89 @@ void flext::CopySamples(t_sample *dst,const t_sample *src,int cnt) else ERRINTERNAL(); #else - int n = cnt>>3; - cnt -= n<<3; - while(n--) { - dst[0] = src[0]; dst[1] = src[1]; - dst[2] = src[2]; dst[3] = src[3]; - dst[4] = src[4]; dst[5] = src[5]; - dst[6] = src[6]; dst[7] = src[7]; - src += 8,dst += 8; - } - - while(cnt--) *(dst++) = *(src++); +#ifdef FLEXT_USE_SIMD +#ifdef _MSC_VER +#if 1 // t_sample is float + if(GetSIMDCapabilities()&simd_sse) { + // single precision + + int n = cnt>>4; + cnt -= n<<4; + + if((reinterpret_cast<unsigned long>(src)&(__alignof(t_sample)-1)) == 0 + && (reinterpret_cast<unsigned long>(dst)&(__alignof(t_sample)-1)) == 0 + ) { + // aligned version + while(n--) { + _mm_store_ps(dst+0,_mm_load_ps(src+0)); + _mm_store_ps(dst+4,_mm_load_ps(src+4)); + _mm_store_ps(dst+8,_mm_load_ps(src+8)); + _mm_store_ps(dst+12,_mm_load_ps(src+12)); + src += 16,dst += 16; + } + } + else { + // unaligned version + while(n--) { + _mm_storeu_ps(dst+0,_mm_loadu_ps(src+0)); + _mm_storeu_ps(dst+4,_mm_loadu_ps(src+4)); + _mm_storeu_ps(dst+8,_mm_loadu_ps(src+8)); + _mm_storeu_ps(dst+12,_mm_loadu_ps(src+12)); + src += 16,dst += 16; + } + } + while(cnt--) *(dst++) = *(src++); + } + else +#elif 0 // t_sample is double + if(GetSIMDCapabilities()&simd_sse2) { + // double precision + + int n = cnt>>3; + cnt -= n<<3; + + if((reinterpret_cast<unsigned long>(src)&(__alignof(t_sample)-1)) == 0 + && (reinterpret_cast<unsigned long>(dst)&(__alignof(t_sample)-1)) == 0 + ) { + // aligned version + while(n--) { + _mm_store_pd(dst+0,_mm_load_pd(src+0)); + _mm_store_pd(dst+2,_mm_load_pd(src+2)); + _mm_store_pd(dst+4,_mm_load_pd(src+4)); + _mm_store_pd(dst+6,_mm_load_pd(src+6)); + src += 8,dst += 8; + } + } + else { + // unaligned version + while(n--) { + _mm_storeu_pd(dst+0,_mm_loadu_pd(src+0)); + _mm_storeu_pd(dst+2,_mm_loadu_pd(src+2)); + _mm_storeu_pd(dst+4,_mm_loadu_pd(src+4)); + _mm_storeu_pd(dst+6,_mm_loadu_pd(src+6)); + src += 8,dst += 8; + } + } + while(cnt--) *(dst++) = *(src++); + } + else +#else + #error t_sample data type has illegal size +#endif +#endif // _MSC_VER +#endif // FLEXT_USE_SIMD + { + int n = cnt>>3; + cnt -= n<<3; + while(n--) { + dst[0] = src[0]; dst[1] = src[1]; + dst[2] = src[2]; dst[3] = src[3]; + dst[4] = src[4]; dst[5] = src[5]; + dst[6] = src[6]; dst[7] = src[7]; + src += 8,dst += 8; + } + while(cnt--) *(dst++) = *(src++); + } #endif } @@ -282,13 +372,84 @@ void flext::SetSamples(t_sample *dst,int cnt,t_sample s) else ERRINTERNAL(); #else - int n = cnt>>3; - cnt -= n<<3; - while(n--) { - dst[0] = dst[1] = dst[2] = dst[3] = dst[4] = dst[5] = dst[6] = dst[7] = s; - dst += 8; - } - - while(cnt--) *(dst++) = s; +#ifdef FLEXT_USE_SIMD +#ifdef _MSC_VER +#if 1 // t_sample is float + if(GetSIMDCapabilities()&simd_sse) { + // single precision + + __m128 v = _mm_load1_ps(&s); + int n = cnt>>4; + cnt -= n<<4; + + if((reinterpret_cast<unsigned long>(dst)&(__alignof(t_sample)-1)) == 0) { + // aligned version + while(n--) { + _mm_store_ps(dst+0,v); + _mm_store_ps(dst+4,v); + _mm_store_ps(dst+8,v); + _mm_store_ps(dst+12,v); + dst += 16; + } + } + else { + // unaligned version + while(n--) { + _mm_storeu_ps(dst+0,v); + _mm_storeu_ps(dst+4,v); + _mm_storeu_ps(dst+8,v); + _mm_storeu_ps(dst+12,v); + dst += 16; + } + } + while(cnt--) *(dst++) = s; + } + else +#elif 0 // t_sample is double + if(GetSIMDCapabilities()&simd_sse2) { + // double precision + + __m128 v = _mm_load1_pd(&s); + int n = cnt>>3; + cnt -= n<<3; + + if((reinterpret_cast<unsigned long>(dst)&(__alignof(t_sample)-1)) == 0) { + // aligned version + while(n--) { + _mm_store_pd(dst+0,v); + _mm_store_pd(dst+2,v); + _mm_store_pd(dst+4,v); + _mm_store_pd(dst+8,v); + dst += 8; + } + } + else { + // unaligned version + while(n--) { + _mm_storeu_pd(dst+0,v); + _mm_storeu_pd(dst+2,v); + _mm_storeu_pd(dst+4,v); + _mm_storeu_pd(dst+8,v); + dst += 8; + } + } + while(cnt--) *(dst++) = s; + } + else +#else + #error t_sample data type has illegal size +#endif +#endif // _MSC_VER +#endif // FLEXT_USE_SIMD + { + int n = cnt>>3; + cnt -= n<<3; + while(n--) { + dst[0] = dst[1] = dst[2] = dst[3] = dst[4] = dst[5] = dst[6] = dst[7] = s; + dst += 8; + } + + while(cnt--) *(dst++) = s; + } #endif } |