From b84b298888b12a3c00ffd6cabd5b39a5937e1347 Mon Sep 17 00:00:00 2001
From: Thomas Grill <xovo@users.sourceforge.net>
Date: Sat, 17 May 2003 02:32:51 +0000
Subject:  ""

svn path=/trunk/; revision=625
---
 externals/grill/flext/source/flprefix.h |   7 +-
 externals/grill/flext/source/flsimd.cpp | 209 ++++++++++++++++++++++++++++----
 2 files changed, 190 insertions(+), 26 deletions(-)

(limited to 'externals/grill/flext/source')

diff --git a/externals/grill/flext/source/flprefix.h b/externals/grill/flext/source/flprefix.h
index 67046319..dae6822c 100755
--- a/externals/grill/flext/source/flprefix.h
+++ b/externals/grill/flext/source/flprefix.h
@@ -99,8 +99,9 @@ WARRANTIES, see the file, "license.txt," in this distribution.
 #endif
 
 // Definition of OS/CPU
-#if defined(_MSC_VER)
+#if defined(_MSC_VER) || (defined(__ICC) && (FLEXT_OS == FLEXT_OS_WIN || defined(_WIN32)))
 	// Microsoft C++
+    // and Intel C++ (as guessed)
 	
 	#ifndef FLEXT_CPU
 		#if defined(_M_IX86)
@@ -193,8 +194,10 @@ WARRANTIES, see the file, "license.txt," in this distribution.
 	// This is important for everything
 	#pragma bool on
 
-#elif defined(__GNUG__)
+#elif defined(__GNUG__) || (defined(__ICC) && (FLEXT_OS == FLEXT_OS_LINUX || defined(linux) || defined(__linux__)))
+
 	// GNU C++
+    // and Intel (as suggested by Tim Blechmann)
 
 	#ifndef FLEXT_CPU
 		#if defined(_X86_) || defined(__i386__) || defined(__i586__) || defined(__i686__)
diff --git a/externals/grill/flext/source/flsimd.cpp b/externals/grill/flext/source/flsimd.cpp
index f5db9f5e..bebf3d97 100755
--- a/externals/grill/flext/source/flsimd.cpp
+++ b/externals/grill/flext/source/flsimd.cpp
@@ -9,7 +9,12 @@ WARRANTIES, see the file, "license.txt," in this distribution.
 */
 
 /*! \file flsimd.cpp
-	\brief flext SIMD support functions    
+	\brief flext SIMD support functions
+
+    If FLEXT_USE_SIMD is defined at compilation, SIMD instructions are used wherever feasible.
+    If used with MSVC++ the "Processor Pack" must be installed.
+
+    If FLEXT_USE_IPP is defined the Intel Performance Package is used.
 */
 
 #include "flext.h"
@@ -19,14 +24,22 @@ WARRANTIES, see the file, "license.txt," in this distribution.
 #include <windows.h>
 #endif
 
-#if FLEXT_CPU == FLEXT_CPU_PPC && defined(__MWERKS__)
-#include "Altivec.h"
-#endif
-
 #ifdef FLEXT_USE_IPP
 #include <ipps.h>
 #endif
 
+#ifdef FLEXT_USE_SIMD
+    #ifdef _MSC_VER
+        // include MSVC SIMD header files
+        #include <mmintrin.h> // MMX
+        #include <xmmintrin.h> // SSE
+        #include <emmintrin.h> // SSE2
+        #include <mm3dnow.h> // 3DNow!
+    #elif FLEXT_CPU == FLEXT_CPU_PPC && defined(__MWERKS__)
+        #include "Altivec.h"
+    #endif
+#endif // FLEXT_USE_SIMD
+
 static unsigned long setsimdcaps();
 
 /*! \brief Holds SIMD capability flags
@@ -37,6 +50,7 @@ unsigned long flext::simdcaps = setsimdcaps();
 unsigned long flext::GetSIMDCapabilities() { return simdcaps; }
 
 
+#ifdef FLEXT_USE_SIMD
 
 #if FLEXT_CPU == FLEXT_CPU_INTEL
 
@@ -247,6 +261,10 @@ static unsigned long setsimdcaps()
     return simdflags;
 }
 
+#else // FLEXT_USE_SIMD
+static unsigned long setsimdcaps() { return 0; }
+#endif // FLEXT_USE_SIMD
+
 
 void flext::CopySamples(t_sample *dst,const t_sample *src,int cnt) 
 {
@@ -258,17 +276,89 @@ void flext::CopySamples(t_sample *dst,const t_sample *src,int cnt)
     else
         ERRINTERNAL();
 #else
-	int n = cnt>>3;
-	cnt -= n<<3;
-	while(n--) {
-		dst[0] = src[0]; dst[1] = src[1];
-		dst[2] = src[2]; dst[3] = src[3];
-		dst[4] = src[4]; dst[5] = src[5];
-		dst[6] = src[6]; dst[7] = src[7];
-		src += 8,dst += 8;
-	}
-	
-	while(cnt--) *(dst++) = *(src++); 
+#ifdef FLEXT_USE_SIMD
+#ifdef _MSC_VER
+#if 1  // t_sample is float
+    if(GetSIMDCapabilities()&simd_sse) {
+        // single precision
+
+   	    int n = cnt>>4;
+        cnt -= n<<4;
+
+        if((reinterpret_cast<unsigned long>(src)&(__alignof(t_sample)-1)) == 0
+            && (reinterpret_cast<unsigned long>(dst)&(__alignof(t_sample)-1)) == 0
+        ) {
+            // aligned version
+    	    while(n--) {
+                _mm_store_ps(dst+0,_mm_load_ps(src+0));
+                _mm_store_ps(dst+4,_mm_load_ps(src+4));
+                _mm_store_ps(dst+8,_mm_load_ps(src+8));
+                _mm_store_ps(dst+12,_mm_load_ps(src+12));
+                src += 16,dst += 16;
+            }
+        }
+        else {
+            // unaligned version
+    	    while(n--) {
+                _mm_storeu_ps(dst+0,_mm_loadu_ps(src+0));
+                _mm_storeu_ps(dst+4,_mm_loadu_ps(src+4));
+                _mm_storeu_ps(dst+8,_mm_loadu_ps(src+8));
+                _mm_storeu_ps(dst+12,_mm_loadu_ps(src+12));
+                src += 16,dst += 16;
+            }
+        }
+       	while(cnt--) *(dst++) = *(src++); 
+    }
+    else
+#elif 0  // t_sample is double
+    if(GetSIMDCapabilities()&simd_sse2) {
+        // double precision
+
+   	    int n = cnt>>3;
+        cnt -= n<<3;
+
+        if((reinterpret_cast<unsigned long>(src)&(__alignof(t_sample)-1)) == 0
+            && (reinterpret_cast<unsigned long>(dst)&(__alignof(t_sample)-1)) == 0
+        ) {
+            // aligned version
+    	    while(n--) {
+                _mm_store_pd(dst+0,_mm_load_pd(src+0));
+                _mm_store_pd(dst+2,_mm_load_pd(src+2));
+                _mm_store_pd(dst+4,_mm_load_pd(src+4));
+                _mm_store_pd(dst+6,_mm_load_pd(src+6));
+                src += 8,dst += 8;
+            }
+        }
+        else {
+            // unaligned version
+    	    while(n--) {
+                _mm_storeu_pd(dst+0,_mm_loadu_pd(src+0));
+                _mm_storeu_pd(dst+2,_mm_loadu_pd(src+2));
+                _mm_storeu_pd(dst+4,_mm_loadu_pd(src+4));
+                _mm_storeu_pd(dst+6,_mm_loadu_pd(src+6));
+                src += 8,dst += 8;
+            }
+        }
+       	while(cnt--) *(dst++) = *(src++); 
+    }
+    else
+#else
+    #error t_sample data type has illegal size
+#endif
+#endif // _MSC_VER
+#endif // FLEXT_USE_SIMD
+    {
+	    int n = cnt>>3;
+	    cnt -= n<<3;
+	    while(n--) {
+		    dst[0] = src[0]; dst[1] = src[1];
+		    dst[2] = src[2]; dst[3] = src[3];
+		    dst[4] = src[4]; dst[5] = src[5];
+		    dst[6] = src[6]; dst[7] = src[7];
+		    src += 8,dst += 8;
+	    }
+    	while(cnt--) *(dst++) = *(src++); 
+    }
 #endif
 }
 
@@ -282,13 +372,84 @@ void flext::SetSamples(t_sample *dst,int cnt,t_sample s)
     else
         ERRINTERNAL();
 #else
-	int n = cnt>>3;
-	cnt -= n<<3;
-	while(n--) {
-		dst[0] = dst[1] = dst[2] = dst[3] = dst[4] = dst[5] = dst[6] = dst[7] = s;
-		dst += 8;
-	}
-	
-	while(cnt--) *(dst++) = s; 
+#ifdef FLEXT_USE_SIMD
+#ifdef _MSC_VER
+#if 1  // t_sample is float
+    if(GetSIMDCapabilities()&simd_sse) {
+        // single precision
+
+        __m128 v = _mm_load1_ps(&s);
+  	    int n = cnt>>4;
+        cnt -= n<<4;
+
+        if((reinterpret_cast<unsigned long>(dst)&(__alignof(t_sample)-1)) == 0) {
+            // aligned version
+    	    while(n--) {
+                _mm_store_ps(dst+0,v);
+                _mm_store_ps(dst+4,v);
+                _mm_store_ps(dst+8,v);
+                _mm_store_ps(dst+12,v);
+                dst += 16;
+            }
+        }
+        else {
+            // unaligned version
+    	    while(n--) {
+                _mm_storeu_ps(dst+0,v);
+                _mm_storeu_ps(dst+4,v);
+                _mm_storeu_ps(dst+8,v);
+                _mm_storeu_ps(dst+12,v);
+                dst += 16;
+            }
+        }
+      	while(cnt--) *(dst++) = s; 
+    }
+    else
+#elif 0  // t_sample is double
+    if(GetSIMDCapabilities()&simd_sse2) {
+        // double precision
+
+        __m128 v = _mm_load1_pd(&s);
+   	    int n = cnt>>3;
+        cnt -= n<<3;
+
+        if((reinterpret_cast<unsigned long>(dst)&(__alignof(t_sample)-1)) == 0) {
+            // aligned version
+            while(n--) {
+                _mm_store_pd(dst+0,v);
+                _mm_store_pd(dst+2,v);
+                _mm_store_pd(dst+4,v);
+                _mm_store_pd(dst+8,v);
+                dst += 8;
+            }
+        }
+        else {
+            // unaligned version
+    	    while(n--) {
+                _mm_storeu_pd(dst+0,v);
+                _mm_storeu_pd(dst+2,v);
+                _mm_storeu_pd(dst+4,v);
+                _mm_storeu_pd(dst+8,v);
+                dst += 8;
+            }
+        }
+       	while(cnt--) *(dst++) = s; 
+    }
+    else
+#else
+    #error t_sample data type has illegal size
+#endif
+#endif // _MSC_VER
+#endif // FLEXT_USE_SIMD
+    {
+	    int n = cnt>>3;
+	    cnt -= n<<3;
+	    while(n--) {
+		    dst[0] = dst[1] = dst[2] = dst[3] = dst[4] = dst[5] = dst[6] = dst[7] = s;
+		    dst += 8;
+	    }
+	    
+	    while(cnt--) *(dst++) = s; 
+    }
 #endif
 }
-- 
cgit v1.2.1