From a15b74b021809776079d06abdc35cd293e5fa698 Mon Sep 17 00:00:00 2001 From: Thomas Grill Date: Tue, 22 Jul 2003 02:37:23 +0000 Subject: "" svn path=/trunk/; revision=793 --- externals/grill/flext/config-pd-darwin.txt | 2 +- externals/grill/flext/config-pd-linux.txt | 2 +- externals/grill/flext/flext.cw | Bin 789029 -> 789029 bytes externals/grill/flext/source/flprefix.h | 10 +- externals/grill/flext/source/flsimd.cpp | 182 ++++++++++++++++++++++------- externals/grill/flext/source/flsupport.h | 1 + externals/grill/flext/source/fltimer.cpp | 6 +- externals/grill/flext/source/flutil.cpp | 2 +- externals/grill/xsample/source/groove.cpp | 9 ++ externals/grill/xsample/source/inter.h | 65 +++++++---- externals/grill/xsample/source/main.h | 24 +++- externals/grill/xsample/xsample.cw | Bin 279503 -> 279503 bytes 12 files changed, 227 insertions(+), 76 deletions(-) (limited to 'externals') diff --git a/externals/grill/flext/config-pd-darwin.txt b/externals/grill/flext/config-pd-darwin.txt index a50a2f28..7b1a0fb9 100644 --- a/externals/grill/flext/config-pd-darwin.txt +++ b/externals/grill/flext/config-pd-darwin.txt @@ -25,7 +25,7 @@ INSTDIR=/usr/local/pd/flext # additional compiler flags # (check if they fit for your system!) -UFLAGS=-DFLEXT_USE_SIMD -malign-power -maltivec +UFLAGS=-DFLEXT_USE_SIMD -malign-power -maltivec -faltivec # define to use old library naming for backwards compatibility # ("flext.a" instead of new "libflext.a" etc.) diff --git a/externals/grill/flext/config-pd-linux.txt b/externals/grill/flext/config-pd-linux.txt index beaa086a..97bbb655 100644 --- a/externals/grill/flext/config-pd-linux.txt +++ b/externals/grill/flext/config-pd-linux.txt @@ -16,7 +16,7 @@ SNDOBJ=/usr/local/include/SndObj # where is the STK include directory? # (leave blank or comment out to disable STK support) -# STK=/usr/src/stk-4.1.1/include +STK=/usr/src/stk-4.1.1/include # where should flext libraries be built? TARGDIR=./pd-linux diff --git a/externals/grill/flext/flext.cw b/externals/grill/flext/flext.cw index f952b7f9..b8c40866 100644 Binary files a/externals/grill/flext/flext.cw and b/externals/grill/flext/flext.cw differ diff --git a/externals/grill/flext/source/flprefix.h b/externals/grill/flext/source/flprefix.h index c27d1a67..e9e43e50 100755 --- a/externals/grill/flext/source/flprefix.h +++ b/externals/grill/flext/source/flprefix.h @@ -59,7 +59,7 @@ WARRANTIES, see the file, "license.txt," in this distribution. #define FLEXT_OSAPI_MAC_CLASSIC 2 #define FLEXT_OSAPI_MAC_CARBON 3 -#define FLEXT_OSAPI_MAC_OSX 4 +#define FLEXT_OSAPI_MAC_MACH 4 #define FLEXT_OSAPI_WIN_NATIVE 5 // WIN32 Platform #define FLEXT_OSAPI_WIN_POSIX 6 // POSIX API (e.g. cygwin) @@ -188,9 +188,9 @@ WARRANTIES, see the file, "license.txt," in this distribution. #endif #ifndef FLEXT_OSAPI - #if TARGET_API_MAC_OSX - // this has the precedence (OSX can also be Carbon, of course) - #define FLEXT_OSAPI FLEXT_OSAPI_MAC_OSX + #if TARGET_API_MAC_MACH + // this has the precedence (MACH also supports Carbon, of course) + #define FLEXT_OSAPI FLEXT_OSAPI_MAC_MACH #elif TARGET_API_MAC_CARBON #define FLEXT_OSAPI FLEXT_OSAPI_MAC_CARBON #else @@ -235,7 +235,7 @@ WARRANTIES, see the file, "license.txt," in this distribution. #ifndef FLEXT_OSAPI #if FLEXT_OS == FLEXT_OS_MAC - #define FLEXT_OSAPI FLEXT_OSAPI_MAC_OSX + #define FLEXT_OSAPI FLEXT_OSAPI_MAC_MACH #elif FLEXT_OS == FLEXT_OS_WIN #define FLEXT_OSAPI FLEXT_OSAPI_WIN_POSIX #elif FLEXT_OS == FLEXT_OS_LINUX || FLEXT_OS == FLEXT_OS_IRIX diff --git a/externals/grill/flext/source/flsimd.cpp b/externals/grill/flext/source/flsimd.cpp index 88cbdb89..4d92aa48 100755 --- a/externals/grill/flext/source/flsimd.cpp +++ b/externals/grill/flext/source/flsimd.cpp @@ -35,15 +35,24 @@ WARRANTIES, see the file, "license.txt," in this distribution. #include // SSE #include // SSE2 #include // 3DNow! -// #elif FLEXT_CPU == FLEXT_CPU_PPC && defined(__MWERKS__) -// #include - #elif FLEXT_CPU == FLEXT_CPU_PPC && defined(__MWERKS__) - #include + #elif FLEXT_CPU == FLEXT_CPU_PPC && defined(__MWERKS__) && defined(__ALTIVEC__) + #if FLEXT_OSAPI == FLEXT_OSAPI_MAC_MACH + #include + #else + #include + #endif + + #pragma altivec_model on + + #include +// #include #include - #elif FLEXT_CPU == FLEXT_CPU_PPC && defined(__GNUG__) - #include + #elif FLEXT_CPU == FLEXT_CPU_PPC && defined(__GNUG__) && defined(__ALTIVEC__) + #include +// #include #include #endif + #endif // FLEXT_USE_SIMD static unsigned long setsimdcaps(); @@ -263,10 +272,85 @@ static unsigned long setsimdcaps() if(cpuinfo.os_support&_CPU_FEATURE_3DNOW) simdflags += flext::simd_3dnow; if(cpuinfo.os_support&_CPU_FEATURE_SSE) simdflags += flext::simd_sse; if(cpuinfo.os_support&_CPU_FEATURE_SSE2) simdflags += flext::simd_sse2; +#elif FLEXT_CPU == FLEXT_CPU_PPC + + #if FLEXT_OSAPI == FLEXT_OSAPI_MAC_MACH + + int selectors[2] = { CTL_HW, HW_VECTORUNIT }; + int hasVectorUnit = 0; + size_t length = sizeof(hasVectorUnit); + int error = sysctl(selectors, 2, &hasVectorUnit, &length, NULL, 0); + + if(!error && hasVectorUnit != 0) simdflags += flext::simd_altivec; + + #else + + long cpuAttributes; + Boolean hasAltiVec = false; + OSErr err = Gestalt( gestaltPowerPCProcessorFeatures, &cpuAttributes ); + + if( noErr == err ) + if(( 1 << gestaltPowerPCHasVectorInstructions) & cpuAttributes) simdflags += flext::simd_altivec; + + #endif #endif return simdflags; } + +#if FLEXT_CPU == FLEXT_CPU_PPC && defined(__ALTIVEC__) + +/* functions for misaligned vector data - taken from the Altivec tutorial of Ian Ollmann, Ph.D. */ + +//! Load a vector from an unaligned location in memory +inline vector unsigned char LoadUnaligned( vector unsigned char *v ) +{ + vector unsigned char permuteVector = vec_lvsl( 0, (int*) v ); + vector unsigned char low = vec_ld( 0, v ); + vector unsigned char high = vec_ld( 16, v ); + return vec_perm( low, high, permuteVector ); +} + +//! Store a vector to an unaligned location in memory +inline void StoreUnaligned( vector unsigned char v, vector unsigned char *where) +{ + // Load the surrounding area + vector unsigned char low = vec_ld( 0, where ); + vector unsigned char high = vec_ld( 16, where ); + // Prepare the constants that we need + vector unsigned char permuteVector = vec_lvsr( 0, (int*) where ); + + vector unsigned char oxFF = (vector unsigned char)vec_splat_s8( -1 ); + vector unsigned char ox00 = (vector unsigned char)vec_splat_s8( 0 ); + // Make a mask for which parts of the vectors to swap out + vector unsigned char mask = vec_perm( ox00, oxFF, permuteVector ); + // Right rotate our input data + v = vec_perm( v, v, permuteVector ); + // Insert our data into the low and high vectors + low = vec_sel( v, low, mask ); + high = vec_sel( high, v, mask ); + // Store the two aligned result vectors + vec_st( low, 0, where ); + vec_st( high, 16, where ); +} + +inline vector float LoadUnaligned(float *v ) +{ + return (vector float)LoadUnaligned((vector unsigned char *)v); +} + +inline void StoreUnaligned( vector float v,float *where) +{ + return StoreUnaligned((vector unsigned char)v,(vector unsigned char *)where); +} + +inline bool IsVectorAligned(const void *where) +{ + return reinterpret_cast(where)&(sizeof(vector float)-1) == 0; +} +#endif + + #else // FLEXT_USE_SIMD static unsigned long setsimdcaps() { return 0; } #endif // FLEXT_USE_SIMD @@ -395,11 +479,11 @@ loopuu: while(cnt--) *(dst++) = *(src++); } else -#elif FLEXT_OS == FLEXT_OS_MAC && defined(__VEC__) && defined(__VECTOROPS__) - { +#elif FLEXT_CPU == FLEXT_CPU_PPC && defined(__VEC__) && defined(__VECTOROPS__) + { int n = cnt>>2,n4 = n<<2; cnt -= n4; - vScopy(n4,src,dst); + vScopy(n4,(vector float *)src,(vector float *)dst); src += n4,dst += n4; while(cnt--) *(dst++) = *(src++); } @@ -474,7 +558,24 @@ loopu: while(cnt--) *(dst++) = s; } else -#endif // _MSC_VER +#elif 0 //FLEXT_CPU == FLEXT_CPU_PPC && defined(__VEC__) + if(GetSIMDCapabilities()&simd_altivec && IsVectorAligned(dst)) { + vector float svec = IsVectorAligned(&s)?vec_splat(vec_ld(0,(vector float *)&s),0):LoadUnaligned(&s); + int n = cnt>>4,n4 = n<<4; + cnt -= n4; + + while(n--) { + vec_st(svec,0,dst); + vec_st(svec,16,dst); + vec_st(svec,32,dst); + vec_st(svec,48,dst); + dst += 64; + } + + while(cnt--) *(dst++) = s; + } + else +#endif #endif // FLEXT_USE_SIMD { int n = cnt>>3; @@ -587,10 +688,24 @@ loopu: while(cnt--) *(dst++) = *(src++)*op; } else -#elif FLEXT_OS == FLEXT_OS_MAC && defined(__VEC__) && defined(__VECTOROPS__) - { - vsmul(src,1,&op,dst,1,cnt); +#elif 0 //FLEXT_CPU == FLEXT_CPU_PPC && defined(__VEC__) + if(GetSIMDCapabilities()&simd_altivec && IsVectorAligned(src) && IsVectorAligned(dst)) { + vector float opvec = IsVectorAligned(&op)?vec_splat(vec_ld(0,(vector float *)&op),0):LoadUnaligned(&op); + vector float addvec = (vector float)vec_splat_u32(0); + int n = cnt>>4,n4 = n<<4; + cnt -= n4; + + while(n--) { + vec_st(vec_madd(vec_ld( 0,src),opvec,addvec), 0,dst); + vec_st(vec_madd(vec_ld(16,src),opvec,addvec),16,dst); + vec_st(vec_madd(vec_ld(32,src),opvec,addvec),32,dst); + vec_st(vec_madd(vec_ld(48,src),opvec,addvec),48,dst); + src += 64,dst += 64; + } + + while(cnt--) *(dst++) = *(src++)*op; } + else #endif // _MSC_VER #endif // FLEXT_USE_SIMD { @@ -809,10 +924,10 @@ loopuu: while(cnt--) *(dst++) = *(src++) * *(op++); } else -#elif FLEXT_OS == FLEXT_OS_MAC && defined(__VEC__) && defined(__VECTOROPS__) - { - vsmul(src,1,&op,dst,1,cnt); +#elif 0 // FLEXT_CPU == FLEXT_CPU_PPC && defined(__VEC__) + if(GetSIMDCapabilities()&simd_sse) { } + else #endif // _MSC_VER #endif // FLEXT_USE_SIMD { @@ -939,16 +1054,10 @@ loopu: while(cnt--) *(dst++) = *(src++)+op; } else -/* -#elif FLEXT_OS == FLEXT_OS_MAC && defined(__VEC__) && defined(__VECTOROPS__) - { - int n = cnt>>2,n4 = n<<2; - cnt -= n4; - vScopy(n4,src,dst); - src += n4,dst += n4; - while(cnt--) *(dst++) = *(src++); +#elif 0 //FLEXT_CPU == FLEXT_CPU_PPC && defined(__VEC__) + if(GetSIMDCapabilities()&simd_altivec) { } -*/ + else #endif // _MSC_VER #endif // FLEXT_USE_SIMD { @@ -1168,16 +1277,9 @@ void flext::AddSamples(t_sample *dst,const t_sample *src,const t_sample *op,int while(cnt--) *(dst++) = *(src++) + *(op++); } else -/* -#elif FLEXT_OS == FLEXT_OS_MAC && defined(__VEC__) && defined(__VECTOROPS__) - { - int n = cnt>>2,n4 = n<<2; - cnt -= n4; - vScopy(n4,src,dst); - src += n4,dst += n4; - while(cnt--) *(dst++) = *(src++); +#elif 0 //FLEXT_CPU == FLEXT_CPU_PPC && defined(__VEC__) + { } -*/ #endif // _MSC_VER #endif // FLEXT_USE_SIMD { @@ -1316,16 +1418,10 @@ loopu: while(cnt--) *(dst++) = *(src++)*opmul+opadd; } else -/* -#elif FLEXT_OS == FLEXT_OS_MAC && defined(__VEC__) && defined(__VECTOROPS__) - { - int n = cnt>>2,n4 = n<<2; - cnt -= n4; - vScopy(n4,src,dst); - src += n4,dst += n4; - while(cnt--) *(dst++) = *(src++); +#elif 0 //FLEXT_CPU == FLEXT_CPU_PPC && defined(__VEC__) + if(GetSIMDCapabilities()&simd_altivec) { } -*/ + else #endif // _MSC_VER #endif // FLEXT_USE_SIMD { diff --git a/externals/grill/flext/source/flsupport.h b/externals/grill/flext/source/flsupport.h index 5c222205..6db7a7b0 100644 --- a/externals/grill/flext/source/flsupport.h +++ b/externals/grill/flext/source/flsupport.h @@ -969,6 +969,7 @@ public: simd_altivec = 0x10 }; + /*! Check for SIMD capabilities of the CPU */ static unsigned long GetSIMDCapabilities(); diff --git a/externals/grill/flext/source/fltimer.cpp b/externals/grill/flext/source/fltimer.cpp index 6767c186..dffe1eb6 100755 --- a/externals/grill/flext/source/fltimer.cpp +++ b/externals/grill/flext/source/fltimer.cpp @@ -16,7 +16,7 @@ WARRANTIES, see the file, "license.txt," in this distribution. #if FLEXT_OS == FLEXT_OS_WIN #include -#elif FLEXT_OS == FLEXT_OS_LINUX || FLEXT_OS == FLEXT_OS_IRIX || FLEXT_OSAPI == FLEXT_OSAPI_MAC_OSX +#elif FLEXT_OS == FLEXT_OS_LINUX || FLEXT_OS == FLEXT_OS_IRIX || FLEXT_OSAPI == FLEXT_OSAPI_MAC_MACH #include #include #elif FLEXT_OS == FLEXT_OS_MAC @@ -74,7 +74,7 @@ double flext::GetOSTime() SystemTimeToFileTime(&systm,&fltm); tm = (double)((LARGE_INTEGER *)&fltm)->QuadPart*0.001; } -#elif FLEXT_OS == FLEXT_OS_LINUX || FLEXT_OS == FLEXT_OS_IRIX || FLEXT_OSAPI == FLEXT_OSAPI_MAC_OSX // POSIX +#elif FLEXT_OS == FLEXT_OS_LINUX || FLEXT_OS == FLEXT_OS_IRIX || FLEXT_OSAPI == FLEXT_OSAPI_MAC_MACH // POSIX timeval tmv; gettimeofday(&tmv,NULL); tm = tmv.tv_sec+tmv.tv_usec*1.e-6; @@ -92,7 +92,7 @@ void flext::Sleep(double s) { #if FLEXT_OS == FLEXT_OS_WIN ::Sleep((long)(s*1000.)); -#elif FLEXT_OS == FLEXT_OS_LINUX || FLEXT_OS == FLEXT_OS_IRIX || FLEXT_OSAPI == FLEXT_OSAPI_MAC_OSX // POSIX +#elif FLEXT_OS == FLEXT_OS_LINUX || FLEXT_OS == FLEXT_OS_IRIX || FLEXT_OSAPI == FLEXT_OSAPI_MAC_MACH // POSIX usleep((long)(s*1000000.)); #elif FLEXT_OS == FLEXT_OS_MAC // that's just for OS9 & Carbon! UnsignedWide tick; diff --git a/externals/grill/flext/source/flutil.cpp b/externals/grill/flext/source/flutil.cpp index 5081c245..3c7c704d 100644 --- a/externals/grill/flext/source/flutil.cpp +++ b/externals/grill/flext/source/flutil.cpp @@ -18,7 +18,7 @@ WARRANTIES, see the file, "license.txt," in this distribution. #if FLEXT_OS == FLEXT_OS_WIN #include #elif FLEXT_OS == FLEXT_OS_MAC - #if FLEXT_OSAPI != FLEXT_OSAPI_MAC_OSX + #if FLEXT_OSAPI != FLEXT_OSAPI_MAC_MACH #include #else #include diff --git a/externals/grill/xsample/source/groove.cpp b/externals/grill/xsample/source/groove.cpp index 4759812c..f70846f7 100644 --- a/externals/grill/xsample/source/groove.cpp +++ b/externals/grill/xsample/source/groove.cpp @@ -498,6 +498,11 @@ V xgroove::s_pos_loop(I n,S *const *invecs,S *const *outvecs) S *pos = outvecs[outchns]; BL lpbang = false; +#ifdef __VEC__ + // prefetch cache + vec_dst(speed,GetPrefetchConstant(1,n>>2,0),0); +#endif + const D smin = curmin,smax = curmax,plen = smax-smin; //curlen; if(buf && plen > 0) { @@ -529,6 +534,10 @@ V xgroove::s_pos_loop(I n,S *const *invecs,S *const *outvecs) else s_pos_off(n,invecs,outvecs); +#ifdef __VEC__ + vec_dss(0); +#endif + if(lpbang) ToOutBang(outchns+3); } diff --git a/externals/grill/xsample/source/inter.h b/externals/grill/xsample/source/inter.h index 3f8ea5f8..d5f2b4ac 100755 --- a/externals/grill/xsample/source/inter.h +++ b/externals/grill/xsample/source/inter.h @@ -38,7 +38,7 @@ TMPLDEF V xinter::st_play1(const S *bdt,const I smin,const I smax,const I n,cons } else if(oint >= smax) { // position > last sample ... take only last sample - fp = bdt+(smax-1)*BCHNS; + fp = bdt+(smin == smax?smin:smax-1)*BCHNS; } else { // normal @@ -116,50 +116,73 @@ TMPLDEF V xinter::st_play4(const S *bdt,const I smin,const I smax,const I n,cons // position info are frame units const S *pos = invecs[0]; + +#ifdef __VEC__ + // prefetch cache + vec_dst(pos,GetPrefetchConstant(1,n>>2,0),0); + const int pf = GetPrefetchConstant(BCHNS,1,16*BCHNS); +#endif + S *const *sig = outvecs; register I si = 0; // 4-point interpolation // --------------------- const I maxo = smax-1; // last sample in play region + const S *maxp = bdt+maxo*BCHNS; // pointer to last sample for(I i = 0; i < n; ++i,++si) { F o = *(pos++); - register I oint = (I)o,ointm,oint1,oint2; + register I oint = (I)o; + register F frac; + register const S *fa,*fb,*fc,*fd; if(oint <= smin) { if(oint < smin) oint = smin,o = (float)smin; - // position is first simple - ointm = smin; // first sample - oint1 = oint+1; - oint2 = oint1+1; + + fa = bdt+smin*BCHNS; // position is first sample + fb = bdt+oint*BCHNS; + + frac = o-oint; + fc = fb+BCHNS; + fd = fc+BCHNS; } else if(oint >= maxo-2) { if(oint > maxo) oint = maxo,o = (float)smax; - ointm = oint-1; - oint1 = oint >= maxo?maxo:oint+1; - oint2 = oint1 >= maxo?maxo:oint1+1; + frac = o-oint; + + fb = bdt+oint*BCHNS; + fa = fb-BCHNS; // CACHE! + + // \TODO what about wrap-around??? + fc = fb >= maxp?maxp:fb+BCHNS; // ev. CACHE! + fd = fc >= maxp?maxp:fc+BCHNS; // ev. CACHE! } else { - ointm = oint-1; - oint1 = oint+1; - oint2 = oint1+1; + fa = bdt+oint*BCHNS-BCHNS; + frac = o-oint; + fb = fa+BCHNS; +#ifdef __VEC__ + vec_dst(fa,pf,0); +#endif + fc = fb+BCHNS; + fd = fc+BCHNS; } - register F frac = o-oint; + register F f1 = 0.5f*(frac-1.0f); + register F f3 = frac*3.0f-1.0f; - register const S *fa = bdt+ointm*BCHNS; - register const S *fb = bdt+oint*BCHNS; - register const S *fc = bdt+oint1*BCHNS; - register const S *fd = bdt+oint2*BCHNS; - for(I ci = 0; ci < OCHNS; ++ci) { + const F amdf = (fa[ci]-fd[ci])*frac; const F cmb = fc[ci]-fb[ci]; - sig[ci][si] = fb[ci] + frac*( - cmb - 0.5f*(frac-1.0f) * ((fa[ci]-fd[ci]+3.0f*cmb)*frac + (fb[ci]-fa[ci]-cmb)) - ); + const F bma = fb[ci]-fa[ci]; + sig[ci][si] = fb[ci] + frac*( cmb - f1 * ( amdf+bma+cmb*f3 ) ); } } + +#ifdef __VEC__ + vec_dss(0); +#endif // clear rest of output channels (if buffer has less channels) for(I ci = OCHNS; ci < outchns; ++ci) ZeroSamples(sig[ci],n); diff --git a/externals/grill/xsample/source/main.h b/externals/grill/xsample/source/main.h index b3b8a499..61c0bf4d 100644 --- a/externals/grill/xsample/source/main.h +++ b/externals/grill/xsample/source/main.h @@ -12,7 +12,7 @@ WARRANTIES, see the file, "license.txt," in this distribution. #define __XSAMPLE_H -#define XSAMPLE_VERSION "0.3.0pre12" +#define XSAMPLE_VERSION "0.3.0pre13" #define FLEXT_ATTRIBUTES 1 @@ -72,6 +72,28 @@ WARRANTIES, see the file, "license.txt," in this distribution. #define STD #endif +#ifdef __ALTIVEC__ +#if FLEXT_CPU == FLEXT_CPU_PPC && defined(__MWERKS__) + #pragma altivec_model on + #include + #include +#elif FLEXT_CPU == FLEXT_CPU_PPC && defined(__GNUG__) + #include + #include +#endif + + // Initialize a prefetch constant for use with vec_dst(), vec_dstt(), vec_dstst or vec_dststt + // Taken from the "AltiVec tutorial" by Ian Ollmann, Ph.D. + inline UInt32 GetPrefetchConstant( int blockSizeInVectors,int blockCount,int blockStride ) + { +// FLEXT_ASSERT( blockSizeInVectors > 0 && blockSizeInVectors <= 32 ); +// FLEXT_ASSERT( blockCount > 0 && blockCount <= 256 ); +// FLEXT_ASSERT( blockStride > MIN_SHRT && blockStride <= MAX_SHRT ); + return ((blockSizeInVectors << 24) & 0x1F000000) | + ((blockCount << 16) && 0x00FF0000) | + (blockStride & 0xFFFF); + } +#endif class xsample: public flext_dsp diff --git a/externals/grill/xsample/xsample.cw b/externals/grill/xsample/xsample.cw index 8065e314..2a4c0938 100755 Binary files a/externals/grill/xsample/xsample.cw and b/externals/grill/xsample/xsample.cw differ -- cgit v1.2.1