From 9afde55c5e20b9f2c4e9d644277f9706f519342c Mon Sep 17 00:00:00 2001 From: Thomas Grill Date: Tue, 30 Dec 2003 03:37:34 +0000 Subject: "" svn path=/trunk/; revision=1244 --- externals/grill/flext/source/flsimd.cpp | 183 ++++++++++++++++++++++++++------ 1 file changed, 149 insertions(+), 34 deletions(-) (limited to 'externals/grill/flext/source/flsimd.cpp') diff --git a/externals/grill/flext/source/flsimd.cpp b/externals/grill/flext/source/flsimd.cpp index 4d92aa48..fdb544c5 100755 --- a/externals/grill/flext/source/flsimd.cpp +++ b/externals/grill/flext/source/flsimd.cpp @@ -37,20 +37,19 @@ WARRANTIES, see the file, "license.txt," in this distribution. #include // 3DNow! #elif FLEXT_CPU == FLEXT_CPU_PPC && defined(__MWERKS__) && defined(__ALTIVEC__) #if FLEXT_OSAPI == FLEXT_OSAPI_MAC_MACH - #include + #include + #include #else - #include + #include #endif #pragma altivec_model on #include -// #include #include #elif FLEXT_CPU == FLEXT_CPU_PPC && defined(__GNUG__) && defined(__ALTIVEC__) #include -// #include - #include + #include #endif #endif // FLEXT_USE_SIMD @@ -334,7 +333,7 @@ inline void StoreUnaligned( vector unsigned char v, vector unsigned char *where) vec_st( high, 16, where ); } -inline vector float LoadUnaligned(float *v ) +inline vector float LoadUnaligned(const float *v ) { return (vector float)LoadUnaligned((vector unsigned char *)v); } @@ -348,6 +347,11 @@ inline bool IsVectorAligned(const void *where) { return reinterpret_cast(where)&(sizeof(vector float)-1) == 0; } + +inline vector float LoadValue(const float &f) +{ + return IsVectorAligned(&f)?vec_splat(vec_ld(0,(vector float *)&f),0):LoadUnaligned(&f); +} #endif @@ -479,14 +483,14 @@ loopuu: while(cnt--) *(dst++) = *(src++); } else -#elif FLEXT_CPU == FLEXT_CPU_PPC && defined(__VEC__) && defined(__VECTOROPS__) - { +#elif FLEXT_CPU == FLEXT_CPU_PPC && defined(__VECTOROPS__) + if(true) { int n = cnt>>2,n4 = n<<2; - cnt -= n4; vScopy(n4,(vector float *)src,(vector float *)dst); - src += n4,dst += n4; + cnt -= n4,src += n4,dst += n4; while(cnt--) *(dst++) = *(src++); } + else #endif // _MSC_VER #endif // FLEXT_USE_SIMD { @@ -558,18 +562,18 @@ loopu: while(cnt--) *(dst++) = s; } else -#elif 0 //FLEXT_CPU == FLEXT_CPU_PPC && defined(__VEC__) +#elif FLEXT_CPU == FLEXT_CPU_PPC && defined(__ALTIVEC__) if(GetSIMDCapabilities()&simd_altivec && IsVectorAligned(dst)) { - vector float svec = IsVectorAligned(&s)?vec_splat(vec_ld(0,(vector float *)&s),0):LoadUnaligned(&s); - int n = cnt>>4,n4 = n<<4; - cnt -= n4; + vector float svec = LoadValue(s); + int n = cnt>>4; + cnt -= n<<4; while(n--) { vec_st(svec,0,dst); vec_st(svec,16,dst); vec_st(svec,32,dst); vec_st(svec,48,dst); - dst += 64; + dst += 16; } while(cnt--) *(dst++) = s; @@ -688,19 +692,33 @@ loopu: while(cnt--) *(dst++) = *(src++)*op; } else -#elif 0 //FLEXT_CPU == FLEXT_CPU_PPC && defined(__VEC__) +#elif FLEXT_CPU == FLEXT_CPU_PPC && defined(__VDSP__) + if(true) { + vsmul(src,1,&op,dst,1,cnt); + } + else +#elif FLEXT_CPU == FLEXT_CPU_PPC && defined(__ALTIVEC__) if(GetSIMDCapabilities()&simd_altivec && IsVectorAligned(src) && IsVectorAligned(dst)) { - vector float opvec = IsVectorAligned(&op)?vec_splat(vec_ld(0,(vector float *)&op),0):LoadUnaligned(&op); - vector float addvec = (vector float)vec_splat_u32(0); - int n = cnt>>4,n4 = n<<4; - cnt -= n4; + const vector float arg = LoadValue(op); + const vector float zero = (vector float)(0); + int n = cnt>>4; + cnt -= n<<4; - while(n--) { - vec_st(vec_madd(vec_ld( 0,src),opvec,addvec), 0,dst); - vec_st(vec_madd(vec_ld(16,src),opvec,addvec),16,dst); - vec_st(vec_madd(vec_ld(32,src),opvec,addvec),32,dst); - vec_st(vec_madd(vec_ld(48,src),opvec,addvec),48,dst); - src += 64,dst += 64; + for(; n--; src += 16,dst += 16) { + vector float a1 = vec_ld( 0,src); + vector float a2 = vec_ld(16,src); + vector float a3 = vec_ld(32,src); + vector float a4 = vec_ld(48,src); + + a1 = vec_madd(a1,arg,zero); + a2 = vec_madd(a2,arg,zero); + a3 = vec_madd(a3,arg,zero); + a4 = vec_madd(a4,arg,zero); + + vec_st(a1, 0,dst); + vec_st(a2,16,dst); + vec_st(a3,32,dst); + vec_st(a4,48,dst); } while(cnt--) *(dst++) = *(src++)*op; @@ -924,8 +942,34 @@ loopuu: while(cnt--) *(dst++) = *(src++) * *(op++); } else -#elif 0 // FLEXT_CPU == FLEXT_CPU_PPC && defined(__VEC__) - if(GetSIMDCapabilities()&simd_sse) { +#elif FLEXT_CPU == FLEXT_CPU_PPC && defined(__VDSP__) + if(true) { + vmul(src,1,op,1,dst,1,cnt); + } + else +#elif FLEXT_CPU == FLEXT_CPU_PPC && defined(__ALTIVEC__) + if(GetSIMDCapabilities()&simd_altivec && IsVectorAligned(src) && IsVectorAligned(op) && IsVectorAligned(dst)) { + const vector float zero = (vector float)(0); + int n = cnt>>4; + cnt -= n<<4; + + for(; n--; src += 16,op += 16,dst += 16) { + vector float a1 = vec_ld( 0,src),b1 = vec_ld( 0,op); + vector float a2 = vec_ld(16,src),b2 = vec_ld(16,op); + vector float a3 = vec_ld(32,src),b3 = vec_ld(32,op); + vector float a4 = vec_ld(48,src),b4 = vec_ld(48,op); + + a1 = vec_madd(a1,b1,zero); + a2 = vec_madd(a2,b2,zero); + a3 = vec_madd(a3,b3,zero); + a4 = vec_madd(a4,b4,zero); + + vec_st(a1, 0,dst); + vec_st(a2,16,dst); + vec_st(a3,32,dst); + vec_st(a4,48,dst); + } + while(cnt--) *(dst++) = *(src++) * *(op++); } else #endif // _MSC_VER @@ -1054,8 +1098,30 @@ loopu: while(cnt--) *(dst++) = *(src++)+op; } else -#elif 0 //FLEXT_CPU == FLEXT_CPU_PPC && defined(__VEC__) - if(GetSIMDCapabilities()&simd_altivec) { +#elif FLEXT_CPU == FLEXT_CPU_PPC && defined(__ALTIVEC__) + if(GetSIMDCapabilities()&simd_altivec && IsVectorAligned(src) && IsVectorAligned(dst)) { + const vector float arg = LoadValue(op); + int n = cnt>>4; + cnt -= n<<4; + + for(; n--; src += 16,dst += 16) { + vector float a1 = vec_ld( 0,src); + vector float a2 = vec_ld(16,src); + vector float a3 = vec_ld(32,src); + vector float a4 = vec_ld(48,src); + + a1 = vec_add(a1,arg); + a2 = vec_add(a2,arg); + a3 = vec_add(a3,arg); + a4 = vec_add(a4,arg); + + vec_st(a1, 0,dst); + vec_st(a2,16,dst); + vec_st(a3,32,dst); + vec_st(a4,48,dst); + } + + while(cnt--) *(dst++) = *(src++)+op; } else #endif // _MSC_VER @@ -1277,9 +1343,35 @@ void flext::AddSamples(t_sample *dst,const t_sample *src,const t_sample *op,int while(cnt--) *(dst++) = *(src++) + *(op++); } else -#elif 0 //FLEXT_CPU == FLEXT_CPU_PPC && defined(__VEC__) - { +#elif FLEXT_CPU == FLEXT_CPU_PPC && defined(__VDSP__) + if(true) { + vadd(src,1,op,1,dst,1,cnt); + } + else +#elif FLEXT_CPU == FLEXT_CPU_PPC && defined(__ALTIVEC__) + if(GetSIMDCapabilities()&simd_altivec && IsVectorAligned(src) && IsVectorAligned(op) && IsVectorAligned(dst)) { + int n = cnt>>4; + cnt -= n<<4; + + for(; n--; src += 16,op += 16,dst += 16) { + vector float a1 = vec_ld( 0,src),b1 = vec_ld( 0,op); + vector float a2 = vec_ld(16,src),b2 = vec_ld(16,op); + vector float a3 = vec_ld(32,src),b3 = vec_ld(32,op); + vector float a4 = vec_ld(48,src),b4 = vec_ld(48,op); + + a1 = vec_add(a1,b1); + a2 = vec_add(a2,b2); + a3 = vec_add(a3,b3); + a4 = vec_add(a4,b4); + + vec_st(a1, 0,dst); + vec_st(a2,16,dst); + vec_st(a3,32,dst); + vec_st(a4,48,dst); + } + while(cnt--) *(dst++) = *(src++) + *(op++); } + else #endif // _MSC_VER #endif // FLEXT_USE_SIMD { @@ -1418,8 +1510,31 @@ loopu: while(cnt--) *(dst++) = *(src++)*opmul+opadd; } else -#elif 0 //FLEXT_CPU == FLEXT_CPU_PPC && defined(__VEC__) - if(GetSIMDCapabilities()&simd_altivec) { +#elif FLEXT_CPU == FLEXT_CPU_PPC && defined(__ALTIVEC__) + if(GetSIMDCapabilities()&simd_altivec && IsVectorAligned(src) && IsVectorAligned(dst)) { + const vector float argmul = LoadValue(opmul); + const vector float argadd = LoadValue(opadd); + int n = cnt>>4; + cnt -= n<<4; + + for(; n--; src += 16,dst += 16) { + vector float a1 = vec_ld( 0,src); + vector float a2 = vec_ld(16,src); + vector float a3 = vec_ld(32,src); + vector float a4 = vec_ld(48,src); + + a1 = vec_madd(a1,argmul,argadd); + a2 = vec_madd(a2,argmul,argadd); + a3 = vec_madd(a3,argmul,argadd); + a4 = vec_madd(a4,argmul,argadd); + + vec_st(a1, 0,dst); + vec_st(a2,16,dst); + vec_st(a3,32,dst); + vec_st(a4,48,dst); + } + + while(cnt--) *(dst++) = *(src++)*opmul+opadd; } else #endif // _MSC_VER -- cgit v1.2.1