From 44d97258734b898c3cb0f8cdfd199352de2bfc2d Mon Sep 17 00:00:00 2001 From: Thomas Grill Date: Mon, 21 Jul 2003 02:37:30 +0000 Subject: "" svn path=/trunk/; revision=782 --- externals/grill/flext/source/flattr_ed.cpp | 57 +--- externals/grill/flext/source/flbase.h | 7 + externals/grill/flext/source/flprefix.h | 1 - externals/grill/flext/source/flsimd.cpp | 471 +++++++++++++++++++++-------- externals/grill/vasp/vasp.vcproj | 2 +- externals/grill/xsample/source/groove.cpp | 99 +++++- externals/grill/xsample/source/inter.h | 4 +- externals/grill/xsample/source/main.h | 2 +- 8 files changed, 463 insertions(+), 180 deletions(-) (limited to 'externals/grill') diff --git a/externals/grill/flext/source/flattr_ed.cpp b/externals/grill/flext/source/flattr_ed.cpp index ec096ef5..d138a97b 100644 --- a/externals/grill/flext/source/flattr_ed.cpp +++ b/externals/grill/flext/source/flattr_ed.cpp @@ -12,7 +12,7 @@ WARRANTIES, see the file, "license.txt," in this distribution. \brief Attribute editor (property dialog) for PD */ -#include "flprefix.h" +#include "flext.h" #if FLEXT_SYS == FLEXT_SYS_PD && !defined(FLEXT_NOATTREDIT) @@ -20,64 +20,13 @@ WARRANTIES, see the file, "license.txt," in this distribution. #pragma warning( disable : 4091 ) #endif +// This is problematic... non-public headers! #include -#include "flext.h" +#include #include #include -#ifdef __MWERKS__ -#define STD std -#else -#define STD -#endif - - -#if !defined(PD_VERSION_MAJOR) - /* PD version 0.36 or below */ - - /* Call this to get a gobj's bounding rectangle in pixels */ - typedef void (*t_getrectfn)(t_gobj *x, struct _glist *glist, - int *x1, int *y1, int *x2, int *y2); - /* and this to displace a gobj: */ - typedef void (*t_displacefn)(t_gobj *x, struct _glist *glist, int dx, int dy); - /* change color to show selection: */ - typedef void (*t_selectfn)(t_gobj *x, struct _glist *glist, int state); - /* change appearance to show activation/deactivation: */ - typedef void (*t_activatefn)(t_gobj *x, struct _glist *glist, int state); - /* warn a gobj it's about to be deleted */ - typedef void (*t_deletefn)(t_gobj *x, struct _glist *glist); - /* making visible or invisible */ - typedef void (*t_visfn)(t_gobj *x, struct _glist *glist, int flag); - /* field a mouse click (when not in "edit" mode) */ - typedef int (*t_clickfn)(t_gobj *x, struct _glist *glist, - int xpix, int ypix, int shift, int alt, int dbl, int doit); - /* save to a binbuf */ - typedef void (*t_savefn)(t_gobj *x, t_binbuf *b); - /* open properties dialog */ - typedef void (*t_propertiesfn)(t_gobj *x, struct _glist *glist); - /* ... and later, resizing; getting/setting font or color... */ - - struct _widgetbehavior - { - t_getrectfn w_getrectfn; - t_displacefn w_displacefn; - t_selectfn w_selectfn; - t_activatefn w_activatefn; - t_deletefn w_deletefn; - t_visfn w_visfn; - t_clickfn w_clickfn; - t_savefn w_savefn; - t_propertiesfn w_propertiesfn; - }; - -#elif !defined(PD_VERSION_MINOR) - #error Flext cannot be compiled with this version! -#else - #include -#endif - - static t_widgetbehavior widgetbehavior; static void (*ori_vis)(t_gobj *c, t_glist *, int vis) = NULL; diff --git a/externals/grill/flext/source/flbase.h b/externals/grill/flext/source/flbase.h index 795c0673..eec2e520 100644 --- a/externals/grill/flext/source/flbase.h +++ b/externals/grill/flext/source/flbase.h @@ -21,6 +21,13 @@ WARRANTIES, see the file, "license.txt," in this distribution. #include "flsupport.h" +// ----- disable attribute editor for PD version < devel_0_36 or 0.37 +#ifndef PD_MAJOR_VERSION +#undef FLEXT_NOATTREDIT +#define FLEXT_NOATTREDIT +#endif + + class FLEXT_SHARE FLEXT_CLASSDEF(flext_obj); typedef class FLEXT_CLASSDEF(flext_obj) flext_obj; diff --git a/externals/grill/flext/source/flprefix.h b/externals/grill/flext/source/flprefix.h index 7ab517df..c27d1a67 100755 --- a/externals/grill/flext/source/flprefix.h +++ b/externals/grill/flext/source/flprefix.h @@ -371,7 +371,6 @@ WARRANTIES, see the file, "license.txt," in this distribution. #define FLEXT_CLASSDEF(CL) CL##_single #endif - // std namespace #ifdef __MWERKS__ #define STD std diff --git a/externals/grill/flext/source/flsimd.cpp b/externals/grill/flext/source/flsimd.cpp index 3dcee887..88cbdb89 100755 --- a/externals/grill/flext/source/flsimd.cpp +++ b/externals/grill/flext/source/flsimd.cpp @@ -287,6 +287,12 @@ void flext::CopySamples(t_sample *dst,const t_sample *src,int cnt) if(GetSIMDCapabilities()&simd_sse) { // single precision + __asm { + mov eax,dword ptr [src] + prefetcht0 [eax+0] + prefetcht0 [eax+32] + } + int n = cnt>>4; cnt -= n<<4; @@ -295,10 +301,11 @@ void flext::CopySamples(t_sample *dst,const t_sample *src,int cnt) // aligned src, aligned dst __asm { mov eax,dword ptr [src] - prefetcht0 [eax] mov edx,dword ptr [dst] mov ecx,[n] - loopaa: +loopaa: + prefetcht0 [eax+64] + prefetcht0 [eax+96] movaps xmm0,xmmword ptr[eax] movaps xmmword ptr[edx],xmm0 movaps xmm1,xmmword ptr[eax+4*4] @@ -317,10 +324,11 @@ void flext::CopySamples(t_sample *dst,const t_sample *src,int cnt) // aligned src, unaligned dst __asm { mov eax,dword ptr [src] - prefetcht0 [eax] mov edx,dword ptr [dst] mov ecx,[n] - loopau: +loopau: + prefetcht0 [eax+64] + prefetcht0 [eax+96] movaps xmm0,xmmword ptr[eax] movups xmmword ptr[edx],xmm0 movaps xmm1,xmmword ptr[eax+4*4] @@ -341,10 +349,11 @@ void flext::CopySamples(t_sample *dst,const t_sample *src,int cnt) // unaligned src, aligned dst __asm { mov eax,dword ptr [src] - prefetcht0 [eax] mov edx,dword ptr [dst] mov ecx,[n] loopua: + prefetcht0 [eax+64] + prefetcht0 [eax+96] movups xmm0,xmmword ptr[eax] movaps xmmword ptr[edx],xmm0 movups xmm1,xmmword ptr[eax+4*4] @@ -363,10 +372,11 @@ loopua: // unaligned src, unaligned dst __asm { mov eax,dword ptr [src] - prefetcht0 [eax] mov edx,dword ptr [dst] mov ecx,[n] loopuu: + prefetcht0 [eax+64] + prefetcht0 [eax+96] movups xmm0,xmmword ptr[eax] movups xmmword ptr[edx],xmm0 movups xmm1,xmmword ptr[eax+4*4] @@ -502,6 +512,10 @@ void flext::MulSamples(t_sample *dst,const t_sample *src,t_sample op,int cnt) cnt -= n<<4; __asm { + mov eax,dword ptr [src] + prefetcht0 [eax+0] + prefetcht0 [eax+32] + movss xmm0,xmmword ptr [op] shufps xmm0,xmm0,0 } @@ -515,6 +529,9 @@ void flext::MulSamples(t_sample *dst,const t_sample *src,t_sample op,int cnt) mov eax,dword ptr [src] mov edx,dword ptr [dst] loopa: + prefetcht0 [eax+64] + prefetcht0 [eax+96] + movaps xmm1,xmmword ptr[eax] mulps xmm1,xmm0 movaps xmmword ptr[edx],xmm1 @@ -543,6 +560,9 @@ loopa: mov eax,dword ptr [src] mov edx,dword ptr [dst] loopu: + prefetcht0 [eax+64] + prefetcht0 [eax+96] + movups xmm1,xmmword ptr[eax] mulps xmm1,xmm0 movups xmmword ptr[edx],xmm1 @@ -619,75 +639,171 @@ void flext::MulSamples(t_sample *dst,const t_sample *src,const t_sample *op,int int n = cnt>>4; cnt -= n<<4; + __asm { + mov eax,[src] + mov ebx,[op] + prefetcht0 [eax+0] + prefetcht0 [ebx+0] + prefetcht0 [eax+32] + prefetcht0 [ebx+32] + } + if((reinterpret_cast(src)&(__alignof(__m128)-1)) == 0 && (reinterpret_cast(dst)&(__alignof(__m128)-1)) == 0 - && (reinterpret_cast(op)&(__alignof(__m128)-1)) == 0 - ) { - // aligned version - __asm { - mov ecx,[n] - mov eax,dword ptr [src] - mov edx,dword ptr [dst] - mov ebx,dword ptr [op] -loopa: - movaps xmm0,xmmword ptr[eax] - movaps xmm1,xmmword ptr[ebx] - mulps xmm0,xmm1 - movaps xmmword ptr[edx],xmm0 + ) { + if((reinterpret_cast(op)&(__alignof(__m128)-1)) == 0) { + __asm { + mov ecx,[n] + mov eax,dword ptr [src] + mov edx,dword ptr [dst] + mov ebx,dword ptr [op] + loopaa: + prefetcht0 [eax+64] + prefetcht0 [ebx+64] + prefetcht0 [eax+96] + prefetcht0 [ebx+96] - movaps xmm2,xmmword ptr[eax+4*4] - movaps xmm3,xmmword ptr[ebx+4*4] - mulps xmm2,xmm3 - movaps xmmword ptr[edx+4*4],xmm2 + movaps xmm0,xmmword ptr[eax] + movaps xmm1,xmmword ptr[ebx] + mulps xmm0,xmm1 + movaps xmmword ptr[edx],xmm0 - movaps xmm4,xmmword ptr[eax+8*4] - movaps xmm5,xmmword ptr[ebx+8*4] - mulps xmm4,xmm5 - movaps xmmword ptr[edx+8*4],xmm4 + movaps xmm2,xmmword ptr[eax+4*4] + movaps xmm3,xmmword ptr[ebx+4*4] + mulps xmm2,xmm3 + movaps xmmword ptr[edx+4*4],xmm2 - movaps xmm6,xmmword ptr[eax+12*4] - movaps xmm7,xmmword ptr[ebx+12*4] - mulps xmm6,xmm7 - movaps xmmword ptr[edx+12*4],xmm6 + movaps xmm4,xmmword ptr[eax+8*4] + movaps xmm5,xmmword ptr[ebx+8*4] + mulps xmm4,xmm5 + movaps xmmword ptr[edx+8*4],xmm4 - add eax,16*4 - add ebx,16*4 - add edx,16*4 - loop loopa + movaps xmm6,xmmword ptr[eax+12*4] + movaps xmm7,xmmword ptr[ebx+12*4] + mulps xmm6,xmm7 + movaps xmmword ptr[edx+12*4],xmm6 + + add eax,16*4 + add ebx,16*4 + add edx,16*4 + loop loopaa + } } - } + else { + __asm { + mov ecx,[n] + mov eax,dword ptr [src] + mov edx,dword ptr [dst] + mov ebx,dword ptr [op] + loopau: + prefetcht0 [eax+64] + prefetcht0 [ebx+64] + prefetcht0 [eax+96] + prefetcht0 [ebx+96] + + movaps xmm0,xmmword ptr[eax] + movups xmm1,xmmword ptr[ebx] + mulps xmm0,xmm1 + movaps xmmword ptr[edx],xmm0 + + movaps xmm2,xmmword ptr[eax+4*4] + movups xmm3,xmmword ptr[ebx+4*4] + mulps xmm2,xmm3 + movaps xmmword ptr[edx+4*4],xmm2 + + movaps xmm4,xmmword ptr[eax+8*4] + movups xmm5,xmmword ptr[ebx+8*4] + mulps xmm4,xmm5 + movaps xmmword ptr[edx+8*4],xmm4 + + movaps xmm6,xmmword ptr[eax+12*4] + movups xmm7,xmmword ptr[ebx+12*4] + mulps xmm6,xmm7 + movaps xmmword ptr[edx+12*4],xmm6 + + add eax,16*4 + add ebx,16*4 + add edx,16*4 + loop loopau + } + } + } else { - // unaligned version - __asm { - mov ecx,[n] - mov eax,dword ptr [src] - mov edx,dword ptr [dst] - mov ebx,dword ptr [op] -loopu: - movups xmm0,xmmword ptr[eax] - movups xmm1,xmmword ptr[ebx] - mulps xmm0,xmm1 - movups xmmword ptr[edx],xmm0 + if((reinterpret_cast(op)&(__alignof(__m128)-1)) == 0) { + __asm { + mov ecx,[n] + mov eax,dword ptr [src] + mov edx,dword ptr [dst] + mov ebx,dword ptr [op] + loopua: + prefetcht0 [eax+64] + prefetcht0 [ebx+64] + prefetcht0 [eax+96] + prefetcht0 [ebx+96] - movups xmm2,xmmword ptr[eax+4*4] - movups xmm3,xmmword ptr[ebx+4*4] - mulps xmm2,xmm3 - movups xmmword ptr[edx+4*4],xmm2 + movups xmm0,xmmword ptr[eax] + movaps xmm1,xmmword ptr[ebx] + mulps xmm0,xmm1 + movups xmmword ptr[edx],xmm0 - movups xmm4,xmmword ptr[eax+8*4] - movups xmm5,xmmword ptr[ebx+8*4] - mulps xmm4,xmm5 - movups xmmword ptr[edx+8*4],xmm4 + movups xmm2,xmmword ptr[eax+4*4] + movaps xmm3,xmmword ptr[ebx+4*4] + mulps xmm2,xmm3 + movups xmmword ptr[edx+4*4],xmm2 - movups xmm6,xmmword ptr[eax+12*4] - movups xmm7,xmmword ptr[ebx+12*4] - mulps xmm6,xmm7 - movups xmmword ptr[edx+12*4],xmm6 + movups xmm4,xmmword ptr[eax+8*4] + movaps xmm5,xmmword ptr[ebx+8*4] + mulps xmm4,xmm5 + movups xmmword ptr[edx+8*4],xmm4 - add eax,16*4 - add ebx,16*4 - add edx,16*4 - loop loopu + movups xmm6,xmmword ptr[eax+12*4] + movaps xmm7,xmmword ptr[ebx+12*4] + mulps xmm6,xmm7 + movups xmmword ptr[edx+12*4],xmm6 + + add eax,16*4 + add ebx,16*4 + add edx,16*4 + loop loopua + } + } + else { + __asm { + mov ecx,[n] + mov eax,dword ptr [src] + mov edx,dword ptr [dst] + mov ebx,dword ptr [op] +loopuu: + prefetcht0 [eax+64] + prefetcht0 [ebx+64] + prefetcht0 [eax+96] + prefetcht0 [ebx+96] + + movups xmm0,xmmword ptr[eax] + movups xmm1,xmmword ptr[ebx] + mulps xmm0,xmm1 + movups xmmword ptr[edx],xmm0 + + movups xmm2,xmmword ptr[eax+4*4] + movups xmm3,xmmword ptr[ebx+4*4] + mulps xmm2,xmm3 + movups xmmword ptr[edx+4*4],xmm2 + + movups xmm4,xmmword ptr[eax+8*4] + movups xmm5,xmmword ptr[ebx+8*4] + mulps xmm4,xmm5 + movups xmmword ptr[edx+8*4],xmm4 + + movups xmm6,xmmword ptr[eax+12*4] + movups xmm7,xmmword ptr[ebx+12*4] + mulps xmm6,xmm7 + movups xmmword ptr[edx+12*4],xmm6 + + add eax,16*4 + add ebx,16*4 + add edx,16*4 + loop loopuu + } } } while(cnt--) *(dst++) = *(src++) * *(op++); @@ -748,6 +864,10 @@ void flext::AddSamples(t_sample *dst,const t_sample *src,t_sample op,int cnt) cnt -= n<<4; __asm { + mov eax,[src] + prefetcht0 [eax+0] + prefetcht0 [eax+32] + movss xmm0,xmmword ptr [op] shufps xmm0,xmm0,0 } @@ -761,6 +881,9 @@ void flext::AddSamples(t_sample *dst,const t_sample *src,t_sample op,int cnt) mov eax,dword ptr [src] mov edx,dword ptr [dst] loopa: + prefetcht0 [eax+64] + prefetcht0 [eax+96] + movaps xmm1,xmmword ptr[eax] addps xmm1,xmm0 movaps xmmword ptr[edx],xmm1 @@ -789,6 +912,9 @@ loopa: mov eax,dword ptr [src] mov edx,dword ptr [dst] loopu: + prefetcht0 [eax+64] + prefetcht0 [eax+96] + movups xmm1,xmmword ptr[eax] addps xmm1,xmm0 movups xmmword ptr[edx],xmm1 @@ -867,79 +993,176 @@ void flext::AddSamples(t_sample *dst,const t_sample *src,const t_sample *op,int #ifdef FLEXT_USE_SIMD #ifdef _MSC_VER if(GetSIMDCapabilities()&simd_sse) { + // Prefetch cache + __asm { + mov eax,dword ptr [src] + mov ebx,dword ptr [op] + prefetcht0 [eax] + prefetcht0 [ebx] + prefetcht0 [eax+32] + prefetcht0 [ebx+32] + } + // single precision int n = cnt>>4; cnt -= n<<4; if((reinterpret_cast(src)&(__alignof(__m128)-1)) == 0 && (reinterpret_cast(dst)&(__alignof(__m128)-1)) == 0 - && (reinterpret_cast(op)&(__alignof(__m128)-1)) == 0 - ) { - // aligned version - __asm { - mov ecx,dword ptr [n] - mov eax,dword ptr [src] - mov edx,dword ptr [dst] - mov ebx,dword ptr [op] -loopa: - movaps xmm0,xmmword ptr[eax] - movaps xmm1,xmmword ptr[ebx] - addps xmm0,xmm1 - movaps xmmword ptr[edx],xmm0 + ) { + if((reinterpret_cast(op)&(__alignof(__m128)-1)) == 0) { + __asm { + mov ecx,dword ptr [n] + mov eax,dword ptr [src] + mov edx,dword ptr [dst] + mov ebx,dword ptr [op] + loopaa: + prefetcht0 [eax+64] + prefetcht0 [ebx+64] + prefetcht0 [eax+96] + prefetcht0 [ebx+96] - movaps xmm2,xmmword ptr[eax+4*4] - movaps xmm3,xmmword ptr[ebx+4*4] - addps xmm2,xmm3 - movaps xmmword ptr[edx+4*4],xmm2 + movaps xmm0,xmmword ptr[eax] + movaps xmm1,xmmword ptr[ebx] + addps xmm0,xmm1 + movaps xmmword ptr[edx],xmm0 - movaps xmm4,xmmword ptr[eax+8*4] - movaps xmm5,xmmword ptr[ebx+8*4] - addps xmm4,xmm5 - movaps xmmword ptr[edx+8*4],xmm4 + movaps xmm2,xmmword ptr[eax+4*4] + movaps xmm3,xmmword ptr[ebx+4*4] + addps xmm2,xmm3 + movaps xmmword ptr[edx+4*4],xmm2 - movaps xmm6,xmmword ptr[eax+12*4] - movaps xmm7,xmmword ptr[ebx+12*4] - addps xmm6,xmm7 - movaps xmmword ptr[edx+12*4],xmm6 + movaps xmm4,xmmword ptr[eax+8*4] + movaps xmm5,xmmword ptr[ebx+8*4] + addps xmm4,xmm5 + movaps xmmword ptr[edx+8*4],xmm4 - add eax,16*4 - add ebx,16*4 - add edx,16*4 - loop loopa + movaps xmm6,xmmword ptr[eax+12*4] + movaps xmm7,xmmword ptr[ebx+12*4] + addps xmm6,xmm7 + movaps xmmword ptr[edx+12*4],xmm6 + + add eax,16*4 + add ebx,16*4 + add edx,16*4 + loop loopaa + } + } + else { + __asm { + mov ecx,dword ptr [n] + mov eax,dword ptr [src] + mov edx,dword ptr [dst] + mov ebx,dword ptr [op] + loopau: + prefetcht0 [eax+64] + prefetcht0 [ebx+64] + prefetcht0 [eax+96] + prefetcht0 [ebx+96] + + movaps xmm0,xmmword ptr[eax] + movups xmm1,xmmword ptr[ebx] + addps xmm0,xmm1 + movaps xmmword ptr[edx],xmm0 + + movaps xmm2,xmmword ptr[eax+4*4] + movups xmm3,xmmword ptr[ebx+4*4] + addps xmm2,xmm3 + movaps xmmword ptr[edx+4*4],xmm2 + + movaps xmm4,xmmword ptr[eax+8*4] + movups xmm5,xmmword ptr[ebx+8*4] + addps xmm4,xmm5 + movaps xmmword ptr[edx+8*4],xmm4 + + movaps xmm6,xmmword ptr[eax+12*4] + movups xmm7,xmmword ptr[ebx+12*4] + addps xmm6,xmm7 + movaps xmmword ptr[edx+12*4],xmm6 + + add eax,16*4 + add ebx,16*4 + add edx,16*4 + loop loopau + } } } else { - // unaligned version - __asm { - mov ecx,dword ptr [n] - mov eax,dword ptr [src] - mov edx,dword ptr [dst] - mov ebx,dword ptr [op] -loopu: - movups xmm0,xmmword ptr[eax] - movups xmm1,xmmword ptr[ebx] - addps xmm0,xmm1 - movups xmmword ptr[edx],xmm0 + if((reinterpret_cast(op)&(__alignof(__m128)-1)) == 0) { + __asm { + mov ecx,dword ptr [n] + mov eax,dword ptr [src] + mov edx,dword ptr [dst] + mov ebx,dword ptr [op] + loopua: + prefetcht0 [eax+64] + prefetcht0 [ebx+64] + prefetcht0 [eax+96] + prefetcht0 [ebx+96] - movups xmm2,xmmword ptr[eax+4*4] - movups xmm3,xmmword ptr[ebx+4*4] - addps xmm2,xmm3 - movups xmmword ptr[edx+4*4],xmm2 + movups xmm0,xmmword ptr[eax] + movaps xmm1,xmmword ptr[ebx] + addps xmm0,xmm1 + movups xmmword ptr[edx],xmm0 - movups xmm4,xmmword ptr[eax+8*4] - movups xmm5,xmmword ptr[ebx+8*4] - addps xmm4,xmm5 - movups xmmword ptr[edx+8*4],xmm4 + movups xmm2,xmmword ptr[eax+4*4] + movaps xmm3,xmmword ptr[ebx+4*4] + addps xmm2,xmm3 + movups xmmword ptr[edx+4*4],xmm2 - movups xmm6,xmmword ptr[eax+12*4] - movups xmm7,xmmword ptr[ebx+12*4] - addps xmm6,xmm7 - movups xmmword ptr[edx+12*4],xmm6 + movups xmm4,xmmword ptr[eax+8*4] + movaps xmm5,xmmword ptr[ebx+8*4] + addps xmm4,xmm5 + movups xmmword ptr[edx+8*4],xmm4 - add eax,16*4 - add ebx,16*4 - add edx,16*4 - loop loopu + movups xmm6,xmmword ptr[eax+12*4] + movaps xmm7,xmmword ptr[ebx+12*4] + addps xmm6,xmm7 + movups xmmword ptr[edx+12*4],xmm6 + + add eax,16*4 + add ebx,16*4 + add edx,16*4 + loop loopua + } + } + else { + __asm { + mov ecx,dword ptr [n] + mov eax,dword ptr [src] + mov edx,dword ptr [dst] + mov ebx,dword ptr [op] + loopuu: + prefetcht0 [eax+64] + prefetcht0 [ebx+64] + prefetcht0 [eax+96] + prefetcht0 [ebx+96] + + movups xmm0,xmmword ptr[eax] + movups xmm1,xmmword ptr[ebx] + addps xmm0,xmm1 + movups xmmword ptr[edx],xmm0 + + movups xmm2,xmmword ptr[eax+4*4] + movups xmm3,xmmword ptr[ebx+4*4] + addps xmm2,xmm3 + movups xmmword ptr[edx+4*4],xmm2 + + movups xmm4,xmmword ptr[eax+8*4] + movups xmm5,xmmword ptr[ebx+8*4] + addps xmm4,xmm5 + movups xmmword ptr[edx+8*4],xmm4 + + movups xmm6,xmmword ptr[eax+12*4] + movups xmm7,xmmword ptr[ebx+12*4] + addps xmm6,xmm7 + movups xmmword ptr[edx+12*4],xmm6 + + add eax,16*4 + add ebx,16*4 + add edx,16*4 + loop loopuu + } } } while(cnt--) *(dst++) = *(src++) + *(op++); @@ -1008,6 +1231,10 @@ void flext::ScaleSamples(t_sample *dst,const t_sample *src,t_sample opmul,t_samp cnt -= n<<4; __asm { + mov eax,dword ptr [src] + prefetcht0 [eax+0] + prefetcht0 [eax+32] + movss xmm0,xmmword ptr [opadd] shufps xmm0,xmm0,0 movss xmm1,xmmword ptr [opmul] @@ -1023,6 +1250,9 @@ void flext::ScaleSamples(t_sample *dst,const t_sample *src,t_sample opmul,t_samp mov eax,dword ptr [src] mov edx,dword ptr [dst] loopa: + prefetcht0 [eax+64] + prefetcht0 [eax+96] + movaps xmm2,xmmword ptr[eax] mulps xmm2,xmm1 addps xmm2,xmm0 @@ -1055,6 +1285,9 @@ loopa: mov eax,dword ptr [src] mov edx,dword ptr [dst] loopu: + prefetcht0 [eax+64] + prefetcht0 [eax+96] + movups xmm2,xmmword ptr[eax] mulps xmm2,xmm1 addps xmm2,xmm0 diff --git a/externals/grill/vasp/vasp.vcproj b/externals/grill/vasp/vasp.vcproj index f6b7ac0d..0723fa7f 100644 --- a/externals/grill/vasp/vasp.vcproj +++ b/externals/grill/vasp/vasp.vcproj @@ -23,7 +23,7 @@ 0) { register D o = curpos; @@ -446,6 +450,48 @@ V xgroove::s_pos_once(I n,S *const *invecs,S *const *outvecs) if(lpbang) ToOutBang(outchns+3); } +// \TODO optimize that for spd = const! +V xgroove::s_pos_c_once(I n,S *const *invecs,S *const *outvecs) +{ + const S spd = *invecs[0]; + S *pos = outvecs[outchns]; + BL lpbang = false; + + const D smin = curmin,smax = curmax,plen = smax-smin; //curlen; + + if(buf && plen > 0) { + register D o = curpos; + + for(I i = 0; i < n; ++i) { + if(o >= smax) { o = smax; lpbang = true; } + else if(o < smin) { o = smin; lpbang = true; } + + pos[i] = o; + o += spd; + } + // normalize and store current playing position + setpos(o); + + playfun(n,&pos,outvecs); + + arrscale(n,pos,pos); + } + else + s_pos_off(n,invecs,outvecs); + + if(lpbang) ToOutBang(outchns+3); +} + +V xgroove::s_pos_a_once(I n,S *const *invecs,S *const *outvecs) +{ + const S *speed = invecs[0]; + if(speed[0] == speed[n-1]) + // assume constant speed + s_pos_c_once(n,invecs,outvecs); + else + s_pos_once(n,invecs,outvecs); +} + V xgroove::s_pos_loop(I n,S *const *invecs,S *const *outvecs) { const S *speed = invecs[0]; @@ -486,6 +532,55 @@ V xgroove::s_pos_loop(I n,S *const *invecs,S *const *outvecs) if(lpbang) ToOutBang(outchns+3); } +// \TODO optimize that for spd = const! +V xgroove::s_pos_c_loop(I n,S *const *invecs,S *const *outvecs) +{ + const S spd = *invecs[0]; + S *pos = outvecs[outchns]; + BL lpbang = false; + + const D smin = curmin,smax = curmax,plen = smax-smin; //curlen; + + if(buf && plen > 0) { + register D o = curpos; + + for(I i = 0; i < n; ++i) { + // normalize offset + if(!(o < smax)) { // faster than o >= smax + o = fmod(o-smin,plen)+smin; + lpbang = true; + } + else if(o < smin) { + o = fmod(o-smin,plen)+smax; + lpbang = true; + } + + pos[i] = o; + o += spd; + } + // normalize and store current playing position + setpos(o); + + playfun(n,&pos,outvecs); + + arrscale(n,pos,pos); + } + else + s_pos_off(n,invecs,outvecs); + + if(lpbang) ToOutBang(outchns+3); +} + +V xgroove::s_pos_a_loop(I n,S *const *invecs,S *const *outvecs) +{ + const S *speed = invecs[0]; + if(speed[0] == speed[n-1]) + // assume constant speed + s_pos_c_loop(n,invecs,outvecs); + else + s_pos_loop(n,invecs,outvecs); +} + V xgroove::s_pos_loopzn(I n,S *const *invecs,S *const *outvecs) { const S *speed = invecs[0]; diff --git a/externals/grill/xsample/source/inter.h b/externals/grill/xsample/source/inter.h index 602bfd49..3f8ea5f8 100755 --- a/externals/grill/xsample/source/inter.h +++ b/externals/grill/xsample/source/inter.h @@ -128,14 +128,14 @@ TMPLDEF V xinter::st_play4(const S *bdt,const I smin,const I smax,const I n,cons register I oint = (I)o,ointm,oint1,oint2; if(oint <= smin) { - if(oint < smin) oint = smin,o = smin; + if(oint < smin) oint = smin,o = (float)smin; // position is first simple ointm = smin; // first sample oint1 = oint+1; oint2 = oint1+1; } else if(oint >= maxo-2) { - if(oint > maxo) oint = maxo,o = smax; + if(oint > maxo) oint = maxo,o = (float)smax; ointm = oint-1; oint1 = oint >= maxo?maxo:oint+1; oint2 = oint1 >= maxo?maxo:oint1+1; diff --git a/externals/grill/xsample/source/main.h b/externals/grill/xsample/source/main.h index 6dce2711..b3b8a499 100644 --- a/externals/grill/xsample/source/main.h +++ b/externals/grill/xsample/source/main.h @@ -12,7 +12,7 @@ WARRANTIES, see the file, "license.txt," in this distribution. #define __XSAMPLE_H -#define XSAMPLE_VERSION "0.3.0pre11" +#define XSAMPLE_VERSION "0.3.0pre12" #define FLEXT_ATTRIBUTES 1 -- cgit v1.2.1