aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--externals/grill/flext/source/flattr_ed.cpp57
-rw-r--r--externals/grill/flext/source/flbase.h7
-rwxr-xr-xexternals/grill/flext/source/flprefix.h1
-rwxr-xr-xexternals/grill/flext/source/flsimd.cpp471
-rw-r--r--externals/grill/vasp/vasp.vcproj2
-rw-r--r--externals/grill/xsample/source/groove.cpp99
-rwxr-xr-xexternals/grill/xsample/source/inter.h4
-rw-r--r--externals/grill/xsample/source/main.h2
8 files changed, 463 insertions, 180 deletions
diff --git a/externals/grill/flext/source/flattr_ed.cpp b/externals/grill/flext/source/flattr_ed.cpp
index ec096ef5..d138a97b 100644
--- a/externals/grill/flext/source/flattr_ed.cpp
+++ b/externals/grill/flext/source/flattr_ed.cpp
@@ -12,7 +12,7 @@ WARRANTIES, see the file, "license.txt," in this distribution.
\brief Attribute editor (property dialog) for PD
*/
-#include "flprefix.h"
+#include "flext.h"
#if FLEXT_SYS == FLEXT_SYS_PD && !defined(FLEXT_NOATTREDIT)
@@ -20,64 +20,13 @@ WARRANTIES, see the file, "license.txt," in this distribution.
#pragma warning( disable : 4091 )
#endif
+// This is problematic... non-public headers!
#include <m_imp.h>
-#include "flext.h"
+#include <g_canvas.h>
#include <string.h>
#include <stdio.h>
-#ifdef __MWERKS__
-#define STD std
-#else
-#define STD
-#endif
-
-
-#if !defined(PD_VERSION_MAJOR)
- /* PD version 0.36 or below */
-
- /* Call this to get a gobj's bounding rectangle in pixels */
- typedef void (*t_getrectfn)(t_gobj *x, struct _glist *glist,
- int *x1, int *y1, int *x2, int *y2);
- /* and this to displace a gobj: */
- typedef void (*t_displacefn)(t_gobj *x, struct _glist *glist, int dx, int dy);
- /* change color to show selection: */
- typedef void (*t_selectfn)(t_gobj *x, struct _glist *glist, int state);
- /* change appearance to show activation/deactivation: */
- typedef void (*t_activatefn)(t_gobj *x, struct _glist *glist, int state);
- /* warn a gobj it's about to be deleted */
- typedef void (*t_deletefn)(t_gobj *x, struct _glist *glist);
- /* making visible or invisible */
- typedef void (*t_visfn)(t_gobj *x, struct _glist *glist, int flag);
- /* field a mouse click (when not in "edit" mode) */
- typedef int (*t_clickfn)(t_gobj *x, struct _glist *glist,
- int xpix, int ypix, int shift, int alt, int dbl, int doit);
- /* save to a binbuf */
- typedef void (*t_savefn)(t_gobj *x, t_binbuf *b);
- /* open properties dialog */
- typedef void (*t_propertiesfn)(t_gobj *x, struct _glist *glist);
- /* ... and later, resizing; getting/setting font or color... */
-
- struct _widgetbehavior
- {
- t_getrectfn w_getrectfn;
- t_displacefn w_displacefn;
- t_selectfn w_selectfn;
- t_activatefn w_activatefn;
- t_deletefn w_deletefn;
- t_visfn w_visfn;
- t_clickfn w_clickfn;
- t_savefn w_savefn;
- t_propertiesfn w_propertiesfn;
- };
-
-#elif !defined(PD_VERSION_MINOR)
- #error Flext cannot be compiled with this version!
-#else
- #include <g_canvas.h>
-#endif
-
-
static t_widgetbehavior widgetbehavior;
static void (*ori_vis)(t_gobj *c, t_glist *, int vis) = NULL;
diff --git a/externals/grill/flext/source/flbase.h b/externals/grill/flext/source/flbase.h
index 795c0673..eec2e520 100644
--- a/externals/grill/flext/source/flbase.h
+++ b/externals/grill/flext/source/flbase.h
@@ -21,6 +21,13 @@ WARRANTIES, see the file, "license.txt," in this distribution.
#include "flsupport.h"
+// ----- disable attribute editor for PD version < devel_0_36 or 0.37
+#ifndef PD_MAJOR_VERSION
+#undef FLEXT_NOATTREDIT
+#define FLEXT_NOATTREDIT
+#endif
+
+
class FLEXT_SHARE FLEXT_CLASSDEF(flext_obj);
typedef class FLEXT_CLASSDEF(flext_obj) flext_obj;
diff --git a/externals/grill/flext/source/flprefix.h b/externals/grill/flext/source/flprefix.h
index 7ab517df..c27d1a67 100755
--- a/externals/grill/flext/source/flprefix.h
+++ b/externals/grill/flext/source/flprefix.h
@@ -371,7 +371,6 @@ WARRANTIES, see the file, "license.txt," in this distribution.
#define FLEXT_CLASSDEF(CL) CL##_single
#endif
-
// std namespace
#ifdef __MWERKS__
#define STD std
diff --git a/externals/grill/flext/source/flsimd.cpp b/externals/grill/flext/source/flsimd.cpp
index 3dcee887..88cbdb89 100755
--- a/externals/grill/flext/source/flsimd.cpp
+++ b/externals/grill/flext/source/flsimd.cpp
@@ -287,6 +287,12 @@ void flext::CopySamples(t_sample *dst,const t_sample *src,int cnt)
if(GetSIMDCapabilities()&simd_sse) {
// single precision
+ __asm {
+ mov eax,dword ptr [src]
+ prefetcht0 [eax+0]
+ prefetcht0 [eax+32]
+ }
+
int n = cnt>>4;
cnt -= n<<4;
@@ -295,10 +301,11 @@ void flext::CopySamples(t_sample *dst,const t_sample *src,int cnt)
// aligned src, aligned dst
__asm {
mov eax,dword ptr [src]
- prefetcht0 [eax]
mov edx,dword ptr [dst]
mov ecx,[n]
- loopaa:
+loopaa:
+ prefetcht0 [eax+64]
+ prefetcht0 [eax+96]
movaps xmm0,xmmword ptr[eax]
movaps xmmword ptr[edx],xmm0
movaps xmm1,xmmword ptr[eax+4*4]
@@ -317,10 +324,11 @@ void flext::CopySamples(t_sample *dst,const t_sample *src,int cnt)
// aligned src, unaligned dst
__asm {
mov eax,dword ptr [src]
- prefetcht0 [eax]
mov edx,dword ptr [dst]
mov ecx,[n]
- loopau:
+loopau:
+ prefetcht0 [eax+64]
+ prefetcht0 [eax+96]
movaps xmm0,xmmword ptr[eax]
movups xmmword ptr[edx],xmm0
movaps xmm1,xmmword ptr[eax+4*4]
@@ -341,10 +349,11 @@ void flext::CopySamples(t_sample *dst,const t_sample *src,int cnt)
// unaligned src, aligned dst
__asm {
mov eax,dword ptr [src]
- prefetcht0 [eax]
mov edx,dword ptr [dst]
mov ecx,[n]
loopua:
+ prefetcht0 [eax+64]
+ prefetcht0 [eax+96]
movups xmm0,xmmword ptr[eax]
movaps xmmword ptr[edx],xmm0
movups xmm1,xmmword ptr[eax+4*4]
@@ -363,10 +372,11 @@ loopua:
// unaligned src, unaligned dst
__asm {
mov eax,dword ptr [src]
- prefetcht0 [eax]
mov edx,dword ptr [dst]
mov ecx,[n]
loopuu:
+ prefetcht0 [eax+64]
+ prefetcht0 [eax+96]
movups xmm0,xmmword ptr[eax]
movups xmmword ptr[edx],xmm0
movups xmm1,xmmword ptr[eax+4*4]
@@ -502,6 +512,10 @@ void flext::MulSamples(t_sample *dst,const t_sample *src,t_sample op,int cnt)
cnt -= n<<4;
__asm {
+ mov eax,dword ptr [src]
+ prefetcht0 [eax+0]
+ prefetcht0 [eax+32]
+
movss xmm0,xmmword ptr [op]
shufps xmm0,xmm0,0
}
@@ -515,6 +529,9 @@ void flext::MulSamples(t_sample *dst,const t_sample *src,t_sample op,int cnt)
mov eax,dword ptr [src]
mov edx,dword ptr [dst]
loopa:
+ prefetcht0 [eax+64]
+ prefetcht0 [eax+96]
+
movaps xmm1,xmmword ptr[eax]
mulps xmm1,xmm0
movaps xmmword ptr[edx],xmm1
@@ -543,6 +560,9 @@ loopa:
mov eax,dword ptr [src]
mov edx,dword ptr [dst]
loopu:
+ prefetcht0 [eax+64]
+ prefetcht0 [eax+96]
+
movups xmm1,xmmword ptr[eax]
mulps xmm1,xmm0
movups xmmword ptr[edx],xmm1
@@ -619,75 +639,171 @@ void flext::MulSamples(t_sample *dst,const t_sample *src,const t_sample *op,int
int n = cnt>>4;
cnt -= n<<4;
+ __asm {
+ mov eax,[src]
+ mov ebx,[op]
+ prefetcht0 [eax+0]
+ prefetcht0 [ebx+0]
+ prefetcht0 [eax+32]
+ prefetcht0 [ebx+32]
+ }
+
if((reinterpret_cast<unsigned long>(src)&(__alignof(__m128)-1)) == 0
&& (reinterpret_cast<unsigned long>(dst)&(__alignof(__m128)-1)) == 0
- && (reinterpret_cast<unsigned long>(op)&(__alignof(__m128)-1)) == 0
- ) {
- // aligned version
- __asm {
- mov ecx,[n]
- mov eax,dword ptr [src]
- mov edx,dword ptr [dst]
- mov ebx,dword ptr [op]
-loopa:
- movaps xmm0,xmmword ptr[eax]
- movaps xmm1,xmmword ptr[ebx]
- mulps xmm0,xmm1
- movaps xmmword ptr[edx],xmm0
+ ) {
+ if((reinterpret_cast<unsigned long>(op)&(__alignof(__m128)-1)) == 0) {
+ __asm {
+ mov ecx,[n]
+ mov eax,dword ptr [src]
+ mov edx,dword ptr [dst]
+ mov ebx,dword ptr [op]
+ loopaa:
+ prefetcht0 [eax+64]
+ prefetcht0 [ebx+64]
+ prefetcht0 [eax+96]
+ prefetcht0 [ebx+96]
- movaps xmm2,xmmword ptr[eax+4*4]
- movaps xmm3,xmmword ptr[ebx+4*4]
- mulps xmm2,xmm3
- movaps xmmword ptr[edx+4*4],xmm2
+ movaps xmm0,xmmword ptr[eax]
+ movaps xmm1,xmmword ptr[ebx]
+ mulps xmm0,xmm1
+ movaps xmmword ptr[edx],xmm0
- movaps xmm4,xmmword ptr[eax+8*4]
- movaps xmm5,xmmword ptr[ebx+8*4]
- mulps xmm4,xmm5
- movaps xmmword ptr[edx+8*4],xmm4
+ movaps xmm2,xmmword ptr[eax+4*4]
+ movaps xmm3,xmmword ptr[ebx+4*4]
+ mulps xmm2,xmm3
+ movaps xmmword ptr[edx+4*4],xmm2
- movaps xmm6,xmmword ptr[eax+12*4]
- movaps xmm7,xmmword ptr[ebx+12*4]
- mulps xmm6,xmm7
- movaps xmmword ptr[edx+12*4],xmm6
+ movaps xmm4,xmmword ptr[eax+8*4]
+ movaps xmm5,xmmword ptr[ebx+8*4]
+ mulps xmm4,xmm5
+ movaps xmmword ptr[edx+8*4],xmm4
- add eax,16*4
- add ebx,16*4
- add edx,16*4
- loop loopa
+ movaps xmm6,xmmword ptr[eax+12*4]
+ movaps xmm7,xmmword ptr[ebx+12*4]
+ mulps xmm6,xmm7
+ movaps xmmword ptr[edx+12*4],xmm6
+
+ add eax,16*4
+ add ebx,16*4
+ add edx,16*4
+ loop loopaa
+ }
}
- }
+ else {
+ __asm {
+ mov ecx,[n]
+ mov eax,dword ptr [src]
+ mov edx,dword ptr [dst]
+ mov ebx,dword ptr [op]
+ loopau:
+ prefetcht0 [eax+64]
+ prefetcht0 [ebx+64]
+ prefetcht0 [eax+96]
+ prefetcht0 [ebx+96]
+
+ movaps xmm0,xmmword ptr[eax]
+ movups xmm1,xmmword ptr[ebx]
+ mulps xmm0,xmm1
+ movaps xmmword ptr[edx],xmm0
+
+ movaps xmm2,xmmword ptr[eax+4*4]
+ movups xmm3,xmmword ptr[ebx+4*4]
+ mulps xmm2,xmm3
+ movaps xmmword ptr[edx+4*4],xmm2
+
+ movaps xmm4,xmmword ptr[eax+8*4]
+ movups xmm5,xmmword ptr[ebx+8*4]
+ mulps xmm4,xmm5
+ movaps xmmword ptr[edx+8*4],xmm4
+
+ movaps xmm6,xmmword ptr[eax+12*4]
+ movups xmm7,xmmword ptr[ebx+12*4]
+ mulps xmm6,xmm7
+ movaps xmmword ptr[edx+12*4],xmm6
+
+ add eax,16*4
+ add ebx,16*4
+ add edx,16*4
+ loop loopau
+ }
+ }
+ }
else {
- // unaligned version
- __asm {
- mov ecx,[n]
- mov eax,dword ptr [src]
- mov edx,dword ptr [dst]
- mov ebx,dword ptr [op]
-loopu:
- movups xmm0,xmmword ptr[eax]
- movups xmm1,xmmword ptr[ebx]
- mulps xmm0,xmm1
- movups xmmword ptr[edx],xmm0
+ if((reinterpret_cast<unsigned long>(op)&(__alignof(__m128)-1)) == 0) {
+ __asm {
+ mov ecx,[n]
+ mov eax,dword ptr [src]
+ mov edx,dword ptr [dst]
+ mov ebx,dword ptr [op]
+ loopua:
+ prefetcht0 [eax+64]
+ prefetcht0 [ebx+64]
+ prefetcht0 [eax+96]
+ prefetcht0 [ebx+96]
- movups xmm2,xmmword ptr[eax+4*4]
- movups xmm3,xmmword ptr[ebx+4*4]
- mulps xmm2,xmm3
- movups xmmword ptr[edx+4*4],xmm2
+ movups xmm0,xmmword ptr[eax]
+ movaps xmm1,xmmword ptr[ebx]
+ mulps xmm0,xmm1
+ movups xmmword ptr[edx],xmm0
- movups xmm4,xmmword ptr[eax+8*4]
- movups xmm5,xmmword ptr[ebx+8*4]
- mulps xmm4,xmm5
- movups xmmword ptr[edx+8*4],xmm4
+ movups xmm2,xmmword ptr[eax+4*4]
+ movaps xmm3,xmmword ptr[ebx+4*4]
+ mulps xmm2,xmm3
+ movups xmmword ptr[edx+4*4],xmm2
- movups xmm6,xmmword ptr[eax+12*4]
- movups xmm7,xmmword ptr[ebx+12*4]
- mulps xmm6,xmm7
- movups xmmword ptr[edx+12*4],xmm6
+ movups xmm4,xmmword ptr[eax+8*4]
+ movaps xmm5,xmmword ptr[ebx+8*4]
+ mulps xmm4,xmm5
+ movups xmmword ptr[edx+8*4],xmm4
- add eax,16*4
- add ebx,16*4
- add edx,16*4
- loop loopu
+ movups xmm6,xmmword ptr[eax+12*4]
+ movaps xmm7,xmmword ptr[ebx+12*4]
+ mulps xmm6,xmm7
+ movups xmmword ptr[edx+12*4],xmm6
+
+ add eax,16*4
+ add ebx,16*4
+ add edx,16*4
+ loop loopua
+ }
+ }
+ else {
+ __asm {
+ mov ecx,[n]
+ mov eax,dword ptr [src]
+ mov edx,dword ptr [dst]
+ mov ebx,dword ptr [op]
+loopuu:
+ prefetcht0 [eax+64]
+ prefetcht0 [ebx+64]
+ prefetcht0 [eax+96]
+ prefetcht0 [ebx+96]
+
+ movups xmm0,xmmword ptr[eax]
+ movups xmm1,xmmword ptr[ebx]
+ mulps xmm0,xmm1
+ movups xmmword ptr[edx],xmm0
+
+ movups xmm2,xmmword ptr[eax+4*4]
+ movups xmm3,xmmword ptr[ebx+4*4]
+ mulps xmm2,xmm3
+ movups xmmword ptr[edx+4*4],xmm2
+
+ movups xmm4,xmmword ptr[eax+8*4]
+ movups xmm5,xmmword ptr[ebx+8*4]
+ mulps xmm4,xmm5
+ movups xmmword ptr[edx+8*4],xmm4
+
+ movups xmm6,xmmword ptr[eax+12*4]
+ movups xmm7,xmmword ptr[ebx+12*4]
+ mulps xmm6,xmm7
+ movups xmmword ptr[edx+12*4],xmm6
+
+ add eax,16*4
+ add ebx,16*4
+ add edx,16*4
+ loop loopuu
+ }
}
}
while(cnt--) *(dst++) = *(src++) * *(op++);
@@ -748,6 +864,10 @@ void flext::AddSamples(t_sample *dst,const t_sample *src,t_sample op,int cnt)
cnt -= n<<4;
__asm {
+ mov eax,[src]
+ prefetcht0 [eax+0]
+ prefetcht0 [eax+32]
+
movss xmm0,xmmword ptr [op]
shufps xmm0,xmm0,0
}
@@ -761,6 +881,9 @@ void flext::AddSamples(t_sample *dst,const t_sample *src,t_sample op,int cnt)
mov eax,dword ptr [src]
mov edx,dword ptr [dst]
loopa:
+ prefetcht0 [eax+64]
+ prefetcht0 [eax+96]
+
movaps xmm1,xmmword ptr[eax]
addps xmm1,xmm0
movaps xmmword ptr[edx],xmm1
@@ -789,6 +912,9 @@ loopa:
mov eax,dword ptr [src]
mov edx,dword ptr [dst]
loopu:
+ prefetcht0 [eax+64]
+ prefetcht0 [eax+96]
+
movups xmm1,xmmword ptr[eax]
addps xmm1,xmm0
movups xmmword ptr[edx],xmm1
@@ -867,79 +993,176 @@ void flext::AddSamples(t_sample *dst,const t_sample *src,const t_sample *op,int
#ifdef FLEXT_USE_SIMD
#ifdef _MSC_VER
if(GetSIMDCapabilities()&simd_sse) {
+ // Prefetch cache
+ __asm {
+ mov eax,dword ptr [src]
+ mov ebx,dword ptr [op]
+ prefetcht0 [eax]
+ prefetcht0 [ebx]
+ prefetcht0 [eax+32]
+ prefetcht0 [ebx+32]
+ }
+
// single precision
int n = cnt>>4;
cnt -= n<<4;
if((reinterpret_cast<unsigned long>(src)&(__alignof(__m128)-1)) == 0
&& (reinterpret_cast<unsigned long>(dst)&(__alignof(__m128)-1)) == 0
- && (reinterpret_cast<unsigned long>(op)&(__alignof(__m128)-1)) == 0
- ) {
- // aligned version
- __asm {
- mov ecx,dword ptr [n]
- mov eax,dword ptr [src]
- mov edx,dword ptr [dst]
- mov ebx,dword ptr [op]
-loopa:
- movaps xmm0,xmmword ptr[eax]
- movaps xmm1,xmmword ptr[ebx]
- addps xmm0,xmm1
- movaps xmmword ptr[edx],xmm0
+ ) {
+ if((reinterpret_cast<unsigned long>(op)&(__alignof(__m128)-1)) == 0) {
+ __asm {
+ mov ecx,dword ptr [n]
+ mov eax,dword ptr [src]
+ mov edx,dword ptr [dst]
+ mov ebx,dword ptr [op]
+ loopaa:
+ prefetcht0 [eax+64]
+ prefetcht0 [ebx+64]
+ prefetcht0 [eax+96]
+ prefetcht0 [ebx+96]
- movaps xmm2,xmmword ptr[eax+4*4]
- movaps xmm3,xmmword ptr[ebx+4*4]
- addps xmm2,xmm3
- movaps xmmword ptr[edx+4*4],xmm2
+ movaps xmm0,xmmword ptr[eax]
+ movaps xmm1,xmmword ptr[ebx]
+ addps xmm0,xmm1
+ movaps xmmword ptr[edx],xmm0
- movaps xmm4,xmmword ptr[eax+8*4]
- movaps xmm5,xmmword ptr[ebx+8*4]
- addps xmm4,xmm5
- movaps xmmword ptr[edx+8*4],xmm4
+ movaps xmm2,xmmword ptr[eax+4*4]
+ movaps xmm3,xmmword ptr[ebx+4*4]
+ addps xmm2,xmm3
+ movaps xmmword ptr[edx+4*4],xmm2
- movaps xmm6,xmmword ptr[eax+12*4]
- movaps xmm7,xmmword ptr[ebx+12*4]
- addps xmm6,xmm7
- movaps xmmword ptr[edx+12*4],xmm6
+ movaps xmm4,xmmword ptr[eax+8*4]
+ movaps xmm5,xmmword ptr[ebx+8*4]
+ addps xmm4,xmm5
+ movaps xmmword ptr[edx+8*4],xmm4
- add eax,16*4
- add ebx,16*4
- add edx,16*4
- loop loopa
+ movaps xmm6,xmmword ptr[eax+12*4]
+ movaps xmm7,xmmword ptr[ebx+12*4]
+ addps xmm6,xmm7
+ movaps xmmword ptr[edx+12*4],xmm6
+
+ add eax,16*4
+ add ebx,16*4
+ add edx,16*4
+ loop loopaa
+ }
+ }
+ else {
+ __asm {
+ mov ecx,dword ptr [n]
+ mov eax,dword ptr [src]
+ mov edx,dword ptr [dst]
+ mov ebx,dword ptr [op]
+ loopau:
+ prefetcht0 [eax+64]
+ prefetcht0 [ebx+64]
+ prefetcht0 [eax+96]
+ prefetcht0 [ebx+96]
+
+ movaps xmm0,xmmword ptr[eax]
+ movups xmm1,xmmword ptr[ebx]
+ addps xmm0,xmm1
+ movaps xmmword ptr[edx],xmm0
+
+ movaps xmm2,xmmword ptr[eax+4*4]
+ movups xmm3,xmmword ptr[ebx+4*4]
+ addps xmm2,xmm3
+ movaps xmmword ptr[edx+4*4],xmm2
+
+ movaps xmm4,xmmword ptr[eax+8*4]
+ movups xmm5,xmmword ptr[ebx+8*4]
+ addps xmm4,xmm5
+ movaps xmmword ptr[edx+8*4],xmm4
+
+ movaps xmm6,xmmword ptr[eax+12*4]
+ movups xmm7,xmmword ptr[ebx+12*4]
+ addps xmm6,xmm7
+ movaps xmmword ptr[edx+12*4],xmm6
+
+ add eax,16*4
+ add ebx,16*4
+ add edx,16*4
+ loop loopau
+ }
}
}
else {
- // unaligned version
- __asm {
- mov ecx,dword ptr [n]
- mov eax,dword ptr [src]
- mov edx,dword ptr [dst]
- mov ebx,dword ptr [op]
-loopu:
- movups xmm0,xmmword ptr[eax]
- movups xmm1,xmmword ptr[ebx]
- addps xmm0,xmm1
- movups xmmword ptr[edx],xmm0
+ if((reinterpret_cast<unsigned long>(op)&(__alignof(__m128)-1)) == 0) {
+ __asm {
+ mov ecx,dword ptr [n]
+ mov eax,dword ptr [src]
+ mov edx,dword ptr [dst]
+ mov ebx,dword ptr [op]
+ loopua:
+ prefetcht0 [eax+64]
+ prefetcht0 [ebx+64]
+ prefetcht0 [eax+96]
+ prefetcht0 [ebx+96]
- movups xmm2,xmmword ptr[eax+4*4]
- movups xmm3,xmmword ptr[ebx+4*4]
- addps xmm2,xmm3
- movups xmmword ptr[edx+4*4],xmm2
+ movups xmm0,xmmword ptr[eax]
+ movaps xmm1,xmmword ptr[ebx]
+ addps xmm0,xmm1
+ movups xmmword ptr[edx],xmm0
- movups xmm4,xmmword ptr[eax+8*4]
- movups xmm5,xmmword ptr[ebx+8*4]
- addps xmm4,xmm5
- movups xmmword ptr[edx+8*4],xmm4
+ movups xmm2,xmmword ptr[eax+4*4]
+ movaps xmm3,xmmword ptr[ebx+4*4]
+ addps xmm2,xmm3
+ movups xmmword ptr[edx+4*4],xmm2
- movups xmm6,xmmword ptr[eax+12*4]
- movups xmm7,xmmword ptr[ebx+12*4]
- addps xmm6,xmm7
- movups xmmword ptr[edx+12*4],xmm6
+ movups xmm4,xmmword ptr[eax+8*4]
+ movaps xmm5,xmmword ptr[ebx+8*4]
+ addps xmm4,xmm5
+ movups xmmword ptr[edx+8*4],xmm4
- add eax,16*4
- add ebx,16*4
- add edx,16*4
- loop loopu
+ movups xmm6,xmmword ptr[eax+12*4]
+ movaps xmm7,xmmword ptr[ebx+12*4]
+ addps xmm6,xmm7
+ movups xmmword ptr[edx+12*4],xmm6
+
+ add eax,16*4
+ add ebx,16*4
+ add edx,16*4
+ loop loopua
+ }
+ }
+ else {
+ __asm {
+ mov ecx,dword ptr [n]
+ mov eax,dword ptr [src]
+ mov edx,dword ptr [dst]
+ mov ebx,dword ptr [op]
+ loopuu:
+ prefetcht0 [eax+64]
+ prefetcht0 [ebx+64]
+ prefetcht0 [eax+96]
+ prefetcht0 [ebx+96]
+
+ movups xmm0,xmmword ptr[eax]
+ movups xmm1,xmmword ptr[ebx]
+ addps xmm0,xmm1
+ movups xmmword ptr[edx],xmm0
+
+ movups xmm2,xmmword ptr[eax+4*4]
+ movups xmm3,xmmword ptr[ebx+4*4]
+ addps xmm2,xmm3
+ movups xmmword ptr[edx+4*4],xmm2
+
+ movups xmm4,xmmword ptr[eax+8*4]
+ movups xmm5,xmmword ptr[ebx+8*4]
+ addps xmm4,xmm5
+ movups xmmword ptr[edx+8*4],xmm4
+
+ movups xmm6,xmmword ptr[eax+12*4]
+ movups xmm7,xmmword ptr[ebx+12*4]
+ addps xmm6,xmm7
+ movups xmmword ptr[edx+12*4],xmm6
+
+ add eax,16*4
+ add ebx,16*4
+ add edx,16*4
+ loop loopuu
+ }
}
}
while(cnt--) *(dst++) = *(src++) + *(op++);
@@ -1008,6 +1231,10 @@ void flext::ScaleSamples(t_sample *dst,const t_sample *src,t_sample opmul,t_samp
cnt -= n<<4;
__asm {
+ mov eax,dword ptr [src]
+ prefetcht0 [eax+0]
+ prefetcht0 [eax+32]
+
movss xmm0,xmmword ptr [opadd]
shufps xmm0,xmm0,0
movss xmm1,xmmword ptr [opmul]
@@ -1023,6 +1250,9 @@ void flext::ScaleSamples(t_sample *dst,const t_sample *src,t_sample opmul,t_samp
mov eax,dword ptr [src]
mov edx,dword ptr [dst]
loopa:
+ prefetcht0 [eax+64]
+ prefetcht0 [eax+96]
+
movaps xmm2,xmmword ptr[eax]
mulps xmm2,xmm1
addps xmm2,xmm0
@@ -1055,6 +1285,9 @@ loopa:
mov eax,dword ptr [src]
mov edx,dword ptr [dst]
loopu:
+ prefetcht0 [eax+64]
+ prefetcht0 [eax+96]
+
movups xmm2,xmmword ptr[eax]
mulps xmm2,xmm1
addps xmm2,xmm0
diff --git a/externals/grill/vasp/vasp.vcproj b/externals/grill/vasp/vasp.vcproj
index f6b7ac0d..0723fa7f 100644
--- a/externals/grill/vasp/vasp.vcproj
+++ b/externals/grill/vasp/vasp.vcproj
@@ -23,7 +23,7 @@
<Tool
Name="VCCLCompilerTool"
Optimization="0"
- AdditionalIncludeDirectories="c:\programme\audio\pd\src,f:\prog\max\flext\source"
+ AdditionalIncludeDirectories="f:\prog\pd\pd-cvs\src,f:\prog\max\flext\source"
PreprocessorDefinitions="WIN32;_DEBUG;_WINDOWS;_USRDLL;FLEXT_SYS=2;VASP_COMPACT"
BasicRuntimeChecks="3"
RuntimeLibrary="5"
diff --git a/externals/grill/xsample/source/groove.cpp b/externals/grill/xsample/source/groove.cpp
index 41f2248b..4759812c 100644
--- a/externals/grill/xsample/source/groove.cpp
+++ b/externals/grill/xsample/source/groove.cpp
@@ -93,7 +93,11 @@ private:
DEFSIGFUN(s_pos_off);
DEFSIGFUN(s_pos_once);
+ DEFSIGFUN(s_pos_c_once);
+ DEFSIGFUN(s_pos_a_once);
DEFSIGFUN(s_pos_loop);
+ DEFSIGFUN(s_pos_c_loop);
+ DEFSIGFUN(s_pos_a_loop);
DEFSIGFUN(s_pos_loopzn);
DEFSIGFUN(s_pos_bidir);
@@ -132,7 +136,7 @@ private:
};
-FLEXT_LIB_DSP_V("xgroove~",xgroove)
+FLEXT_LIB_DSP_V("xgroove~",xgroove)
V xgroove::setup(t_classid c)
@@ -419,7 +423,7 @@ V xgroove::s_pos_once(I n,S *const *invecs,S *const *outvecs)
S *pos = outvecs[outchns];
BL lpbang = false;
- const I smin = curmin,smax = curmax,plen = smax-smin; //curlen;
+ const D smin = curmin,smax = curmax,plen = smax-smin; //curlen;
if(buf && plen > 0) {
register D o = curpos;
@@ -446,6 +450,48 @@ V xgroove::s_pos_once(I n,S *const *invecs,S *const *outvecs)
if(lpbang) ToOutBang(outchns+3);
}
+// \TODO optimize that for spd = const!
+V xgroove::s_pos_c_once(I n,S *const *invecs,S *const *outvecs)
+{
+ const S spd = *invecs[0];
+ S *pos = outvecs[outchns];
+ BL lpbang = false;
+
+ const D smin = curmin,smax = curmax,plen = smax-smin; //curlen;
+
+ if(buf && plen > 0) {
+ register D o = curpos;
+
+ for(I i = 0; i < n; ++i) {
+ if(o >= smax) { o = smax; lpbang = true; }
+ else if(o < smin) { o = smin; lpbang = true; }
+
+ pos[i] = o;
+ o += spd;
+ }
+ // normalize and store current playing position
+ setpos(o);
+
+ playfun(n,&pos,outvecs);
+
+ arrscale(n,pos,pos);
+ }
+ else
+ s_pos_off(n,invecs,outvecs);
+
+ if(lpbang) ToOutBang(outchns+3);
+}
+
+V xgroove::s_pos_a_once(I n,S *const *invecs,S *const *outvecs)
+{
+ const S *speed = invecs[0];
+ if(speed[0] == speed[n-1])
+ // assume constant speed
+ s_pos_c_once(n,invecs,outvecs);
+ else
+ s_pos_once(n,invecs,outvecs);
+}
+
V xgroove::s_pos_loop(I n,S *const *invecs,S *const *outvecs)
{
const S *speed = invecs[0];
@@ -486,6 +532,55 @@ V xgroove::s_pos_loop(I n,S *const *invecs,S *const *outvecs)
if(lpbang) ToOutBang(outchns+3);
}
+// \TODO optimize that for spd = const!
+V xgroove::s_pos_c_loop(I n,S *const *invecs,S *const *outvecs)
+{
+ const S spd = *invecs[0];
+ S *pos = outvecs[outchns];
+ BL lpbang = false;
+
+ const D smin = curmin,smax = curmax,plen = smax-smin; //curlen;
+
+ if(buf && plen > 0) {
+ register D o = curpos;
+
+ for(I i = 0; i < n; ++i) {
+ // normalize offset
+ if(!(o < smax)) { // faster than o >= smax
+ o = fmod(o-smin,plen)+smin;
+ lpbang = true;
+ }
+ else if(o < smin) {
+ o = fmod(o-smin,plen)+smax;
+ lpbang = true;
+ }
+
+ pos[i] = o;
+ o += spd;
+ }
+ // normalize and store current playing position
+ setpos(o);
+
+ playfun(n,&pos,outvecs);
+
+ arrscale(n,pos,pos);
+ }
+ else
+ s_pos_off(n,invecs,outvecs);
+
+ if(lpbang) ToOutBang(outchns+3);
+}
+
+V xgroove::s_pos_a_loop(I n,S *const *invecs,S *const *outvecs)
+{
+ const S *speed = invecs[0];
+ if(speed[0] == speed[n-1])
+ // assume constant speed
+ s_pos_c_loop(n,invecs,outvecs);
+ else
+ s_pos_loop(n,invecs,outvecs);
+}
+
V xgroove::s_pos_loopzn(I n,S *const *invecs,S *const *outvecs)
{
const S *speed = invecs[0];
diff --git a/externals/grill/xsample/source/inter.h b/externals/grill/xsample/source/inter.h
index 602bfd49..3f8ea5f8 100755
--- a/externals/grill/xsample/source/inter.h
+++ b/externals/grill/xsample/source/inter.h
@@ -128,14 +128,14 @@ TMPLDEF V xinter::st_play4(const S *bdt,const I smin,const I smax,const I n,cons
register I oint = (I)o,ointm,oint1,oint2;
if(oint <= smin) {
- if(oint < smin) oint = smin,o = smin;
+ if(oint < smin) oint = smin,o = (float)smin;
// position is first simple
ointm = smin; // first sample
oint1 = oint+1;
oint2 = oint1+1;
}
else if(oint >= maxo-2) {
- if(oint > maxo) oint = maxo,o = smax;
+ if(oint > maxo) oint = maxo,o = (float)smax;
ointm = oint-1;
oint1 = oint >= maxo?maxo:oint+1;
oint2 = oint1 >= maxo?maxo:oint1+1;
diff --git a/externals/grill/xsample/source/main.h b/externals/grill/xsample/source/main.h
index 6dce2711..b3b8a499 100644
--- a/externals/grill/xsample/source/main.h
+++ b/externals/grill/xsample/source/main.h
@@ -12,7 +12,7 @@ WARRANTIES, see the file, "license.txt," in this distribution.
#define __XSAMPLE_H
-#define XSAMPLE_VERSION "0.3.0pre11"
+#define XSAMPLE_VERSION "0.3.0pre12"
#define FLEXT_ATTRIBUTES 1