aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--volctl~/volctl~.c71
1 files changed, 50 insertions, 21 deletions
diff --git a/volctl~/volctl~.c b/volctl~/volctl~.c
index 1a9e451..e49c267 100644
--- a/volctl~/volctl~.c
+++ b/volctl~/volctl~.c
@@ -53,7 +53,7 @@ typedef struct _volctl
void *volctl_new(t_symbol *s, int argc, t_atom *argv)
{
- if (argc > 2) post("volctl~: extra arguments ignored");
+ if (argc > 3) post("volctl~: extra arguments ignored");
t_volctl *x = (t_volctl *)pd_new(volctl_class);
inlet_new(&x->x_obj, &x->x_obj.ob_pd, &s_float, gensym("f1"));
@@ -171,6 +171,7 @@ static t_int *volctl_perf8(t_int *w)
return (w+5);
}
+
static t_int *volctl_perf_simd(t_int *w)
{
t_volctl * x = (t_volctl *)(w[1]);
@@ -229,7 +230,53 @@ static t_int *volctl_perf_simd(t_int *w)
}
else
{
- if(x->x_target)
+ switch(x->x_target)
+ {
+ case 0:
+ asm(
+ ".set T_FLOAT,4 \n"
+
+ "xorps %%xmm0, %%xmm0 \n"
+ "shrl $4, %1 \n"
+
+ "1: \n"
+ "movaps %%xmm0, (%0) \n"
+ "movaps %%xmm0, 4*T_FLOAT(%0) \n"
+ "movaps %%xmm0, 8*T_FLOAT(%0) \n"
+ "movaps %%xmm0, 12*T_FLOAT(%0) \n"
+ "addl $16*T_FLOAT, %0 \n"
+ "loop 1b \n"
+ :
+ :"r"(out),
+ "c"(w[4])
+ : "%xmm0");
+ break;
+
+ case 1:
+ asm(
+ ".set T_FLOAT,4 \n"
+
+ "shrl $4, %1 \n"
+
+ "1: \n"
+ "movaps (%1), %%xmm0 \n"
+ "movaps 4*T_FLOAT(%1), %%xmm1 \n"
+ "movaps 8*T_FLOAT(%1), %%xmm2 \n"
+ "movaps 12*T_FLOAT(%1), %%xmm3 \n"
+ "movaps %%xmm0, (%2) \n"
+ "movaps %%xmm1, 4*T_FLOAT(%2) \n"
+ "movaps %%xmm2, 8*T_FLOAT(%2) \n"
+ "movaps %%xmm3, 12*T_FLOAT(%2) \n"
+
+ "addl $16*T_FLOAT,%1 \n"
+ "addl $16*T_FLOAT,%2 \n"
+ "loop 1b \n"
+ :
+ :"c"(w[4]),"r"(in),"r"(out)
+ :"%xmm0","%xmm1","%xmm2","%xmm3");
+ break;
+
+ default:
asm(
".set T_FLOAT,4 \n"
@@ -257,25 +304,7 @@ static t_int *volctl_perf_simd(t_int *w)
: "r"(in), "r"(out),
"c"(w[4]),"r"(&(t_float)(x->x_target))
: "%xmm0", "%xmm1","%xmm2","%xmm3","%xmm4");
- else
- asm(
- ".set T_FLOAT,4 \n"
-
- "xorps %%xmm0, %%xmm0 \n"
- "shrl $4, %1 \n"
-
- "1: \n"
- "movaps %%xmm0, (%0) \n"
- "movaps %%xmm0, 4*T_FLOAT(%0) \n"
- "movaps %%xmm0, 8*T_FLOAT(%0) \n"
- "movaps %%xmm0, 12*T_FLOAT(%0) \n"
- "addl $16*T_FLOAT, %0 \n"
- "loop 1b \n"
- :
- :"r"(out),
- "c"(w[4])
- : "%xmm0");
-
+ }
}
return (w+5);
}