aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorTim Blechmann <timblech@users.sourceforge.net>2004-12-28 15:48:19 +0000
committerIOhannes m zmölnig <zmoelnig@iem.at>2015-10-14 15:11:58 +0200
commit2f98df88850ab893c7acf8ea2b9000c03c2e17da (patch)
treee5e2ed18e2aca6b09f979899a3c1e3cc7d7a9f94
parent43dd4efedf1ecfe721cde5830bdcee67ffa48907 (diff)
simd-optimized ramp
svn path=/trunk/externals/tb/; revision=2435
-rw-r--r--volctl~/makefile2
-rw-r--r--volctl~/readme.txt3
-rw-r--r--volctl~/volctl~.c381
3 files changed, 229 insertions, 157 deletions
diff --git a/volctl~/makefile b/volctl~/makefile
index 9b4cc04..3457263 100644
--- a/volctl~/makefile
+++ b/volctl~/makefile
@@ -62,7 +62,7 @@ pd_linux: $(NAME).pd_linux
LINUXCFLAGS = -DPD -O3 -fPIC -funroll-loops -fomit-frame-pointer \
-Wall -W -Wshadow -Wstrict-prototypes -Werror \
- -Wno-unused -Wno-parentheses -Wno-switch
+ -Wno-unused -Wno-parentheses -Wno-switch
LINUXINCLUDE = -I../../src
diff --git a/volctl~/readme.txt b/volctl~/readme.txt
index 5707467..4efff34 100644
--- a/volctl~/readme.txt
+++ b/volctl~/readme.txt
@@ -8,7 +8,7 @@ volctl~ is doing more or less the same as
|line~ 0 10
| | | | |
|*~ 0| = |volctl 0 10|
-| |
+| |
except that it is faster
@@ -20,5 +20,4 @@ volctl~ will only probably only compile against pd>=devel_0_37 with gcc.
i'm not planing to do a port to win/osx or any pd without aligned dsp blocks...
todo:
-- write complete volctl_perform_simd function in assembler
- check icc's segfault \ No newline at end of file
diff --git a/volctl~/volctl~.c b/volctl~/volctl~.c
index 9ef96de..60abfac 100644
--- a/volctl~/volctl~.c
+++ b/volctl~/volctl~.c
@@ -1,31 +1,30 @@
/* Copyright (c) 2004 Tim Blechmann.
- *For information on usage and redistribution, and for a DISCLAIMER OF ALL
- *WARRANTIES, see the file, "gpl.txt" in this distribution.
+ * For information on usage and redistribution, and for a DISCLAIMER OF ALL
+ * WARRANTIES, see the file, "gpl.txt" in this distribution.
*
- *This program is free software; you can redistribute it and/or
- *modify it under the terms of the GNU General Public License
- *as published by the Free Software Foundation; either version 2
- *of the License, or (at your option) any later version.
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version 2
+ * of the License, or (at your option) any later version.
*
- *See file LICENSE for further informations on licensing terms.
+ * See file LICENSE for further informations on licensing terms.
*
- *This program is distributed in the hope that it will be useful,
- *but WITHOUT ANY WARRANTY; without even the implied warranty of
- *MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- *GNU General Public License for more details.
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
*
- *You should have received a copy of the GNU General Public License
- *along with this program; if not, write to the Free Software
- *Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
*
- *Based on PureData by Miller Puckette and others.
+ * Based on PureData by Miller Puckette and others.
*
- * coded while listening to: Julien Ottavi: Nervure Magnetique
+ * coded while listening to: Julien Ottavi: Nervure Magnetique
* */
#include "m_pd.h"
-
#include "m_simd.h"
@@ -36,15 +35,16 @@ static t_class *volctl_class;
typedef struct _volctl
{
t_object x_obj;
- float x_f;
+ t_float x_f;
- float x_h; //interpolation time
- float x_value; //current factor
+ t_float x_h; //interpolation time
+ t_float x_value; //current factor
int x_ticksleft; //ticks to go
- float x_mspersample; //ms per sample
- float x_slope; //slope
-
+ t_float x_samples_per_ms; //ms per sample
+ t_float x_slope; //slope
+ t_float * x_slopes; //slopes for simd
+ t_float x_slope_step;
int x_line;
} t_volctl;
@@ -54,7 +54,7 @@ void *volctl_new(t_symbol *s, int argc, t_atom *argv)
if (argc > 2) post("volctl~: extra arguments ignored");
t_volctl *x = (t_volctl *)pd_new(volctl_class);
- inlet_new(&x->x_obj, &x->x_obj.ob_pd, gensym("float"), gensym("f1"));
+ inlet_new(&x->x_obj, &x->x_obj.ob_pd, &s_float, gensym("f1"));
inlet_settip(x->x_obj.ob_inlet,gensym("factor"));
x->x_value = atom_getfloatarg(0, argc, argv);
@@ -62,13 +62,22 @@ void *volctl_new(t_symbol *s, int argc, t_atom *argv)
inlet_settip(time,gensym("interpolation_time"));
x->x_h = atom_getfloatarg(1, argc, argv);
- x->x_mspersample = 1000.f / 44100; // assume default samplerate
+ x->x_samples_per_ms = 44100.f / 1000.f; // assume default samplerate
outlet_new(&x->x_obj, &s_signal);
x->x_f = 0;
+
+ x->x_slopes = getalignedbytes(4*sizeof(t_float));
+
return (x);
}
+static void volctl_free(t_volctl *x)
+{
+ freealignedbytes(x->x_slopes, 4*sizeof(t_float));
+}
+
+
t_int *volctl_perform(t_int *w)
{
t_volctl * x = (t_volctl *)(w[1]);
@@ -76,40 +85,40 @@ t_int *volctl_perform(t_int *w)
t_float *out = (t_float *)(w[3]);
int n = (int)(w[4]);
- float f = x->x_value;
+ t_float f = x->x_value;
if (x->x_ticksleft)
{
- float x_slope = x->x_slope;
- if (x->x_ticksleft < n)
- {
- int remain = x->x_ticksleft;
- n-=remain;
- while (remain--)
- {
- f+=x_slope;
- *out++ = *in++ * f;
- }
- while (n--)
- {
- *out++ = *in++ * f;
- }
- x->x_value = f;
- x->x_ticksleft = 0;
- }
- else
- {
- x->x_ticksleft -=n;
- while (n--)
- {
- f+=x_slope;
- *out++ = *in++ * f;
- }
- x->x_value = f;
- }
+ t_float x_slope = x->x_slope;
+ if (x->x_ticksleft < n)
+ {
+ int remain = x->x_ticksleft;
+ n-=remain;
+ while (remain--)
+ {
+ f+=x_slope;
+ *out++ = *in++ * f;
+ }
+ while (n--)
+ {
+ *out++ = *in++ * f;
+ }
+ x->x_value = f;
+ x->x_ticksleft = 0;
+ }
+ else
+ {
+ x->x_ticksleft -=n;
+ while (n--)
+ {
+ f+=x_slope;
+ *out++ = *in++ * f;
+ }
+ x->x_value = f;
+ }
}
else
- while (n--) *out++ = *in++ * f;
+ while (n--) *out++ = *in++ * f;
return (w+5);
}
@@ -122,48 +131,63 @@ t_int *volctl_perf8(t_int *w)
t_float *out = (t_float *)(w[3]);
int n = (int)(w[4]);
- float f = x->x_value;
+ t_float f = x->x_value;
if (x->x_ticksleft)
{
- float x_slope = x->x_slope;
- if (x->x_ticksleft < n)
- {
- int remain = x->x_ticksleft;
- n-=remain;
- while (remain--)
- {
- *out++ = *in++ * f;
- f+=x_slope;
- }
- while (n--)
- {
- *out++ = *in++ * f;
- }
- x->x_value = f;
- x->x_ticksleft = 0;
- }
- else
- {
- x->x_ticksleft -= n;
- while (n--)
- {
- *out++ = *in++ * f;
- f+=x_slope;
- }
- x->x_value = f;
- }
+ t_float x_slope = x->x_slope;
+ if (x->x_ticksleft < n)
+ {
+ int remain = x->x_ticksleft;
+ n-=remain;
+ while (remain--)
+ {
+ *out++ = *in++ * f;
+ f+=x_slope;
+ }
+ while (n--)
+ {
+ *out++ = *in++ * f;
+ }
+ x->x_value = f;
+ x->x_ticksleft = 0;
+ }
+ else
+ {
+ x->x_ticksleft -= n;
+ n = n>>3;
+ while (n--)
+ {
+ *out++ = *in++ * f;
+ f+=x_slope;
+ *out++ = *in++ * f;
+ f+=x_slope;
+ *out++ = *in++ * f;
+ f+=x_slope;
+ *out++ = *in++ * f;
+ f+=x_slope;
+ *out++ = *in++ * f;
+ f+=x_slope;
+ *out++ = *in++ * f;
+ f+=x_slope;
+ *out++ = *in++ * f;
+ f+=x_slope;
+ *out++ = *in++ * f;
+ f+=x_slope;
+ }
+ x->x_value = f;
+ }
}
else
{
- for (; n; n -= 8, in += 8, out += 8)
- {
- float f0 = in[0], f1 = in[1], f2 = in[2], f3 = in[3];
- float f4 = in[4], f5 = in[5], f6 = in[6], f7 = in[7];
+ for (; n; n -= 8, in += 8, out += 8)
+ {
+ t_float f0 = in[0], f1 = in[1], f2 = in[2], f3 = in[3];
+ t_float f4 = in[4], f5 = in[5], f6 = in[6], f7 = in[7];
- out[0] = f0 * f; out[1] = f1 * f; out[2] = f2 * f; out[3] = f3 * f;
- out[4] = f4 * f; out[5] = f5 * f; out[6] = f6 * f; out[7] = f7 * f;
- }
+ out[0] = f0 * f; out[1] = f1 * f; out[2] = f2 * f; out[3] = f3 * f;
+ out[4] = f4 * f; out[5] = f5 * f; out[6] = f6 * f; out[7] = f7 * f;
+ }
}
return (w+5);
}
@@ -176,67 +200,106 @@ t_int *volctl_perf_simd(t_int *w)
if (x->x_ticksleft)
{
- int n = (int)(w[4]);
+ int n = (int)(w[4]);
- float f = x->x_value;
+ t_float x_slope = x->x_slope;
+ if (x->x_ticksleft < n)
+ {
+ t_float f = x->x_value;
+
+ int remain = x->x_ticksleft;
+ n-=remain;
+ while (remain--)
+ {
+ *out++ = *in++ * f;
+ f+=x_slope;
+ }
+ while (n--)
+ {
+ *out++ = *in++ * f;
+ }
+ x->x_value = f;
+ x->x_ticksleft = 0;
+ }
+ else
+ {
+ x->x_ticksleft -= n;
+
+ asm(
+ ".set T_FLOAT,4 \n"
+ "movss (%3),%%xmm0 \n" /* value */
+ "shufps $0, %%xmm0, %%xmm0 \n"
+ "movaps (%4), %%xmm1 \n" /* x_slopes */
+ "addps %%xmm0, %%xmm1 \n"
- float x_slope = x->x_slope;
- if (x->x_ticksleft < n)
- {
- int remain = x->x_ticksleft;
- n-=remain;
- while (remain--)
- {
- *out++ = *in++ * f;
- f+=x_slope;
- }
- while (n--)
- {
- *out++ = *in++ * f;
- }
- x->x_value = f;
- x->x_ticksleft = 0;
- }
- else
- {
- x->x_ticksleft -= n;
- while (n--)
- {
- *out++ = *in++ * f;
- f+=x_slope;
- }
- x->x_value = f;
- }
+ "movss (%5), %%xmm0 \n"
+ "shufps $0, %%xmm0, %%xmm0 \n" /* x_slope_step */
+
+ "shrl $4, %2 \n" /* n>>4 */
+
+ "1: \n"
+ "movaps (%0), %%xmm2 \n"
+ "mulps %%xmm1, %%xmm2 \n"
+ "movaps %%xmm2, (%1) \n"
+ "addps %%xmm0, %%xmm1 \n"
+
+ "movaps 4*T_FLOAT(%0), %%xmm2 \n"
+ "mulps %%xmm1, %%xmm2 \n"
+ "movaps %%xmm2, 4*T_FLOAT(%1) \n"
+ "addps %%xmm0, %%xmm1 \n"
+
+ "movaps 8*T_FLOAT(%0), %%xmm2 \n"
+ "mulps %%xmm1, %%xmm2 \n"
+ "movaps %%xmm2, 8*T_FLOAT(%1) \n"
+ "addps %%xmm0, %%xmm1 \n"
+
+ "movaps 12*T_FLOAT(%0), %%xmm2 \n"
+ "mulps %%xmm1, %%xmm2 \n"
+ "movaps %%xmm2, 12*T_FLOAT(%1) \n"
+ "addps %%xmm0, %%xmm1 \n" /* one instr. obsolete */
+
+ "addl $16*T_FLOAT, %0 \n"
+ "addl $16*T_FLOAT, %1 \n"
+ "loop 1b \n"
+
+ :
+ :"r"(in), "r"(out), "c"(n), "r"(&(t_float)(x->x_value)),
+ "r"((t_float*)x->x_slopes), "r"(&(t_float)(x->x_slope_step))
+ :"%xmm0", "%xmm1", "%xmm2");
+
+/* post("value %f", x->x_value); */
+ x->x_value += n*(x->x_slope);
+ }
}
else
{
- asm(
- ".set T_FLOAT,4 \n"
-
- "movss (%3), %%xmm0 \n"
- "shufps $0, %%xmm0, %%xmm0 \n"
- "shrl $4, %2 \n"
-
- "volctl_loop: \n"
- "movaps (%0), %%xmm1 \n"
- "mulps %%xmm0, %%xmm1 \n"
- "movaps %%xmm1, (%1) \n"
- "movaps 4*T_FLOAT(%0), %%xmm2 \n"
- "mulps %%xmm0, %%xmm2 \n"
- "movaps %%xmm2, 4*T_FLOAT(%1) \n"
- "movaps 8*T_FLOAT(%0), %%xmm3 \n"
- "mulps %%xmm0, %%xmm3 \n"
- "movaps %%xmm3, 8*T_FLOAT(%1) \n"
- "movaps 12*T_FLOAT(%0), %%xmm4 \n"
- "mulps %%xmm0, %%xmm4 \n"
- "movaps %%xmm4, 12*T_FLOAT(%1) \n"
- "addl $64, %0 \n"
- "addl $64, %1 \n"
- "loop volctl_loop \n"
- :
- : "r"(in), "r"(out),
- "a"(w[4]),"r"(&(x->x_value))
- : "%xmm0","%xmm1","%xmm2","%xmm3","%xmm4");
+ asm(
+ ".set T_FLOAT,4 \n"
+
+ "movss (%3), %%xmm0 \n"
+ "shufps $0, %%xmm0, %%xmm0 \n"
+ "shrl $4, %2 \n"
+
+ "volctl_loop: \n"
+ "movaps (%0), %%xmm1 \n"
+ "mulps %%xmm0, %%xmm1 \n"
+ "movaps %%xmm1, (%1) \n"
+ "movaps 4*T_FLOAT(%0), %%xmm2 \n"
+ "mulps %%xmm0, %%xmm2 \n"
+ "movaps %%xmm2, 4*T_FLOAT(%1) \n"
+ "movaps 8*T_FLOAT(%0), %%xmm3 \n"
+ "mulps %%xmm0, %%xmm3 \n"
+ "movaps %%xmm3, 8*T_FLOAT(%1) \n"
+ "movaps 12*T_FLOAT(%0), %%xmm4 \n"
+ "mulps %%xmm0, %%xmm4 \n"
+ "movaps %%xmm4, 12*T_FLOAT(%1) \n"
+ "addl $16*T_FLOAT, %0 \n"
+ "addl $16*T_FLOAT, %1 \n"
+ "loop volctl_loop \n"
+ :
+ : "r"(in), "r"(out),
+ "c"(w[4]),"r"(&(t_float)(x->x_value))
+ : "%xmm0", "%xmm1","%xmm2","%xmm3","%xmm4");
}
return (w+5);
}
@@ -244,8 +307,18 @@ t_int *volctl_perf_simd(t_int *w)
void volctl_set(t_volctl *x, t_float f)
{
- x->x_ticksleft = x->x_h / x->x_mspersample;
- x->x_slope = (f - x->x_value) / x->x_ticksleft;
+ t_float slope;
+ int i;
+
+ x->x_ticksleft = x->x_h * x->x_samples_per_ms;
+ slope = (f - x->x_value) / x->x_ticksleft;
+ x->x_slope = slope;
+
+ for (i = 0; i != 4; ++i)
+ {
+ x->x_slopes[i] = i*slope;
+ }
+ x->x_slope_step = 4*slope;
}
void volctl_dsp(t_volctl *x, t_signal **sp)
@@ -255,18 +328,18 @@ void volctl_dsp(t_volctl *x, t_signal **sp)
dsp_add(volctl_perform, 4, x, sp[0]->s_vec, sp[1]->s_vec, n);
else
{
- if(SIMD_CHECK2(n,sp[0]->s_vec,sp[1]->s_vec))
- dsp_add(volctl_perf_simd, 4, x, sp[0]->s_vec, sp[1]->s_vec, n);
- else
- dsp_add(volctl_perf8, 4, x, sp[0]->s_vec, sp[1]->s_vec, n);
+ if(SIMD_CHECK2(n,sp[0]->s_vec,sp[1]->s_vec))
+ dsp_add(volctl_perf_simd, 4, x, sp[0]->s_vec, sp[1]->s_vec, n);
+ else
+ dsp_add(volctl_perf8, 4, x, sp[0]->s_vec, sp[1]->s_vec, n);
}
- x->x_mspersample = 1000.f / sp[0]->s_sr;
+ x->x_samples_per_ms = sp[0]->s_sr / 1000.f;
}
void volctl_tilde_setup(void)
{
- volctl_class = class_new(gensym("volctl~"), (t_newmethod)volctl_new, 0,
- sizeof(t_volctl), 0, A_GIMME, 0);
+ volctl_class = class_new(gensym("volctl~"), (t_newmethod)volctl_new,
+ (t_method)volctl_free, sizeof(t_volctl), 0, A_GIMME, 0);
CLASS_MAINSIGNALIN(volctl_class, t_volctl, x_f);
class_addmethod(volctl_class, (t_method)volctl_dsp, gensym("dsp"), 0);
class_addmethod(volctl_class, (t_method)volctl_set, gensym("f1"),A_FLOAT,0);