From a0e797de4a660aa2af4b309fe258fb287e3d38e1 Mon Sep 17 00:00:00 2001 From: Tim Blechmann Date: Sat, 8 Jan 2005 22:13:53 +0000 Subject: improvement for zero factor ... x * 0 = 0 svn path=/trunk/externals/tb/; revision=2482 --- volctl~/volctl~.c | 319 ++++++++++++++++++++++++++---------------------------- 1 file changed, 151 insertions(+), 168 deletions(-) diff --git a/volctl~/volctl~.c b/volctl~/volctl~.c index 60abfac..1a9e451 100644 --- a/volctl~/volctl~.c +++ b/volctl~/volctl~.c @@ -39,14 +39,16 @@ typedef struct _volctl t_float x_h; //interpolation time t_float x_value; //current factor + t_float x_target; //target factor - int x_ticksleft; //ticks to go + int x_ticksleft; //dsp ticks to go t_float x_samples_per_ms; //ms per sample t_float x_slope; //slope t_float * x_slopes; //slopes for simd t_float x_slope_step; int x_line; - + int x_blocksize; + t_float x_1overblocksize; } t_volctl; void *volctl_new(t_symbol *s, int argc, t_atom *argv) @@ -63,6 +65,8 @@ void *volctl_new(t_symbol *s, int argc, t_atom *argv) x->x_h = atom_getfloatarg(1, argc, argv); x->x_samples_per_ms = 44100.f / 1000.f; // assume default samplerate + x->x_blocksize = 64; + x->x_1overblocksize = 1.f/64.f; outlet_new(&x->x_obj, &s_signal); x->x_f = 0; @@ -77,122 +81,97 @@ static void volctl_free(t_volctl *x) freealignedbytes(x->x_slopes, 4*sizeof(t_float)); } - -t_int *volctl_perform(t_int *w) +static t_int *volctl_perform(t_int *w) { t_volctl * x = (t_volctl *)(w[1]); t_float *in = (t_float *)(w[2]); t_float *out = (t_float *)(w[3]); int n = (int)(w[4]); - t_float f = x->x_value; if (x->x_ticksleft) { + t_float f = x->x_value; t_float x_slope = x->x_slope; - if (x->x_ticksleft < n) - { - int remain = x->x_ticksleft; - n-=remain; - while (remain--) - { - f+=x_slope; - *out++ = *in++ * f; - } - while (n--) - { - *out++ = *in++ * f; - } - x->x_value = f; - x->x_ticksleft = 0; - } - else + + x->x_ticksleft--; + while (n--) { - x->x_ticksleft -=n; - while (n--) - { - f+=x_slope; - *out++ = *in++ * f; - } - x->x_value = f; + f+=x_slope; + *out++ = *in++ * f; } + x->x_value = f; } else + { + t_float f = x->x_target; while (n--) *out++ = *in++ * f; + } return (w+5); } -t_int *volctl_perf8(t_int *w) +static t_int *volctl_perf8(t_int *w) { t_volctl * x = (t_volctl *)(w[1]); t_float *in = (t_float *)(w[2]); t_float *out = (t_float *)(w[3]); int n = (int)(w[4]); - t_float f = x->x_value; - if (x->x_ticksleft) { + t_float f = x->x_value; + t_float x_slope = x->x_slope; - if (x->x_ticksleft < n) + x->x_ticksleft--; + n = n>>3; + while (n--) { - int remain = x->x_ticksleft; - n-=remain; - while (remain--) - { - *out++ = *in++ * f; - f+=x_slope; - } - while (n--) + *out++ = *in++ * f; + f+=x_slope; + *out++ = *in++ * f; + f+=x_slope; + *out++ = *in++ * f; + f+=x_slope; + *out++ = *in++ * f; + f+=x_slope; + *out++ = *in++ * f; + f+=x_slope; + *out++ = *in++ * f; + f+=x_slope; + *out++ = *in++ * f; + f+=x_slope; + *out++ = *in++ * f; + f+=x_slope; + } + x->x_value = f; + } + else + { + t_float f = x->x_target; + + if (f) + for (; n; n -= 8, in += 8, out += 8) { - *out++ = *in++ * f; + t_float f0 = in[0], f1 = in[1], f2 = in[2], f3 = in[3]; + t_float f4 = in[4], f5 = in[5], f6 = in[6], f7 = in[7]; + + out[0] = f0 * f; out[1] = f1 * f; out[2] = f2 * f; out[3] = f3 * f; + out[4] = f4 * f; out[5] = f5 * f; out[6] = f6 * f; out[7] = f7 * f; } - x->x_value = f; - x->x_ticksleft = 0; - } else - { - x->x_ticksleft -= n; - n = n>>3; - while (n--) + for (; n; n -= 8, in += 8, out += 8) { - *out++ = *in++ * f; - f+=x_slope; - *out++ = *in++ * f; - f+=x_slope; - *out++ = *in++ * f; - f+=x_slope; - *out++ = *in++ * f; - f+=x_slope; - *out++ = *in++ * f; - f+=x_slope; - *out++ = *in++ * f; - f+=x_slope; - *out++ = *in++ * f; - f+=x_slope; - *out++ = *in++ * f; - f+=x_slope; + out[0] = 0; out[1] = 0; out[2] = 0; out[3] = 0; + out[4] = 0; out[5] = 0; out[6] = 0; out[7] = 0; } - x->x_value = f; - } - } - else - { - for (; n; n -= 8, in += 8, out += 8) - { - t_float f0 = in[0], f1 = in[1], f2 = in[2], f3 = in[3]; - t_float f4 = in[4], f5 = in[5], f6 = in[6], f7 = in[7]; - - out[0] = f0 * f; out[1] = f1 * f; out[2] = f2 * f; out[3] = f3 * f; - out[4] = f4 * f; out[5] = f5 * f; out[6] = f6 * f; out[7] = f7 * f; - } + } return (w+5); } -t_int *volctl_perf_simd(t_int *w) +static t_int *volctl_perf_simd(t_int *w) { t_volctl * x = (t_volctl *)(w[1]); t_float *in = (t_float *)(w[2]); @@ -202,116 +181,115 @@ t_int *volctl_perf_simd(t_int *w) { int n = (int)(w[4]); - t_float x_slope = x->x_slope; - if (x->x_ticksleft < n) - { - t_float f = x->x_value; + x->x_ticksleft--; + + asm( + ".set T_FLOAT,4 \n" + "movss (%3),%%xmm0 \n" /* value */ + "shufps $0, %%xmm0, %%xmm0 \n" + "movaps (%4), %%xmm1 \n" /* x_slopes */ + "addps %%xmm0, %%xmm1 \n" - int remain = x->x_ticksleft; - n-=remain; - while (remain--) - { - *out++ = *in++ * f; - f+=x_slope; - } - while (n--) - { - *out++ = *in++ * f; - } - x->x_value = f; - x->x_ticksleft = 0; - } - else - { - x->x_ticksleft -= n; + "movss (%5), %%xmm0 \n" + "shufps $0, %%xmm0, %%xmm0 \n" /* x_slope_step */ + + "shrl $4, %2 \n" /* n>>4 */ + + "1: \n" + "movaps (%0), %%xmm2 \n" + "mulps %%xmm1, %%xmm2 \n" + "movaps %%xmm2, (%1) \n" + "addps %%xmm0, %%xmm1 \n" + "movaps 4*T_FLOAT(%0), %%xmm2 \n" + "mulps %%xmm1, %%xmm2 \n" + "movaps %%xmm2, 4*T_FLOAT(%1) \n" + "addps %%xmm0, %%xmm1 \n" + + "movaps 8*T_FLOAT(%0), %%xmm2 \n" + "mulps %%xmm1, %%xmm2 \n" + "movaps %%xmm2, 8*T_FLOAT(%1) \n" + "addps %%xmm0, %%xmm1 \n" + + "movaps 12*T_FLOAT(%0), %%xmm2 \n" + "mulps %%xmm1, %%xmm2 \n" + "movaps %%xmm2, 12*T_FLOAT(%1) \n" + "addps %%xmm0, %%xmm1 \n" /* one instr. obsolete */ + + "addl $16*T_FLOAT, %0 \n" + "addl $16*T_FLOAT, %1 \n" + "loop 1b \n" + + : + :"r"(in), "r"(out), "c"(n), "r"(&(t_float)(x->x_value)), + "r"((t_float*)x->x_slopes), "r"(&(t_float)(x->x_slope_step)) + :"%xmm0", "%xmm1", "%xmm2"); + + x->x_value += n*(x->x_slope); + } + else + { + if(x->x_target) asm( ".set T_FLOAT,4 \n" - "movss (%3),%%xmm0 \n" /* value */ + + "movss (%3), %%xmm0 \n" "shufps $0, %%xmm0, %%xmm0 \n" - "movaps (%4), %%xmm1 \n" /* x_slopes */ - "addps %%xmm0, %%xmm1 \n" - - "movss (%5), %%xmm0 \n" - "shufps $0, %%xmm0, %%xmm0 \n" /* x_slope_step */ - - "shrl $4, %2 \n" /* n>>4 */ - + "shrl $4, %2 \n" + "1: \n" - "movaps (%0), %%xmm2 \n" - "mulps %%xmm1, %%xmm2 \n" - "movaps %%xmm2, (%1) \n" - "addps %%xmm0, %%xmm1 \n" - + "movaps (%0), %%xmm1 \n" + "mulps %%xmm0, %%xmm1 \n" + "movaps %%xmm1, (%1) \n" "movaps 4*T_FLOAT(%0), %%xmm2 \n" - "mulps %%xmm1, %%xmm2 \n" + "mulps %%xmm0, %%xmm2 \n" "movaps %%xmm2, 4*T_FLOAT(%1) \n" - "addps %%xmm0, %%xmm1 \n" - - "movaps 8*T_FLOAT(%0), %%xmm2 \n" - "mulps %%xmm1, %%xmm2 \n" - "movaps %%xmm2, 8*T_FLOAT(%1) \n" - "addps %%xmm0, %%xmm1 \n" - - "movaps 12*T_FLOAT(%0), %%xmm2 \n" - "mulps %%xmm1, %%xmm2 \n" - "movaps %%xmm2, 12*T_FLOAT(%1) \n" - "addps %%xmm0, %%xmm1 \n" /* one instr. obsolete */ - + "movaps 8*T_FLOAT(%0), %%xmm3 \n" + "mulps %%xmm0, %%xmm3 \n" + "movaps %%xmm3, 8*T_FLOAT(%1) \n" + "movaps 12*T_FLOAT(%0), %%xmm4 \n" + "mulps %%xmm0, %%xmm4 \n" + "movaps %%xmm4, 12*T_FLOAT(%1) \n" "addl $16*T_FLOAT, %0 \n" "addl $16*T_FLOAT, %1 \n" "loop 1b \n" - : - :"r"(in), "r"(out), "c"(n), "r"(&(t_float)(x->x_value)), - "r"((t_float*)x->x_slopes), "r"(&(t_float)(x->x_slope_step)) - :"%xmm0", "%xmm1", "%xmm2"); + : "r"(in), "r"(out), + "c"(w[4]),"r"(&(t_float)(x->x_target)) + : "%xmm0", "%xmm1","%xmm2","%xmm3","%xmm4"); + else + asm( + ".set T_FLOAT,4 \n" -/* post("value %f", x->x_value); */ - x->x_value += n*(x->x_slope); - } - } - else - { - asm( - ".set T_FLOAT,4 \n" + "xorps %%xmm0, %%xmm0 \n" + "shrl $4, %1 \n" - "movss (%3), %%xmm0 \n" - "shufps $0, %%xmm0, %%xmm0 \n" - "shrl $4, %2 \n" + "1: \n" + "movaps %%xmm0, (%0) \n" + "movaps %%xmm0, 4*T_FLOAT(%0) \n" + "movaps %%xmm0, 8*T_FLOAT(%0) \n" + "movaps %%xmm0, 12*T_FLOAT(%0) \n" + "addl $16*T_FLOAT, %0 \n" + "loop 1b \n" + : + :"r"(out), + "c"(w[4]) + : "%xmm0"); - "volctl_loop: \n" - "movaps (%0), %%xmm1 \n" - "mulps %%xmm0, %%xmm1 \n" - "movaps %%xmm1, (%1) \n" - "movaps 4*T_FLOAT(%0), %%xmm2 \n" - "mulps %%xmm0, %%xmm2 \n" - "movaps %%xmm2, 4*T_FLOAT(%1) \n" - "movaps 8*T_FLOAT(%0), %%xmm3 \n" - "mulps %%xmm0, %%xmm3 \n" - "movaps %%xmm3, 8*T_FLOAT(%1) \n" - "movaps 12*T_FLOAT(%0), %%xmm4 \n" - "mulps %%xmm0, %%xmm4 \n" - "movaps %%xmm4, 12*T_FLOAT(%1) \n" - "addl $16*T_FLOAT, %0 \n" - "addl $16*T_FLOAT, %1 \n" - "loop volctl_loop \n" - : - : "r"(in), "r"(out), - "c"(w[4]),"r"(&(t_float)(x->x_value)) - : "%xmm0", "%xmm1","%xmm2","%xmm3","%xmm4"); } return (w+5); } -void volctl_set(t_volctl *x, t_float f) +static void volctl_set(t_volctl *x, t_float f) { t_float slope; int i; - - x->x_ticksleft = x->x_h * x->x_samples_per_ms; - slope = (f - x->x_value) / x->x_ticksleft; + int samplesleft = x->x_h * x->x_samples_per_ms; + samplesleft += x->x_blocksize - ( samplesleft & (x->x_blocksize - 1)); + x->x_ticksleft = (int) (t_float)samplesleft * x->x_1overblocksize; + + slope = (f - x->x_value) / samplesleft; x->x_slope = slope; for (i = 0; i != 4; ++i) @@ -319,9 +297,11 @@ void volctl_set(t_volctl *x, t_float f) x->x_slopes[i] = i*slope; } x->x_slope_step = 4*slope; + + x->x_target = f; } -void volctl_dsp(t_volctl *x, t_signal **sp) +static void volctl_dsp(t_volctl *x, t_signal **sp) { const int n = sp[0]->s_n; if (n&7) @@ -333,7 +313,10 @@ void volctl_dsp(t_volctl *x, t_signal **sp) else dsp_add(volctl_perf8, 4, x, sp[0]->s_vec, sp[1]->s_vec, n); } - x->x_samples_per_ms = sp[0]->s_sr / 1000.f; + + x->x_blocksize = n; + x->x_1overblocksize = 1./n; + x->x_samples_per_ms = sp[0]->s_sr / 1000.f; } void volctl_tilde_setup(void) -- cgit v1.2.1