/* Copyright (c) 2004 Tim Blechmann. * For information on usage and redistribution, and for a DISCLAIMER OF ALL * WARRANTIES, see the file, "gpl.txt" in this distribution. * * This program is free software; you can redistribute it and/or * modify it under the terms of the GNU General Public License * as published by the Free Software Foundation; either version 2 * of the License, or (at your option) any later version. * * See file LICENSE for further informations on licensing terms. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * You should have received a copy of the GNU General Public License * along with this program; if not, write to the Free Software * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. * * Based on PureData by Miller Puckette and others. * * coded while listening to: Julien Ottavi: Nervure Magnetique * */ #include "m_pd.h" #include "m_simd.h" /* ----------------------------- volctl ----------------------------- */ static t_class *volctl_class; typedef struct _volctl { t_object x_obj; t_float x_f; t_float x_h; //interpolation time t_float x_value; //current factor t_float x_target; //target factor int x_ticksleft; //dsp ticks to go t_float x_samples_per_ms; //ms per sample t_float x_slope; //slope t_float * x_slopes; //slopes for simd t_float x_slope_step; int x_line; int x_blocksize; t_float x_1overblocksize; } t_volctl; void *volctl_new(t_symbol *s, int argc, t_atom *argv) { if (argc > 3) post("volctl~: extra arguments ignored"); t_volctl *x = (t_volctl *)pd_new(volctl_class); inlet_new(&x->x_obj, &x->x_obj.ob_pd, &s_float, gensym("f1")); inlet_settip(x->x_obj.ob_inlet,gensym("factor")); x->x_value = atom_getfloatarg(0, argc, argv); t_inlet * time = floatinlet_new(&x->x_obj, &x->x_h); inlet_settip(time,gensym("interpolation_time")); x->x_h = atom_getfloatarg(1, argc, argv); x->x_samples_per_ms = 44100.f / 1000.f; // assume default samplerate x->x_blocksize = 64; x->x_1overblocksize = 1.f/64.f; outlet_new(&x->x_obj, &s_signal); x->x_f = 0; x->x_slopes = getalignedbytes(4*sizeof(t_float)); return (x); } static void volctl_free(t_volctl *x) { freealignedbytes(x->x_slopes, 4*sizeof(t_float)); } static t_int *volctl_perform(t_int *w) { t_volctl * x = (t_volctl *)(w[1]); t_float *in = (t_float *)(w[2]); t_float *out = (t_float *)(w[3]); int n = (int)(w[4]); if (x->x_ticksleft) { t_float f = x->x_value; t_float x_slope = x->x_slope; x->x_ticksleft--; while (n--) { f+=x_slope; *out++ = *in++ * f; } x->x_value = f; } else { t_float f = x->x_target; while (n--) *out++ = *in++ * f; } return (w+5); } static t_int *volctl_perf8(t_int *w) { t_volctl * x = (t_volctl *)(w[1]); t_float *in = (t_float *)(w[2]); t_float *out = (t_float *)(w[3]); int n = (int)(w[4]); if (x->x_ticksleft) { t_float f = x->x_value; t_float x_slope = x->x_slope; x->x_ticksleft--; n = n>>3; while (n--) { *out++ = *in++ * f; f+=x_slope; *out++ = *in++ * f; f+=x_slope; *out++ = *in++ * f; f+=x_slope; *out++ = *in++ * f; f+=x_slope; *out++ = *in++ * f; f+=x_slope; *out++ = *in++ * f; f+=x_slope; *out++ = *in++ * f; f+=x_slope; *out++ = *in++ * f; f+=x_slope; } x->x_value = f; } else { t_float f = x->x_target; if (f) for (; n; n -= 8, in += 8, out += 8) { t_float f0 = in[0], f1 = in[1], f2 = in[2], f3 = in[3]; t_float f4 = in[4], f5 = in[5], f6 = in[6], f7 = in[7]; out[0] = f0 * f; out[1] = f1 * f; out[2] = f2 * f; out[3] = f3 * f; out[4] = f4 * f; out[5] = f5 * f; out[6] = f6 * f; out[7] = f7 * f; } else for (; n; n -= 8, in += 8, out += 8) { out[0] = 0; out[1] = 0; out[2] = 0; out[3] = 0; out[4] = 0; out[5] = 0; out[6] = 0; out[7] = 0; } } return (w+5); } static t_int *volctl_perf_simd(t_int *w) { t_volctl * x = (t_volctl *)(w[1]); t_float *in = (t_float *)(w[2]); t_float *out = (t_float *)(w[3]); int n = (int)(w[4]); if (x->x_ticksleft) { #if defined(__GNUC__) && (defined(_X86_) || defined(__i386__) || defined(__i586__) || defined(__i686__)) asm( ".set T_FLOAT,4 \n" "movss (%3),%%xmm0 \n" /* value */ "shufps $0, %%xmm0, %%xmm0 \n" "movaps (%4), %%xmm1 \n" /* x_slopes */ "addps %%xmm0, %%xmm1 \n" "movss (%5), %%xmm0 \n" "shufps $0, %%xmm0, %%xmm0 \n" /* x_slope_step */ "shrl $4, %2 \n" /* n>>4 */ "1: \n" "movaps (%0), %%xmm2 \n" "mulps %%xmm1, %%xmm2 \n" "movaps %%xmm2, (%1) \n" "addps %%xmm0, %%xmm1 \n" "movaps 4*T_FLOAT(%0), %%xmm2 \n" "mulps %%xmm1, %%xmm2 \n" "movaps %%xmm2, 4*T_FLOAT(%1) \n" "addps %%xmm0, %%xmm1 \n" "movaps 8*T_FLOAT(%0), %%xmm2 \n" "mulps %%xmm1, %%xmm2 \n" "movaps %%xmm2, 8*T_FLOAT(%1) \n" "addps %%xmm0, %%xmm1 \n" "movaps 12*T_FLOAT(%0), %%xmm2 \n" "mulps %%xmm1, %%xmm2 \n" "movaps %%xmm2, 12*T_FLOAT(%1) \n" "addps %%xmm0, %%xmm1 \n" /* one instr. obsolete */ "addl $16*T_FLOAT, %0 \n" "addl $16*T_FLOAT, %1 \n" "loop 1b \n" : :"r"(in), "r"(out), "c"(n), "r"(&(t_float)(x->x_value)), "r"((t_float*)x->x_slopes), "r"(&(t_float)(x->x_slope_step)) :"%xmm0", "%xmm1", "%xmm2"); #elif defined(NT) && defined(_MSC_VER) __asm { mov ecx,n mov ebx,in mov edx,out movss xmm0,xmmword prt [x->x_value] shufps xmm0,xmm0,0 movaps xmm1,xmmword prt [x->x_slopes] addps xmm1,xmm0 movss xmm0,xmmword prt [x->x_slope_step] shufps xmm0,xmm0,0 shr ecx,4 loopa: movaps xmm2,xmmword ptr[ebx] mulps xmm2,xmm1 movaps xmmword prt[edx],xmm2 addps xmm1,xmm0 movaps xmm2,xmmword ptr[ebx+4*TYPE t_float] mulps xmm2,xmm1 movaps xmmword prt[edx+4*TYPE t_float],xmm2 addps xmm1,xmm0 movaps xmm2,xmmword ptr[ebx+8*TYPE t_float] mulps xmm2,xmm1 movaps xmmword prt[edx+8*TYPE t_float],xmm2 addps xmm1,xmm0 movaps xmm2,xmmword ptr[ebx+12*TYPE t_float] mulps xmm2,xmm1 movaps xmmword prt[edx+12*TYPE t_float],xmm2 addps xmm1,xmm0 add ebx,16*TYPE t_float add edx,16*TYPE t_float loop loopa } #else /* not yet implemented ... */ t_float f = x->x_value; t_float x_slope = x->x_slope; n = n>>3; while (n--) { *out++ = *in++ * f; f+=x_slope; *out++ = *in++ * f; f+=x_slope; *out++ = *in++ * f; f+=x_slope; *out++ = *in++ * f; f+=x_slope; *out++ = *in++ * f; f+=x_slope; *out++ = *in++ * f; f+=x_slope; *out++ = *in++ * f; f+=x_slope; *out++ = *in++ * f; f+=x_slope; } #endif x->x_ticksleft--; x->x_value += n*(x->x_slope); } else { if (x->x_target == 0.f) zerovec_simd(out, n); else if (x->x_target == 1.f) { if (in != out) copyvec_simd(out, in, n); } else { t_int args[6]={0, (t_int)in, (t_int)&x->x_target, (t_int)out, n, 0}; scalartimes_perf_simd(args); } } return (w+5); } static void volctl_set(t_volctl *x, t_float f) { t_float slope; int i; int samplesleft = x->x_h * x->x_samples_per_ms; samplesleft += x->x_blocksize - ( samplesleft & (x->x_blocksize - 1)); x->x_ticksleft = (int) (t_float)samplesleft * x->x_1overblocksize; slope = (f - x->x_value) / samplesleft; x->x_slope = slope; for (i = 0; i != 4; ++i) { x->x_slopes[i] = i*slope; } x->x_slope_step = 4*slope; x->x_target = f; } static void volctl_dsp(t_volctl *x, t_signal **sp) { const int n = sp[0]->s_n; if (n&7) dsp_add(volctl_perform, 4, x, sp[0]->s_vec, sp[1]->s_vec, n); else { if(SIMD_CHECK2(n,sp[0]->s_vec,sp[1]->s_vec)) dsp_add(volctl_perf_simd, 4, x, sp[0]->s_vec, sp[1]->s_vec, n); else dsp_add(volctl_perf8, 4, x, sp[0]->s_vec, sp[1]->s_vec, n); } x->x_blocksize = n; x->x_1overblocksize = 1./n; x->x_samples_per_ms = sp[0]->s_sr / 1000.f; } void volctl_tilde_setup(void) { volctl_class = class_new(gensym("volctl~"), (t_newmethod)volctl_new, (t_method)volctl_free, sizeof(t_volctl), 0, A_GIMME, 0); CLASS_MAINSIGNALIN(volctl_class, t_volctl, x_f); class_addmethod(volctl_class, (t_method)volctl_dsp, gensym("dsp"), 0); class_addmethod(volctl_class, (t_method)volctl_set, gensym("f1"),A_FLOAT,0); class_settip(volctl_class,gensym("signal")); }