/****************************************************** * * zexy - implementation file * * copyleft (c) IOhannes m zmölnig * * 1999:forum::für::umläute:2005 * * institute of electronic music and acoustics (iem) * ****************************************************** * * license: GNU General Public License v.2 * ******************************************************/ /* abs~: absolute value of signal 2112:forum::für::umläute:2005 */ #include "zexy.h" typedef struct _abs { t_object x_obj; float x_f; } t_abs; /* ------------------------ sigABS~ ----------------------------- */ static t_class *sigABS_class; static t_int *sigABS_perform(t_int *w) { t_float *in = (t_float *)(w[1]); t_float *out = (t_float *)(w[2]); int n = (int)(w[3]); while (n--) *out++ = fabsf(*in++); return (w+4); } #ifdef __SSE__ static long l_bitmask[]={0x7fffffff, 0x7fffffff, 0x7fffffff, 0x7fffffff}; static t_int *sigABS_performSSE(t_int *w) { __m128 *in = (__m128 *)(w[1]); __m128 *out = (__m128 *)(w[2]); int n = (int)(w[3])>>4; __m128 bitmask= _mm_loadu_ps((float*)l_bitmask); while (n--) { out[0] = _mm_and_ps(in[0] , bitmask); out[1] = _mm_and_ps(in[1] , bitmask); out[2] = _mm_and_ps(in[2] , bitmask); out[3] = _mm_and_ps(in[3] , bitmask); in +=4; out+=4; } #if 0 /* * handwritten SSE-code by tim blechmann * * JMZ: the above (using intrinsics) is a little bit slower * but still about 4* as fast as the generic code * i prefer using intrinsics as i don't have to learn how to * assemble */ asm( ".section .rodata \n" ".align 16 \n" "2: \n" ".long 2147483647 \n" /* bitmask */ ".long 2147483647 \n" /* 0x7fffffff */ ".long 2147483647 \n" ".long 2147483647 \n" ".text \n" "movaps (2b), %%xmm0 \n" /* xmm0 = bitmask */ "shrl $4, %2 \n" /* loop: *dest = abs(*src) */ "1: \n" "movaps (%0,%3), %%xmm1 \n" "andps %%xmm0, %%xmm1 \n" "movaps %%xmm1, (%1,%3) \n" "movaps 16(%0,%3), %%xmm2 \n" "andps %%xmm0, %%xmm2 \n" "movaps %%xmm2, 16(%1,%3) \n" "movaps 32(%0,%3), %%xmm3 \n" "andps %%xmm0, %%xmm3 \n" "movaps %%xmm3, 32(%1,%3) \n" "movaps 48(%0,%3), %%xmm4 \n" "andps %%xmm0, %%xmm4 \n" "movaps %%xmm4, 48(%1,%3) \n" "addl $64, %3 \n" "loop 1b \n" : :"r"(in), "r"(out), "c"(n), "r"(0) :"%xmm0", "%xmm1", "%xmm2", "%xmm3", "%xmm4" ); #endif /*0*/ return (w+4); } #endif /* __SSE__ */ static void sigABS_dsp(t_abs *x, t_signal **sp) { ZEXY_USEVAR(x); #ifdef __SSE__ if( Z_SIMD_CHKBLOCKSIZE(sp[0]->s_n)&& Z_SIMD_CHKALIGN(sp[0]->s_vec)&& Z_SIMD_CHKALIGN(sp[1]->s_vec)) { dsp_add(sigABS_performSSE, 3, sp[0]->s_vec, sp[1]->s_vec, sp[0]->s_n); } else #endif { dsp_add(sigABS_perform, 3, sp[0]->s_vec, sp[1]->s_vec, sp[0]->s_n); } } static void sigABS_helper(void) { post("\n%c abs~ \t\t:: absolute value of a signal", HEARTSYMBOL); } static void *sigABS_new(void) { t_abs *x = (t_abs *)pd_new(sigABS_class); x->x_f=0.f; outlet_new(&x->x_obj, gensym("signal")); return (x); } void abs_tilde_setup(void) { sigABS_class = class_new(gensym("abs~"), (t_newmethod)sigABS_new, 0, sizeof(t_abs), 0, A_DEFFLOAT, 0); CLASS_MAINSIGNALIN(sigABS_class, t_abs, x_f); class_addmethod(sigABS_class, (t_method)sigABS_dsp, gensym("dsp"), 0); class_addmethod(sigABS_class, (t_method)sigABS_helper, gensym("help"), 0); class_sethelpsymbol(sigABS_class, gensym("zigbinops")); zexy_register("abs~"); }