diff options
author | IOhannes m zmölnig <zmoelnig@users.sourceforge.net> | 2005-12-21 14:56:16 +0000 |
---|---|---|
committer | IOhannes m zmölnig <zmoelnig@users.sourceforge.net> | 2005-12-21 14:56:16 +0000 |
commit | d590f5e6e1fdaf2f1c5a229a51e345e60b9656ef (patch) | |
tree | 3eb94c5336fbad4a1142fdd447b6fc8adc8766c1 | |
parent | b7d51f55367bebf34b83553675d4c37bc7c32e9b (diff) |
moved code for [abs~] into separate file
use sse-code (intrinsics) for [abs~]
svn path=/trunk/externals/zexy/; revision=4274
-rw-r--r-- | src/abs~.c | 166 | ||||
-rw-r--r-- | src/z_sigbin.c | 48 | ||||
-rw-r--r-- | src/z_zexy.c | 1 | ||||
-rw-r--r-- | src/z_zexy.h | 1 |
4 files changed, 169 insertions, 47 deletions
diff --git a/src/abs~.c b/src/abs~.c new file mode 100644 index 0000000..7eaf5a7 --- /dev/null +++ b/src/abs~.c @@ -0,0 +1,166 @@ +/****************************************************** + * + * zexy - implementation file + * + * copyleft (c) IOhannes m zmölnig + * + * 1999:forum::für::umläute:2005 + * + * institute of electronic music and acoustics (iem) + * + ****************************************************** + * + * license: GNU General Public License v.2 + * + ******************************************************/ + +/* + abs~: absolute value of signal + + 2112:forum::für::umläute:2005 +*/ + +#include "zexy.h" + +typedef struct _abs +{ + t_object x_obj; + float x_f; +} t_abs; + + +/* ------------------------ sigABS~ ----------------------------- */ + +static t_class *sigABS_class; + +static t_int *sigABS_perform(t_int *w) +{ + t_float *in = (t_float *)(w[1]); + t_float *out = (t_float *)(w[2]); + int n = (int)(w[3]); + + while (n--) *out++ = fabsf(*in++); + + return (w+4); +} + +#ifdef __SSE__ +static long l_bitmask[]={0x7fffffff, 0x7fffffff, 0x7fffffff, 0x7fffffff}; +static t_int *sigABS_performSSE(t_int *w) +{ + __m128 *in = (__m128 *)(w[1]); + __m128 *out = (__m128 *)(w[2]); + int n = (int)(w[3])>>4; + + __m128 bitmask= _mm_loadu_ps((float*)l_bitmask); + + while (n--) { + out[0] = _mm_and_ps(in[0] , bitmask); + out[1] = _mm_and_ps(in[1] , bitmask); + out[2] = _mm_and_ps(in[2] , bitmask); + out[3] = _mm_and_ps(in[3] , bitmask); + + in +=4; + out+=4; + + } +#if 0 + /* + * handwritten SSE-code by tim blechmann + * + * JMZ: the above (using intrinsics) is a little bit slower + * but still about 4* as fast as the generic code + * i prefer using intrinsics as i don't have to learn how to + * assemble + */ + asm( + ".section .rodata \n" + ".align 16 \n" + "2: \n" + ".long 2147483647 \n" /* bitmask */ + ".long 2147483647 \n" /* 0x7fffffff */ + ".long 2147483647 \n" + ".long 2147483647 \n" + + ".text \n" + + "movaps (2b), %%xmm0 \n" /* xmm0 = bitmask */ + "shrl $4, %2 \n" + + /* loop: *dest = abs(*src) */ + "1: \n" + "movaps (%0,%3), %%xmm1 \n" + "andps %%xmm0, %%xmm1 \n" + "movaps %%xmm1, (%1,%3) \n" + + "movaps 16(%0,%3), %%xmm2 \n" + "andps %%xmm0, %%xmm2 \n" + "movaps %%xmm2, 16(%1,%3) \n" + + "movaps 32(%0,%3), %%xmm3 \n" + "andps %%xmm0, %%xmm3 \n" + "movaps %%xmm3, 32(%1,%3) \n" + + "movaps 48(%0,%3), %%xmm4 \n" + "andps %%xmm0, %%xmm4 \n" + "movaps %%xmm4, 48(%1,%3) \n" + + "addl $64, %3 \n" + "loop 1b \n" + : + :"r"(in), "r"(out), "c"(n), "r"(0) + :"%xmm0", "%xmm1", "%xmm2", "%xmm3", "%xmm4" + ); +#endif /*0*/ + + return (w+4); +} +#endif /* __SSE__ */ + +static void sigABS_dsp(t_abs *x, t_signal **sp) +{ +#ifdef __SSE__ + if( + Z_SIMD_CHKBLOCKSIZE(sp[0]->s_n)&& + Z_SIMD_CHKALIGN(sp[0]->s_vec)&& + Z_SIMD_CHKALIGN(sp[1]->s_vec)) + { + dsp_add(sigABS_performSSE, 3, sp[0]->s_vec, sp[1]->s_vec, sp[0]->s_n); + } else +#endif + { + dsp_add(sigABS_perform, 3, sp[0]->s_vec, sp[1]->s_vec, sp[0]->s_n); + } +} + +static void sigABS_helper(void) +{ + post("\n%c abs~ \t\t:: absolute value of a signal", HEARTSYMBOL); +} + +static void *sigABS_new(void) +{ + t_abs *x = (t_abs *)pd_new(sigABS_class); + x->x_f=0.f; + outlet_new(&x->x_obj, gensym("signal")); + + return (x); +} + +void abs_tilde_setup(void) +{ + sigABS_class = class_new(gensym("abs~"), (t_newmethod)sigABS_new, 0, + sizeof(t_abs), 0, A_DEFFLOAT, 0); + CLASS_MAINSIGNALIN(sigABS_class, t_abs, x_f); + class_addmethod(sigABS_class, (t_method)sigABS_dsp, gensym("dsp"), 0); + + class_addmethod(sigABS_class, (t_method)sigABS_helper, gensym("help"), 0); + class_sethelpsymbol(sigABS_class, gensym("zexy/sigbinops+")); + + zexy_register("abs~"); +} + +void z_abs__setup(void) +{ + abs_tilde_setup(); +} diff --git a/src/z_sigbin.c b/src/z_sigbin.c index 61f67d6..77de462 100644 --- a/src/z_sigbin.c +++ b/src/z_sigbin.c @@ -15,7 +15,7 @@ ******************************************************/ /* - finally :: some of the missing binops for signals :: abs~, sgn~, >~, <~, ==~, &&~, ||~ + finally :: some of the missing binops for signals :: sgn~, >~, <~, ==~, &&~, ||~ 1302:forum::für::umläute:2000 */ @@ -28,51 +28,6 @@ typedef struct _misc } t_misc; -/* ------------------------ sigABS~ ----------------------------- */ - -static t_class *sigABS_class; - -static t_int *sigABS_perform(t_int *w) -{ - t_float *in = (t_float *)(w[1]); - t_float *out = (t_float *)(w[2]); - int n = (int)(w[3]); - - while (n--) *out++ = fabsf(*in++); - - return (w+4); -} - -static void sigABS_dsp(t_misc *x, t_signal **sp) -{ - dsp_add(sigABS_perform, 3, sp[0]->s_vec, sp[1]->s_vec, sp[0]->s_n); -} - -static void sigABS_helper(void) -{ - post("\n%c abs~ \t\t:: absolute value of a signal", HEARTSYMBOL); -} - -static void *sigABS_new(void) -{ - t_misc *x = (t_misc *)pd_new(sigABS_class); - outlet_new(&x->x_obj, gensym("signal")); - - return (x); -} - -static void sigABS_setup(void) -{ - sigABS_class = class_new(gensym("abs~"), (t_newmethod)sigABS_new, 0, - sizeof(t_misc), 0, A_DEFFLOAT, 0); - class_addmethod(sigABS_class, nullfn, gensym("signal"), 0); - class_addmethod(sigABS_class, (t_method)sigABS_dsp, gensym("dsp"), 0); - - class_addmethod(sigABS_class, (t_method)sigABS_helper, gensym("help"), 0); - - class_sethelpsymbol(sigABS_class, gensym("zexy/sigbinops+")); -} - /* ------------------------ sgn~ ----------------------------- */ static t_class *sigSGN_class; @@ -800,7 +755,6 @@ static void sigOR_setup(void) void z_sigbin_setup(void) { - sigABS_setup(); sigSGN_setup(); sigGRT_setup(); sigLESS_setup(); diff --git a/src/z_zexy.c b/src/z_zexy.c index d561b5f..2eeae8e 100644 --- a/src/z_zexy.c +++ b/src/z_zexy.c @@ -8,6 +8,7 @@ void z_zexy_setup(void) { z_a2l_setup(); /* a2l.c */ + z_abs__setup(); /* abs~.c */ z_atoi_setup(); /* atoi.c */ z_avg__setup(); /* avg~.c */ z_blockmirror__setup(); /* blockmirror~.c */ diff --git a/src/z_zexy.h b/src/z_zexy.h index bf51257..8278e73 100644 --- a/src/z_zexy.h +++ b/src/z_zexy.h @@ -6,6 +6,7 @@ #ifndef Z_ZEXY_H__ #define Z_ZEXY_H__ void z_a2l_setup(void); /* a2l.c */ +void z_abs__setup(void); /* abs~.c */ void z_atoi_setup(void); /* atoi.c */ void z_avg__setup(void); /* avg~.c */ void z_blockmirror__setup(void); /* blockmirror~.c */ |