From f0f2e78feae74ba5ea2faf3516fdc0711cc7fe3d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?IOhannes=20m=20zm=C3=B6lnig?= Date: Wed, 21 Dec 2005 17:05:47 +0000 Subject: moved sgn~ into separate file initial SSE-support of sgn~ svn path=/trunk/externals/zexy/; revision=4275 --- src/sgn~.c | 173 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++ src/z_sigbin.c | 58 +------------------ src/z_zexy.c | 1 + src/z_zexy.h | 1 + 4 files changed, 176 insertions(+), 57 deletions(-) create mode 100644 src/sgn~.c (limited to 'src') diff --git a/src/sgn~.c b/src/sgn~.c new file mode 100644 index 0000000..b390144 --- /dev/null +++ b/src/sgn~.c @@ -0,0 +1,173 @@ +/****************************************************** + * + * zexy - implementation file + * + * copyleft (c) IOhannes m zmölnig + * + * 2000:forum::für::umläute:2005 + * + * institute of electronic music and acoustics (iem) + * + ****************************************************** + * + * license: GNU General Public License v.2 + * + ******************************************************/ + +/* + sgn~: sign of a signal + + 2112:forum::für::umläute:2005 +*/ + +#include "zexy.h" + +typedef struct _sgnTilde +{ + t_object x_obj; +} t_sgnTilde; + + +/* ------------------------ sgn~ ----------------------------- */ + +static t_class *sgnTilde_class; + +static t_int *sgnTilde_perform(t_int *w) +{ + t_float *in = (t_float *)(w[1]); + t_float *out = (t_float *)(w[2]); + int n = (int)(w[3]); + t_float x; + while (n--) { + if ((x=*in++)>0.) *out++=1.; + else if (x<0.) *out++=-1.; + else *out++=0.; + } + + return (w+4); +} +static t_int *sgnTilde_perform8(t_int *w) +{ + t_float *in = (t_float *)(w[1]); + t_float *out = (t_float *)(w[2]); + int n = (int)(w[3])>>3; + t_float x; + + while(n--){ + /* weirdly enough, the if/else/if/else is a lot faster than ()?:(()?:) */ + if ((x=in[0])>0.) out[0]=1.; else if(x<0.) out[0]=-1.; else out[0]=0.; + if ((x=in[1])>0.) out[1]=1.; else if(x<0.) out[1]=-1.; else out[1]=0.; + if ((x=in[2])>0.) out[2]=1.; else if(x<0.) out[2]=-1.; else out[2]=0.; + if ((x=in[3])>0.) out[3]=1.; else if(x<0.) out[3]=-1.; else out[3]=0.; + if ((x=in[4])>0.) out[4]=1.; else if(x<0.) out[4]=-1.; else out[4]=0.; + if ((x=in[5])>0.) out[5]=1.; else if(x<0.) out[5]=-1.; else out[5]=0.; + if ((x=in[6])>0.) out[6]=1.; else if(x<0.) out[6]=-1.; else out[6]=0.; + if ((x=in[7])>0.) out[7]=1.; else if(x<0.) out[7]=-1.; else out[7]=0.; + + in+=8; + out+=8; + } + + return (w+4); +} + +#ifdef __SSE__ +static long l_bitmask[]={0x80000000, 0x80000000, 0x80000000, 0x80000000}; // sign bitmask +static t_int *sgnTilde_performSSE(t_int *w) +{ + __m128 *in = (__m128 *)(w[1]); + __m128 *out = (__m128 *)(w[2]); + + __m128 val; + int n = (int)(w[3])>>4; // yea, we do 16x loop-unrolling + + const __m128 sgnmask= _mm_loadu_ps((float*)l_bitmask); + const __m128 zero = _mm_setzero_ps(); + const __m128 one = _mm_set_ps(1.f, 1.f, 1.f, 1.f); + + __m128 xmm0, xmm1; + + while (n--) { + + val=in[0]; + xmm0 = _mm_cmpneq_ps(val , zero);// mask for non-zeros + xmm1 = _mm_and_ps (val, sgnmask);// sign (without value) + xmm0 = _mm_and_ps (xmm0, one); // (abs) value: (val==0.f)?0.f:1.f + out[0]= _mm_or_ps (xmm1, xmm0);// merge sign and value + + val=in[1]; + xmm0 = _mm_cmpneq_ps(val , zero); + xmm1 = _mm_and_ps (val, sgnmask); + xmm0 = _mm_and_ps (xmm0, one); + out[1]= _mm_or_ps (xmm1, xmm0); + + val=in[2]; + xmm0 = _mm_cmpneq_ps(val , zero); + xmm1 = _mm_and_ps (val, sgnmask); + xmm0 = _mm_and_ps (xmm0, one); + out[2]= _mm_or_ps (xmm1, xmm0); + + val=in[3]; + xmm0 = _mm_cmpneq_ps(val , zero); + xmm1 = _mm_and_ps (val, sgnmask); + xmm0 = _mm_and_ps (xmm0, one); + out[3]= _mm_or_ps (xmm1, xmm0); + + in +=4; + out+=4; + + } + return (w+4); +} +#endif /* __SSE__ */ + +static void sgnTilde_dsp(t_sgnTilde *x, t_signal **sp) +{ +#ifdef __SSE__ + if( + Z_SIMD_CHKBLOCKSIZE(sp[0]->s_n)&& + Z_SIMD_CHKALIGN(sp[0]->s_vec)&& + Z_SIMD_CHKALIGN(sp[1]->s_vec)) + { + dsp_add(sgnTilde_performSSE, 3, sp[0]->s_vec, sp[1]->s_vec, sp[0]->s_n); + } else +#endif + if (sp[0]->s_n & 7) { + dsp_add(sgnTilde_perform , 3, sp[0]->s_vec, sp[1]->s_vec, sp[0]->s_n); + } else { + dsp_add(sgnTilde_perform8, 3, sp[0]->s_vec, sp[1]->s_vec, sp[0]->s_n); + } + +} + +static void sgnTilde_helper(void) +{ + post("\n%c sgn~ \t\t:: sign of a signal", HEARTSYMBOL); +} + +static void *sgnTilde_new() +{ + t_sgnTilde *x = (t_sgnTilde *)pd_new(sgnTilde_class); + outlet_new(&x->x_obj, gensym("signal")); + + return (x); +} + +static void sgn_tilde_setup(void) +{ + sgnTilde_class = class_new(gensym("sgn~"), (t_newmethod)sgnTilde_new, 0, + sizeof(t_sgnTilde), 0, A_DEFFLOAT, 0); + class_addmethod(sgnTilde_class, nullfn, gensym("signal"), 0); + class_addmethod(sgnTilde_class, (t_method)sgnTilde_dsp, gensym("dsp"), 0); + + class_addmethod(sgnTilde_class, (t_method)sgnTilde_helper, gensym("help"), 0); + class_sethelpsymbol(sgnTilde_class, gensym("zexy/sigbinops+")); + zexy_register("sgn~"); +} + + + +void z_sgn__setup(void) +{ + sgn_tilde_setup(); +} diff --git a/src/z_sigbin.c b/src/z_sigbin.c index 77de462..50610f7 100644 --- a/src/z_sigbin.c +++ b/src/z_sigbin.c @@ -15,68 +15,13 @@ ******************************************************/ /* - finally :: some of the missing binops for signals :: sgn~, >~, <~, ==~, &&~, ||~ + finally :: some of the missing binops for signals :: >~, <~, ==~, &&~, ||~ 1302:forum::für::umläute:2000 */ #include "zexy.h" -typedef struct _misc -{ - t_object x_obj; -} t_misc; - - -/* ------------------------ sgn~ ----------------------------- */ - -static t_class *sigSGN_class; - -static t_int *sigSGN_perform(t_int *w) -{ - t_float *in = (t_float *)(w[1]); - t_float *out = (t_float *)(w[2]); - int n = (int)(w[3]); - t_float x; - - while (n--) { - if ((x=*in++)>0.) *out++=1.; - else if (x<0.) *out++=-1.; - else *out++=0.; - } - - return (w+4); -} - -static void sigSGN_dsp(t_misc *x, t_signal **sp) -{ - dsp_add(sigSGN_perform, 3, sp[0]->s_vec, sp[1]->s_vec, sp[0]->s_n); -} - -static void sigSGN_helper(void) -{ - post("\n%c sgn~ \t\t:: sign of a signal", HEARTSYMBOL); -} - -static void *sigSGN_new() -{ - t_misc *x = (t_misc *)pd_new(sigSGN_class); - outlet_new(&x->x_obj, gensym("signal")); - - return (x); -} - -static void sigSGN_setup(void) -{ - sigSGN_class = class_new(gensym("sgn~"), (t_newmethod)sigSGN_new, 0, - sizeof(t_misc), 0, A_DEFFLOAT, 0); - class_addmethod(sigSGN_class, nullfn, gensym("signal"), 0); - class_addmethod(sigSGN_class, (t_method)sigSGN_dsp, gensym("dsp"), 0); - - class_addmethod(sigSGN_class, (t_method)sigSGN_helper, gensym("help"), 0); - class_sethelpsymbol(sigSGN_class, gensym("zexy/sigbinops+")); -} - /* ------------------------ relational~ ----------------------------- */ /* ----------------------------- sigGRT ----------------------------- */ @@ -755,7 +700,6 @@ static void sigOR_setup(void) void z_sigbin_setup(void) { - sigSGN_setup(); sigGRT_setup(); sigLESS_setup(); sigEQUAL_setup(); diff --git a/src/z_zexy.c b/src/z_zexy.c index 2eeae8e..f7c1dc3 100644 --- a/src/z_zexy.c +++ b/src/z_zexy.c @@ -53,6 +53,7 @@ void z_zexy_setup(void) z_repeat_setup(); /* repeat.c */ z_sfplay_setup(); /* sfplay.c */ z_sfrecord_setup(); /* sfrecord.c */ + z_sgn__setup(); /* sgn~.c */ z_sigzero__setup(); /* sigzero~.c */ z_sort_setup(); /* sort.c */ z_step__setup(); /* step~.c */ diff --git a/src/z_zexy.h b/src/z_zexy.h index 8278e73..6e09612 100644 --- a/src/z_zexy.h +++ b/src/z_zexy.h @@ -51,6 +51,7 @@ void z_repack_setup(void); /* repack.c */ void z_repeat_setup(void); /* repeat.c */ void z_sfplay_setup(void); /* sfplay.c */ void z_sfrecord_setup(void); /* sfrecord.c */ +void z_sgn__setup(void); /* sgn~.c */ void z_sigzero__setup(void); /* sigzero~.c */ void z_sort_setup(void); /* sort.c */ void z_step__setup(void); /* step~.c */ -- cgit v1.2.1