path: root/src
diff options
authorIOhannes m zmölnig <zmoelnig@users.sourceforge.net>2005-12-21 17:05:47 +0000
committerIOhannes m zmölnig <zmoelnig@users.sourceforge.net>2005-12-21 17:05:47 +0000
commitf0f2e78feae74ba5ea2faf3516fdc0711cc7fe3d (patch)
tree6532d4db5a757cf3e7ae9f359d1b91da90b4ab2a /src
parentd590f5e6e1fdaf2f1c5a229a51e345e60b9656ef (diff)
moved sgn~ into separate file
initial SSE-support of sgn~ svn path=/trunk/externals/zexy/; revision=4275
Diffstat (limited to 'src')
4 files changed, 176 insertions, 57 deletions
diff --git a/src/sgn~.c b/src/sgn~.c
new file mode 100644
index 0000000..b390144
--- /dev/null
+++ b/src/sgn~.c
@@ -0,0 +1,173 @@
+ *
+ * zexy - implementation file
+ *
+ * copyleft (c) IOhannes m zm�lnig
+ *
+ * 2000:forum::f�r::uml�ute:2005
+ *
+ * institute of electronic music and acoustics (iem)
+ *
+ ******************************************************
+ *
+ * license: GNU General Public License v.2
+ *
+ ******************************************************/
+ sgn~: sign of a signal
+ 2112:forum::f�r::uml�ute:2005
+#include "zexy.h"
+typedef struct _sgnTilde
+ t_object x_obj;
+} t_sgnTilde;
+/* ------------------------ sgn~ ----------------------------- */
+static t_class *sgnTilde_class;
+static t_int *sgnTilde_perform(t_int *w)
+ t_float *in = (t_float *)(w[1]);
+ t_float *out = (t_float *)(w[2]);
+ int n = (int)(w[3]);
+ t_float x;
+ while (n--) {
+ if ((x=*in++)>0.) *out++=1.;
+ else if (x<0.) *out++=-1.;
+ else *out++=0.;
+ }
+ return (w+4);
+static t_int *sgnTilde_perform8(t_int *w)
+ t_float *in = (t_float *)(w[1]);
+ t_float *out = (t_float *)(w[2]);
+ int n = (int)(w[3])>>3;
+ t_float x;
+ while(n--){
+ /* weirdly enough, the if/else/if/else is a lot faster than ()?:(()?:) */
+ if ((x=in[0])>0.) out[0]=1.; else if(x<0.) out[0]=-1.; else out[0]=0.;
+ if ((x=in[1])>0.) out[1]=1.; else if(x<0.) out[1]=-1.; else out[1]=0.;
+ if ((x=in[2])>0.) out[2]=1.; else if(x<0.) out[2]=-1.; else out[2]=0.;
+ if ((x=in[3])>0.) out[3]=1.; else if(x<0.) out[3]=-1.; else out[3]=0.;
+ if ((x=in[4])>0.) out[4]=1.; else if(x<0.) out[4]=-1.; else out[4]=0.;
+ if ((x=in[5])>0.) out[5]=1.; else if(x<0.) out[5]=-1.; else out[5]=0.;
+ if ((x=in[6])>0.) out[6]=1.; else if(x<0.) out[6]=-1.; else out[6]=0.;
+ if ((x=in[7])>0.) out[7]=1.; else if(x<0.) out[7]=-1.; else out[7]=0.;
+ in+=8;
+ out+=8;
+ }
+ return (w+4);
+#ifdef __SSE__
+static long l_bitmask[]={0x80000000, 0x80000000, 0x80000000, 0x80000000}; // sign bitmask
+static t_int *sgnTilde_performSSE(t_int *w)
+ __m128 *in = (__m128 *)(w[1]);
+ __m128 *out = (__m128 *)(w[2]);
+ __m128 val;
+ int n = (int)(w[3])>>4; // yea, we do 16x loop-unrolling
+ const __m128 sgnmask= _mm_loadu_ps((float*)l_bitmask);
+ const __m128 zero = _mm_setzero_ps();
+ const __m128 one = _mm_set_ps(1.f, 1.f, 1.f, 1.f);
+ __m128 xmm0, xmm1;
+ while (n--) {
+ val=in[0];
+ xmm0 = _mm_cmpneq_ps(val , zero);// mask for non-zeros
+ xmm1 = _mm_and_ps (val, sgnmask);// sign (without value)
+ xmm0 = _mm_and_ps (xmm0, one); // (abs) value: (val==0.f)?0.f:1.f
+ out[0]= _mm_or_ps (xmm1, xmm0);// merge sign and value
+ val=in[1];
+ xmm0 = _mm_cmpneq_ps(val , zero);
+ xmm1 = _mm_and_ps (val, sgnmask);
+ xmm0 = _mm_and_ps (xmm0, one);
+ out[1]= _mm_or_ps (xmm1, xmm0);
+ val=in[2];
+ xmm0 = _mm_cmpneq_ps(val , zero);
+ xmm1 = _mm_and_ps (val, sgnmask);
+ xmm0 = _mm_and_ps (xmm0, one);
+ out[2]= _mm_or_ps (xmm1, xmm0);
+ val=in[3];
+ xmm0 = _mm_cmpneq_ps(val , zero);
+ xmm1 = _mm_and_ps (val, sgnmask);
+ xmm0 = _mm_and_ps (xmm0, one);
+ out[3]= _mm_or_ps (xmm1, xmm0);
+ in +=4;
+ out+=4;
+ }
+ return (w+4);
+#endif /* __SSE__ */
+static void sgnTilde_dsp(t_sgnTilde *x, t_signal **sp)
+#ifdef __SSE__
+ if(
+ Z_SIMD_CHKBLOCKSIZE(sp[0]->s_n)&&
+ Z_SIMD_CHKALIGN(sp[0]->s_vec)&&
+ Z_SIMD_CHKALIGN(sp[1]->s_vec))
+ {
+ dsp_add(sgnTilde_performSSE, 3, sp[0]->s_vec, sp[1]->s_vec, sp[0]->s_n);
+ } else
+ if (sp[0]->s_n & 7) {
+ dsp_add(sgnTilde_perform , 3, sp[0]->s_vec, sp[1]->s_vec, sp[0]->s_n);
+ } else {
+ dsp_add(sgnTilde_perform8, 3, sp[0]->s_vec, sp[1]->s_vec, sp[0]->s_n);
+ }
+static void sgnTilde_helper(void)
+ post("\n%c sgn~ \t\t:: sign of a signal", HEARTSYMBOL);
+static void *sgnTilde_new()
+ t_sgnTilde *x = (t_sgnTilde *)pd_new(sgnTilde_class);
+ outlet_new(&x->x_obj, gensym("signal"));
+ return (x);
+static void sgn_tilde_setup(void)
+ sgnTilde_class = class_new(gensym("sgn~"), (t_newmethod)sgnTilde_new, 0,
+ sizeof(t_sgnTilde), 0, A_DEFFLOAT, 0);
+ class_addmethod(sgnTilde_class, nullfn, gensym("signal"), 0);
+ class_addmethod(sgnTilde_class, (t_method)sgnTilde_dsp, gensym("dsp"), 0);
+ class_addmethod(sgnTilde_class, (t_method)sgnTilde_helper, gensym("help"), 0);
+ class_sethelpsymbol(sgnTilde_class, gensym("zexy/sigbinops+"));
+ zexy_register("sgn~");
+void z_sgn__setup(void)
+ sgn_tilde_setup();
diff --git a/src/z_sigbin.c b/src/z_sigbin.c
index 77de462..50610f7 100644
--- a/src/z_sigbin.c
+++ b/src/z_sigbin.c
@@ -15,68 +15,13 @@
- finally :: some of the missing binops for signals :: sgn~, >~, <~, ==~, &&~, ||~
+ finally :: some of the missing binops for signals :: >~, <~, ==~, &&~, ||~
#include "zexy.h"
-typedef struct _misc
- t_object x_obj;
-} t_misc;
-/* ------------------------ sgn~ ----------------------------- */
-static t_class *sigSGN_class;
-static t_int *sigSGN_perform(t_int *w)
- t_float *in = (t_float *)(w[1]);
- t_float *out = (t_float *)(w[2]);
- int n = (int)(w[3]);
- t_float x;
- while (n--) {
- if ((x=*in++)>0.) *out++=1.;
- else if (x<0.) *out++=-1.;
- else *out++=0.;
- }
- return (w+4);
-static void sigSGN_dsp(t_misc *x, t_signal **sp)
- dsp_add(sigSGN_perform, 3, sp[0]->s_vec, sp[1]->s_vec, sp[0]->s_n);
-static void sigSGN_helper(void)
- post("\n%c sgn~ \t\t:: sign of a signal", HEARTSYMBOL);
-static void *sigSGN_new()
- t_misc *x = (t_misc *)pd_new(sigSGN_class);
- outlet_new(&x->x_obj, gensym("signal"));
- return (x);
-static void sigSGN_setup(void)
- sigSGN_class = class_new(gensym("sgn~"), (t_newmethod)sigSGN_new, 0,
- sizeof(t_misc), 0, A_DEFFLOAT, 0);
- class_addmethod(sigSGN_class, nullfn, gensym("signal"), 0);
- class_addmethod(sigSGN_class, (t_method)sigSGN_dsp, gensym("dsp"), 0);
- class_addmethod(sigSGN_class, (t_method)sigSGN_helper, gensym("help"), 0);
- class_sethelpsymbol(sigSGN_class, gensym("zexy/sigbinops+"));
/* ------------------------ relational~ ----------------------------- */
/* ----------------------------- sigGRT ----------------------------- */
@@ -755,7 +700,6 @@ static void sigOR_setup(void)
void z_sigbin_setup(void)
- sigSGN_setup();
diff --git a/src/z_zexy.c b/src/z_zexy.c
index 2eeae8e..f7c1dc3 100644
--- a/src/z_zexy.c
+++ b/src/z_zexy.c
@@ -53,6 +53,7 @@ void z_zexy_setup(void)
z_repeat_setup(); /* repeat.c */
z_sfplay_setup(); /* sfplay.c */
z_sfrecord_setup(); /* sfrecord.c */
+ z_sgn__setup(); /* sgn~.c */
z_sigzero__setup(); /* sigzero~.c */
z_sort_setup(); /* sort.c */
z_step__setup(); /* step~.c */
diff --git a/src/z_zexy.h b/src/z_zexy.h
index 8278e73..6e09612 100644
--- a/src/z_zexy.h
+++ b/src/z_zexy.h
@@ -51,6 +51,7 @@ void z_repack_setup(void); /* repack.c */
void z_repeat_setup(void); /* repeat.c */
void z_sfplay_setup(void); /* sfplay.c */
void z_sfrecord_setup(void); /* sfrecord.c */
+void z_sgn__setup(void); /* sgn~.c */
void z_sigzero__setup(void); /* sigzero~.c */
void z_sort_setup(void); /* sort.c */
void z_step__setup(void); /* step~.c */