aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorIOhannes m zmölnig <zmoelnig@users.sourceforge.net>2005-12-21 14:56:16 +0000
committerIOhannes m zmölnig <zmoelnig@users.sourceforge.net>2005-12-21 14:56:16 +0000
commitd590f5e6e1fdaf2f1c5a229a51e345e60b9656ef (patch)
tree3eb94c5336fbad4a1142fdd447b6fc8adc8766c1
parentb7d51f55367bebf34b83553675d4c37bc7c32e9b (diff)
moved code for [abs~] into separate file
use sse-code (intrinsics) for [abs~] svn path=/trunk/externals/zexy/; revision=4274
-rw-r--r--src/abs~.c166
-rw-r--r--src/z_sigbin.c48
-rw-r--r--src/z_zexy.c1
-rw-r--r--src/z_zexy.h1
4 files changed, 169 insertions, 47 deletions
diff --git a/src/abs~.c b/src/abs~.c
new file mode 100644
index 0000000..7eaf5a7
--- /dev/null
+++ b/src/abs~.c
@@ -0,0 +1,166 @@
+/******************************************************
+ *
+ * zexy - implementation file
+ *
+ * copyleft (c) IOhannes m zmölnig
+ *
+ * 1999:forum::für::umläute:2005
+ *
+ * institute of electronic music and acoustics (iem)
+ *
+ ******************************************************
+ *
+ * license: GNU General Public License v.2
+ *
+ ******************************************************/
+
+/*
+ abs~: absolute value of signal
+
+ 2112:forum::für::umläute:2005
+*/
+
+#include "zexy.h"
+
+typedef struct _abs
+{
+ t_object x_obj;
+ float x_f;
+} t_abs;
+
+
+/* ------------------------ sigABS~ ----------------------------- */
+
+static t_class *sigABS_class;
+
+static t_int *sigABS_perform(t_int *w)
+{
+ t_float *in = (t_float *)(w[1]);
+ t_float *out = (t_float *)(w[2]);
+ int n = (int)(w[3]);
+
+ while (n--) *out++ = fabsf(*in++);
+
+ return (w+4);
+}
+
+#ifdef __SSE__
+static long l_bitmask[]={0x7fffffff, 0x7fffffff, 0x7fffffff, 0x7fffffff};
+static t_int *sigABS_performSSE(t_int *w)
+{
+ __m128 *in = (__m128 *)(w[1]);
+ __m128 *out = (__m128 *)(w[2]);
+ int n = (int)(w[3])>>4;
+
+ __m128 bitmask= _mm_loadu_ps((float*)l_bitmask);
+
+ while (n--) {
+ out[0] = _mm_and_ps(in[0] , bitmask);
+ out[1] = _mm_and_ps(in[1] , bitmask);
+ out[2] = _mm_and_ps(in[2] , bitmask);
+ out[3] = _mm_and_ps(in[3] , bitmask);
+
+ in +=4;
+ out+=4;
+
+ }
+#if 0
+ /*
+ * handwritten SSE-code by tim blechmann
+ *
+ * JMZ: the above (using intrinsics) is a little bit slower
+ * but still about 4* as fast as the generic code
+ * i prefer using intrinsics as i don't have to learn how to
+ * assemble
+ */
+ asm(
+ ".section .rodata \n"
+ ".align 16 \n"
+ "2: \n"
+ ".long 2147483647 \n" /* bitmask */
+ ".long 2147483647 \n" /* 0x7fffffff */
+ ".long 2147483647 \n"
+ ".long 2147483647 \n"
+
+ ".text \n"
+
+ "movaps (2b), %%xmm0 \n" /* xmm0 = bitmask */
+ "shrl $4, %2 \n"
+
+ /* loop: *dest = abs(*src) */
+ "1: \n"
+ "movaps (%0,%3), %%xmm1 \n"
+ "andps %%xmm0, %%xmm1 \n"
+ "movaps %%xmm1, (%1,%3) \n"
+
+ "movaps 16(%0,%3), %%xmm2 \n"
+ "andps %%xmm0, %%xmm2 \n"
+ "movaps %%xmm2, 16(%1,%3) \n"
+
+ "movaps 32(%0,%3), %%xmm3 \n"
+ "andps %%xmm0, %%xmm3 \n"
+ "movaps %%xmm3, 32(%1,%3) \n"
+
+ "movaps 48(%0,%3), %%xmm4 \n"
+ "andps %%xmm0, %%xmm4 \n"
+ "movaps %%xmm4, 48(%1,%3) \n"
+
+ "addl $64, %3 \n"
+ "loop 1b \n"
+ :
+ :"r"(in), "r"(out), "c"(n), "r"(0)
+ :"%xmm0", "%xmm1", "%xmm2", "%xmm3", "%xmm4"
+ );
+#endif /*0*/
+
+ return (w+4);
+}
+#endif /* __SSE__ */
+
+static void sigABS_dsp(t_abs *x, t_signal **sp)
+{
+#ifdef __SSE__
+ if(
+ Z_SIMD_CHKBLOCKSIZE(sp[0]->s_n)&&
+ Z_SIMD_CHKALIGN(sp[0]->s_vec)&&
+ Z_SIMD_CHKALIGN(sp[1]->s_vec))
+ {
+ dsp_add(sigABS_performSSE, 3, sp[0]->s_vec, sp[1]->s_vec, sp[0]->s_n);
+ } else
+#endif
+ {
+ dsp_add(sigABS_perform, 3, sp[0]->s_vec, sp[1]->s_vec, sp[0]->s_n);
+ }
+}
+
+static void sigABS_helper(void)
+{
+ post("\n%c abs~ \t\t:: absolute value of a signal", HEARTSYMBOL);
+}
+
+static void *sigABS_new(void)
+{
+ t_abs *x = (t_abs *)pd_new(sigABS_class);
+ x->x_f=0.f;
+ outlet_new(&x->x_obj, gensym("signal"));
+
+ return (x);
+}
+
+void abs_tilde_setup(void)
+{
+ sigABS_class = class_new(gensym("abs~"), (t_newmethod)sigABS_new, 0,
+ sizeof(t_abs), 0, A_DEFFLOAT, 0);
+ CLASS_MAINSIGNALIN(sigABS_class, t_abs, x_f);
+ class_addmethod(sigABS_class, (t_method)sigABS_dsp, gensym("dsp"), 0);
+
+ class_addmethod(sigABS_class, (t_method)sigABS_helper, gensym("help"), 0);
+ class_sethelpsymbol(sigABS_class, gensym("zexy/sigbinops+"));
+
+ zexy_register("abs~");
+}
+
+void z_abs__setup(void)
+{
+ abs_tilde_setup();
+}
diff --git a/src/z_sigbin.c b/src/z_sigbin.c
index 61f67d6..77de462 100644
--- a/src/z_sigbin.c
+++ b/src/z_sigbin.c
@@ -15,7 +15,7 @@
******************************************************/
/*
- finally :: some of the missing binops for signals :: abs~, sgn~, >~, <~, ==~, &&~, ||~
+ finally :: some of the missing binops for signals :: sgn~, >~, <~, ==~, &&~, ||~
1302:forum::für::umläute:2000
*/
@@ -28,51 +28,6 @@ typedef struct _misc
} t_misc;
-/* ------------------------ sigABS~ ----------------------------- */
-
-static t_class *sigABS_class;
-
-static t_int *sigABS_perform(t_int *w)
-{
- t_float *in = (t_float *)(w[1]);
- t_float *out = (t_float *)(w[2]);
- int n = (int)(w[3]);
-
- while (n--) *out++ = fabsf(*in++);
-
- return (w+4);
-}
-
-static void sigABS_dsp(t_misc *x, t_signal **sp)
-{
- dsp_add(sigABS_perform, 3, sp[0]->s_vec, sp[1]->s_vec, sp[0]->s_n);
-}
-
-static void sigABS_helper(void)
-{
- post("\n%c abs~ \t\t:: absolute value of a signal", HEARTSYMBOL);
-}
-
-static void *sigABS_new(void)
-{
- t_misc *x = (t_misc *)pd_new(sigABS_class);
- outlet_new(&x->x_obj, gensym("signal"));
-
- return (x);
-}
-
-static void sigABS_setup(void)
-{
- sigABS_class = class_new(gensym("abs~"), (t_newmethod)sigABS_new, 0,
- sizeof(t_misc), 0, A_DEFFLOAT, 0);
- class_addmethod(sigABS_class, nullfn, gensym("signal"), 0);
- class_addmethod(sigABS_class, (t_method)sigABS_dsp, gensym("dsp"), 0);
-
- class_addmethod(sigABS_class, (t_method)sigABS_helper, gensym("help"), 0);
-
- class_sethelpsymbol(sigABS_class, gensym("zexy/sigbinops+"));
-}
-
/* ------------------------ sgn~ ----------------------------- */
static t_class *sigSGN_class;
@@ -800,7 +755,6 @@ static void sigOR_setup(void)
void z_sigbin_setup(void)
{
- sigABS_setup();
sigSGN_setup();
sigGRT_setup();
sigLESS_setup();
diff --git a/src/z_zexy.c b/src/z_zexy.c
index d561b5f..2eeae8e 100644
--- a/src/z_zexy.c
+++ b/src/z_zexy.c
@@ -8,6 +8,7 @@
void z_zexy_setup(void)
{
z_a2l_setup(); /* a2l.c */
+ z_abs__setup(); /* abs~.c */
z_atoi_setup(); /* atoi.c */
z_avg__setup(); /* avg~.c */
z_blockmirror__setup(); /* blockmirror~.c */
diff --git a/src/z_zexy.h b/src/z_zexy.h
index bf51257..8278e73 100644
--- a/src/z_zexy.h
+++ b/src/z_zexy.h
@@ -6,6 +6,7 @@
#ifndef Z_ZEXY_H__
#define Z_ZEXY_H__
void z_a2l_setup(void); /* a2l.c */
+void z_abs__setup(void); /* abs~.c */
void z_atoi_setup(void); /* atoi.c */
void z_avg__setup(void); /* avg~.c */
void z_blockmirror__setup(void); /* blockmirror~.c */