From 5c669899d3ccc8a60521e6bf8268f8f59fd3babb Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?IOhannes=20m=20zm=C3=B6lnig?= Date: Thu, 5 Jan 2006 11:53:24 +0000 Subject: added [absgn~] as external (written by tim blechmann) and abstraction the main purpose of this external is speed svn path=/trunk/externals/zexy/; revision=4364 --- abs/absgn~-help.pd | 30 +++++++++++ abs/absgn~.pd | 17 ++++++ examples/sigbinops+.pd | 99 ++++++++++++++++++---------------- src/absgn~.c | 140 +++++++++++++++++++++++++++++++++++++++++++++++++ src/z_zexy.c | 9 ++-- src/z_zexy.h | 9 ++-- 6 files changed, 252 insertions(+), 52 deletions(-) create mode 100644 abs/absgn~-help.pd create mode 100644 abs/absgn~.pd create mode 100644 src/absgn~.c diff --git a/abs/absgn~-help.pd b/abs/absgn~-help.pd new file mode 100644 index 0000000..4f718fd --- /dev/null +++ b/abs/absgn~-help.pd @@ -0,0 +1,30 @@ +#N canvas 201 257 575 388 10; +#X obj 92 103 sig~; +#X text 272 20 part of zexy; +#X obj 93 176 snapshot~; +#X floatatom 93 204 5 0 0 0 - - -; +#X obj 146 153 metro 100; +#X obj 146 131 tgl 15 0 empty empty empty 0 -6 0 8 -262144 -1 -1 1 +1; +#X floatatom 92 81 5 0 0 0 - - -; +#X text 16 243 note:; +#X text 41 285 performance is worse than with the external version +; +#X text 45 315 if you want to use the external version of this object +\, you have to load zexy; +#X text 43 255 this is the abstraction version of this object \, which +is using [expr~]; +#X obj 92 152 absgn~; +#X text 349 61 updated for zexy-2.1; +#X obj 163 176 snapshot~; +#X floatatom 163 202 5 0 0 0 - - -; +#X text 65 46 absolute value and signum of a signal; +#X connect 0 0 11 0; +#X connect 2 0 3 0; +#X connect 4 0 2 0; +#X connect 4 0 13 0; +#X connect 5 0 4 0; +#X connect 6 0 0 0; +#X connect 11 0 2 0; +#X connect 11 1 13 0; +#X connect 13 0 14 0; diff --git a/abs/absgn~.pd b/abs/absgn~.pd new file mode 100644 index 0000000..873bbab --- /dev/null +++ b/abs/absgn~.pd @@ -0,0 +1,17 @@ +#N canvas 0 0 482 310 10; +#X obj 30 88 inlet~; +#X obj 30 136 outlet~; +#X text 319 21 part of zexy; +#X obj 187 137 outlet~; +#X obj 31 113 abs~; +#X obj 187 113 sgn~; +#X text 36 44 [absgn~]; +#X text 250 261 updated for zexy-2.1; +#X text 41 183 [absgn~] \, [abs~] and [sgn~] are provided both as externals +(fast!) and abstractions (slow!!) \; if you are using this abstraction +of [absgn~] \, chances are high \, that [abs~] and [sgn~] are abstractions +too.; +#X connect 0 0 4 0; +#X connect 0 0 5 0; +#X connect 4 0 1 0; +#X connect 5 0 3 0; diff --git a/examples/sigbinops+.pd b/examples/sigbinops+.pd index 1223ffc..93bbf8d 100644 --- a/examples/sigbinops+.pd +++ b/examples/sigbinops+.pd @@ -1,61 +1,67 @@ #N canvas 470 149 594 490 10; #X obj 62 193 abs~; #X obj 120 194 sgn~; -#X obj 262 191 <~; -#X obj 296 190 ==~; -#X obj 330 191 >~; -#X obj 388 190 &&~; -#X obj 423 190 ||~; -#X floatatom 423 238 0 0 0 0 - - -; -#X floatatom 388 238 0 0 0 0 - - -; -#X floatatom 330 239 0 0 0 0 - - -; -#X floatatom 296 239 0 0 0 0 - - -; +#X obj 342 191 <~; +#X obj 376 190 ==~; +#X obj 410 191 >~; +#X obj 468 190 &&~; +#X obj 503 190 ||~; +#X floatatom 503 238 0 0 0 0 - - -; +#X floatatom 468 238 0 0 0 0 - - -; +#X floatatom 410 239 0 0 0 0 - - -; +#X floatatom 376 239 0 0 0 0 - - -; #X floatatom 120 242 0 0 0 0 - - -; #X floatatom 62 243 0 0 0 0 - - -; #X obj 62 221 avg~; #X obj 120 220 avg~; -#X floatatom 262 239 0 0 0 0 - - -; -#X obj 262 217 avg~; -#X obj 296 216 avg~; -#X obj 330 216 avg~; -#X obj 388 216 avg~; -#X obj 423 216 avg~; +#X floatatom 342 239 0 0 0 0 - - -; +#X obj 342 217 avg~; +#X obj 376 216 avg~; +#X obj 410 216 avg~; +#X obj 468 216 avg~; +#X obj 503 216 avg~; #X obj 62 163 sig~; #X floatatom 62 141 0 0 0 0 - - -; -#X obj 330 126 sig~; -#X floatatom 330 104 0 0 0 0 - - -; -#X obj 404 126 sig~; -#X floatatom 404 104 0 0 0 0 - - -; -#X floatatom 440 448 0 0 0 0 - - -; -#X floatatom 400 448 0 0 0 0 - - -; -#X floatatom 349 448 0 0 0 0 - - -; -#X floatatom 308 449 0 0 0 0 - - -; -#X floatatom 274 449 0 0 0 0 - - -; -#X obj 274 427 avg~; -#X obj 308 426 avg~; -#X obj 349 425 avg~; -#X obj 400 426 avg~; -#X obj 440 426 avg~; -#X obj 349 337 sig~; -#X floatatom 349 315 0 0 0 0 - - -; -#X floatatom 430 316 0 0 0 0 - - -; -#X obj 274 401 <~ 2; -#X obj 308 400 ==~ 2; -#X obj 349 400 >~ 2; -#X obj 400 400 &&~ 2; -#X obj 440 400 ||~ 2; +#X obj 410 126 sig~; +#X floatatom 410 104 0 0 0 0 - - -; +#X obj 484 126 sig~; +#X floatatom 484 104 0 0 0 0 - - -; +#X floatatom 520 448 0 0 0 0 - - -; +#X floatatom 480 448 0 0 0 0 - - -; +#X floatatom 429 448 0 0 0 0 - - -; +#X floatatom 388 449 0 0 0 0 - - -; +#X floatatom 354 449 0 0 0 0 - - -; +#X obj 354 427 avg~; +#X obj 388 426 avg~; +#X obj 429 425 avg~; +#X obj 480 426 avg~; +#X obj 520 426 avg~; +#X obj 429 337 sig~; +#X floatatom 429 315 0 0 0 0 - - -; +#X floatatom 510 316 0 0 0 0 - - -; +#X obj 354 401 <~ 2; +#X obj 388 400 ==~ 2; +#X obj 429 400 >~ 2; +#X obj 480 400 &&~ 2; +#X obj 520 400 ||~ 2; #X text 83 30 more math and binary operations for signals; #X text 28 263 absolute; #X text 50 278 value; #X text 112 263 signum; -#X text 249 267 greater; -#X text 330 268 less; -#X text 296 284 equal; -#X text 393 261 logical; -#X text 386 274 AND; -#X text 431 274 OR; +#X text 329 267 greater; +#X text 410 268 less; +#X text 376 284 equal; +#X text 473 261 logical; +#X text 466 274 AND; +#X text 511 274 OR; #X text 430 16 part of zexy; -#X text 72 58 (this patch might be very CPU-consumptious \, because +#X floatatom 230 242 0 0 0 0 - - -; +#X floatatom 192 243 0 0 0 0 - - -; +#X obj 192 221 avg~; +#X obj 230 220 avg~; +#X obj 192 193 absgn~; +#X text 196 267 (both); +#X text 72 58 (this help-patch might be very CPU-consumptious \, because of the float-atoms....); #X connect 0 0 13 0; #X connect 1 0 14 0; @@ -73,6 +79,7 @@ of the float-atoms....); #X connect 20 0 7 0; #X connect 21 0 0 0; #X connect 21 0 1 0; +#X connect 21 0 60 0; #X connect 22 0 21 0; #X connect 23 0 2 0; #X connect 23 0 3 0; @@ -106,3 +113,7 @@ of the float-atoms....); #X connect 42 0 34 0; #X connect 43 0 35 0; #X connect 44 0 36 0; +#X connect 58 0 57 0; +#X connect 59 0 56 0; +#X connect 60 0 58 0; +#X connect 60 1 59 0; diff --git a/src/absgn~.c b/src/absgn~.c new file mode 100644 index 0000000..1d3d4bd --- /dev/null +++ b/src/absgn~.c @@ -0,0 +1,140 @@ +/****************************************************** + * + * zexy - implementation file + * + * (c) 2006 Tim Blechmann + * + * + ****************************************************** + * + * license: GNU General Public License v.2 + * + ******************************************************/ + +#include "zexy.h" + +typedef struct _absgn +{ + t_object x_obj; + float x_f; +} t_absgn; + + +/* ------------------------ sigABSGN~ ----------------------------- */ + +static t_class *sigABSGN_class; + +static t_int *sigABSGN_perform(t_int *w) +{ + t_float *in = (t_float *)(w[1]); + t_float *out = (t_float *)(w[2]); + t_float *out2 = (t_float *)(w[3]); + int n = (int)(w[4]); + + while (n--) + { + t_float val = *in++; + *out++ = fabsf(val); + + if (val>0.) *out2++=1.; + else if (val<0.) *out2++=-1.; + else *out2++=0.; + } + + + return (w+5); +} + +#ifdef __SSE__ +static long l_bitmask[] ={0x7fffffff, 0x7fffffff, 0x7fffffff, 0x7fffffff}; +static long l_sgnbitmask[]={0x80000000, 0x80000000, 0x80000000, 0x80000000}; +static t_int *sigABSGN_performSSE(t_int *w) +{ + __m128 *in = (__m128 *)(w[1]); + __m128 *out1 = (__m128 *)(w[2]); + __m128 *out2 = (__m128 *)(w[3]); + int n = (int)(w[4])>>3; + + const __m128 bitmask= _mm_loadu_ps((float*)l_bitmask); + const __m128 sgnmask= _mm_loadu_ps((float*)l_sgnbitmask); + const __m128 zero = _mm_setzero_ps(); + const __m128 one = _mm_set1_ps(1.f); + + do { + __m128 val, val2, xmm0, xmm1, xmm2, xmm3; + val=in[0]; + xmm0 = _mm_cmpneq_ps(val, zero); /* mask for non-zeros */ + xmm1 = _mm_and_ps (val, sgnmask);/* sign (without value) */ + xmm0 = _mm_and_ps (xmm0, one); /* (abs) value: (val==0.f)?0.f:1.f */ + out1[0]= _mm_and_ps (val, bitmask);/* abs: set sign-bit to "+" */ + out2[0]= _mm_or_ps (xmm1, xmm0); /* merge sign and value */ + + val2=in[1]; + xmm2 = _mm_cmpneq_ps(val2, zero); /* mask for non-zeros */ + xmm3 = _mm_and_ps (val2, sgnmask);/* sign (without value) */ + xmm2 = _mm_and_ps (xmm2, one); /* (abs) value: (val==0.f)?0.f:1.f */ + out1[1]= _mm_and_ps (val2, bitmask);/* abs: set sign-bit to "+" */ + out2[1]= _mm_or_ps (xmm3, xmm2); /* merge sign and value */ + + in +=2; + out+=2; + out2+=2; + } + while (--n); + + return (w+5); +} +#endif /* __SSE__ */ + +static void sigABSGN_dsp(t_absgn *x, t_signal **sp) +{ +#ifdef __SSE__ + if( + Z_SIMD_CHKBLOCKSIZE(sp[0]->s_n)&& + Z_SIMD_CHKALIGN(sp[0]->s_vec)&& + Z_SIMD_CHKALIGN(sp[1]->s_vec)&& + Z_SIMD_CHKALIGN(sp[2]->s_vec) + ) + { + dsp_add(sigABSGN_performSSE, 4, sp[0]->s_vec, sp[1]->s_vec, sp[2]->s_vec, sp[0]->s_n); + } else +#endif + { + dsp_add(sigABSGN_perform, 4, sp[0]->s_vec, sp[1]->s_vec, sp[2]->s_vec, sp[0]->s_n); + } +} + +static void sigABSGN_helper(void) +{ + post("\n%c absgn~ \t\t:: absolute value and sign of a signal", HEARTSYMBOL); + post(" \t\t copyright (c) Tim Blechmann 2006"); +} + +static void *sigABSGN_new(void) +{ + t_absgn *x = (t_absgn *)pd_new(sigABSGN_class); + x->x_f=0.f; + + outlet_new(&x->x_obj, gensym("signal")); + outlet_new(&x->x_obj, gensym("signal")); + + return (x); +} + +void absgn_tilde_setup(void) +{ + sigABSGN_class = class_new(gensym("absgn~"), (t_newmethod)sigABSGN_new, 0, + sizeof(t_absgn), 0, A_DEFFLOAT, 0); + CLASS_MAINSIGNALIN(sigABSGN_class, t_absgn, x_f); + class_addmethod(sigABSGN_class, (t_method)sigABSGN_dsp, gensym("dsp"), 0); + + class_addmethod(sigABSGN_class, (t_method)sigABSGN_helper, gensym("help"), 0); + class_sethelpsymbol(sigABSGN_class, gensym("zexy/sigbinops+")); + + zexy_register("absgn~"); +} + +void z_absgn__setup(void) +{ + absgn_tilde_setup(); +} diff --git a/src/z_zexy.c b/src/z_zexy.c index f7c1dc3..e1693c2 100644 --- a/src/z_zexy.c +++ b/src/z_zexy.c @@ -8,14 +8,15 @@ void z_zexy_setup(void) { z_a2l_setup(); /* a2l.c */ + z_absgn__setup(); /* absgn~.c */ z_abs__setup(); /* abs~.c */ z_atoi_setup(); /* atoi.c */ z_avg__setup(); /* avg~.c */ z_blockmirror__setup(); /* blockmirror~.c */ z_blockswap__setup(); /* blockswap~.c */ z_date_setup(); /* date.c */ - z_demultiplex__setup(); /* demultiplex~.c */ z_demultiplex_setup(); /* demultiplex.c */ + z_demultiplex__setup(); /* demultiplex~.c */ z_dfreq__setup(); /* dfreq~.c */ z_dirac__setup(); /* dirac~.c */ z_dot_setup(); /* dot.c */ @@ -36,14 +37,14 @@ void z_zexy_setup(void) z_minmax_setup(); /* minmax.c */ z_msgfile_setup(); /* msgfile.c */ z_multiline__setup(); /* multiline~.c */ - z_multiplex__setup(); /* multiplex~.c */ z_multiplex_setup(); /* multiplex.c */ + z_multiplex__setup(); /* multiplex~.c */ z_niagara_setup(); /* niagara.c */ z_noish__setup(); /* noish~.c */ z_noisi__setup(); /* noisi~.c */ z_operating_system_setup(); /* operating_system.c */ - z_pack__setup(); /* pack~.c */ z_packel_setup(); /* packel.c */ + z_pack__setup(); /* pack~.c */ z_pdf__setup(); /* pdf~.c */ z_prime_setup(); /* prime.c */ z_quantize__setup(); /* quantize~.c */ @@ -70,7 +71,7 @@ void z_zexy_setup(void) z_urn_setup(); /* urn.c */ z_winNT_portio_setup(); /* winNT_portio.c */ z_wrap_setup(); /* wrap.c */ - z_z__setup(); /* z~.c */ z_z_sigbin_setup(); /* z_sigbin.c */ + z_z__setup(); /* z~.c */ } diff --git a/src/z_zexy.h b/src/z_zexy.h index 6e09612..db1419d 100644 --- a/src/z_zexy.h +++ b/src/z_zexy.h @@ -6,14 +6,15 @@ #ifndef Z_ZEXY_H__ #define Z_ZEXY_H__ void z_a2l_setup(void); /* a2l.c */ +void z_absgn__setup(void); /* absgn~.c */ void z_abs__setup(void); /* abs~.c */ void z_atoi_setup(void); /* atoi.c */ void z_avg__setup(void); /* avg~.c */ void z_blockmirror__setup(void); /* blockmirror~.c */ void z_blockswap__setup(void); /* blockswap~.c */ void z_date_setup(void); /* date.c */ -void z_demultiplex__setup(void); /* demultiplex~.c */ void z_demultiplex_setup(void); /* demultiplex.c */ +void z_demultiplex__setup(void); /* demultiplex~.c */ void z_dfreq__setup(void); /* dfreq~.c */ void z_dirac__setup(void); /* dirac~.c */ void z_dot_setup(void); /* dot.c */ @@ -34,14 +35,14 @@ void z_mavg_setup(void); /* mavg.c */ void z_minmax_setup(void); /* minmax.c */ void z_msgfile_setup(void); /* msgfile.c */ void z_multiline__setup(void); /* multiline~.c */ -void z_multiplex__setup(void); /* multiplex~.c */ void z_multiplex_setup(void); /* multiplex.c */ +void z_multiplex__setup(void); /* multiplex~.c */ void z_niagara_setup(void); /* niagara.c */ void z_noish__setup(void); /* noish~.c */ void z_noisi__setup(void); /* noisi~.c */ void z_operating_system_setup(void); /* operating_system.c */ -void z_pack__setup(void); /* pack~.c */ void z_packel_setup(void); /* packel.c */ +void z_pack__setup(void); /* pack~.c */ void z_pdf__setup(void); /* pdf~.c */ void z_prime_setup(void); /* prime.c */ void z_quantize__setup(void); /* quantize~.c */ @@ -68,7 +69,7 @@ void z_unpack__setup(void); /* unpack~.c */ void z_urn_setup(void); /* urn.c */ void z_winNT_portio_setup(void); /* winNT_portio.c */ void z_wrap_setup(void); /* wrap.c */ -void z_z__setup(void); /* z~.c */ void z_z_sigbin_setup(void); /* z_sigbin.c */ +void z_z__setup(void); /* z~.c */ #endif /* Z_ZEXY_H__ */ -- cgit v1.2.1