aboutsummaryrefslogtreecommitdiff
path: root/src/sgn~.c
diff options
context:
space:
mode:
authorIOhannes m zmölnig <zmoelnig@users.sourceforge.net>2005-12-21 17:15:41 +0000
committerIOhannes m zmölnig <zmoelnig@users.sourceforge.net>2005-12-21 17:15:41 +0000
commit9d7d02e7eb868ca1a054c0861c0cc7030553750d (patch)
treeb2eb1ee84986445a9cee5bcda0b311da4133feea /src/sgn~.c
parentf0f2e78feae74ba5ea2faf3516fdc0711cc7fe3d (diff)
only do 8x loop unrolling since it seems to be more efficient
(well, rather a joke: 4096 [abs~] need about 24% with 16x unrolling and about 23-24% with 8x unrolling) svn path=/trunk/externals/zexy/; revision=4276
Diffstat (limited to 'src/sgn~.c')
-rw-r--r--src/sgn~.c18
1 files changed, 3 insertions, 15 deletions
diff --git a/src/sgn~.c b/src/sgn~.c
index b390144..a18c539 100644
--- a/src/sgn~.c
+++ b/src/sgn~.c
@@ -79,7 +79,7 @@ static t_int *sgnTilde_performSSE(t_int *w)
__m128 *out = (__m128 *)(w[2]);
__m128 val;
- int n = (int)(w[3])>>4; // yea, we do 16x loop-unrolling
+ int n = (int)(w[3])>>3; // we do 8x loop-unrolling
const __m128 sgnmask= _mm_loadu_ps((float*)l_bitmask);
const __m128 zero = _mm_setzero_ps();
@@ -101,20 +101,8 @@ static t_int *sgnTilde_performSSE(t_int *w)
xmm0 = _mm_and_ps (xmm0, one);
out[1]= _mm_or_ps (xmm1, xmm0);
- val=in[2];
- xmm0 = _mm_cmpneq_ps(val , zero);
- xmm1 = _mm_and_ps (val, sgnmask);
- xmm0 = _mm_and_ps (xmm0, one);
- out[2]= _mm_or_ps (xmm1, xmm0);
-
- val=in[3];
- xmm0 = _mm_cmpneq_ps(val , zero);
- xmm1 = _mm_and_ps (val, sgnmask);
- xmm0 = _mm_and_ps (xmm0, one);
- out[3]= _mm_or_ps (xmm1, xmm0);
-
- in +=4;
- out+=4;
+ in +=2;
+ out+=2;
}
return (w+4);