aboutsummaryrefslogtreecommitdiff
path: root/src/zexySIMD.h
diff options
context:
space:
mode:
Diffstat (limited to 'src/zexySIMD.h')
-rw-r--r--src/zexySIMD.h48
1 files changed, 32 insertions, 16 deletions
diff --git a/src/zexySIMD.h b/src/zexySIMD.h
index 893a857..a9048c0 100644
--- a/src/zexySIMD.h
+++ b/src/zexySIMD.h
@@ -17,10 +17,11 @@ typedef union {
* runs a check whether the SSE-optimized perform routine returns the same result as the generic routine
* if the results differ, the SSE-code is probably broken, so we should fallback to the generic code
*/
-static int zexy_testSSE(t_perfroutine genericperf, t_perfroutine sseperf, unsigned int numinchannels, unsigned int numoutchannels)
+static int zexy_testSSE(t_perfroutine genericperf, t_perfroutine sseperf,
+ unsigned int numinchannels, unsigned int numoutchannels)
{
-/* this currently only works with single input, single output */
-/* LATER make it work truely multichannel */
+ /* this currently only works with single input, single output */
+ /* LATER make it work truely multichannel */
if(1==numinchannels && 1==numoutchannels) {
t_int w1[4], w2[4];
t_sample4 in, in1[4], in2[4], out1[4], out2[4];
@@ -34,28 +35,43 @@ static int zexy_testSSE(t_perfroutine genericperf, t_perfroutine sseperf, unsign
in.f[1]=5.;
for(i=0; i<4; i++) {
- in1[i].f[0]=in.f[i]; in1[i].f[1]=in.f[i]; in1[i].f[3]=in.f[i]; in1[i].f[2]=in.f[i];
+ in1[i].f[0]=in.f[i];
+ in1[i].f[1]=in.f[i];
+ in1[i].f[3]=in.f[i];
+ in1[i].f[2]=in.f[i];
out1[i].f[0]=out1[i].f[1]=out1[i].f[2]=out1[i].f[3]=0.f;
- in2[i].f[0]=in.f[i]; in2[i].f[1]=in.f[i]; in2[i].f[3]=in.f[i]; in2[i].f[2]=in.f[i];
+ in2[i].f[0]=in.f[i];
+ in2[i].f[1]=in.f[i];
+ in2[i].f[3]=in.f[i];
+ in2[i].f[2]=in.f[i];
out2[i].f[0]=out2[i].f[1]=out2[i].f[2]=out2[i].f[3]=0.f;
}
- w1[0]=(t_int)0; w1[1]=(t_int)&in1; w1[2]=(t_int)&out1; w1[3]=(t_int)16; (*genericperf)(w1);
- w2[0]=(t_int)0; w2[1]=(t_int)&in2; w2[2]=(t_int)&out2; w2[3]=(t_int)16; (*sseperf)(w2);
+ w1[0]=(t_int)0;
+ w1[1]=(t_int)&in1;
+ w1[2]=(t_int)&out1;
+ w1[3]=(t_int)16;
+ (*genericperf)(w1);
+ w2[0]=(t_int)0;
+ w2[1]=(t_int)&in2;
+ w2[2]=(t_int)&out2;
+ w2[3]=(t_int)16;
+ (*sseperf)(w2);
for(i=0; i<4; i++) {
for(j=0; j<4; j++) {
- if(fabsf(out1[i].f[j]-out2[i].f[j])>1e-17) {
- z_verbose(2, "generic and optimized routines return different results: skipping optimization");
- z_verbose(2, "[%d,%d]: ((%f->%f)!=(%f->%f))",
- i, j,
- in1[i].f[j], out1[i].f[j],
- in2[i].f[j], out2[i].f[j]
- );
- return 0;
- }
+ if(fabsf(out1[i].f[j]-out2[i].f[j])>1e-17) {
+ z_verbose(2,
+ "generic and optimized routines return different results: skipping optimization");
+ z_verbose(2, "[%d,%d]: ((%f->%f)!=(%f->%f))",
+ i, j,
+ in1[i].f[j], out1[i].f[j],
+ in2[i].f[j], out2[i].f[j]
+ );
+ return 0;
+ }
}
}
} else {