From a0e797de4a660aa2af4b309fe258fb287e3d38e1 Mon Sep 17 00:00:00 2001
From: Tim Blechmann <timblech@users.sourceforge.net>
Date: Sat, 8 Jan 2005 22:13:53 +0000
Subject: improvement for zero factor ... x * 0 = 0

svn path=/trunk/externals/tb/; revision=2482
---
 volctl~/volctl~.c | 319 ++++++++++++++++++++++++++----------------------------
 1 file changed, 151 insertions(+), 168 deletions(-)

diff --git a/volctl~/volctl~.c b/volctl~/volctl~.c
index 60abfac..1a9e451 100644
--- a/volctl~/volctl~.c
+++ b/volctl~/volctl~.c
@@ -39,14 +39,16 @@ typedef struct _volctl
 
     t_float x_h; //interpolation time
     t_float x_value; //current factor
+    t_float x_target; //target factor
     
-    int x_ticksleft; //ticks to go
+    int x_ticksleft; //dsp ticks to go
     t_float x_samples_per_ms; //ms per sample
     t_float x_slope; //slope
 	t_float * x_slopes; //slopes for simd
 	t_float x_slope_step;
     int x_line; 
-
+	int x_blocksize;
+	t_float x_1overblocksize;
 } t_volctl;
 
 void *volctl_new(t_symbol *s, int argc, t_atom *argv)
@@ -63,6 +65,8 @@ void *volctl_new(t_symbol *s, int argc, t_atom *argv)
     x->x_h = atom_getfloatarg(1, argc, argv);
 
     x->x_samples_per_ms = 44100.f / 1000.f; // assume default samplerate
+	x->x_blocksize = 64;
+	x->x_1overblocksize = 1.f/64.f;
 
     outlet_new(&x->x_obj, &s_signal);
     x->x_f = 0;
@@ -77,122 +81,97 @@ static void volctl_free(t_volctl *x)
 	freealignedbytes(x->x_slopes, 4*sizeof(t_float));
 }
 
-
-t_int *volctl_perform(t_int *w)
+static t_int *volctl_perform(t_int *w)
 {
     t_volctl * x = (t_volctl *)(w[1]);
     t_float *in = (t_float *)(w[2]);
     t_float *out = (t_float *)(w[3]);
     int n = (int)(w[4]);
     
-    t_float f = x->x_value;
 
     if (x->x_ticksleft)
     {
+		t_float f = x->x_value;
 		t_float x_slope = x->x_slope;
-		if (x->x_ticksleft < n)
-		{
-			int remain = x->x_ticksleft;
-			n-=remain;
-			while (remain--)
-			{
-				f+=x_slope;
-				*out++ = *in++ * f;
-			}
-			while (n--)
-			{
-				*out++ = *in++ * f;
-			}
-			x->x_value = f;
-			x->x_ticksleft = 0;
-		}
-		else
+		
+		x->x_ticksleft--;
+		while (n--)
 		{
-			x->x_ticksleft -=n;
-			while (n--)
-			{
-				f+=x_slope;
-				*out++ = *in++ * f;
-			}
-			x->x_value = f;
+			f+=x_slope;
+			*out++ = *in++ * f;
 		}
+		x->x_value = f;
     }
     else
+	{
+		t_float f = x->x_target;
 		while (n--) *out++ = *in++ * f; 
+	}
 	
     return (w+5);
 }
     
 
-t_int *volctl_perf8(t_int *w)
+static t_int *volctl_perf8(t_int *w)
 {
     t_volctl * x = (t_volctl *)(w[1]);
     t_float *in = (t_float *)(w[2]);
     t_float *out = (t_float *)(w[3]);
     int n = (int)(w[4]);
 
-    t_float f = x->x_value;
-
     if (x->x_ticksleft)
     {
+		t_float f = x->x_value;
+
 		t_float x_slope = x->x_slope;
-		if (x->x_ticksleft < n)
+		x->x_ticksleft--;
+		n = n>>3;
+		while (n--)
 		{
-			int remain = x->x_ticksleft;
-			n-=remain;
-			while (remain--)
-			{
-				*out++ = *in++ * f;
-				f+=x_slope;
-			}
-			while (n--)
+			*out++ = *in++ * f;
+			f+=x_slope;
+			*out++ = *in++ * f;
+			f+=x_slope;
+			*out++ = *in++ * f;
+			f+=x_slope;
+			*out++ = *in++ * f;
+			f+=x_slope;
+			*out++ = *in++ * f;
+			f+=x_slope;
+			*out++ = *in++ * f;
+			f+=x_slope;
+			*out++ = *in++ * f;
+			f+=x_slope;
+			*out++ = *in++ * f;
+			f+=x_slope;
+		}
+		x->x_value = f;
+    }
+    else
+    {
+		t_float f = x->x_target;
+
+		if (f)
+			for (; n; n -= 8, in += 8, out += 8)
 			{
-				*out++ = *in++ * f;
+				t_float f0 = in[0], f1 = in[1], f2 = in[2], f3 = in[3];
+				t_float f4 = in[4], f5 = in[5], f6 = in[6], f7 = in[7];
+				
+				out[0] = f0 * f; out[1] = f1 * f; out[2] = f2 * f; out[3] = f3 * f;
+				out[4] = f4 * f; out[5] = f5 * f; out[6] = f6 * f; out[7] = f7 * f;
 			}
-			x->x_value = f;
-			x->x_ticksleft = 0;
-		}
 		else
-		{
-			x->x_ticksleft -= n;
-			n = n>>3;
-			while (n--)
+			for (; n; n -= 8, in += 8, out += 8)
 			{
-				*out++ = *in++ * f;
-				f+=x_slope;
-				*out++ = *in++ * f;
-				f+=x_slope;
-				*out++ = *in++ * f;
-				f+=x_slope;
-				*out++ = *in++ * f;
-				f+=x_slope;
-				*out++ = *in++ * f;
-				f+=x_slope;
-				*out++ = *in++ * f;
-				f+=x_slope;
-				*out++ = *in++ * f;
-				f+=x_slope;
-				*out++ = *in++ * f;
-				f+=x_slope;
+				out[0] = 0; out[1] = 0; out[2] = 0; out[3] = 0;
+				out[4] = 0; out[5] = 0; out[6] = 0; out[7] = 0;
 			}
-			x->x_value = f;
-		}
-    }
-    else
-    {
-		for (; n; n -= 8, in += 8, out += 8)
-		{
-			t_float f0 = in[0], f1 = in[1], f2 = in[2], f3 = in[3];
-			t_float f4 = in[4], f5 = in[5], f6 = in[6], f7 = in[7];
-	    
-			out[0] = f0 * f; out[1] = f1 * f; out[2] = f2 * f; out[3] = f3 * f;
-			out[4] = f4 * f; out[5] = f5 * f; out[6] = f6 * f; out[7] = f7 * f;
-		}
+
     }
     return (w+5);
 }
 
-t_int *volctl_perf_simd(t_int *w)
+static t_int *volctl_perf_simd(t_int *w)
 {
     t_volctl * x = (t_volctl *)(w[1]);
     t_float *in = (t_float *)(w[2]);
@@ -202,116 +181,115 @@ t_int *volctl_perf_simd(t_int *w)
     {
 		int n = (int)(w[4]);
 	
-		t_float x_slope = x->x_slope;
-		if (x->x_ticksleft < n)
-		{
-			t_float f = x->x_value;
+		x->x_ticksleft--;
+		
+		asm(
+			".set T_FLOAT,4                          \n"
+			"movss     (%3),%%xmm0                   \n" /* value */
+			"shufps    $0, %%xmm0, %%xmm0            \n"
+			"movaps    (%4), %%xmm1                  \n" /* x_slopes */
+			"addps     %%xmm0, %%xmm1                \n"
 			
-			int remain = x->x_ticksleft;
-			n-=remain;
-			while (remain--)
-			{
-				*out++ = *in++ * f;
-				f+=x_slope;
-			}
-			while (n--)
-			{
-				*out++ = *in++ * f;
-			}
-			x->x_value = f;
-			x->x_ticksleft = 0;
-		}
-		else
-		{
-			x->x_ticksleft -= n;
+			"movss     (%5), %%xmm0                  \n"
+			"shufps    $0, %%xmm0, %%xmm0            \n" /* x_slope_step */
+			
+			"shrl      $4, %2                        \n" /* n>>4 */
+			
+			"1:                                      \n"
+			"movaps    (%0), %%xmm2                  \n"
+			"mulps     %%xmm1, %%xmm2                \n"
+			"movaps    %%xmm2, (%1)                  \n"
+			"addps     %%xmm0, %%xmm1                \n"
 			
+			"movaps    4*T_FLOAT(%0), %%xmm2         \n"
+			"mulps     %%xmm1, %%xmm2                \n"
+			"movaps    %%xmm2, 4*T_FLOAT(%1)         \n"
+			"addps     %%xmm0, %%xmm1                \n"
+			
+			"movaps    8*T_FLOAT(%0), %%xmm2         \n"
+			"mulps     %%xmm1, %%xmm2                \n"
+			"movaps    %%xmm2, 8*T_FLOAT(%1)         \n"
+			"addps     %%xmm0, %%xmm1                \n"
+			
+			"movaps    12*T_FLOAT(%0), %%xmm2        \n"
+			"mulps     %%xmm1, %%xmm2                \n"
+			"movaps    %%xmm2, 12*T_FLOAT(%1)        \n"
+			"addps     %%xmm0, %%xmm1                \n" /* one instr. obsolete */
+			
+			"addl      $16*T_FLOAT, %0               \n"
+			"addl      $16*T_FLOAT, %1               \n"
+			"loop      1b                            \n"
+			
+			:
+			:"r"(in), "r"(out), "c"(n), "r"(&(t_float)(x->x_value)),
+			"r"((t_float*)x->x_slopes), "r"(&(t_float)(x->x_slope_step))
+			:"%xmm0", "%xmm1", "%xmm2");
+		
+		x->x_value += n*(x->x_slope);
+	}
+    else
+    {
+		if(x->x_target)
 			asm(
 				".set T_FLOAT,4                          \n"
-				"movss     (%3),%%xmm0                   \n" /* value */
+			
+				"movss     (%3), %%xmm0                  \n"
 				"shufps    $0, %%xmm0, %%xmm0            \n"
-				"movaps    (%4), %%xmm1                  \n" /* x_slopes */
-				"addps     %%xmm0, %%xmm1                \n"
-
-				"movss     (%5), %%xmm0                  \n"
-				"shufps    $0, %%xmm0, %%xmm0            \n" /* x_slope_step */
-
-				"shrl      $4, %2                        \n" /* n>>4 */
-				
+				"shrl      $4, %2                        \n"
+			
 				"1:                                      \n"
-				"movaps    (%0), %%xmm2                  \n"
-				"mulps     %%xmm1, %%xmm2                \n"
-				"movaps    %%xmm2, (%1)                  \n"
-				"addps     %%xmm0, %%xmm1                \n"
-
+				"movaps    (%0), %%xmm1                  \n"
+				"mulps     %%xmm0, %%xmm1                \n"
+				"movaps    %%xmm1, (%1)                  \n" 
 				"movaps    4*T_FLOAT(%0), %%xmm2         \n"
-				"mulps     %%xmm1, %%xmm2                \n"
+				"mulps     %%xmm0, %%xmm2                \n"
 				"movaps    %%xmm2, 4*T_FLOAT(%1)         \n"
-				"addps     %%xmm0, %%xmm1                \n"
-
-				"movaps    8*T_FLOAT(%0), %%xmm2         \n"
-				"mulps     %%xmm1, %%xmm2                \n"
-				"movaps    %%xmm2, 8*T_FLOAT(%1)         \n"
-				"addps     %%xmm0, %%xmm1                \n"
-
-				"movaps    12*T_FLOAT(%0), %%xmm2        \n"
-				"mulps     %%xmm1, %%xmm2                \n"
-				"movaps    %%xmm2, 12*T_FLOAT(%1)        \n"
-				"addps     %%xmm0, %%xmm1                \n" /* one instr. obsolete */
-
+				"movaps    8*T_FLOAT(%0), %%xmm3         \n"
+				"mulps     %%xmm0, %%xmm3                \n"
+				"movaps    %%xmm3, 8*T_FLOAT(%1)         \n"
+				"movaps    12*T_FLOAT(%0), %%xmm4        \n"
+				"mulps     %%xmm0, %%xmm4                \n"
+				"movaps    %%xmm4, 12*T_FLOAT(%1)        \n"
 				"addl      $16*T_FLOAT, %0               \n"
 				"addl      $16*T_FLOAT, %1               \n"
 				"loop      1b                            \n"
-
 				:
-				:"r"(in), "r"(out), "c"(n), "r"(&(t_float)(x->x_value)),
-				"r"((t_float*)x->x_slopes), "r"(&(t_float)(x->x_slope_step))
-				:"%xmm0", "%xmm1", "%xmm2");
+				: "r"(in), "r"(out),
+				"c"(w[4]),"r"(&(t_float)(x->x_target))
+				: "%xmm0", "%xmm1","%xmm2","%xmm3","%xmm4");
+		else
+			asm(
+				".set T_FLOAT,4                          \n"
 			
-/* 			post("value %f", x->x_value); */
-			x->x_value += n*(x->x_slope);
-		}
-    }
-    else
-    {
-		asm(
-			".set T_FLOAT,4                          \n"
+				"xorps     %%xmm0, %%xmm0                \n"
+				"shrl      $4, %1                        \n"
 			
-			"movss     (%3), %%xmm0                  \n"
-			"shufps    $0, %%xmm0, %%xmm0            \n"
-			"shrl      $4, %2                        \n"
+				"1:                                      \n"
+				"movaps    %%xmm0, (%0)                  \n" 
+				"movaps    %%xmm0, 4*T_FLOAT(%0)         \n"
+				"movaps    %%xmm0, 8*T_FLOAT(%0)         \n"
+				"movaps    %%xmm0, 12*T_FLOAT(%0)        \n"
+				"addl      $16*T_FLOAT, %0               \n"
+				"loop      1b                            \n"
+				:
+				:"r"(out),
+				"c"(w[4])
+				: "%xmm0");
 			
-			"volctl_loop:                            \n"
-			"movaps    (%0), %%xmm1                  \n"
-			"mulps     %%xmm0, %%xmm1                \n"
-			"movaps    %%xmm1, (%1)                  \n" 
-			"movaps    4*T_FLOAT(%0), %%xmm2         \n"
-			"mulps     %%xmm0, %%xmm2                \n"
-			"movaps    %%xmm2, 4*T_FLOAT(%1)         \n"
-			"movaps    8*T_FLOAT(%0), %%xmm3         \n"
-			"mulps     %%xmm0, %%xmm3                \n"
-			"movaps    %%xmm3, 8*T_FLOAT(%1)         \n"
-			"movaps    12*T_FLOAT(%0), %%xmm4        \n"
-			"mulps     %%xmm0, %%xmm4                \n"
-			"movaps    %%xmm4, 12*T_FLOAT(%1)        \n"
-			"addl      $16*T_FLOAT, %0               \n"
-			"addl      $16*T_FLOAT, %1               \n"
-			"loop      volctl_loop                   \n"
-			:
-			: "r"(in), "r"(out),
-			"c"(w[4]),"r"(&(t_float)(x->x_value))
-			: "%xmm0", "%xmm1","%xmm2","%xmm3","%xmm4");
     }
     return (w+5);
 }
 
 
-void volctl_set(t_volctl *x, t_float f)
+static void volctl_set(t_volctl *x, t_float f)
 {
 	t_float slope;
 	int i;
-	
-    x->x_ticksleft = x->x_h * x->x_samples_per_ms;
-    slope = (f - x->x_value) / x->x_ticksleft;
+	int samplesleft = x->x_h * x->x_samples_per_ms;
+	samplesleft += x->x_blocksize - ( samplesleft & (x->x_blocksize - 1));
+	x->x_ticksleft = (int) (t_float)samplesleft * x->x_1overblocksize;
+
+    slope = (f - x->x_value) / samplesleft;
     x->x_slope = slope;
 	
 	for (i = 0; i != 4; ++i)
@@ -319,9 +297,11 @@ void volctl_set(t_volctl *x, t_float f)
 		x->x_slopes[i] = i*slope;
 	}
 	x->x_slope_step = 4*slope;
+
+	x->x_target = f;
 }
 
-void volctl_dsp(t_volctl *x, t_signal **sp)
+static void volctl_dsp(t_volctl *x, t_signal **sp)
 {
     const int n = sp[0]->s_n;
     if (n&7)
@@ -333,7 +313,10 @@ void volctl_dsp(t_volctl *x, t_signal **sp)
 		else
 			dsp_add(volctl_perf8, 4, x, sp[0]->s_vec, sp[1]->s_vec, n);
     }
-    x->x_samples_per_ms = sp[0]->s_sr / 1000.f;
+
+	x->x_blocksize = n;
+    x->x_1overblocksize = 1./n;
+	x->x_samples_per_ms = sp[0]->s_sr / 1000.f;
 }
 
 void volctl_tilde_setup(void)
-- 
cgit v1.2.1