From 08f5624a15d0064acc4ea985e6f9e5f8df845bce Mon Sep 17 00:00:00 2001
From: Tim Blechmann <timblech@users.sourceforge.net>
Date: Sat, 4 Sep 2004 21:27:12 +0000
Subject: This commit was generated by cvs2svn to compensate for changes in
 r2003, which included commits to RCS files with non-trunk default branches.

svn path=/trunk/externals/tb/; revision=2004
---
 volctl~/volctl~.c | 274 ++++++++++++++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 274 insertions(+)
 create mode 100644 volctl~/volctl~.c

(limited to 'volctl~/volctl~.c')

diff --git a/volctl~/volctl~.c b/volctl~/volctl~.c
new file mode 100644
index 0000000..9ef96de
--- /dev/null
+++ b/volctl~/volctl~.c
@@ -0,0 +1,274 @@
+/* Copyright (c) 2004 Tim Blechmann.
+ *For information on usage and redistribution, and for a DISCLAIMER OF ALL
+ *WARRANTIES, see the file, "gpl.txt" in this distribution.
+ *
+ *This program is free software; you can redistribute it and/or
+ *modify it under the terms of the GNU General Public License
+ *as published by the Free Software Foundation; either version 2
+ *of the License, or (at your option) any later version.
+ *
+ *See file LICENSE for further informations on licensing terms.
+ *
+ *This program is distributed in the hope that it will be useful,
+ *but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ *GNU General Public License for more details.
+ *
+ *You should have received a copy of the GNU General Public License
+ *along with this program; if not, write to the Free Software
+ *Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA  02111-1307, USA.
+ *
+ *Based on PureData by Miller Puckette and others.
+ *
+ *  coded while listening to: Julien Ottavi: Nervure Magnetique
+ *                                                                    */
+
+
+#include "m_pd.h"
+
+#include "m_simd.h"
+
+
+/* ----------------------------- volctl ----------------------------- */
+
+static t_class *volctl_class;
+
+typedef struct _volctl
+{
+    t_object x_obj;
+    float x_f;
+
+    float x_h; //interpolation time
+    float x_value; //current factor
+    
+    int x_ticksleft; //ticks to go
+    float x_mspersample; //ms per sample
+    float x_slope; //slope
+
+    int x_line; 
+
+} t_volctl;
+
+void *volctl_new(t_symbol *s, int argc, t_atom *argv)
+{
+    if (argc > 2) post("volctl~: extra arguments ignored");
+
+    t_volctl *x = (t_volctl *)pd_new(volctl_class);
+    inlet_new(&x->x_obj, &x->x_obj.ob_pd, gensym("float"), gensym("f1"));
+    inlet_settip(x->x_obj.ob_inlet,gensym("factor"));
+    x->x_value = atom_getfloatarg(0, argc, argv);
+    
+    t_inlet * time = floatinlet_new(&x->x_obj, &x->x_h);
+    inlet_settip(time,gensym("interpolation_time"));
+    x->x_h = atom_getfloatarg(1, argc, argv);
+
+    x->x_mspersample = 1000.f / 44100; // assume default samplerate
+
+    outlet_new(&x->x_obj, &s_signal);
+    x->x_f = 0;
+    return (x);
+}
+
+t_int *volctl_perform(t_int *w)
+{
+    t_volctl * x = (t_volctl *)(w[1]);
+    t_float *in = (t_float *)(w[2]);
+    t_float *out = (t_float *)(w[3]);
+    int n = (int)(w[4]);
+    
+    float f = x->x_value;
+
+    if (x->x_ticksleft)
+    {
+	float x_slope = x->x_slope;
+	if (x->x_ticksleft < n)
+	{
+	    int remain = x->x_ticksleft;
+	    n-=remain;
+	    while (remain--)
+	    {
+		f+=x_slope;
+		*out++ = *in++ * f;
+	    }
+	    while (n--)
+	    {
+		*out++ = *in++ * f;
+	    }
+	    x->x_value = f;
+	    x->x_ticksleft = 0;
+	}
+	else
+	{
+	    x->x_ticksleft -=n;
+	    while (n--)
+	    {
+		f+=x_slope;
+		*out++ = *in++ * f;
+	    }
+	    x->x_value = f;
+	}
+    }
+    else
+	while (n--) *out++ = *in++ * f; 
+	
+    return (w+5);
+}
+    
+
+t_int *volctl_perf8(t_int *w)
+{
+    t_volctl * x = (t_volctl *)(w[1]);
+    t_float *in = (t_float *)(w[2]);
+    t_float *out = (t_float *)(w[3]);
+    int n = (int)(w[4]);
+
+    float f = x->x_value;
+
+    if (x->x_ticksleft)
+    {
+	float x_slope = x->x_slope;
+	if (x->x_ticksleft < n)
+	{
+	    int remain = x->x_ticksleft;
+	    n-=remain;
+	    while (remain--)
+	    {
+		*out++ = *in++ * f;
+		f+=x_slope;
+	    }
+	    while (n--)
+	    {
+		*out++ = *in++ * f;
+	    }
+	    x->x_value = f;
+	    x->x_ticksleft = 0;
+	}
+	else
+	{
+	    x->x_ticksleft -= n;
+	    while (n--)
+	    {
+		*out++ = *in++ * f;
+		f+=x_slope;
+	    }
+	    x->x_value = f;
+	}
+    }
+    else
+    {
+	for (; n; n -= 8, in += 8, out += 8)
+	{
+	    float f0 = in[0], f1 = in[1], f2 = in[2], f3 = in[3];
+	    float f4 = in[4], f5 = in[5], f6 = in[6], f7 = in[7];
+	    
+	    out[0] = f0 * f; out[1] = f1 * f; out[2] = f2 * f; out[3] = f3 * f;
+	    out[4] = f4 * f; out[5] = f5 * f; out[6] = f6 * f; out[7] = f7 * f;
+	}
+    }
+    return (w+5);
+}
+
+t_int *volctl_perf_simd(t_int *w)
+{
+    t_volctl * x = (t_volctl *)(w[1]);
+    t_float *in = (t_float *)(w[2]);
+    t_float *out = (t_float *)(w[3]);
+
+    if (x->x_ticksleft)
+    {
+	int n = (int)(w[4]);
+	
+	float f = x->x_value;
+
+	float x_slope = x->x_slope;
+	if (x->x_ticksleft < n)
+	{
+	    int remain = x->x_ticksleft;
+	    n-=remain;
+	    while (remain--)
+	    {
+		*out++ = *in++ * f;
+		f+=x_slope;
+	    }
+	    while (n--)
+	    {
+		*out++ = *in++ * f;
+	    }
+	    x->x_value = f;
+	    x->x_ticksleft = 0;
+	}
+	else
+	{
+	    x->x_ticksleft -= n;
+	    while (n--)
+	    {
+		*out++ = *in++ * f;
+		f+=x_slope;
+	    }
+	    x->x_value = f;
+	}
+    }
+    else
+    {
+	asm(
+	    ".set T_FLOAT,4                            \n"
+
+	    "movss     (%3), %%xmm0                   \n"
+	    "shufps    $0, %%xmm0, %%xmm0                \n"
+	    "shrl      $4, %2                        \n"
+  
+	    "volctl_loop:                              \n"
+	    "movaps    (%0), %%xmm1                   \n"
+	    "mulps     %%xmm0, %%xmm1                    \n"
+	    "movaps    %%xmm1, (%1)                   \n" 
+	    "movaps    4*T_FLOAT(%0), %%xmm2          \n"
+	    "mulps     %%xmm0, %%xmm2                    \n"
+	    "movaps    %%xmm2, 4*T_FLOAT(%1)          \n"
+	    "movaps    8*T_FLOAT(%0), %%xmm3          \n"
+	    "mulps     %%xmm0, %%xmm3                    \n"
+	    "movaps    %%xmm3, 8*T_FLOAT(%1)          \n"
+	    "movaps    12*T_FLOAT(%0), %%xmm4         \n"
+	    "mulps     %%xmm0, %%xmm4                    \n"
+	    "movaps    %%xmm4, 12*T_FLOAT(%1)         \n"
+	    "addl      $64, %0                       \n"
+	    "addl      $64, %1                       \n"
+	    "loop      volctl_loop                     \n"
+	    :
+	    : "r"(in), "r"(out),
+	    "a"(w[4]),"r"(&(x->x_value))
+	    : "%xmm0","%xmm1","%xmm2","%xmm3","%xmm4");
+    }
+    return (w+5);
+}
+
+
+void volctl_set(t_volctl *x, t_float f)
+{
+    x->x_ticksleft = x->x_h / x->x_mspersample;
+    x->x_slope = (f - x->x_value) / x->x_ticksleft;
+}
+
+void volctl_dsp(t_volctl *x, t_signal **sp)
+{
+    const int n = sp[0]->s_n;
+    if (n&7)
+    	dsp_add(volctl_perform, 4, x, sp[0]->s_vec, sp[1]->s_vec, n);
+    else 
+    {
+	if(SIMD_CHECK2(n,sp[0]->s_vec,sp[1]->s_vec))
+	    dsp_add(volctl_perf_simd, 4, x, sp[0]->s_vec, sp[1]->s_vec, n);
+	else
+	dsp_add(volctl_perf8, 4, x, sp[0]->s_vec, sp[1]->s_vec, n);
+    }
+    x->x_mspersample = 1000.f / sp[0]->s_sr;
+}
+
+void volctl_tilde_setup(void)
+{
+    volctl_class = class_new(gensym("volctl~"), (t_newmethod)volctl_new, 0,
+			     sizeof(t_volctl), 0, A_GIMME, 0);
+    CLASS_MAINSIGNALIN(volctl_class, t_volctl, x_f);
+    class_addmethod(volctl_class, (t_method)volctl_dsp, gensym("dsp"), 0);
+    class_addmethod(volctl_class, (t_method)volctl_set, gensym("f1"),A_FLOAT,0);
+    class_settip(volctl_class,gensym("signal"));
+}
-- 
cgit v1.2.1


From 2f98df88850ab893c7acf8ea2b9000c03c2e17da Mon Sep 17 00:00:00 2001
From: Tim Blechmann <timblech@users.sourceforge.net>
Date: Tue, 28 Dec 2004 15:48:19 +0000
Subject: simd-optimized ramp

svn path=/trunk/externals/tb/; revision=2435
---
 volctl~/volctl~.c | 381 ++++++++++++++++++++++++++++++++----------------------
 1 file changed, 227 insertions(+), 154 deletions(-)

(limited to 'volctl~/volctl~.c')

diff --git a/volctl~/volctl~.c b/volctl~/volctl~.c
index 9ef96de..60abfac 100644
--- a/volctl~/volctl~.c
+++ b/volctl~/volctl~.c
@@ -1,31 +1,30 @@
 /* Copyright (c) 2004 Tim Blechmann.
- *For information on usage and redistribution, and for a DISCLAIMER OF ALL
- *WARRANTIES, see the file, "gpl.txt" in this distribution.
+ * For information on usage and redistribution, and for a DISCLAIMER OF ALL
+ * WARRANTIES, see the file, "gpl.txt" in this distribution.
  *
- *This program is free software; you can redistribute it and/or
- *modify it under the terms of the GNU General Public License
- *as published by the Free Software Foundation; either version 2
- *of the License, or (at your option) any later version.
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version 2
+ * of the License, or (at your option) any later version.
  *
- *See file LICENSE for further informations on licensing terms.
+ * See file LICENSE for further informations on licensing terms.
  *
- *This program is distributed in the hope that it will be useful,
- *but WITHOUT ANY WARRANTY; without even the implied warranty of
- *MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- *GNU General Public License for more details.
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
  *
- *You should have received a copy of the GNU General Public License
- *along with this program; if not, write to the Free Software
- *Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA  02111-1307, USA.
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA  02111-1307, USA.
  *
- *Based on PureData by Miller Puckette and others.
+ * Based on PureData by Miller Puckette and others.
  *
- *  coded while listening to: Julien Ottavi: Nervure Magnetique
+ * coded while listening to: Julien Ottavi: Nervure Magnetique
  *                                                                    */
 
 
 #include "m_pd.h"
-
 #include "m_simd.h"
 
 
@@ -36,15 +35,16 @@ static t_class *volctl_class;
 typedef struct _volctl
 {
     t_object x_obj;
-    float x_f;
+    t_float x_f;
 
-    float x_h; //interpolation time
-    float x_value; //current factor
+    t_float x_h; //interpolation time
+    t_float x_value; //current factor
     
     int x_ticksleft; //ticks to go
-    float x_mspersample; //ms per sample
-    float x_slope; //slope
-
+    t_float x_samples_per_ms; //ms per sample
+    t_float x_slope; //slope
+	t_float * x_slopes; //slopes for simd
+	t_float x_slope_step;
     int x_line; 
 
 } t_volctl;
@@ -54,7 +54,7 @@ void *volctl_new(t_symbol *s, int argc, t_atom *argv)
     if (argc > 2) post("volctl~: extra arguments ignored");
 
     t_volctl *x = (t_volctl *)pd_new(volctl_class);
-    inlet_new(&x->x_obj, &x->x_obj.ob_pd, gensym("float"), gensym("f1"));
+    inlet_new(&x->x_obj, &x->x_obj.ob_pd, &s_float, gensym("f1"));
     inlet_settip(x->x_obj.ob_inlet,gensym("factor"));
     x->x_value = atom_getfloatarg(0, argc, argv);
     
@@ -62,13 +62,22 @@ void *volctl_new(t_symbol *s, int argc, t_atom *argv)
     inlet_settip(time,gensym("interpolation_time"));
     x->x_h = atom_getfloatarg(1, argc, argv);
 
-    x->x_mspersample = 1000.f / 44100; // assume default samplerate
+    x->x_samples_per_ms = 44100.f / 1000.f; // assume default samplerate
 
     outlet_new(&x->x_obj, &s_signal);
     x->x_f = 0;
+	
+	x->x_slopes = getalignedbytes(4*sizeof(t_float));
+
     return (x);
 }
 
+static void volctl_free(t_volctl *x)
+{
+	freealignedbytes(x->x_slopes, 4*sizeof(t_float));
+}
+
+
 t_int *volctl_perform(t_int *w)
 {
     t_volctl * x = (t_volctl *)(w[1]);
@@ -76,40 +85,40 @@ t_int *volctl_perform(t_int *w)
     t_float *out = (t_float *)(w[3]);
     int n = (int)(w[4]);
     
-    float f = x->x_value;
+    t_float f = x->x_value;
 
     if (x->x_ticksleft)
     {
-	float x_slope = x->x_slope;
-	if (x->x_ticksleft < n)
-	{
-	    int remain = x->x_ticksleft;
-	    n-=remain;
-	    while (remain--)
-	    {
-		f+=x_slope;
-		*out++ = *in++ * f;
-	    }
-	    while (n--)
-	    {
-		*out++ = *in++ * f;
-	    }
-	    x->x_value = f;
-	    x->x_ticksleft = 0;
-	}
-	else
-	{
-	    x->x_ticksleft -=n;
-	    while (n--)
-	    {
-		f+=x_slope;
-		*out++ = *in++ * f;
-	    }
-	    x->x_value = f;
-	}
+		t_float x_slope = x->x_slope;
+		if (x->x_ticksleft < n)
+		{
+			int remain = x->x_ticksleft;
+			n-=remain;
+			while (remain--)
+			{
+				f+=x_slope;
+				*out++ = *in++ * f;
+			}
+			while (n--)
+			{
+				*out++ = *in++ * f;
+			}
+			x->x_value = f;
+			x->x_ticksleft = 0;
+		}
+		else
+		{
+			x->x_ticksleft -=n;
+			while (n--)
+			{
+				f+=x_slope;
+				*out++ = *in++ * f;
+			}
+			x->x_value = f;
+		}
     }
     else
-	while (n--) *out++ = *in++ * f; 
+		while (n--) *out++ = *in++ * f; 
 	
     return (w+5);
 }
@@ -122,48 +131,63 @@ t_int *volctl_perf8(t_int *w)
     t_float *out = (t_float *)(w[3]);
     int n = (int)(w[4]);
 
-    float f = x->x_value;
+    t_float f = x->x_value;
 
     if (x->x_ticksleft)
     {
-	float x_slope = x->x_slope;
-	if (x->x_ticksleft < n)
-	{
-	    int remain = x->x_ticksleft;
-	    n-=remain;
-	    while (remain--)
-	    {
-		*out++ = *in++ * f;
-		f+=x_slope;
-	    }
-	    while (n--)
-	    {
-		*out++ = *in++ * f;
-	    }
-	    x->x_value = f;
-	    x->x_ticksleft = 0;
-	}
-	else
-	{
-	    x->x_ticksleft -= n;
-	    while (n--)
-	    {
-		*out++ = *in++ * f;
-		f+=x_slope;
-	    }
-	    x->x_value = f;
-	}
+		t_float x_slope = x->x_slope;
+		if (x->x_ticksleft < n)
+		{
+			int remain = x->x_ticksleft;
+			n-=remain;
+			while (remain--)
+			{
+				*out++ = *in++ * f;
+				f+=x_slope;
+			}
+			while (n--)
+			{
+				*out++ = *in++ * f;
+			}
+			x->x_value = f;
+			x->x_ticksleft = 0;
+		}
+		else
+		{
+			x->x_ticksleft -= n;
+			n = n>>3;
+			while (n--)
+			{
+				*out++ = *in++ * f;
+				f+=x_slope;
+				*out++ = *in++ * f;
+				f+=x_slope;
+				*out++ = *in++ * f;
+				f+=x_slope;
+				*out++ = *in++ * f;
+				f+=x_slope;
+				*out++ = *in++ * f;
+				f+=x_slope;
+				*out++ = *in++ * f;
+				f+=x_slope;
+				*out++ = *in++ * f;
+				f+=x_slope;
+				*out++ = *in++ * f;
+				f+=x_slope;
+			}
+			x->x_value = f;
+		}
     }
     else
     {
-	for (; n; n -= 8, in += 8, out += 8)
-	{
-	    float f0 = in[0], f1 = in[1], f2 = in[2], f3 = in[3];
-	    float f4 = in[4], f5 = in[5], f6 = in[6], f7 = in[7];
+		for (; n; n -= 8, in += 8, out += 8)
+		{
+			t_float f0 = in[0], f1 = in[1], f2 = in[2], f3 = in[3];
+			t_float f4 = in[4], f5 = in[5], f6 = in[6], f7 = in[7];
 	    
-	    out[0] = f0 * f; out[1] = f1 * f; out[2] = f2 * f; out[3] = f3 * f;
-	    out[4] = f4 * f; out[5] = f5 * f; out[6] = f6 * f; out[7] = f7 * f;
-	}
+			out[0] = f0 * f; out[1] = f1 * f; out[2] = f2 * f; out[3] = f3 * f;
+			out[4] = f4 * f; out[5] = f5 * f; out[6] = f6 * f; out[7] = f7 * f;
+		}
     }
     return (w+5);
 }
@@ -176,67 +200,106 @@ t_int *volctl_perf_simd(t_int *w)
 
     if (x->x_ticksleft)
     {
-	int n = (int)(w[4]);
+		int n = (int)(w[4]);
 	
-	float f = x->x_value;
+		t_float x_slope = x->x_slope;
+		if (x->x_ticksleft < n)
+		{
+			t_float f = x->x_value;
+			
+			int remain = x->x_ticksleft;
+			n-=remain;
+			while (remain--)
+			{
+				*out++ = *in++ * f;
+				f+=x_slope;
+			}
+			while (n--)
+			{
+				*out++ = *in++ * f;
+			}
+			x->x_value = f;
+			x->x_ticksleft = 0;
+		}
+		else
+		{
+			x->x_ticksleft -= n;
+			
+			asm(
+				".set T_FLOAT,4                          \n"
+				"movss     (%3),%%xmm0                   \n" /* value */
+				"shufps    $0, %%xmm0, %%xmm0            \n"
+				"movaps    (%4), %%xmm1                  \n" /* x_slopes */
+				"addps     %%xmm0, %%xmm1                \n"
 
-	float x_slope = x->x_slope;
-	if (x->x_ticksleft < n)
-	{
-	    int remain = x->x_ticksleft;
-	    n-=remain;
-	    while (remain--)
-	    {
-		*out++ = *in++ * f;
-		f+=x_slope;
-	    }
-	    while (n--)
-	    {
-		*out++ = *in++ * f;
-	    }
-	    x->x_value = f;
-	    x->x_ticksleft = 0;
-	}
-	else
-	{
-	    x->x_ticksleft -= n;
-	    while (n--)
-	    {
-		*out++ = *in++ * f;
-		f+=x_slope;
-	    }
-	    x->x_value = f;
-	}
+				"movss     (%5), %%xmm0                  \n"
+				"shufps    $0, %%xmm0, %%xmm0            \n" /* x_slope_step */
+
+				"shrl      $4, %2                        \n" /* n>>4 */
+				
+				"1:                                      \n"
+				"movaps    (%0), %%xmm2                  \n"
+				"mulps     %%xmm1, %%xmm2                \n"
+				"movaps    %%xmm2, (%1)                  \n"
+				"addps     %%xmm0, %%xmm1                \n"
+
+				"movaps    4*T_FLOAT(%0), %%xmm2         \n"
+				"mulps     %%xmm1, %%xmm2                \n"
+				"movaps    %%xmm2, 4*T_FLOAT(%1)         \n"
+				"addps     %%xmm0, %%xmm1                \n"
+
+				"movaps    8*T_FLOAT(%0), %%xmm2         \n"
+				"mulps     %%xmm1, %%xmm2                \n"
+				"movaps    %%xmm2, 8*T_FLOAT(%1)         \n"
+				"addps     %%xmm0, %%xmm1                \n"
+
+				"movaps    12*T_FLOAT(%0), %%xmm2        \n"
+				"mulps     %%xmm1, %%xmm2                \n"
+				"movaps    %%xmm2, 12*T_FLOAT(%1)        \n"
+				"addps     %%xmm0, %%xmm1                \n" /* one instr. obsolete */
+
+				"addl      $16*T_FLOAT, %0               \n"
+				"addl      $16*T_FLOAT, %1               \n"
+				"loop      1b                            \n"
+
+				:
+				:"r"(in), "r"(out), "c"(n), "r"(&(t_float)(x->x_value)),
+				"r"((t_float*)x->x_slopes), "r"(&(t_float)(x->x_slope_step))
+				:"%xmm0", "%xmm1", "%xmm2");
+			
+/* 			post("value %f", x->x_value); */
+			x->x_value += n*(x->x_slope);
+		}
     }
     else
     {
-	asm(
-	    ".set T_FLOAT,4                            \n"
-
-	    "movss     (%3), %%xmm0                   \n"
-	    "shufps    $0, %%xmm0, %%xmm0                \n"
-	    "shrl      $4, %2                        \n"
-  
-	    "volctl_loop:                              \n"
-	    "movaps    (%0), %%xmm1                   \n"
-	    "mulps     %%xmm0, %%xmm1                    \n"
-	    "movaps    %%xmm1, (%1)                   \n" 
-	    "movaps    4*T_FLOAT(%0), %%xmm2          \n"
-	    "mulps     %%xmm0, %%xmm2                    \n"
-	    "movaps    %%xmm2, 4*T_FLOAT(%1)          \n"
-	    "movaps    8*T_FLOAT(%0), %%xmm3          \n"
-	    "mulps     %%xmm0, %%xmm3                    \n"
-	    "movaps    %%xmm3, 8*T_FLOAT(%1)          \n"
-	    "movaps    12*T_FLOAT(%0), %%xmm4         \n"
-	    "mulps     %%xmm0, %%xmm4                    \n"
-	    "movaps    %%xmm4, 12*T_FLOAT(%1)         \n"
-	    "addl      $64, %0                       \n"
-	    "addl      $64, %1                       \n"
-	    "loop      volctl_loop                     \n"
-	    :
-	    : "r"(in), "r"(out),
-	    "a"(w[4]),"r"(&(x->x_value))
-	    : "%xmm0","%xmm1","%xmm2","%xmm3","%xmm4");
+		asm(
+			".set T_FLOAT,4                          \n"
+			
+			"movss     (%3), %%xmm0                  \n"
+			"shufps    $0, %%xmm0, %%xmm0            \n"
+			"shrl      $4, %2                        \n"
+			
+			"volctl_loop:                            \n"
+			"movaps    (%0), %%xmm1                  \n"
+			"mulps     %%xmm0, %%xmm1                \n"
+			"movaps    %%xmm1, (%1)                  \n" 
+			"movaps    4*T_FLOAT(%0), %%xmm2         \n"
+			"mulps     %%xmm0, %%xmm2                \n"
+			"movaps    %%xmm2, 4*T_FLOAT(%1)         \n"
+			"movaps    8*T_FLOAT(%0), %%xmm3         \n"
+			"mulps     %%xmm0, %%xmm3                \n"
+			"movaps    %%xmm3, 8*T_FLOAT(%1)         \n"
+			"movaps    12*T_FLOAT(%0), %%xmm4        \n"
+			"mulps     %%xmm0, %%xmm4                \n"
+			"movaps    %%xmm4, 12*T_FLOAT(%1)        \n"
+			"addl      $16*T_FLOAT, %0               \n"
+			"addl      $16*T_FLOAT, %1               \n"
+			"loop      volctl_loop                   \n"
+			:
+			: "r"(in), "r"(out),
+			"c"(w[4]),"r"(&(t_float)(x->x_value))
+			: "%xmm0", "%xmm1","%xmm2","%xmm3","%xmm4");
     }
     return (w+5);
 }
@@ -244,8 +307,18 @@ t_int *volctl_perf_simd(t_int *w)
 
 void volctl_set(t_volctl *x, t_float f)
 {
-    x->x_ticksleft = x->x_h / x->x_mspersample;
-    x->x_slope = (f - x->x_value) / x->x_ticksleft;
+	t_float slope;
+	int i;
+	
+    x->x_ticksleft = x->x_h * x->x_samples_per_ms;
+    slope = (f - x->x_value) / x->x_ticksleft;
+    x->x_slope = slope;
+	
+	for (i = 0; i != 4; ++i)
+	{
+		x->x_slopes[i] = i*slope;
+	}
+	x->x_slope_step = 4*slope;
 }
 
 void volctl_dsp(t_volctl *x, t_signal **sp)
@@ -255,18 +328,18 @@ void volctl_dsp(t_volctl *x, t_signal **sp)
     	dsp_add(volctl_perform, 4, x, sp[0]->s_vec, sp[1]->s_vec, n);
     else 
     {
-	if(SIMD_CHECK2(n,sp[0]->s_vec,sp[1]->s_vec))
-	    dsp_add(volctl_perf_simd, 4, x, sp[0]->s_vec, sp[1]->s_vec, n);
-	else
-	dsp_add(volctl_perf8, 4, x, sp[0]->s_vec, sp[1]->s_vec, n);
+		if(SIMD_CHECK2(n,sp[0]->s_vec,sp[1]->s_vec))
+			dsp_add(volctl_perf_simd, 4, x, sp[0]->s_vec, sp[1]->s_vec, n);
+		else
+			dsp_add(volctl_perf8, 4, x, sp[0]->s_vec, sp[1]->s_vec, n);
     }
-    x->x_mspersample = 1000.f / sp[0]->s_sr;
+    x->x_samples_per_ms = sp[0]->s_sr / 1000.f;
 }
 
 void volctl_tilde_setup(void)
 {
-    volctl_class = class_new(gensym("volctl~"), (t_newmethod)volctl_new, 0,
-			     sizeof(t_volctl), 0, A_GIMME, 0);
+    volctl_class = class_new(gensym("volctl~"), (t_newmethod)volctl_new, 
+							 (t_method)volctl_free, sizeof(t_volctl), 0, A_GIMME, 0);
     CLASS_MAINSIGNALIN(volctl_class, t_volctl, x_f);
     class_addmethod(volctl_class, (t_method)volctl_dsp, gensym("dsp"), 0);
     class_addmethod(volctl_class, (t_method)volctl_set, gensym("f1"),A_FLOAT,0);
-- 
cgit v1.2.1


From a0e797de4a660aa2af4b309fe258fb287e3d38e1 Mon Sep 17 00:00:00 2001
From: Tim Blechmann <timblech@users.sourceforge.net>
Date: Sat, 8 Jan 2005 22:13:53 +0000
Subject: improvement for zero factor ... x * 0 = 0

svn path=/trunk/externals/tb/; revision=2482
---
 volctl~/volctl~.c | 319 ++++++++++++++++++++++++++----------------------------
 1 file changed, 151 insertions(+), 168 deletions(-)

(limited to 'volctl~/volctl~.c')

diff --git a/volctl~/volctl~.c b/volctl~/volctl~.c
index 60abfac..1a9e451 100644
--- a/volctl~/volctl~.c
+++ b/volctl~/volctl~.c
@@ -39,14 +39,16 @@ typedef struct _volctl
 
     t_float x_h; //interpolation time
     t_float x_value; //current factor
+    t_float x_target; //target factor
     
-    int x_ticksleft; //ticks to go
+    int x_ticksleft; //dsp ticks to go
     t_float x_samples_per_ms; //ms per sample
     t_float x_slope; //slope
 	t_float * x_slopes; //slopes for simd
 	t_float x_slope_step;
     int x_line; 
-
+	int x_blocksize;
+	t_float x_1overblocksize;
 } t_volctl;
 
 void *volctl_new(t_symbol *s, int argc, t_atom *argv)
@@ -63,6 +65,8 @@ void *volctl_new(t_symbol *s, int argc, t_atom *argv)
     x->x_h = atom_getfloatarg(1, argc, argv);
 
     x->x_samples_per_ms = 44100.f / 1000.f; // assume default samplerate
+	x->x_blocksize = 64;
+	x->x_1overblocksize = 1.f/64.f;
 
     outlet_new(&x->x_obj, &s_signal);
     x->x_f = 0;
@@ -77,122 +81,97 @@ static void volctl_free(t_volctl *x)
 	freealignedbytes(x->x_slopes, 4*sizeof(t_float));
 }
 
-
-t_int *volctl_perform(t_int *w)
+static t_int *volctl_perform(t_int *w)
 {
     t_volctl * x = (t_volctl *)(w[1]);
     t_float *in = (t_float *)(w[2]);
     t_float *out = (t_float *)(w[3]);
     int n = (int)(w[4]);
     
-    t_float f = x->x_value;
 
     if (x->x_ticksleft)
     {
+		t_float f = x->x_value;
 		t_float x_slope = x->x_slope;
-		if (x->x_ticksleft < n)
-		{
-			int remain = x->x_ticksleft;
-			n-=remain;
-			while (remain--)
-			{
-				f+=x_slope;
-				*out++ = *in++ * f;
-			}
-			while (n--)
-			{
-				*out++ = *in++ * f;
-			}
-			x->x_value = f;
-			x->x_ticksleft = 0;
-		}
-		else
+		
+		x->x_ticksleft--;
+		while (n--)
 		{
-			x->x_ticksleft -=n;
-			while (n--)
-			{
-				f+=x_slope;
-				*out++ = *in++ * f;
-			}
-			x->x_value = f;
+			f+=x_slope;
+			*out++ = *in++ * f;
 		}
+		x->x_value = f;
     }
     else
+	{
+		t_float f = x->x_target;
 		while (n--) *out++ = *in++ * f; 
+	}
 	
     return (w+5);
 }
     
 
-t_int *volctl_perf8(t_int *w)
+static t_int *volctl_perf8(t_int *w)
 {
     t_volctl * x = (t_volctl *)(w[1]);
     t_float *in = (t_float *)(w[2]);
     t_float *out = (t_float *)(w[3]);
     int n = (int)(w[4]);
 
-    t_float f = x->x_value;
-
     if (x->x_ticksleft)
     {
+		t_float f = x->x_value;
+
 		t_float x_slope = x->x_slope;
-		if (x->x_ticksleft < n)
+		x->x_ticksleft--;
+		n = n>>3;
+		while (n--)
 		{
-			int remain = x->x_ticksleft;
-			n-=remain;
-			while (remain--)
-			{
-				*out++ = *in++ * f;
-				f+=x_slope;
-			}
-			while (n--)
+			*out++ = *in++ * f;
+			f+=x_slope;
+			*out++ = *in++ * f;
+			f+=x_slope;
+			*out++ = *in++ * f;
+			f+=x_slope;
+			*out++ = *in++ * f;
+			f+=x_slope;
+			*out++ = *in++ * f;
+			f+=x_slope;
+			*out++ = *in++ * f;
+			f+=x_slope;
+			*out++ = *in++ * f;
+			f+=x_slope;
+			*out++ = *in++ * f;
+			f+=x_slope;
+		}
+		x->x_value = f;
+    }
+    else
+    {
+		t_float f = x->x_target;
+
+		if (f)
+			for (; n; n -= 8, in += 8, out += 8)
 			{
-				*out++ = *in++ * f;
+				t_float f0 = in[0], f1 = in[1], f2 = in[2], f3 = in[3];
+				t_float f4 = in[4], f5 = in[5], f6 = in[6], f7 = in[7];
+				
+				out[0] = f0 * f; out[1] = f1 * f; out[2] = f2 * f; out[3] = f3 * f;
+				out[4] = f4 * f; out[5] = f5 * f; out[6] = f6 * f; out[7] = f7 * f;
 			}
-			x->x_value = f;
-			x->x_ticksleft = 0;
-		}
 		else
-		{
-			x->x_ticksleft -= n;
-			n = n>>3;
-			while (n--)
+			for (; n; n -= 8, in += 8, out += 8)
 			{
-				*out++ = *in++ * f;
-				f+=x_slope;
-				*out++ = *in++ * f;
-				f+=x_slope;
-				*out++ = *in++ * f;
-				f+=x_slope;
-				*out++ = *in++ * f;
-				f+=x_slope;
-				*out++ = *in++ * f;
-				f+=x_slope;
-				*out++ = *in++ * f;
-				f+=x_slope;
-				*out++ = *in++ * f;
-				f+=x_slope;
-				*out++ = *in++ * f;
-				f+=x_slope;
+				out[0] = 0; out[1] = 0; out[2] = 0; out[3] = 0;
+				out[4] = 0; out[5] = 0; out[6] = 0; out[7] = 0;
 			}
-			x->x_value = f;
-		}
-    }
-    else
-    {
-		for (; n; n -= 8, in += 8, out += 8)
-		{
-			t_float f0 = in[0], f1 = in[1], f2 = in[2], f3 = in[3];
-			t_float f4 = in[4], f5 = in[5], f6 = in[6], f7 = in[7];
-	    
-			out[0] = f0 * f; out[1] = f1 * f; out[2] = f2 * f; out[3] = f3 * f;
-			out[4] = f4 * f; out[5] = f5 * f; out[6] = f6 * f; out[7] = f7 * f;
-		}
+
     }
     return (w+5);
 }
 
-t_int *volctl_perf_simd(t_int *w)
+static t_int *volctl_perf_simd(t_int *w)
 {
     t_volctl * x = (t_volctl *)(w[1]);
     t_float *in = (t_float *)(w[2]);
@@ -202,116 +181,115 @@ t_int *volctl_perf_simd(t_int *w)
     {
 		int n = (int)(w[4]);
 	
-		t_float x_slope = x->x_slope;
-		if (x->x_ticksleft < n)
-		{
-			t_float f = x->x_value;
+		x->x_ticksleft--;
+		
+		asm(
+			".set T_FLOAT,4                          \n"
+			"movss     (%3),%%xmm0                   \n" /* value */
+			"shufps    $0, %%xmm0, %%xmm0            \n"
+			"movaps    (%4), %%xmm1                  \n" /* x_slopes */
+			"addps     %%xmm0, %%xmm1                \n"
 			
-			int remain = x->x_ticksleft;
-			n-=remain;
-			while (remain--)
-			{
-				*out++ = *in++ * f;
-				f+=x_slope;
-			}
-			while (n--)
-			{
-				*out++ = *in++ * f;
-			}
-			x->x_value = f;
-			x->x_ticksleft = 0;
-		}
-		else
-		{
-			x->x_ticksleft -= n;
+			"movss     (%5), %%xmm0                  \n"
+			"shufps    $0, %%xmm0, %%xmm0            \n" /* x_slope_step */
+			
+			"shrl      $4, %2                        \n" /* n>>4 */
+			
+			"1:                                      \n"
+			"movaps    (%0), %%xmm2                  \n"
+			"mulps     %%xmm1, %%xmm2                \n"
+			"movaps    %%xmm2, (%1)                  \n"
+			"addps     %%xmm0, %%xmm1                \n"
 			
+			"movaps    4*T_FLOAT(%0), %%xmm2         \n"
+			"mulps     %%xmm1, %%xmm2                \n"
+			"movaps    %%xmm2, 4*T_FLOAT(%1)         \n"
+			"addps     %%xmm0, %%xmm1                \n"
+			
+			"movaps    8*T_FLOAT(%0), %%xmm2         \n"
+			"mulps     %%xmm1, %%xmm2                \n"
+			"movaps    %%xmm2, 8*T_FLOAT(%1)         \n"
+			"addps     %%xmm0, %%xmm1                \n"
+			
+			"movaps    12*T_FLOAT(%0), %%xmm2        \n"
+			"mulps     %%xmm1, %%xmm2                \n"
+			"movaps    %%xmm2, 12*T_FLOAT(%1)        \n"
+			"addps     %%xmm0, %%xmm1                \n" /* one instr. obsolete */
+			
+			"addl      $16*T_FLOAT, %0               \n"
+			"addl      $16*T_FLOAT, %1               \n"
+			"loop      1b                            \n"
+			
+			:
+			:"r"(in), "r"(out), "c"(n), "r"(&(t_float)(x->x_value)),
+			"r"((t_float*)x->x_slopes), "r"(&(t_float)(x->x_slope_step))
+			:"%xmm0", "%xmm1", "%xmm2");
+		
+		x->x_value += n*(x->x_slope);
+	}
+    else
+    {
+		if(x->x_target)
 			asm(
 				".set T_FLOAT,4                          \n"
-				"movss     (%3),%%xmm0                   \n" /* value */
+			
+				"movss     (%3), %%xmm0                  \n"
 				"shufps    $0, %%xmm0, %%xmm0            \n"
-				"movaps    (%4), %%xmm1                  \n" /* x_slopes */
-				"addps     %%xmm0, %%xmm1                \n"
-
-				"movss     (%5), %%xmm0                  \n"
-				"shufps    $0, %%xmm0, %%xmm0            \n" /* x_slope_step */
-
-				"shrl      $4, %2                        \n" /* n>>4 */
-				
+				"shrl      $4, %2                        \n"
+			
 				"1:                                      \n"
-				"movaps    (%0), %%xmm2                  \n"
-				"mulps     %%xmm1, %%xmm2                \n"
-				"movaps    %%xmm2, (%1)                  \n"
-				"addps     %%xmm0, %%xmm1                \n"
-
+				"movaps    (%0), %%xmm1                  \n"
+				"mulps     %%xmm0, %%xmm1                \n"
+				"movaps    %%xmm1, (%1)                  \n" 
 				"movaps    4*T_FLOAT(%0), %%xmm2         \n"
-				"mulps     %%xmm1, %%xmm2                \n"
+				"mulps     %%xmm0, %%xmm2                \n"
 				"movaps    %%xmm2, 4*T_FLOAT(%1)         \n"
-				"addps     %%xmm0, %%xmm1                \n"
-
-				"movaps    8*T_FLOAT(%0), %%xmm2         \n"
-				"mulps     %%xmm1, %%xmm2                \n"
-				"movaps    %%xmm2, 8*T_FLOAT(%1)         \n"
-				"addps     %%xmm0, %%xmm1                \n"
-
-				"movaps    12*T_FLOAT(%0), %%xmm2        \n"
-				"mulps     %%xmm1, %%xmm2                \n"
-				"movaps    %%xmm2, 12*T_FLOAT(%1)        \n"
-				"addps     %%xmm0, %%xmm1                \n" /* one instr. obsolete */
-
+				"movaps    8*T_FLOAT(%0), %%xmm3         \n"
+				"mulps     %%xmm0, %%xmm3                \n"
+				"movaps    %%xmm3, 8*T_FLOAT(%1)         \n"
+				"movaps    12*T_FLOAT(%0), %%xmm4        \n"
+				"mulps     %%xmm0, %%xmm4                \n"
+				"movaps    %%xmm4, 12*T_FLOAT(%1)        \n"
 				"addl      $16*T_FLOAT, %0               \n"
 				"addl      $16*T_FLOAT, %1               \n"
 				"loop      1b                            \n"
-
 				:
-				:"r"(in), "r"(out), "c"(n), "r"(&(t_float)(x->x_value)),
-				"r"((t_float*)x->x_slopes), "r"(&(t_float)(x->x_slope_step))
-				:"%xmm0", "%xmm1", "%xmm2");
+				: "r"(in), "r"(out),
+				"c"(w[4]),"r"(&(t_float)(x->x_target))
+				: "%xmm0", "%xmm1","%xmm2","%xmm3","%xmm4");
+		else
+			asm(
+				".set T_FLOAT,4                          \n"
 			
-/* 			post("value %f", x->x_value); */
-			x->x_value += n*(x->x_slope);
-		}
-    }
-    else
-    {
-		asm(
-			".set T_FLOAT,4                          \n"
+				"xorps     %%xmm0, %%xmm0                \n"
+				"shrl      $4, %1                        \n"
 			
-			"movss     (%3), %%xmm0                  \n"
-			"shufps    $0, %%xmm0, %%xmm0            \n"
-			"shrl      $4, %2                        \n"
+				"1:                                      \n"
+				"movaps    %%xmm0, (%0)                  \n" 
+				"movaps    %%xmm0, 4*T_FLOAT(%0)         \n"
+				"movaps    %%xmm0, 8*T_FLOAT(%0)         \n"
+				"movaps    %%xmm0, 12*T_FLOAT(%0)        \n"
+				"addl      $16*T_FLOAT, %0               \n"
+				"loop      1b                            \n"
+				:
+				:"r"(out),
+				"c"(w[4])
+				: "%xmm0");
 			
-			"volctl_loop:                            \n"
-			"movaps    (%0), %%xmm1                  \n"
-			"mulps     %%xmm0, %%xmm1                \n"
-			"movaps    %%xmm1, (%1)                  \n" 
-			"movaps    4*T_FLOAT(%0), %%xmm2         \n"
-			"mulps     %%xmm0, %%xmm2                \n"
-			"movaps    %%xmm2, 4*T_FLOAT(%1)         \n"
-			"movaps    8*T_FLOAT(%0), %%xmm3         \n"
-			"mulps     %%xmm0, %%xmm3                \n"
-			"movaps    %%xmm3, 8*T_FLOAT(%1)         \n"
-			"movaps    12*T_FLOAT(%0), %%xmm4        \n"
-			"mulps     %%xmm0, %%xmm4                \n"
-			"movaps    %%xmm4, 12*T_FLOAT(%1)        \n"
-			"addl      $16*T_FLOAT, %0               \n"
-			"addl      $16*T_FLOAT, %1               \n"
-			"loop      volctl_loop                   \n"
-			:
-			: "r"(in), "r"(out),
-			"c"(w[4]),"r"(&(t_float)(x->x_value))
-			: "%xmm0", "%xmm1","%xmm2","%xmm3","%xmm4");
     }
     return (w+5);
 }
 
 
-void volctl_set(t_volctl *x, t_float f)
+static void volctl_set(t_volctl *x, t_float f)
 {
 	t_float slope;
 	int i;
-	
-    x->x_ticksleft = x->x_h * x->x_samples_per_ms;
-    slope = (f - x->x_value) / x->x_ticksleft;
+	int samplesleft = x->x_h * x->x_samples_per_ms;
+	samplesleft += x->x_blocksize - ( samplesleft & (x->x_blocksize - 1));
+	x->x_ticksleft = (int) (t_float)samplesleft * x->x_1overblocksize;
+
+    slope = (f - x->x_value) / samplesleft;
     x->x_slope = slope;
 	
 	for (i = 0; i != 4; ++i)
@@ -319,9 +297,11 @@ void volctl_set(t_volctl *x, t_float f)
 		x->x_slopes[i] = i*slope;
 	}
 	x->x_slope_step = 4*slope;
+
+	x->x_target = f;
 }
 
-void volctl_dsp(t_volctl *x, t_signal **sp)
+static void volctl_dsp(t_volctl *x, t_signal **sp)
 {
     const int n = sp[0]->s_n;
     if (n&7)
@@ -333,7 +313,10 @@ void volctl_dsp(t_volctl *x, t_signal **sp)
 		else
 			dsp_add(volctl_perf8, 4, x, sp[0]->s_vec, sp[1]->s_vec, n);
     }
-    x->x_samples_per_ms = sp[0]->s_sr / 1000.f;
+
+	x->x_blocksize = n;
+    x->x_1overblocksize = 1./n;
+	x->x_samples_per_ms = sp[0]->s_sr / 1000.f;
 }
 
 void volctl_tilde_setup(void)
-- 
cgit v1.2.1


From bee409f85b7facc33e9e2b9fb7920edb7c3520bd Mon Sep 17 00:00:00 2001
From: Tim Blechmann <timblech@users.sourceforge.net>
Date: Mon, 24 Jan 2005 21:22:31 +0000
Subject: improvement for factor 1

svn path=/trunk/externals/tb/; revision=2539
---
 volctl~/volctl~.c | 71 +++++++++++++++++++++++++++++++++++++++----------------
 1 file changed, 50 insertions(+), 21 deletions(-)

(limited to 'volctl~/volctl~.c')

diff --git a/volctl~/volctl~.c b/volctl~/volctl~.c
index 1a9e451..e49c267 100644
--- a/volctl~/volctl~.c
+++ b/volctl~/volctl~.c
@@ -53,7 +53,7 @@ typedef struct _volctl
 
 void *volctl_new(t_symbol *s, int argc, t_atom *argv)
 {
-    if (argc > 2) post("volctl~: extra arguments ignored");
+    if (argc > 3) post("volctl~: extra arguments ignored");
 
     t_volctl *x = (t_volctl *)pd_new(volctl_class);
     inlet_new(&x->x_obj, &x->x_obj.ob_pd, &s_float, gensym("f1"));
@@ -171,6 +171,7 @@ static t_int *volctl_perf8(t_int *w)
     return (w+5);
 }
 
+
 static t_int *volctl_perf_simd(t_int *w)
 {
     t_volctl * x = (t_volctl *)(w[1]);
@@ -229,7 +230,53 @@ static t_int *volctl_perf_simd(t_int *w)
 	}
     else
     {
-		if(x->x_target)
+		switch(x->x_target)
+		{
+		case 0:
+			asm(
+				".set T_FLOAT,4                          \n"
+			
+				"xorps     %%xmm0, %%xmm0                \n"
+				"shrl      $4, %1                        \n"
+			
+				"1:                                      \n"
+				"movaps    %%xmm0, (%0)                  \n" 
+				"movaps    %%xmm0, 4*T_FLOAT(%0)         \n"
+				"movaps    %%xmm0, 8*T_FLOAT(%0)         \n"
+				"movaps    %%xmm0, 12*T_FLOAT(%0)        \n"
+				"addl      $16*T_FLOAT, %0               \n"
+				"loop      1b                            \n"
+				:
+				:"r"(out),
+				"c"(w[4])
+				: "%xmm0");
+			break;
+
+		case 1:
+			asm(
+				".set T_FLOAT,4                          \n"
+			
+				"shrl      $4, %1                        \n"
+
+				"1:                                        \n"
+				"movaps    (%1), %%xmm0                    \n"
+				"movaps    4*T_FLOAT(%1), %%xmm1           \n"
+				"movaps    8*T_FLOAT(%1), %%xmm2           \n"
+				"movaps    12*T_FLOAT(%1), %%xmm3          \n"
+				"movaps    %%xmm0, (%2)                    \n"
+				"movaps    %%xmm1, 4*T_FLOAT(%2)           \n"
+				"movaps    %%xmm2, 8*T_FLOAT(%2)           \n"
+				"movaps    %%xmm3, 12*T_FLOAT(%2)          \n"
+				
+				"addl      $16*T_FLOAT,%1                  \n"
+				"addl      $16*T_FLOAT,%2                  \n"
+				"loop      1b                              \n"
+				:
+				:"c"(w[4]),"r"(in),"r"(out)
+				:"%xmm0","%xmm1","%xmm2","%xmm3");
+			break;
+
+		default:
 			asm(
 				".set T_FLOAT,4                          \n"
 			
@@ -257,25 +304,7 @@ static t_int *volctl_perf_simd(t_int *w)
 				: "r"(in), "r"(out),
 				"c"(w[4]),"r"(&(t_float)(x->x_target))
 				: "%xmm0", "%xmm1","%xmm2","%xmm3","%xmm4");
-		else
-			asm(
-				".set T_FLOAT,4                          \n"
-			
-				"xorps     %%xmm0, %%xmm0                \n"
-				"shrl      $4, %1                        \n"
-			
-				"1:                                      \n"
-				"movaps    %%xmm0, (%0)                  \n" 
-				"movaps    %%xmm0, 4*T_FLOAT(%0)         \n"
-				"movaps    %%xmm0, 8*T_FLOAT(%0)         \n"
-				"movaps    %%xmm0, 12*T_FLOAT(%0)        \n"
-				"addl      $16*T_FLOAT, %0               \n"
-				"loop      1b                            \n"
-				:
-				:"r"(out),
-				"c"(w[4])
-				: "%xmm0");
-			
+		}
     }
     return (w+5);
 }
-- 
cgit v1.2.1


From 002257b4186392ad2a70a9b8d55c757d26858cf9 Mon Sep 17 00:00:00 2001
From: Tim Blechmann <timblech@users.sourceforge.net>
Date: Fri, 29 Apr 2005 16:29:40 +0000
Subject: optimized for inplace operation

svn path=/trunk/externals/tb/; revision=2853
---
 volctl~/volctl~.c | Bin 10137 -> 10282 bytes
 1 file changed, 0 insertions(+), 0 deletions(-)

(limited to 'volctl~/volctl~.c')

diff --git a/volctl~/volctl~.c b/volctl~/volctl~.c
index e49c267..6a3466d 100644
Binary files a/volctl~/volctl~.c and b/volctl~/volctl~.c differ
-- 
cgit v1.2.1


From 26404fcb66045f5b08923d9be037fbd3f6f22ebf Mon Sep 17 00:00:00 2001
From: Tim Blechmann <timblech@users.sourceforge.net>
Date: Fri, 29 Apr 2005 19:04:37 +0000
Subject: simd code for msvc ...

svn path=/trunk/externals/tb/; revision=2855
---
 volctl~/volctl~.c | Bin 10282 -> 9347 bytes
 1 file changed, 0 insertions(+), 0 deletions(-)

(limited to 'volctl~/volctl~.c')

diff --git a/volctl~/volctl~.c b/volctl~/volctl~.c
index 6a3466d..946beb3 100644
Binary files a/volctl~/volctl~.c and b/volctl~/volctl~.c differ
-- 
cgit v1.2.1