aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorTim Blechmann <timblech@users.sourceforge.net>2004-09-11 08:10:45 +0000
committerIOhannes m zmölnig <zmoelnig@iem.at>2015-10-14 15:11:57 +0200
commitba1866d2b465633b6d4b3ebbba2c6dc6c0e25d6b (patch)
tree5c2ad2ac635573f2390651d2022701fd955ac0f2
parent8e34dec617b67c4b8d8928188d175f850d6a69c4 (diff)
...
svn path=/trunk/externals/tb/; revision=2024
-rwxr-xr-xsc4pd/make-files.txt2
-rw-r--r--sc4pd/source/Convolution.cpp222
-rw-r--r--sc4pd/source/fftlib.c3306
-rw-r--r--sc4pd/source/fftlib.h62
-rw-r--r--sc4pd/source/main.cpp9
-rw-r--r--sc4pd/source/scmul.cpp (renamed from sc4pd/source/sc*.cpp)0
-rw-r--r--sc4pd/source/support.cpp74
-rw-r--r--sc4pd/source/support.hpp5
8 files changed, 3678 insertions, 2 deletions
diff --git a/sc4pd/make-files.txt b/sc4pd/make-files.txt
index ea7e551..486674f 100755
--- a/sc4pd/make-files.txt
+++ b/sc4pd/make-files.txt
@@ -16,4 +16,4 @@ SRCS= \
TwoPole.cpp TwoZero.cpp FOS.cpp SOS.cpp RLPF.cpp RHPF.cpp LPF.cpp \
HPF.cpp BPF.cpp BRF.cpp LPZ1.cpp HPZ1.cpp LPZ2.cpp HPZ2.cpp \
BPZ2.cpp BRZ2.cpp LFDNoise0.cpp LFDNoise1.cpp LFDNoise2.cpp \
- sc+.cpp sc-.cpp
+ sc+.cpp sc-.cpp scmul.cpp scdiv.cpp Convolution.cpp
diff --git a/sc4pd/source/Convolution.cpp b/sc4pd/source/Convolution.cpp
new file mode 100644
index 0000000..0026b01
--- /dev/null
+++ b/sc4pd/source/Convolution.cpp
@@ -0,0 +1,222 @@
+/* sc4pd
+ Convolution~
+
+ Copyright (c) 2004 Tim Blechmann.
+
+ This code is derived from:
+ SuperCollider real time audio synthesis system
+ Copyright (c) 2002 James McCartney. All rights reserved.
+ http://www.audiosynth.com
+
+ This program is free software; you can redistribute it and/or
+ modify it under the terms of the GNU General Public License
+ as published by the Free Software Foundation; either version 2
+ of the License, or (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program; if not, write to the Free Software
+ Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
+
+ Based on:
+ PureData by Miller Puckette and others.
+ http://www.crca.ucsd.edu/~msp/software.html
+ FLEXT by Thomas Grill
+ http://www.parasitaere-kapazitaeten.net/ext
+ SuperCollider by James McCartney
+ http://www.audiosynth.com
+
+ Coded while listening to: Ambarchi/Muller/Voice Crack: Oystered
+
+*/
+
+#include "sc4pd.hpp"
+#include "fftlib.h"
+
+/* ------------------------ Convolution~ -------------------------------*/
+
+class Convolution_ar:
+ public sc4pd_dsp
+{
+ FLEXT_HEADER(Convolution_ar,sc4pd_dsp);
+
+public:
+ Convolution_ar(int argc, t_atom *argv);
+ ~Convolution_ar();
+
+protected:
+ virtual void m_signal(int n, t_sample *const *in, t_sample *const *out);
+ virtual void m_dsp(int n, t_sample *const *in, t_sample *const *out);
+
+private:
+ int m_pos, m_insize, m_fftsize,m_mask;
+ int m_log2n;
+
+ float *m_inbuf1,*m_inbuf2, *m_fftbuf1, *m_fftbuf2, *m_outbuf,*m_overlapbuf;
+
+};
+
+FLEXT_LIB_DSP_V("Convolution~",Convolution_ar);
+
+Convolution_ar::Convolution_ar(int argc, t_atom *argv)
+{
+
+ //parse arguments
+ AtomList Args(argc,argv);
+
+ m_insize = sc_getfloatarg(Args,0);
+
+ AddInSignal("signal");
+ AddInSignal("kernel");
+ AddOutSignal();
+
+
+ //require size N+M-1 to be a power of two
+
+ m_fftsize=2*(m_insize);
+
+ //just use memory for the input buffers and fft buffers
+ int insize = m_insize * sizeof(float);
+ int fftsize = m_fftsize * sizeof(float);
+
+ m_inbuf1 = new float[m_insize];
+ m_inbuf2 = new float[m_insize];
+
+ m_fftbuf1 = new float[m_fftsize];
+ m_fftbuf2 = new float[m_fftsize];
+
+ m_outbuf = new float[m_fftsize];
+ m_overlapbuf = new float[m_insize];
+
+ memset(m_outbuf, 0, fftsize);
+ memset(m_overlapbuf, 0, insize);
+
+ m_log2n = LOG2CEIL(m_fftsize);
+
+ //test for full input buffer
+ m_mask = m_insize;
+ m_pos = 0;
+}
+
+Convolution_ar::~Convolution_ar()
+{
+ delete m_inbuf1;
+ delete m_inbuf2;
+
+ delete m_fftbuf1;
+ delete m_fftbuf2;
+
+ delete m_outbuf;
+ delete m_overlapbuf;
+}
+
+void Convolution_ar::m_dsp(int n, t_sample *const *in,
+ t_sample *const *out)
+{
+
+}
+
+extern float* cosTable[32];
+
+void Convolution_ar::m_signal(int n, t_sample *const *in,
+ t_sample *const *out)
+{
+ float *in1 = in[0];
+ float *in2 = in[1];
+
+ float *out1 = m_inbuf1 + m_pos;
+ float *out2 = m_inbuf2 + m_pos;
+
+ int numSamples = 2*n; //??? mWorld->mFullRate.mBufLength;
+
+ // copy input
+ CopySamples(out1, in1, numSamples);
+ CopySamples(out2, in2, numSamples);
+
+ m_pos += numSamples;
+
+ if (m_pos & m_insize)
+ {
+
+ //have collected enough samples to transform next frame
+ m_pos = 0; //reset collection counter
+
+ // copy to fftbuf
+
+ uint32 insize=m_insize * sizeof(float);
+ memcpy(m_fftbuf1, m_inbuf1, insize);
+ memcpy(m_fftbuf2, m_inbuf2, insize);
+
+ //zero pad second part of buffer to allow for convolution
+ memset(m_fftbuf1+m_insize, 0, insize);
+ memset(m_fftbuf2+m_insize, 0, insize);
+
+ int log2n = m_log2n;
+
+
+ // do windowing
+ DoWindowing(log2n, m_fftbuf1, m_fftsize);
+ DoWindowing(log2n, m_fftbuf2, m_fftsize);
+
+ // do fft
+/* #if __VEC__
+ ctoz(m_fftbuf1, 2, outbuf1, 1, 1L<<log2n); ctoz(m_fftbuf2, 2, outbuf2, 1, 1L<<log2n);
+ #else */
+
+//in place transform for now
+ rffts(m_fftbuf1, log2n, 1, cosTable[log2n]);
+ rffts(m_fftbuf2, log2n, 1, cosTable[log2n]);
+//#endif
+
+//complex multiply time
+ int numbins = m_fftsize >> 1; //m_fftsize - 2 >> 1;
+
+ float * p1= m_fftbuf1;
+ float * p2= m_fftbuf2;
+
+ p1[0] *= p2[0];
+ p1[1] *= p2[1];
+
+ //complex multiply
+ for (int i=1; i<numbins; ++i) {
+ float real,imag;
+ int realind,imagind;
+ realind= 2*i; imagind= realind+1;
+ real= p1[realind]*p2[realind]- p1[imagind]*p2[imagind];
+ imag= p1[realind]*p2[imagind]+ p1[imagind]*p2[realind];
+ p1[realind] = real; //p2->bin[i];
+ p1[imagind]= imag;
+ }
+
+ //copy second part from before to overlap
+ memcpy(m_overlapbuf, m_outbuf+m_insize, m_insize * sizeof(float));
+
+ //inverse fft into outbuf
+ memcpy(m_outbuf, m_fftbuf1, m_fftsize * sizeof(float));
+
+ //in place
+ riffts(m_outbuf, log2n, 1, cosTable[log2n]);
+
+ DoWindowing(log2n, m_outbuf, m_fftsize);
+ }
+
+ //write out samples copied from outbuf, with overlap added in
+
+ float *output = out[0];
+ float *nout= m_outbuf+m_pos;
+ float *overlap= m_overlapbuf+m_pos;
+
+ for (int i=0; i<numSamples; ++i)
+ {
+ *++output = *++nout + *++overlap;
+ }
+
+}
+
+
+
+
diff --git a/sc4pd/source/fftlib.c b/sc4pd/source/fftlib.c
new file mode 100644
index 0000000..4bbdcb1
--- /dev/null
+++ b/sc4pd/source/fftlib.c
@@ -0,0 +1,3306 @@
+/* FFT library */
+/* Library of in-place fast fourier transforms */
+/* Forward and inverse complex transforms */
+/* and real forward transform */
+/* Optimized for RISC processors with many registers */
+/* Version 1.1 John Green NUWC New London CT January 96 */
+/* Version 1.1 renamed as fftlib from fftbig */
+/* Version 1.1 removed (float *)((char *) ptr) optimization */
+/* Version 1.0 John Green NUWC New London CT December 95 */
+/* (John Green) green_jt@vsdec.nl.nuwc.navy.mil */
+/* green_jt@vsdec.nl.nuwc.navy.mil */
+
+#include <math.h>
+#include "fftlib.h"
+
+#define MAXMROOT 9 /* 2^(MAXMROOT-1) = # of elements in BRcnt */
+
+/* Bit reversed counter */
+static const unsigned char BRcnt[256] = {
+ 0, 128, 64, 192, 32, 160, 96, 224, 16, 144, 80, 208,
+ 48, 176, 112, 240, 8, 136, 72, 200, 40, 168, 104, 232,
+ 24, 152, 88, 216, 56, 184, 120, 248, 4, 132, 68, 196,
+ 36, 164, 100, 228, 20, 148, 84, 212, 52, 180, 116, 244,
+ 12, 140, 76, 204, 44, 172, 108, 236, 28, 156, 92, 220,
+ 60, 188, 124, 252, 2, 130, 66, 194, 34, 162, 98, 226,
+ 18, 146, 82, 210, 50, 178, 114, 242, 10, 138, 74, 202,
+ 42, 170, 106, 234, 26, 154, 90, 218, 58, 186, 122, 250,
+ 6, 134, 70, 198, 38, 166, 102, 230, 22, 150, 86, 214,
+ 54, 182, 118, 246, 14, 142, 78, 206, 46, 174, 110, 238,
+ 30, 158, 94, 222, 62, 190, 126, 254, 1, 129, 65, 193,
+ 33, 161, 97, 225, 17, 145, 81, 209, 49, 177, 113, 241,
+ 9, 137, 73, 201, 41, 169, 105, 233, 25, 153, 89, 217,
+ 57, 185, 121, 249, 5, 133, 69, 197, 37, 165, 101, 229,
+ 21, 149, 85, 213, 53, 181, 117, 245, 13, 141, 77, 205,
+ 45, 173, 109, 237, 29, 157, 93, 221, 61, 189, 125, 253,
+ 3, 131, 67, 195, 35, 163, 99, 227, 19, 147, 83, 211,
+ 51, 179, 115, 243, 11, 139, 75, 203, 43, 171, 107, 235,
+ 27, 155, 91, 219, 59, 187, 123, 251, 7, 135, 71, 199,
+ 39, 167, 103, 231, 23, 151, 87, 215, 55, 183, 119, 247,
+ 15, 143, 79, 207, 47, 175, 111, 239, 31, 159, 95, 223,
+ 63, 191, 127, 255 };
+
+/* Table of powers of 2 */
+static const long Ntbl[21] = {1, 2, 4, 8, 16, 32, 64, 128, 256, 512, 1024, 2048,
+ 4096, 8192, 16384, 32768, 65536, 131072, 262144, 524288, 1048576};
+
+long FFTInit(long *fftMptr, long fftN, float *Utbl){
+/* Compute cosine table and check size for complex ffts */
+/* INPUTS */
+/* fftN = size of fft */
+/* OUTPUTS */
+/* *fftMptr = log2 of fft size */
+/* *Utbl = cosine table with fftN/4 + 1 entries (angles = 0 to pi/2 inclusive) */
+/* RETURNS */
+/* 1 if fftN is invalid, 0 otherwise */
+long i1, ErrVal;
+ErrVal = 0;
+*fftMptr = (long)(log(fftN)/log(2.0) + 0.5);
+if ((*fftMptr >= 3) & (*fftMptr <= 19) & (int)(pow(2,*fftMptr)+.5) == fftN)
+ for (i1 = 0; i1 <= fftN/4; i1++)
+ Utbl[i1] = cos( ( 3.1415926535897932384626433832795 * 2.0 * i1 ) / fftN );
+else
+ ErrVal = 1;
+return ErrVal;
+}
+
+long rFFTInit(long *fftMptr, long fftN, float *Utbl){
+/* Compute cosine table and check size for a real input fft */
+/* INPUTS */
+/* fftN = size of fft */
+/* OUTPUTS */
+/* *fftMptr = log2 of fft size */
+/* *Utbl = cosine table with fftN/4 + 1 entries (angles = 0 to pi/2 inclusive) */
+/* RETURNS */
+/* 1 if fftN is invalid, 0 otherwise */
+long i1, ErrVal;
+ErrVal = 0;
+*fftMptr = (long)(log(fftN)/log(2.0) + 0.5);
+if ((*fftMptr >= 4) & (*fftMptr <= 20) & (int)(pow(2,*fftMptr)+.5) == fftN)
+
+ for (i1 = 0; i1 <= fftN/4; i1++)
+ Utbl[i1] = cos( ( 3.1415926535897932384626433832795 * 2.0 * i1 ) / fftN );
+else
+ ErrVal = 1;
+return ErrVal;
+}
+
+void ffts(float *ioptr, long M, long Rows, float *Utbl){
+/* Compute in-place complex fft on the rows of the input array */
+/* INPUTS */
+/* M = log2 of fft size */
+/* *ioptr = input data array */
+/* *Utbl = cosine table */
+/* Rows = number of rows in ioptr array (use Rows of 1 if ioptr is a 1 dimensional array) */
+/* OUTPUTS */
+/* *ioptr = output data array */
+
+long Flyinc;
+long FlyOffsetA;
+long FlyOffsetAIm;
+long FlyOffsetB;
+long FlyOffsetBIm;
+long NsameU1;
+long NsameU2;
+long NsameU4;
+long diffUcnt;
+long LoopCnt;
+
+float scale;
+float fly0r;
+float fly0i;
+float fly1r;
+float fly1i;
+float fly2r;
+float fly2i;
+float fly3r;
+float fly3i;
+float fly4r;
+float fly4i;
+float fly5r;
+float fly5i;
+float fly6r;
+float fly6i;
+float fly7r;
+float fly7i;
+float U0r;
+float U0i;
+float U1r;
+float U1i;
+float U2r;
+float U2i;
+float U3r;
+float U3i;
+float t0r;
+float t0i;
+float t1r;
+float t1i;
+
+float *fly0P;
+float *fly1P;
+float *fly2P;
+float *fly3P;
+
+float *U0rP;
+float *U0iP;
+float *U1rP;
+float *U1iP;
+float *U2rP;
+float *U2iP;
+float *IOP;
+long U3offset;
+
+long stage;
+long NdiffU;
+long LoopN;
+
+const long BRshift = MAXMROOT - (M>>1);
+const long Nrems2 = Ntbl[M-(M>>1)+1];
+const long Nroot_1_ColInc = (Ntbl[M-1]-Ntbl[M-(M>>1)])*2;
+
+scale = 2.0;
+for (;Rows>0;Rows--){
+
+FlyOffsetA = Ntbl[M] * (2/2);
+FlyOffsetAIm = FlyOffsetA + 1;
+FlyOffsetB = FlyOffsetA + 2;
+FlyOffsetBIm = FlyOffsetB + 1;
+
+/* BitrevR2 ** bit reverse and first radix 2 stage ******/
+
+for (stage = 0; stage < Ntbl[M-(M>>1)]*2; stage += Ntbl[M>>1]*2){
+ for (LoopN = (Ntbl[(M>>1)-1]-1); LoopN >= 0; LoopN--){
+ LoopCnt = (Ntbl[(M>>1)-1]-1);
+ fly0P = ioptr+ Nroot_1_ColInc + ((int)BRcnt[LoopN] >> BRshift)*(2*2) + stage;
+ IOP = ioptr + (LoopN<<(M+1)/2) * 2 + stage;
+ fly1P = IOP + ((int)BRcnt[LoopCnt] >> BRshift)*(2*2);
+ fly0r = *(fly0P);
+ fly0i = *(fly0P+1);
+ fly1r = *(fly0P+FlyOffsetA);
+ fly1i = *(fly0P+FlyOffsetAIm);
+ for (; LoopCnt > LoopN;){
+ fly2r = *(fly0P+2);
+ fly2i = *(fly0P+(2+1));
+ fly3r = *(fly0P+FlyOffsetB);
+ fly3i = *(fly0P+FlyOffsetBIm);
+ fly4r = *(fly1P);
+ fly4i = *(fly1P+1);
+ fly5r = *(fly1P+FlyOffsetA);
+ fly5i = *(fly1P+FlyOffsetAIm);
+ fly6r = *(fly1P+2);
+ fly6i = *(fly1P+(2+1));
+ fly7r = *(fly1P+FlyOffsetB);
+ fly7i = *(fly1P+FlyOffsetBIm);
+
+ t0r = fly0r + fly1r;
+ t0i = fly0i + fly1i;
+ fly1r = fly0r - fly1r;
+ fly1i = fly0i - fly1i;
+ t1r = fly2r + fly3r;
+ t1i = fly2i + fly3i;
+ fly3r = fly2r - fly3r;
+ fly3i = fly2i - fly3i;
+ fly0r = fly4r + fly5r;
+ fly0i = fly4i + fly5i;
+ fly5r = fly4r - fly5r;
+ fly5i = fly4i - fly5i;
+ fly2r = fly6r + fly7r;
+ fly2i = fly6i + fly7i;
+ fly7r = fly6r - fly7r;
+ fly7i = fly6i - fly7i;
+
+ *(fly1P) = t0r;
+ *(fly1P+1) = t0i;
+ *(fly1P+2) = fly1r;
+ *(fly1P+(2+1)) = fly1i;
+ *(fly1P+FlyOffsetA) = t1r;
+ *(fly1P+FlyOffsetAIm) = t1i;
+ *(fly1P+FlyOffsetB) = fly3r;
+ *(fly1P+FlyOffsetBIm) = fly3i;
+ *(fly0P) = fly0r;
+ *(fly0P+1) = fly0i;
+ *(fly0P+2) = fly5r;
+ *(fly0P+(2+1)) = fly5i;
+ *(fly0P+FlyOffsetA) = fly2r;
+ *(fly0P+FlyOffsetAIm) = fly2i;
+ *(fly0P+FlyOffsetB) = fly7r;
+ *(fly0P+FlyOffsetBIm) = fly7i;
+
+ fly0P -= Nrems2;
+ fly0r = *(fly0P);
+ fly0i = *(fly0P+1);
+ fly1r = *(fly0P+FlyOffsetA);
+ fly1i = *(fly0P+FlyOffsetAIm);
+ LoopCnt -= 1;
+ fly1P = (IOP + ((int)BRcnt[LoopCnt] >> BRshift)*(2*2));
+ };
+ fly2r = *(fly0P+2);
+ fly2i = *(fly0P+(2+1));
+ fly3r = *(fly0P+FlyOffsetB);
+ fly3i = *(fly0P+FlyOffsetBIm);
+
+ t0r = fly0r + fly1r;
+ t0i = fly0i + fly1i;
+ fly1r = fly0r - fly1r;
+ fly1i = fly0i - fly1i;
+ t1r = fly2r + fly3r;
+ t1i = fly2i + fly3i;
+ fly3r = fly2r - fly3r;
+ fly3i = fly2i - fly3i;
+
+ *(fly0P) = t0r;
+ *(fly0P+1) = t0i;
+ *(fly0P+2) = fly1r;
+ *(fly0P+(2+1)) = fly1i;
+ *(fly0P+FlyOffsetA) = t1r;
+ *(fly0P+FlyOffsetAIm) = t1i;
+ *(fly0P+FlyOffsetB) = fly3r;
+ *(fly0P+FlyOffsetBIm) = fly3i;
+
+ };
+};
+
+
+
+/**** FFTC **************/
+
+NdiffU = 2;
+Flyinc = (NdiffU);
+
+NsameU4 = Ntbl[M]/4;
+LoopN = Ntbl[M-3];
+
+stage = ((M-1)/3);
+if ( (M-1-(stage * 3)) != 0 ){
+ FlyOffsetA = Flyinc << 2;
+ FlyOffsetB = FlyOffsetA << 1;
+ FlyOffsetAIm = FlyOffsetA + 1;
+ FlyOffsetBIm = FlyOffsetB + 1;
+ if ( (M-1-(stage * 3)) == 1 ){
+ /* 1 radix 2 stage */
+
+ IOP = ioptr;
+ fly0P = IOP;
+ fly1P = (IOP+Flyinc);
+ fly2P = (fly1P+Flyinc);
+ fly3P = (fly2P+Flyinc);
+
+ /* Butterflys */
+ /*
+ t0 - - t0
+ t1 - - t1
+ f2 - 1- f5
+ f1 - -i- f7
+ f4 - - f4
+ f0 - - f0
+ f6 - 1- f2
+ f3 - -i- f1
+ */
+
+ for (LoopCnt = LoopN; LoopCnt > 0 ; LoopCnt--){
+ t0r = *(fly0P);
+ t0i = *(fly0P + 1);
+ t1r = *(fly1P);
+ t1i = *(fly1P + 1);
+ fly2r = *(fly2P);
+ fly2i = *(fly2P + 1);
+ fly1r = *(fly3P);
+ fly1i = *(fly3P + 1);
+ fly4r = *(fly0P + FlyOffsetA);
+ fly4i = *(fly0P + FlyOffsetAIm);
+ fly0r = *(fly1P + FlyOffsetA);
+ fly0i = *(fly1P + FlyOffsetAIm);
+ fly6r = *(fly2P + FlyOffsetA);
+ fly6i = *(fly2P + FlyOffsetAIm);
+ fly3r = *(fly3P + FlyOffsetA);
+ fly3i = *(fly3P + FlyOffsetAIm);
+
+ fly5r = t0r - fly2r;
+ fly5i = t0i - fly2i;
+ t0r = t0r + fly2r;
+ t0i = t0i + fly2i;
+
+ fly7r = t1r - fly1i;
+ fly7i = t1i + fly1r;
+ t1r = t1r + fly1i;
+ t1i = t1i - fly1r;
+
+ fly2r = fly4r - fly6r;
+ fly2i = fly4i - fly6i;
+ fly4r = fly4r + fly6r;
+ fly4i = fly4i + fly6i;
+
+ fly1r = fly0r - fly3i;
+ fly1i = fly0i + fly3r;
+ fly0r = fly0r + fly3i;
+ fly0i = fly0i - fly3r;
+
+ *(fly2P) = fly5r;
+ *(fly2P + 1) = fly5i;
+ *(fly0P) = t0r;
+ *(fly0P + 1) = t0i;
+ *(fly3P) = fly7r;
+ *(fly3P + 1) = fly7i;
+ *(fly1P) = t1r;
+ *(fly1P + 1) = t1i;
+ *(fly2P + FlyOffsetA) = fly2r;
+ *(fly2P + FlyOffsetAIm) = fly2i;
+ *(fly0P + FlyOffsetA) = fly4r;
+ *(fly0P + FlyOffsetAIm) = fly4i;
+ *(fly3P + FlyOffsetA) = fly1r;
+ *(fly3P + FlyOffsetAIm) = fly1i;
+ *(fly1P + FlyOffsetA) = fly0r;
+ *(fly1P + FlyOffsetAIm) = fly0i;
+
+ fly0P = (fly0P + FlyOffsetB);
+ fly1P = (fly1P + FlyOffsetB);
+ fly2P = (fly2P + FlyOffsetB);
+ fly3P = (fly3P + FlyOffsetB);
+ };
+
+ NsameU4 >>= 1;
+ LoopN >>= 1;
+ NdiffU <<= 1;
+ Flyinc = Flyinc << 1;
+ }
+ else{
+ /* 1 radix 4 stage */
+ IOP = ioptr;
+
+ U3r = 0.7071067811865475244008443621; /* sqrt(0.5); */
+ U3i = U3r;
+ fly0P = IOP;
+ fly1P = (IOP+Flyinc);
+ fly2P = (fly1P+Flyinc);
+ fly3P = (fly2P+Flyinc);
+
+ /* Butterflys */
+ /*
+ t0 - - t0 - - t0
+ t1 - - t1 - - t1
+ f2 - 1- f5 - - f5
+ f1 - -i- f7 - - f7
+ f4 - - f4 - 1- f6
+ f0 - - f0 -U3 - f3
+ f6 - 1- f2 - -i- f4
+ f3 - -i- f1 -U3a- f2
+ */
+
+ for (LoopCnt = LoopN; LoopCnt > 0 ; LoopCnt--){
+ t0r = *(fly0P);
+ t0i = *(fly0P + 1);
+ t1r = *(fly1P);
+ t1i = *(fly1P + 1);
+ fly2r = *(fly2P);
+ fly2i = *(fly2P + 1);
+ fly1r = *(fly3P);
+ fly1i = *(fly3P + 1);
+ fly4r = *(fly0P + FlyOffsetA);
+ fly4i = *(fly0P + FlyOffsetAIm);
+ fly0r = *(fly1P + FlyOffsetA);
+ fly0i = *(fly1P + FlyOffsetAIm);
+ fly6r = *(fly2P + FlyOffsetA);
+ fly6i = *(fly2P + FlyOffsetAIm);
+ fly3r = *(fly3P + FlyOffsetA);
+ fly3i = *(fly3P + FlyOffsetAIm);
+
+ fly5r = t0r - fly2r;
+ fly5i = t0i - fly2i;
+ t0r = t0r + fly2r;
+ t0i = t0i + fly2i;
+
+ fly7r = t1r - fly1i;
+ fly7i = t1i + fly1r;
+ t1r = t1r + fly1i;
+ t1i = t1i - fly1r;
+
+ fly2r = fly4r - fly6r;
+ fly2i = fly4i - fly6i;
+ fly4r = fly4r + fly6r;
+ fly4i = fly4i + fly6i;
+
+ fly1r = fly0r - fly3i;
+ fly1i = fly0i + fly3r;
+ fly0r = fly0r + fly3i;
+ fly0i = fly0i - fly3r;
+
+
+ fly6r = t0r - fly4r;
+ fly6i = t0i - fly4i;
+ t0r = t0r + fly4r;
+ t0i = t0i + fly4i;
+
+ fly3r = fly5r - fly2i;
+ fly3i = fly5i + fly2r;
+ fly5r = fly5r + fly2i;
+ fly5i = fly5i - fly2r;
+
+ fly4r = t1r - U3r * fly0r;
+ fly4r = fly4r - U3i * fly0i;
+ fly4i = t1i + U3i * fly0r;
+ fly4i = fly4i - U3r * fly0i;
+ t1r = scale * t1r - fly4r;
+ t1i = scale * t1i - fly4i;
+
+ fly2r = fly7r + U3i * fly1r;
+ fly2r = fly2r - U3r * fly1i;
+ fly2i = fly7i + U3r * fly1r;
+ fly2i = fly2i + U3i * fly1i;
+ fly7r = scale * fly7r - fly2r;
+ fly7i = scale * fly7i - fly2i;
+
+ *(fly0P + FlyOffsetA) = fly6r;
+ *(fly0P + FlyOffsetAIm) = fly6i;
+ *(fly0P) = t0r;
+ *(fly0P + 1) = t0i;
+ *(fly2P + FlyOffsetA) = fly3r;
+ *(fly2P + FlyOffsetAIm) = fly3i;
+ *(fly2P) = fly5r;
+ *(fly2P + 1) = fly5i;
+ *(fly1P + FlyOffsetA) = fly4r;
+ *(fly1P + FlyOffsetAIm) = fly4i;
+ *(fly1P) = t1r;
+ *(fly1P + 1) = t1i;
+ *(fly3P + FlyOffsetA) = fly2r;
+ *(fly3P + FlyOffsetAIm) = fly2i;
+ *(fly3P) = fly7r;
+ *(fly3P + 1) = fly7i;
+
+ fly0P = (fly0P + FlyOffsetB);
+ fly1P = (fly1P + FlyOffsetB);
+ fly2P = (fly2P + FlyOffsetB);
+ fly3P = (fly3P + FlyOffsetB);
+
+ };
+
+ NsameU4 >>= 2;
+ LoopN >>= 2;
+ NdiffU <<= 2;
+ Flyinc = Flyinc << 2;
+ };
+};
+
+NsameU2 = NsameU4 >> 1;
+NsameU1 = NsameU2 >> 1;
+Flyinc <<= 1;
+FlyOffsetA = Flyinc << 2;
+FlyOffsetB = FlyOffsetA << 1;
+FlyOffsetAIm = FlyOffsetA + 1;
+FlyOffsetBIm = FlyOffsetB + 1;
+LoopN >>= 1;
+/* ****** RADIX 8 Stages */
+for (stage = stage<<1; stage > 0 ; stage--){
+
+ /* an fft stage is done in two parts to ease use of the single quadrant cos table */
+
+/* fftcalc1(iobuf, Utbl, N, NdiffU, LoopN); */
+ if(!(stage&1)){
+ U0rP = &Utbl[0];
+ U0iP = &Utbl[Ntbl[M-2]];
+ U1rP = U0rP;
+ U1iP = U0iP;
+ U2rP = U0rP;
+ U2iP = U0iP;
+ U3offset = (Ntbl[M]) / 8;
+
+ IOP = ioptr;
+
+ U0r = *U0rP,
+ U0i = *U0iP;
+ U1r = *U1rP,
+ U1i = *U1iP;
+ U2r = *U2rP,
+ U2i = *U2iP;
+ U3r = *( U2rP + U3offset);
+ U3i = *( U2iP - U3offset);
+ }
+
+ fly0P = IOP;
+ fly1P = (IOP+Flyinc);
+ fly2P = (fly1P+Flyinc);
+ fly3P = (fly2P+Flyinc);
+
+ for (diffUcnt = (NdiffU)>>1; diffUcnt != 0; diffUcnt--){
+
+ /* Butterflys */
+ /*
+ f0 - - t0 - - t0 - - t0
+ f1 -U0 - t1 - - t1 - - t1
+ f2 - - f2 -U1 - f5 - - f3
+ f3 -U0 - f1 -U1a- f7 - - f7
+ f4 - - f4 - - f4 -U2 - f6
+ f5 -U0 - f0 - - f0 -U3 - f4
+ f6 - - f6 -U1 - f2 -U2a- f2
+ f7 -U0 - f3 -U1a- f1 -U3a- f5
+ */
+
+ fly0r = *(IOP);
+ fly0i = *(IOP+1);
+ fly1r = *(fly1P);
+ fly1i = *(fly1P+1);
+
+ for (LoopCnt = LoopN-1; LoopCnt > 0 ; LoopCnt--){
+
+ fly2r = *(fly2P);
+ fly2i = *(fly2P + 1);
+ fly3r = *(fly3P);
+ fly3i = *(fly3P + 1);
+ fly4r = *(fly0P + FlyOffsetA);
+ fly4i = *(fly0P + FlyOffsetAIm);
+ fly5r = *(fly1P + FlyOffsetA);
+ fly5i = *(fly1P + FlyOffsetAIm);
+ fly6r = *(fly2P + FlyOffsetA);
+ fly6i = *(fly2P + FlyOffsetAIm);
+ fly7r = *(fly3P + FlyOffsetA);
+ fly7i = *(fly3P + FlyOffsetAIm);
+
+ t1r = fly0r - U0r * fly1r;
+ t1r = t1r - U0i * fly1i;
+ t1i = fly0i + U0i * fly1r;
+ t1i = t1i - U0r * fly1i;
+ t0r = scale * fly0r - t1r;
+ t0i = scale * fly0i - t1i;
+
+ fly1r = fly2r - U0r * fly3r;
+ fly1r = fly1r - U0i * fly3i;
+ fly1i = fly2i + U0i * fly3r;
+ fly1i = fly1i - U0r * fly3i;
+ fly2r = scale * fly2r - fly1r;
+ fly2i = scale * fly2i - fly1i;
+
+ fly0r = fly4r - U0r * fly5r;
+ fly0r = fly0r - U0i * fly5i;
+ fly0i = fly4i + U0i * fly5r;
+ fly0i = fly0i - U0r * fly5i;
+ fly4r = scale * fly4r - fly0r;
+ fly4i = scale * fly4i - fly0i;
+
+ fly3r = fly6r - U0r * fly7r;
+ fly3r = fly3r - U0i * fly7i;
+ fly3i = fly6i + U0i * fly7r;
+ fly3i = fly3i - U0r * fly7i;
+ fly6r = scale * fly6r - fly3r;
+ fly6i = scale * fly6i - fly3i;
+
+
+ fly5r = t0r - U1r * fly2r;
+ fly5r = fly5r - U1i * fly2i;
+ fly5i = t0i + U1i * fly2r;
+ fly5i = fly5i - U1r * fly2i;
+ t0r = scale * t0r - fly5r;
+ t0i = scale * t0i - fly5i;
+
+ fly7r = t1r + U1i * fly1r;
+ fly7r = fly7r - U1r * fly1i;
+ fly7i = t1i + U1r * fly1r;
+ fly7i = fly7i + U1i * fly1i;
+ t1r = scale * t1r - fly7r;
+ t1i = scale * t1i - fly7i;
+
+ fly2r = fly4r - U1r * fly6r;
+ fly2r = fly2r - U1i * fly6i;
+ fly2i = fly4i + U1i * fly6r;
+ fly2i = fly2i - U1r * fly6i;
+ fly4r = scale * fly4r - fly2r;
+ fly4i = scale * fly4i - fly2i;
+
+ fly1r = fly0r + U1i * fly3r;
+ fly1r = fly1r - U1r * fly3i;
+ fly1i = fly0i + U1r * fly3r;
+ fly1i = fly1i + U1i * fly3i;
+ fly0r = scale * fly0r - fly1r;
+ fly0i = scale * fly0i - fly1i;
+
+ fly6r = t0r - U2r * fly4r;
+ fly6r = fly6r - U2i * fly4i;
+ fly6i = t0i + U2i * fly4r;
+ fly6i = fly6i - U2r * fly4i;
+ t0r = scale * t0r - fly6r;
+ t0i = scale * t0i - fly6i;
+
+ fly3r = fly5r - U2i * fly2r;
+ fly3r = fly3r + U2r * fly2i;
+ fly3i = fly5i - U2r * fly2r;
+ fly3i = fly3i - U2i * fly2i;
+ fly2r = scale * fly5r - fly3r;
+ fly2i = scale * fly5i - fly3i;
+
+ fly4r = t1r - U3r * fly0r;
+ fly4r = fly4r - U3i * fly0i;
+ fly4i = t1i + U3i * fly0r;
+ fly4i = fly4i - U3r * fly0i;
+ t1r = scale * t1r - fly4r;
+ t1i = scale * t1i - fly4i;
+
+ fly5r = fly7r + U3i * fly1r;
+ fly5r = fly5r - U3r * fly1i;
+ fly5i = fly7i + U3r * fly1r;
+ fly5i = fly5i + U3i * fly1i;
+ fly7r = scale * fly7r - fly5r;
+ fly7i = scale * fly7i - fly5i;
+
+ *(fly0P + FlyOffsetA) = fly6r;
+ *(fly0P + FlyOffsetAIm) = fly6i;
+ *(fly0P) = t0r;
+ *(fly0P + 1) = t0i;
+ *(fly2P) = fly3r;
+ *(fly2P + 1) = fly3i;
+ *(fly2P + FlyOffsetA) = fly2r;
+ *(fly2P + FlyOffsetAIm) = fly2i;
+
+ fly0r = *(fly0P + FlyOffsetB);
+ fly0i = *(fly0P + FlyOffsetBIm);
+
+ *(fly1P + FlyOffsetA) = fly4r;
+ *(fly1P + FlyOffsetAIm) = fly4i;
+ *(fly1P) = t1r;
+ *(fly1P + 1) = t1i;
+
+ fly1r = *(fly1P + FlyOffsetB);
+ fly1i = *(fly1P + FlyOffsetBIm);
+
+ *(fly3P + FlyOffsetA) = fly5r;
+ *(fly3P + FlyOffsetAIm) = fly5i;
+ *(fly3P) = fly7r;
+ *(fly3P + 1) = fly7i;
+
+ fly0P = (fly0P + FlyOffsetB);
+ fly1P = (fly1P + FlyOffsetB);
+ fly2P = (fly2P + FlyOffsetB);
+ fly3P = (fly3P + FlyOffsetB);
+ };
+ fly2r = *(fly2P);
+ fly2i = *(fly2P + 1);
+ fly3r = *(fly3P);
+ fly3i = *(fly3P + 1);
+ fly4r = *(fly0P + FlyOffsetA);
+ fly4i = *(fly0P + FlyOffsetAIm);
+ fly5r = *(fly1P + FlyOffsetA);
+ fly5i = *(fly1P + FlyOffsetAIm);
+ fly6r = *(fly2P + FlyOffsetA);
+ fly6i = *(fly2P + FlyOffsetAIm);
+ fly7r = *(fly3P + FlyOffsetA);
+ fly7i = *(fly3P + FlyOffsetAIm);
+
+ t1r = fly0r - U0r * fly1r;
+ t1r = t1r - U0i * fly1i;
+ t1i = fly0i + U0i * fly1r;
+ t1i = t1i - U0r * fly1i;
+ t0r = scale * fly0r - t1r;
+ t0i = scale * fly0i - t1i;
+
+ fly1r = fly2r - U0r * fly3r;
+ fly1r = fly1r - U0i * fly3i;
+ fly1i = fly2i + U0i * fly3r;
+ fly1i = fly1i - U0r * fly3i;
+ fly2r = scale * fly2r - fly1r;
+ fly2i = scale * fly2i - fly1i;
+
+ fly0r = fly4r - U0r * fly5r;
+ fly0r = fly0r - U0i * fly5i;
+ fly0i = fly4i + U0i * fly5r;
+ fly0i = fly0i - U0r * fly5i;
+ fly4r = scale * fly4r - fly0r;
+ fly4i = scale * fly4i - fly0i;
+
+ fly3r = fly6r - U0r * fly7r;
+ fly3r = fly3r - U0i * fly7i;
+ fly3i = fly6i + U0i * fly7r;
+ fly3i = fly3i - U0r * fly7i;
+ fly6r = scale * fly6r - fly3r;
+ fly6i = scale * fly6i - fly3i;
+
+ fly5r = t0r - U1r * fly2r;
+ fly5r = fly5r - U1i * fly2i;
+ fly5i = t0i + U1i * fly2r;
+ fly5i = fly5i - U1r * fly2i;
+ t0r = scale * t0r - fly5r;
+ t0i = scale * t0i - fly5i;
+
+ fly7r = t1r + U1i * fly1r;
+ fly7r = fly7r - U1r * fly1i;
+ fly7i = t1i + U1r * fly1r;
+ fly7i = fly7i + U1i * fly1i;
+ t1r = scale * t1r - fly7r;
+ t1i = scale * t1i - fly7i;
+
+ fly2r = fly4r - U1r * fly6r;
+ fly2r = fly2r - U1i * fly6i;
+ fly2i = fly4i + U1i * fly6r;
+ fly2i = fly2i - U1r * fly6i;
+ fly4r = scale * fly4r - fly2r;
+ fly4i = scale * fly4i - fly2i;
+
+ fly1r = fly0r + U1i * fly3r;
+ fly1r = fly1r - U1r * fly3i;
+ fly1i = fly0i + U1r * fly3r;
+ fly1i = fly1i + U1i * fly3i;
+ fly0r = scale * fly0r - fly1r;
+ fly0i = scale * fly0i - fly1i;
+
+ U0i = *(U0iP = (U0iP - NsameU4));
+ U0r = *(U0rP = (U0rP + NsameU4));
+ U1r = *(U1rP = (U1rP + NsameU2));
+ U1i = *(U1iP = (U1iP - NsameU2));
+ if(stage&1)
+ U0r = -U0r;
+
+ fly6r = t0r - U2r * fly4r;
+ fly6r = fly6r - U2i * fly4i;
+ fly6i = t0i + U2i * fly4r;
+ fly6i = fly6i - U2r * fly4i;
+ t0r = scale * t0r - fly6r;
+ t0i = scale * t0i - fly6i;
+
+ fly3r = fly5r - U2i * fly2r;
+ fly3r = fly3r + U2r * fly2i;
+ fly3i = fly5i - U2r * fly2r;
+ fly3i = fly3i - U2i * fly2i;
+ fly2r = scale * fly5r - fly3r;
+ fly2i = scale * fly5i - fly3i;
+
+ fly4r = t1r - U3r * fly0r;
+ fly4r = fly4r - U3i * fly0i;
+ fly4i = t1i + U3i * fly0r;
+ fly4i = fly4i - U3r * fly0i;
+ t1r = scale * t1r - fly4r;
+ t1i = scale * t1i - fly4i;
+
+ fly5r = fly7r + U3i * fly1r;
+ fly5r = fly5r - U3r * fly1i;
+ fly5i = fly7i + U3r * fly1r;
+ fly5i = fly5i + U3i * fly1i;
+ fly7r = scale * fly7r - fly5r;
+ fly7i = scale * fly7i - fly5i;
+
+ *(fly0P + FlyOffsetA) = fly6r;
+ *(fly0P + FlyOffsetAIm) = fly6i;
+ *(fly0P) = t0r;
+ *(fly0P + 1) = t0i;
+
+ U2r = *(U2rP = (U2rP + NsameU1));
+ U2i = *(U2iP = (U2iP - NsameU1));
+
+ *(fly2P) = fly3r;
+ *(fly2P + 1) = fly3i;
+ *(fly2P + FlyOffsetA) = fly2r;
+ *(fly2P + FlyOffsetAIm) = fly2i;
+ *(fly1P + FlyOffsetA) = fly4r;
+ *(fly1P + FlyOffsetAIm) = fly4i;
+ *(fly1P) = t1r;
+ *(fly1P + 1) = t1i;
+
+ U3r = *( U2rP + U3offset);
+ U3i = *( U2iP - U3offset);
+
+ *(fly3P + FlyOffsetA) = fly5r;
+ *(fly3P + FlyOffsetAIm) = fly5i;
+ *(fly3P) = fly7r;
+ *(fly3P + 1) = fly7i;
+
+ IOP = IOP + 2;
+ fly0P = IOP;
+ fly1P = (IOP+Flyinc);
+ fly2P = (fly1P+Flyinc);
+ fly3P = (fly2P+Flyinc);
+ };
+ NsameU4 = -NsameU4;
+
+ if(stage&1){
+ LoopN >>= 3;
+ NsameU1 >>= 3;
+ NsameU2 >>= 3;
+ NsameU4 >>= 3;
+ NdiffU <<= 3;
+ Flyinc <<= 3;
+ FlyOffsetA <<= 3;
+ FlyOffsetB <<= 3;
+ FlyOffsetAIm = FlyOffsetA + 1;
+ FlyOffsetBIm = FlyOffsetB + 1;
+ }
+}
+ioptr += 2*Ntbl[M];
+}
+}
+
+void iffts(float *ioptr, long M, long Rows, float *Utbl){
+/* Compute in-place inverse complex fft on the rows of the input array */
+/* INPUTS */
+/* M = log2 of fft size */
+/* *ioptr = input data array */
+/* *Utbl = cosine table */
+/* Rows = number of rows in ioptr array (use Rows of 1 if ioptr is a 1 dimensional array) */
+/* OUTPUTS */
+/* *ioptr = output data array */
+
+long Flyinc;
+long FlyOffsetA;
+long FlyOffsetAIm;
+long FlyOffsetB;
+long FlyOffsetBIm;
+long NsameU1;
+long NsameU2;
+long NsameU4;
+long diffUcnt;
+long LoopCnt;
+
+float scale;
+float fly0r;
+float fly0i;
+float fly1r;
+float fly1i;
+float fly2r;
+float fly2i;
+float fly3r;
+float fly3i;
+float fly4r;
+float fly4i;
+float fly5r;
+float fly5i;
+float fly6r;
+float fly6i;
+float fly7r;
+float fly7i;
+float U0r;
+float U0i;
+float U1r;
+float U1i;
+float U2r;
+float U2i;
+float U3r;
+float U3i;
+float t0r;
+float t0i;
+float t1r;
+float t1i;
+
+float *fly0P;
+float *fly1P;
+float *fly2P;
+float *fly3P;
+float *U0rP;
+float *U0iP;
+float *U1rP;
+float *U1iP;
+float *U2rP;
+float *U2iP;
+float *IOP;
+long U3offset;
+
+long stage;
+long NdiffU;
+long LoopN;
+
+const long BRshift = MAXMROOT - (M>>1);
+const long Nrems2 = Ntbl[M-(M>>1)+1];
+const long Nroot_1_ColInc = (Ntbl[M-1]-Ntbl[M-(M>>1)])*2;
+
+for (;Rows>0;Rows--){
+
+FlyOffsetA = Ntbl[M] * 2/2;
+FlyOffsetAIm = FlyOffsetA + 1;
+FlyOffsetB = FlyOffsetA + 2;
+FlyOffsetBIm = FlyOffsetB + 1;
+
+/* BitrevR2 ** bit reverse and first radix 2 stage ******/
+
+scale = 1./Ntbl[M];
+for (stage = 0; stage < Ntbl[M-(M>>1)]*2; stage += Ntbl[M>>1]*2){
+ for (LoopN = (Ntbl[(M>>1)-1]-1); LoopN >= 0; LoopN--){
+ LoopCnt = (Ntbl[(M>>1)-1]-1);
+ fly0P = ioptr + Nroot_1_ColInc + ((int)BRcnt[LoopN] >> BRshift)*(2*2) + stage;
+ IOP = ioptr + (LoopN<<(M+1)/2) * 2 + stage;
+ fly1P = IOP + ((int)BRcnt[LoopCnt] >> BRshift)*(2*2);
+ fly0r = *(fly0P);
+ fly0i = *(fly0P+1);
+ fly1r = *(fly0P+FlyOffsetA);
+ fly1i = *(fly0P+FlyOffsetAIm);
+ for (; LoopCnt > LoopN;){
+ fly2r = *(fly0P+2);
+ fly2i = *(fly0P+(2+1));
+ fly3r = *(fly0P+FlyOffsetB);
+ fly3i = *(fly0P+FlyOffsetBIm);
+ fly4r = *(fly1P);
+ fly4i = *(fly1P+1);
+ fly5r = *(fly1P+FlyOffsetA);
+ fly5i = *(fly1P+FlyOffsetAIm);
+ fly6r = *(fly1P+2);
+ fly6i = *(fly1P+(2+1));
+ fly7r = *(fly1P+FlyOffsetB);
+ fly7i = *(fly1P+FlyOffsetBIm);
+
+ t0r = fly0r + fly1r;
+ t0i = fly0i + fly1i;
+ fly1r = fly0r - fly1r;
+ fly1i = fly0i - fly1i;
+ t1r = fly2r + fly3r;
+ t1i = fly2i + fly3i;
+ fly3r = fly2r - fly3r;
+ fly3i = fly2i - fly3i;
+ fly0r = fly4r + fly5r;
+ fly0i = fly4i + fly5i;
+ fly5r = fly4r - fly5r;
+ fly5i = fly4i - fly5i;
+ fly2r = fly6r + fly7r;
+ fly2i = fly6i + fly7i;
+ fly7r = fly6r - fly7r;
+ fly7i = fly6i - fly7i;
+
+ *(fly1P) = scale*t0r;
+ *(fly1P+1) = scale*t0i;
+ *(fly1P+2) = scale*fly1r;
+ *(fly1P+(2+1)) = scale*fly1i;
+ *(fly1P+FlyOffsetA) = scale*t1r;
+ *(fly1P+FlyOffsetAIm) = scale*t1i;
+ *(fly1P+FlyOffsetB) = scale*fly3r;
+ *(fly1P+FlyOffsetBIm) = scale*fly3i;
+ *(fly0P) = scale*fly0r;
+ *(fly0P+1) = scale*fly0i;
+ *(fly0P+2) = scale*fly5r;
+ *(fly0P+(2+1)) = scale*fly5i;
+ *(fly0P+FlyOffsetA) = scale*fly2r;
+ *(fly0P+FlyOffsetAIm) = scale*fly2i;
+ *(fly0P+FlyOffsetB) = scale*fly7r;
+ *(fly0P+FlyOffsetBIm) = scale*fly7i;
+
+ fly0P -= Nrems2;
+ fly0r = *(fly0P);
+ fly0i = *(fly0P+1);
+ fly1r = *(fly0P+FlyOffsetA);
+ fly1i = *(fly0P+FlyOffsetAIm);
+ LoopCnt -= 1;
+ fly1P = (IOP + ((int)BRcnt[LoopCnt] >> BRshift)*(2*2));
+ };
+ fly2r = *(fly0P+2);
+ fly2i = *(fly0P+(2+1));
+ fly3r = *(fly0P+FlyOffsetB);
+ fly3i = *(fly0P+FlyOffsetBIm);
+
+ t0r = fly0r + fly1r;
+ t0i = fly0i + fly1i;
+ fly1r = fly0r - fly1r;
+ fly1i = fly0i - fly1i;
+ t1r = fly2r + fly3r;
+ t1i = fly2i + fly3i;
+ fly3r = fly2r - fly3r;
+ fly3i = fly2i - fly3i;
+
+ *(fly0P) = scale*t0r;
+ *(fly0P+1) = scale*t0i;
+ *(fly0P+2) = scale*fly1r;
+ *(fly0P+(2+1)) = scale*fly1i;
+ *(fly0P+FlyOffsetA) = scale*t1r;
+ *(fly0P+FlyOffsetAIm) = scale*t1i;
+ *(fly0P+FlyOffsetB) = scale*fly3r;
+ *(fly0P+FlyOffsetBIm) = scale*fly3i;
+
+ };
+};
+
+/**** FFTC **************/
+
+scale = 2.0;
+
+NdiffU = 2;
+Flyinc = (NdiffU);
+
+NsameU4 = Ntbl[M]/4;
+LoopN = Ntbl[M-3];
+
+stage = ((M-1)/3);
+if ( (M-1-(stage * 3)) != 0 ){
+ FlyOffsetA = Flyinc << 2;
+ FlyOffsetB = FlyOffsetA << 1;
+ FlyOffsetAIm = FlyOffsetA + 1;
+ FlyOffsetBIm = FlyOffsetB + 1;
+ if ( (M-1-(stage * 3)) == 1 ){
+ /* 1 radix 2 stage */
+
+ IOP = ioptr;
+ fly0P = IOP;
+ fly1P = (IOP+Flyinc);
+ fly2P = (fly1P+Flyinc);
+ fly3P = (fly2P+Flyinc);
+
+ /* Butterflys */
+ /*
+ t0 - - t0
+ t1 - - t1
+ f2 - 1- f5
+ f1 - -i- f7
+ f4 - - f4
+ f0 - - f0
+ f6 - 1- f2
+ f3 - -i- f1
+ */
+
+ for (LoopCnt = LoopN; LoopCnt > 0 ; LoopCnt--){
+ t0r = *(fly0P);
+ t0i = *(fly0P + 1);
+ t1r = *(fly1P);
+ t1i = *(fly1P + 1);
+ fly2r = *(fly2P);
+ fly2i = *(fly2P + 1);
+ fly1r = *(fly3P);
+ fly1i = *(fly3P + 1);
+ fly4r = *(fly0P + FlyOffsetA);
+ fly4i = *(fly0P + FlyOffsetAIm);
+ fly0r = *(fly1P + FlyOffsetA);
+ fly0i = *(fly1P + FlyOffsetAIm);
+ fly6r = *(fly2P + FlyOffsetA);
+ fly6i = *(fly2P + FlyOffsetAIm);
+ fly3r = *(fly3P + FlyOffsetA);
+ fly3i = *(fly3P + FlyOffsetAIm);
+
+ fly5r = t0r - fly2r;
+ fly5i = t0i - fly2i;
+ t0r = t0r + fly2r;
+ t0i = t0i + fly2i;
+
+ fly7r = t1r + fly1i;
+ fly7i = t1i - fly1r;
+ t1r = t1r - fly1i;
+ t1i = t1i + fly1r;
+
+ fly2r = fly4r - fly6r;
+ fly2i = fly4i - fly6i;
+ fly4r = fly4r + fly6r;
+ fly4i = fly4i + fly6i;
+
+ fly1r = fly0r + fly3i;
+ fly1i = fly0i - fly3r;
+ fly0r = fly0r - fly3i;
+ fly0i = fly0i + fly3r;
+
+ *(fly2P) = fly5r;
+ *(fly2P + 1) = fly5i;
+ *(fly0P) = t0r;
+ *(fly0P + 1) = t0i;
+ *(fly3P) = fly7r;
+ *(fly3P + 1) = fly7i;
+ *(fly1P) = t1r;
+ *(fly1P + 1) = t1i;
+ *(fly2P + FlyOffsetA) = fly2r;
+ *(fly2P + FlyOffsetAIm) = fly2i;
+ *(fly0P + FlyOffsetA) = fly4r;
+ *(fly0P + FlyOffsetAIm) = fly4i;
+ *(fly3P + FlyOffsetA) = fly1r;
+ *(fly3P + FlyOffsetAIm) = fly1i;
+ *(fly1P + FlyOffsetA) = fly0r;
+ *(fly1P + FlyOffsetAIm) = fly0i;
+
+ fly0P = (fly0P + FlyOffsetB);
+ fly1P = (fly1P + FlyOffsetB);
+ fly2P = (fly2P + FlyOffsetB);
+ fly3P = (fly3P + FlyOffsetB);
+ };
+
+ NsameU4 >>= 1;
+ LoopN >>= 1;
+ NdiffU <<= 1;
+ Flyinc = Flyinc << 1;
+ }
+ else{
+ /* 1 radix 4 stage */
+ IOP = ioptr;
+
+ U3r = 0.7071067811865475244008443621; /* sqrt(0.5); */
+ U3i = U3r;
+ fly0P = IOP;
+ fly1P = (IOP+Flyinc);
+ fly2P = (fly1P+Flyinc);
+ fly3P = (fly2P+Flyinc);
+
+ /* Butterflys */
+ /*
+ t0 - - t0 - - t0
+ t1 - - t1 - - t1
+ f2 - 1- f5 - - f5
+ f1 - -i- f7 - - f7
+ f4 - - f4 - 1- f6
+ f0 - - f0 -U3 - f3
+ f6 - 1- f2 - -i- f4
+ f3 - -i- f1 -U3a- f2
+ */
+
+ for (LoopCnt = LoopN; LoopCnt > 0 ; LoopCnt--){
+ t0r = *(fly0P);
+ t0i = *(fly0P + 1);
+ t1r = *(fly1P);
+ t1i = *(fly1P + 1);
+ fly2r = *(fly2P);
+ fly2i = *(fly2P + 1);
+ fly1r = *(fly3P);
+ fly1i = *(fly3P + 1);
+ fly4r = *(fly0P + FlyOffsetA);
+ fly4i = *(fly0P + FlyOffsetAIm);
+ fly0r = *(fly1P + FlyOffsetA);
+ fly0i = *(fly1P + FlyOffsetAIm);
+ fly6r = *(fly2P + FlyOffsetA);
+ fly6i = *(fly2P + FlyOffsetAIm);
+ fly3r = *(fly3P + FlyOffsetA);
+ fly3i = *(fly3P + FlyOffsetAIm);
+
+ fly5r = t0r - fly2r;
+ fly5i = t0i - fly2i;
+ t0r = t0r + fly2r;
+ t0i = t0i + fly2i;
+
+ fly7r = t1r + fly1i;
+ fly7i = t1i - fly1r;
+ t1r = t1r - fly1i;
+ t1i = t1i + fly1r;
+
+ fly2r = fly4r - fly6r;
+ fly2i = fly4i - fly6i;
+ fly4r = fly4r + fly6r;
+ fly4i = fly4i + fly6i;
+
+ fly1r = fly0r + fly3i;
+ fly1i = fly0i - fly3r;
+ fly0r = fly0r - fly3i;
+ fly0i = fly0i + fly3r;
+
+ fly6r = t0r - fly4r;
+ fly6i = t0i - fly4i;
+ t0r = t0r + fly4r;
+ t0i = t0i + fly4i;
+
+ fly3r = fly5r + fly2i;
+ fly3i = fly5i - fly2r;
+ fly5r = fly5r - fly2i;
+ fly5i = fly5i + fly2r;
+
+ fly4r = t1r - U3r * fly0r;
+ fly4r = fly4r + U3i * fly0i;
+ fly4i = t1i - U3i * fly0r;
+ fly4i = fly4i - U3r * fly0i;
+ t1r = scale * t1r - fly4r;
+ t1i = scale * t1i - fly4i;
+
+ fly2r = fly7r + U3i * fly1r;
+ fly2r = fly2r + U3r * fly1i;
+ fly2i = fly7i - U3r * fly1r;
+ fly2i = fly2i + U3i * fly1i;
+ fly7r = scale * fly7r - fly2r;
+ fly7i = scale * fly7i - fly2i;
+
+ *(fly0P + FlyOffsetA) = fly6r;
+ *(fly0P + FlyOffsetAIm) = fly6i;
+ *(fly0P) = t0r;
+ *(fly0P + 1) = t0i;
+ *(fly2P + FlyOffsetA) = fly3r;
+ *(fly2P + FlyOffsetAIm) = fly3i;
+ *(fly2P) = fly5r;
+ *(fly2P + 1) = fly5i;
+ *(fly1P + FlyOffsetA) = fly4r;
+ *(fly1P + FlyOffsetAIm) = fly4i;
+ *(fly1P) = t1r;
+ *(fly1P + 1) = t1i;
+ *(fly3P + FlyOffsetA) = fly2r;
+ *(fly3P + FlyOffsetAIm) = fly2i;
+ *(fly3P) = fly7r;
+ *(fly3P + 1) = fly7i;
+
+ fly0P = (fly0P + FlyOffsetB);
+ fly1P = (fly1P + FlyOffsetB);
+ fly2P = (fly2P + FlyOffsetB);
+ fly3P = (fly3P + FlyOffsetB);
+
+ };
+
+ NsameU4 >>= 2;
+ LoopN >>= 2;
+ NdiffU <<= 2;
+ Flyinc = Flyinc << 2;
+ };
+};
+
+NsameU2 = NsameU4 >> 1;
+NsameU1 = NsameU2 >> 1;
+Flyinc <<= 1;
+FlyOffsetA = Flyinc << 2;
+FlyOffsetB = FlyOffsetA << 1;
+FlyOffsetAIm = FlyOffsetA + 1;
+FlyOffsetBIm = FlyOffsetB + 1;
+LoopN >>= 1;
+
+/* ****** RADIX 8 Stages */
+for (stage = stage<<1; stage > 0 ; stage--){
+
+ /* an fft stage is done in two parts to ease use of the single quadrant cos table */
+
+/* fftcalc1(iobuf, Utbl, N, NdiffU, LoopN); */
+ if(!(stage&1)){
+ U0rP = &Utbl[0];
+ U0iP = &Utbl[Ntbl[M-2]];
+ U1rP = U0rP;
+ U1iP = U0iP;
+ U2rP = U0rP;
+ U2iP = U0iP;
+ U3offset = (Ntbl[M]) / 8;
+
+ IOP = ioptr;
+
+ U0r = *U0rP,
+ U0i = *U0iP;
+ U1r = *U1rP,
+ U1i = *U1iP;
+ U2r = *U2rP,
+ U2i = *U2iP;
+ U3r = *( U2rP + U3offset);
+ U3i = *( U2iP - U3offset);
+ }
+
+ fly0P = IOP;
+ fly1P = (IOP+Flyinc);
+ fly2P = (fly1P+Flyinc);
+ fly3P = (fly2P+Flyinc);
+
+ for (diffUcnt = (NdiffU)>>1; diffUcnt > 0; diffUcnt--){
+
+ /* Butterflys */
+ /*
+ f0 - - t0 - - t0 - - t0
+ f1 -U0 - t1 - - t1 - - t1
+ f2 - - f2 -U1 - f5 - - f3
+ f3 -U0 - f1 -U1a- f7 - - f7
+ f4 - - f4 - - f4 -U2 - f6
+ f5 -U0 - f0 - - f0 -U3 - f4
+ f6 - - f6 -U1 - f2 -U2a- f2
+ f7 -U0 - f3 -U1a- f1 -U3a- f5
+ */
+
+ fly0r = *(IOP);
+ fly0i = *(IOP+1);
+ fly1r = *(fly1P);
+ fly1i = *(fly1P+1);
+
+ for (LoopCnt = LoopN-1; LoopCnt > 0 ; LoopCnt--){
+
+ fly2r = *(fly2P);
+ fly2i = *(fly2P + 1);
+ fly3r = *(fly3P);
+ fly3i = *(fly3P + 1);
+ fly4r = *(fly0P + FlyOffsetA);
+ fly4i = *(fly0P + FlyOffsetAIm);
+ fly5r = *(fly1P + FlyOffsetA);
+ fly5i = *(fly1P + FlyOffsetAIm);
+ fly6r = *(fly2P + FlyOffsetA);
+ fly6i = *(fly2P + FlyOffsetAIm);
+ fly7r = *(fly3P + FlyOffsetA);
+ fly7i = *(fly3P + FlyOffsetAIm);
+
+ t1r = fly0r - U0r * fly1r;
+ t1r = t1r + U0i * fly1i;
+ t1i = fly0i - U0i * fly1r;
+ t1i = t1i - U0r * fly1i;
+ t0r = scale * fly0r - t1r;
+ t0i = scale * fly0i - t1i;
+
+ fly1r = fly2r - U0r * fly3r;
+ fly1r = fly1r + U0i * fly3i;
+ fly1i = fly2i - U0i * fly3r;
+ fly1i = fly1i - U0r * fly3i;
+ fly2r = scale * fly2r - fly1r;
+ fly2i = scale * fly2i - fly1i;
+
+ fly0r = fly4r - U0r * fly5r;
+ fly0r = fly0r + U0i * fly5i;
+ fly0i = fly4i - U0i * fly5r;
+ fly0i = fly0i - U0r * fly5i;
+ fly4r = scale * fly4r - fly0r;
+ fly4i = scale * fly4i - fly0i;
+
+ fly3r = fly6r - U0r * fly7r;
+ fly3r = fly3r + U0i * fly7i;
+ fly3i = fly6i - U0i * fly7r;
+ fly3i = fly3i - U0r * fly7i;
+ fly6r = scale * fly6r - fly3r;
+ fly6i = scale * fly6i - fly3i;
+
+
+ fly5r = t0r - U1r * fly2r;
+ fly5r = fly5r + U1i * fly2i;
+ fly5i = t0i - U1i * fly2r;
+ fly5i = fly5i - U1r * fly2i;
+ t0r = scale * t0r - fly5r;
+ t0i = scale * t0i - fly5i;
+
+ fly7r = t1r + U1i * fly1r;
+ fly7r = fly7r + U1r * fly1i;
+ fly7i = t1i - U1r * fly1r;
+ fly7i = fly7i + U1i * fly1i;
+ t1r = scale * t1r - fly7r;
+ t1i = scale * t1i - fly7i;
+
+ fly2r = fly4r - U1r * fly6r;
+ fly2r = fly2r + U1i * fly6i;
+ fly2i = fly4i - U1i * fly6r;
+ fly2i = fly2i - U1r * fly6i;
+ fly4r = scale * fly4r - fly2r;
+ fly4i = scale * fly4i - fly2i;
+
+ fly1r = fly0r + U1i * fly3r;
+ fly1r = fly1r + U1r * fly3i;
+ fly1i = fly0i - U1r * fly3r;
+ fly1i = fly1i + U1i * fly3i;
+ fly0r = scale * fly0r - fly1r;
+ fly0i = scale * fly0i - fly1i;
+
+ fly6r = t0r - U2r * fly4r;
+ fly6r = fly6r + U2i * fly4i;
+ fly6i = t0i - U2i * fly4r;
+ fly6i = fly6i - U2r * fly4i;
+ t0r = scale * t0r - fly6r;
+ t0i = scale * t0i - fly6i;
+
+ fly3r = fly5r - U2i * fly2r;
+ fly3r = fly3r - U2r * fly2i;
+ fly3i = fly5i + U2r * fly2r;
+ fly3i = fly3i - U2i * fly2i;
+ fly2r = scale * fly5r - fly3r;
+ fly2i = scale * fly5i - fly3i;
+
+ fly4r = t1r - U3r * fly0r;
+ fly4r = fly4r + U3i * fly0i;
+ fly4i = t1i - U3i * fly0r;
+ fly4i = fly4i - U3r * fly0i;
+ t1r = scale * t1r - fly4r;
+ t1i = scale * t1i - fly4i;
+
+ fly5r = fly7r + U3i * fly1r;
+ fly5r = fly5r + U3r * fly1i;
+ fly5i = fly7i - U3r * fly1r;
+ fly5i = fly5i + U3i * fly1i;
+ fly7r = scale * fly7r - fly5r;
+ fly7i = scale * fly7i - fly5i;
+
+ *(fly0P + FlyOffsetA) = fly6r;
+ *(fly0P + FlyOffsetAIm) = fly6i;
+ *(fly0P) = t0r;
+ *(fly0P + 1) = t0i;
+ *(fly2P) = fly3r;
+ *(fly2P + 1) = fly3i;
+ *(fly2P + FlyOffsetA) = fly2r;
+ *(fly2P + FlyOffsetAIm) = fly2i;
+
+ fly0r = *(fly0P + FlyOffsetB);
+ fly0i = *(fly0P + FlyOffsetBIm);
+
+ *(fly1P + FlyOffsetA) = fly4r;
+ *(fly1P + FlyOffsetAIm) = fly4i;
+ *(fly1P) = t1r;
+ *(fly1P + 1) = t1i;
+
+ fly1r = *(fly1P + FlyOffsetB);
+ fly1i = *(fly1P + FlyOffsetBIm);
+
+ *(fly3P + FlyOffsetA) = fly5r;
+ *(fly3P + FlyOffsetAIm) = fly5i;
+ *(fly3P) = fly7r;
+ *(fly3P + 1) = fly7i;
+
+ fly0P = (fly0P + FlyOffsetB);
+ fly1P = (fly1P + FlyOffsetB);
+ fly2P = (fly2P + FlyOffsetB);
+ fly3P = (fly3P + FlyOffsetB);
+
+ };
+ fly2r = *(fly2P);
+ fly2i = *(fly2P + 1);
+ fly3r = *(fly3P);
+ fly3i = *(fly3P + 1);
+ fly4r = *(fly0P + FlyOffsetA);
+ fly4i = *(fly0P + FlyOffsetAIm);
+ fly5r = *(fly1P + FlyOffsetA);
+ fly5i = *(fly1P + FlyOffsetAIm);
+ fly6r = *(fly2P + FlyOffsetA);
+ fly6i = *(fly2P + FlyOffsetAIm);
+ fly7r = *(fly3P + FlyOffsetA);
+ fly7i = *(fly3P + FlyOffsetAIm);
+
+ t1r = fly0r - U0r * fly1r;
+ t1r = t1r + U0i * fly1i;
+ t1i = fly0i - U0i * fly1r;
+ t1i = t1i - U0r * fly1i;
+ t0r = scale * fly0r - t1r;
+ t0i = scale * fly0i - t1i;
+
+ fly1r = fly2r - U0r * fly3r;
+ fly1r = fly1r + U0i * fly3i;
+ fly1i = fly2i - U0i * fly3r;
+ fly1i = fly1i - U0r * fly3i;
+ fly2r = scale * fly2r - fly1r;
+ fly2i = scale * fly2i - fly1i;
+
+ fly0r = fly4r - U0r * fly5r;
+ fly0r = fly0r + U0i * fly5i;
+ fly0i = fly4i - U0i * fly5r;
+ fly0i = fly0i - U0r * fly5i;
+ fly4r = scale * fly4r - fly0r;
+ fly4i = scale * fly4i - fly0i;
+
+ fly3r = fly6r - U0r * fly7r;
+ fly3r = fly3r + U0i * fly7i;
+ fly3i = fly6i - U0i * fly7r;
+ fly3i = fly3i - U0r * fly7i;
+ fly6r = scale * fly6r - fly3r;
+ fly6i = scale * fly6i - fly3i;
+
+ fly5r = t0r - U1r * fly2r;
+ fly5r = fly5r + U1i * fly2i;
+ fly5i = t0i - U1i * fly2r;
+ fly5i = fly5i - U1r * fly2i;
+ t0r = scale * t0r - fly5r;
+ t0i = scale * t0i - fly5i;
+
+ fly7r = t1r + U1i * fly1r;
+ fly7r = fly7r + U1r * fly1i;
+ fly7i = t1i - U1r * fly1r;
+ fly7i = fly7i + U1i * fly1i;
+ t1r = scale * t1r - fly7r;
+ t1i = scale * t1i - fly7i;
+
+ fly2r = fly4r - U1r * fly6r;
+ fly2r = fly2r + U1i * fly6i;
+ fly2i = fly4i - U1i * fly6r;
+ fly2i = fly2i - U1r * fly6i;
+ fly4r = scale * fly4r - fly2r;
+ fly4i = scale * fly4i - fly2i;
+
+ fly1r = fly0r + U1i * fly3r;
+ fly1r = fly1r + U1r * fly3i;
+ fly1i = fly0i - U1r * fly3r;
+ fly1i = fly1i + U1i * fly3i;
+ fly0r = scale * fly0r - fly1r;
+ fly0i = scale * fly0i - fly1i;
+
+ U0i = *(U0iP = (U0iP - NsameU4));
+ U0r = *(U0rP = (U0rP + NsameU4));
+ U1r = *(U1rP = (U1rP + NsameU2));
+ U1i = *(U1iP = (U1iP - NsameU2));
+ if(stage&1)
+ U0r = - U0r;
+
+ fly6r = t0r - U2r * fly4r;
+ fly6r = fly6r + U2i * fly4i;
+ fly6i = t0i - U2i * fly4r;
+ fly6i = fly6i - U2r * fly4i;
+ t0r = scale * t0r - fly6r;
+ t0i = scale * t0i - fly6i;
+
+ fly3r = fly5r - U2i * fly2r;
+ fly3r = fly3r - U2r * fly2i;
+ fly3i = fly5i + U2r * fly2r;
+ fly3i = fly3i - U2i * fly2i;
+ fly2r = scale * fly5r - fly3r;
+ fly2i = scale * fly5i - fly3i;
+
+ fly4r = t1r - U3r * fly0r;
+ fly4r = fly4r + U3i * fly0i;
+ fly4i = t1i - U3i * fly0r;
+ fly4i = fly4i - U3r * fly0i;
+ t1r = scale * t1r - fly4r;
+ t1i = scale * t1i - fly4i;
+
+ fly5r = fly7r + U3i * fly1r;
+ fly5r = fly5r + U3r * fly1i;
+ fly5i = fly7i - U3r * fly1r;
+ fly5i = fly5i + U3i * fly1i;
+ fly7r = scale * fly7r - fly5r;
+ fly7i = scale * fly7i - fly5i;
+
+ *(fly0P + FlyOffsetA) = fly6r;
+ *(fly0P + FlyOffsetAIm) = fly6i;
+ *(fly0P) = t0r;
+ *(fly0P + 1) = t0i;
+
+ U2r = *(U2rP = (U2rP + NsameU1));
+ U2i = *(U2iP = (U2iP - NsameU1));
+
+ *(fly2P) = fly3r;
+ *(fly2P + 1) = fly3i;
+ *(fly2P + FlyOffsetA) = fly2r;
+ *(fly2P + FlyOffsetAIm) = fly2i;
+ *(fly1P + FlyOffsetA) = fly4r;
+ *(fly1P + FlyOffsetAIm) = fly4i;
+ *(fly1P) = t1r;
+ *(fly1P + 1) = t1i;
+
+ U3r = *( U2rP + U3offset);
+ U3i = *( U2iP - U3offset);
+
+ *(fly3P + FlyOffsetA) = fly5r;
+ *(fly3P + FlyOffsetAIm) = fly5i;
+ *(fly3P) = fly7r;
+ *(fly3P + 1) = fly7i;
+
+ IOP = IOP + 2;
+ fly0P = IOP;
+ fly1P = (IOP+Flyinc);
+ fly2P = (fly1P+Flyinc);
+ fly3P = (fly2P+Flyinc);
+ };
+ NsameU4 = -NsameU4;
+
+ if(stage&1){
+ LoopN >>= 3;
+ NsameU1 >>= 3;
+ NsameU2 >>= 3;
+ NsameU4 >>= 3;
+ NdiffU <<= 3;
+ Flyinc = Flyinc << 3;
+ FlyOffsetA <<= 3;
+ FlyOffsetB <<= 3;
+ FlyOffsetAIm = FlyOffsetA + 1;
+ FlyOffsetBIm = FlyOffsetB + 1;
+ }
+}
+ioptr += 2*Ntbl[M];
+}
+}
+
+/* rffts */
+/* compute multiple real input FFTs on 'Rows' consecutively stored arrays */
+/* ioptr = pointer to the data in and out */
+/* M = log2 of fft size */
+/* Rows = number of FFTs to compute */
+/* Utbl = Pointer to cosine table */
+
+void rffts(float *ioptr, long M, long Rows, float *Utbl){
+/* Compute in-place real fft on the rows of the input array */
+/* INPUTS */
+/* M = log2 of fft size */
+/* *ioptr = real input data array */
+/* *Utbl = cosine table */
+/* Rows = number of rows in ioptr array (use Rows of 1 if ioptr is a 1 dimensional array) */
+/* OUTPUTS */
+/* *ioptr = output data array in the following order */
+/* Re(x[0]), Re(x[N/2]), Re(x[1]), Im(x[1]), Re(x[2]), Im(x[2]), ... Re(x[N/2-1]), Im(x[N/2-1]). */
+
+long Flyinc;
+long FlyOffsetA;
+long FlyOffsetAIm;
+long FlyOffsetB;
+long FlyOffsetBIm;
+long NsameU1;
+long NsameU2;
+long NsameU4;
+long diffUcnt;
+long LoopCnt;
+float scale;
+float fly0r;
+float fly0i;
+float fly1r;
+float fly1i;
+float fly2r;
+float fly2i;
+float fly3r;
+float fly3i;
+float fly4r;
+float fly4i;
+float fly5r;
+float fly5i;
+float fly6r;
+float fly6i;
+float fly7r;
+float fly7i;
+float U0r;
+float U0i;
+float U1r;
+float U1i;
+float U2r;
+float U2i;
+float U3r;
+float U3i;
+float t0r;
+float t0i;
+float t1r;
+float t1i;
+
+float *fly0P;
+float *fly1P;
+float *fly2P;
+float *fly3P;
+
+float *U0rP;
+float *U0iP;
+float *U1rP;
+float *U1iP;
+float *U2rP;
+float *U2iP;
+float *IOP;
+long U3offset;
+
+long stage;
+long NdiffU;
+long LoopN;
+
+const long BRshift = MAXMROOT - ((M-1)>>1); /* for RFFT */
+const long Nrems2 = Ntbl[(M-1)-((M-1)>>1)+1]; /* for RFFT */
+const long Nroot_1_ColInc = (Ntbl[(M-1)-1]-Ntbl[(M-1)-((M-1)>>1)])*2; /* for RFFT */
+
+for (;Rows>0;Rows--){
+
+M=M-1; /* for RFFT */
+
+FlyOffsetA = Ntbl[M] * 2/2;
+FlyOffsetAIm = FlyOffsetA + 1;
+FlyOffsetB = FlyOffsetA + 2;
+FlyOffsetBIm = FlyOffsetB + 1;
+
+/* BitrevR2 ** bit reverse shuffle and first radix 2 stage ******/
+
+scale = 0.5;
+for (stage = 0; stage < Ntbl[M-(M>>1)]*2; stage += Ntbl[M>>1]*2){
+ for (LoopN = (Ntbl[(M>>1)-1]-1); LoopN >= 0; LoopN--){
+ LoopCnt = (Ntbl[(M>>1)-1]-1);
+ fly0P = ioptr + Nroot_1_ColInc + ((int)BRcnt[LoopN] >> BRshift)*(2*2) + stage;
+ IOP = ioptr + (LoopN<<(M+1)/2) * 2 + stage;
+ fly1P = IOP + ((int)BRcnt[LoopCnt] >> BRshift)*(2*2);
+ fly0r = *fly0P;
+ fly0i = *(fly0P+1);
+ fly1r = *(fly0P+FlyOffsetA);
+ fly1i = *(fly0P+FlyOffsetAIm);
+ for (; LoopCnt > LoopN;){
+ fly2r = *(fly0P+2);
+ fly2i = *(fly0P+(2+1));
+ fly3r = *(fly0P+FlyOffsetB);
+ fly3i = *(fly0P+FlyOffsetBIm);
+ fly4r = *fly1P;
+ fly4i = *(fly1P+1);
+ fly5r = *(fly1P+FlyOffsetA);
+ fly5i = *(fly1P+FlyOffsetAIm);
+ fly6r = *(fly1P+2);
+ fly6i = *(fly1P+(2+1));
+ fly7r = *(fly1P+FlyOffsetB);
+ fly7i = *(fly1P+FlyOffsetBIm);
+
+ t0r = fly0r + fly1r;
+ t0i = fly0i + fly1i;
+ fly1r = fly0r - fly1r;
+ fly1i = fly0i - fly1i;
+ t1r = fly2r + fly3r;
+ t1i = fly2i + fly3i;
+ fly3r = fly2r - fly3r;
+ fly3i = fly2i - fly3i;
+ fly0r = fly4r + fly5r;
+ fly0i = fly4i + fly5i;
+ fly5r = fly4r - fly5r;
+ fly5i = fly4i - fly5i;
+ fly2r = fly6r + fly7r;
+ fly2i = fly6i + fly7i;
+ fly7r = fly6r - fly7r;
+ fly7i = fly6i - fly7i;
+
+ *fly1P = scale*t0r;
+ *(fly1P+1) = scale*t0i;
+ *(fly1P+2) = scale*fly1r;
+ *(fly1P+(2+1)) = scale*fly1i;
+ *(fly1P+FlyOffsetA) = scale*t1r;
+ *(fly1P+FlyOffsetAIm) = scale*t1i;
+ *(fly1P+FlyOffsetB) = scale*fly3r;
+ *(fly1P+FlyOffsetBIm) = scale*fly3i;
+ *fly0P = scale*fly0r;
+ *(fly0P+1) = scale*fly0i;
+ *(fly0P+2) = scale*fly5r;
+ *(fly0P+(2+1)) = scale*fly5i;
+ *(fly0P+FlyOffsetA) = scale*fly2r;
+ *(fly0P+FlyOffsetAIm) = scale*fly2i;
+ *(fly0P+FlyOffsetB) = scale*fly7r;
+ *(fly0P+FlyOffsetBIm) = scale*fly7i;
+
+ fly0P -= Nrems2;
+ fly0r = *fly0P;
+ fly0i = *(fly0P+1);
+ fly1r = *(fly0P+FlyOffsetA);
+ fly1i = *(fly0P+FlyOffsetAIm);
+ LoopCnt -= 1;
+ fly1P = (IOP + ((int)BRcnt[LoopCnt] >> BRshift)*(2*2));
+ };
+ fly2r = *(fly0P+2);
+ fly2i = *(fly0P+(2+1));
+ fly3r = *(fly0P+FlyOffsetB);
+ fly3i = *(fly0P+FlyOffsetBIm);
+
+ t0r = fly0r + fly1r;
+ t0i = fly0i + fly1i;
+ fly1r = fly0r - fly1r;
+ fly1i = fly0i - fly1i;
+ t1r = fly2r + fly3r;
+ t1i = fly2i + fly3i;
+ fly3r = fly2r - fly3r;
+ fly3i = fly2i - fly3i;
+
+ *fly0P = scale*t0r;
+ *(fly0P+1) = scale*t0i;
+ *(fly0P+2) = scale*fly1r;
+ *(fly0P+(2+1)) = scale*fly1i;
+ *(fly0P+FlyOffsetA) = scale*t1r;
+ *(fly0P+FlyOffsetAIm) = scale*t1i;
+ *(fly0P+FlyOffsetB) = scale*fly3r;
+ *(fly0P+FlyOffsetBIm) = scale*fly3i;
+
+ };
+};
+
+
+
+/**** FFTC **************/
+
+scale = 2.0;
+
+NdiffU = 2;
+Flyinc = (NdiffU);
+
+NsameU4 = Ntbl[M+1]/4; /* for RFFT */
+LoopN = Ntbl[M-3];
+
+stage = ((M-1)/3);
+if ( (M-1-(stage * 3)) != 0 ){
+ FlyOffsetA = Flyinc << 2;
+ FlyOffsetB = FlyOffsetA << 1;
+ FlyOffsetAIm = FlyOffsetA + 1;
+ FlyOffsetBIm = FlyOffsetB + 1;
+ if ( (M-1-(stage * 3)) == 1 ){
+ /* 1 radix 2 stage */
+
+ IOP = ioptr;
+ fly0P = IOP;
+ fly1P = (IOP+Flyinc);
+ fly2P = (fly1P+Flyinc);
+ fly3P = (fly2P+Flyinc);
+
+ /* Butterflys */
+ /*
+ t0 - - t0
+ t1 - - t1
+ f2 - 1- f5
+ f1 - -i- f7
+ f4 - - f4
+ f0 - - f0
+ f6 - 1- f2
+ f3 - -i- f1
+ */
+
+ for (LoopCnt = LoopN; LoopCnt > 0 ; LoopCnt--){
+ t0r = *(fly0P);
+ t0i = *(fly0P + 1);
+ t1r = *(fly1P);
+ t1i = *(fly1P + 1);
+ fly2r = *(fly2P);
+ fly2i = *(fly2P + 1);
+ fly1r = *(fly3P);
+ fly1i = *(fly3P + 1);
+ fly4r = *(fly0P + FlyOffsetA);
+ fly4i = *(fly0P + FlyOffsetAIm);
+ fly0r = *(fly1P + FlyOffsetA);
+ fly0i = *(fly1P + FlyOffsetAIm);
+ fly6r = *(fly2P + FlyOffsetA);
+ fly6i = *(fly2P + FlyOffsetAIm);
+ fly3r = *(fly3P + FlyOffsetA);
+ fly3i = *(fly3P + FlyOffsetAIm);
+
+ fly5r = t0r - fly2r;
+ fly5i = t0i - fly2i;
+ t0r = t0r + fly2r;
+ t0i = t0i + fly2i;
+
+ fly7r = t1r - fly1i;
+ fly7i = t1i + fly1r;
+ t1r = t1r + fly1i;
+ t1i = t1i - fly1r;
+
+ fly2r = fly4r - fly6r;
+ fly2i = fly4i - fly6i;
+ fly4r = fly4r + fly6r;
+ fly4i = fly4i + fly6i;
+
+ fly1r = fly0r - fly3i;
+ fly1i = fly0i + fly3r;
+ fly0r = fly0r + fly3i;
+ fly0i = fly0i - fly3r;
+
+ *(fly2P) = fly5r;
+ *(fly2P + 1) = fly5i;
+ *(fly0P) = t0r;
+ *(fly0P + 1) = t0i;
+ *(fly3P) = fly7r;
+ *(fly3P + 1) = fly7i;
+ *(fly1P) = t1r;
+ *(fly1P + 1) = t1i;
+ *(fly2P + FlyOffsetA) = fly2r;
+ *(fly2P + FlyOffsetAIm) = fly2i;
+ *(fly0P + FlyOffsetA) = fly4r;
+ *(fly0P + FlyOffsetAIm) = fly4i;
+ *(fly3P + FlyOffsetA) = fly1r;
+ *(fly3P + FlyOffsetAIm) = fly1i;
+ *(fly1P + FlyOffsetA) = fly0r;
+ *(fly1P + FlyOffsetAIm) = fly0i;
+
+ fly0P = (fly0P + FlyOffsetB);
+ fly1P = (fly1P + FlyOffsetB);
+ fly2P = (fly2P + FlyOffsetB);
+ fly3P = (fly3P + FlyOffsetB);
+ };
+
+ NsameU4 >>= 1;
+ LoopN >>= 1;
+ NdiffU <<= 1;
+ Flyinc = Flyinc << 1;
+ }
+ else{
+ /* 1 radix 4 stage */
+ IOP = ioptr;
+
+ U3r = 0.7071067811865475244008443621; /* sqrt(0.5); */
+ U3i = U3r;
+ fly0P = IOP;
+ fly1P = (IOP+Flyinc);
+ fly2P = (fly1P+Flyinc);
+ fly3P = (fly2P+Flyinc);
+
+ /* Butterflys */
+ /*
+ t0 - - t0 - - t0
+ t1 - - t1 - - t1
+ f2 - 1- f5 - - f5
+ f1 - -i- f7 - - f7
+ f4 - - f4 - 1- f6
+ f0 - - f0 -U3 - f3
+ f6 - 1- f2 - -i- f4
+ f3 - -i- f1 -U3a- f2
+ */
+
+ for (LoopCnt = LoopN; LoopCnt > 0 ; LoopCnt--){
+ t0r = *(fly0P);
+ t0i = *(fly0P + 1);
+ t1r = *(fly1P);
+ t1i = *(fly1P + 1);
+ fly2r = *(fly2P);
+ fly2i = *(fly2P + 1);
+ fly1r = *(fly3P);
+ fly1i = *(fly3P + 1);
+ fly4r = *(fly0P + FlyOffsetA);
+ fly4i = *(fly0P + FlyOffsetAIm);
+ fly0r = *(fly1P + FlyOffsetA);
+ fly0i = *(fly1P + FlyOffsetAIm);
+ fly6r = *(fly2P + FlyOffsetA);
+ fly6i = *(fly2P + FlyOffsetAIm);
+ fly3r = *(fly3P + FlyOffsetA);
+ fly3i = *(fly3P + FlyOffsetAIm);
+
+ fly5r = t0r - fly2r;
+ fly5i = t0i - fly2i;
+ t0r = t0r + fly2r;
+ t0i = t0i + fly2i;
+
+ fly7r = t1r - fly1i;
+ fly7i = t1i + fly1r;
+ t1r = t1r + fly1i;
+ t1i = t1i - fly1r;
+
+ fly2r = fly4r - fly6r;
+ fly2i = fly4i - fly6i;
+ fly4r = fly4r + fly6r;
+ fly4i = fly4i + fly6i;
+
+ fly1r = fly0r - fly3i;
+ fly1i = fly0i + fly3r;
+ fly0r = fly0r + fly3i;
+ fly0i = fly0i - fly3r;
+
+ fly6r = t0r - fly4r;
+ fly6i = t0i - fly4i;
+ t0r = t0r + fly4r;
+ t0i = t0i + fly4i;
+
+ fly3r = fly5r - fly2i;
+ fly3i = fly5i + fly2r;
+ fly5r = fly5r + fly2i;
+ fly5i = fly5i - fly2r;
+
+ fly4r = t1r - U3r * fly0r;
+ fly4r = fly4r - U3i * fly0i;
+ fly4i = t1i + U3i * fly0r;
+ fly4i = fly4i - U3r * fly0i;
+ t1r = scale * t1r - fly4r;
+ t1i = scale * t1i - fly4i;
+
+ fly2r = fly7r + U3i * fly1r;
+ fly2r = fly2r - U3r * fly1i;
+ fly2i = fly7i + U3r * fly1r;
+ fly2i = fly2i + U3i * fly1i;
+ fly7r = scale * fly7r - fly2r;
+ fly7i = scale * fly7i - fly2i;
+
+ *(fly0P + FlyOffsetA) = fly6r;
+ *(fly0P + FlyOffsetAIm) = fly6i;
+ *(fly0P) = t0r;
+ *(fly0P + 1) = t0i;
+ *(fly2P + FlyOffsetA) = fly3r;
+ *(fly2P + FlyOffsetAIm) = fly3i;
+ *(fly2P) = fly5r;
+ *(fly2P + 1) = fly5i;
+ *(fly1P + FlyOffsetA) = fly4r;
+ *(fly1P + FlyOffsetAIm) = fly4i;
+ *(fly1P) = t1r;
+ *(fly1P + 1) = t1i;
+ *(fly3P + FlyOffsetA) = fly2r;
+ *(fly3P + FlyOffsetAIm) = fly2i;
+ *(fly3P) = fly7r;
+ *(fly3P + 1) = fly7i;
+
+ fly0P = (fly0P + FlyOffsetB);
+ fly1P = (fly1P + FlyOffsetB);
+ fly2P = (fly2P + FlyOffsetB);
+ fly3P = (fly3P + FlyOffsetB);
+
+ };
+
+ NsameU4 >>= 2;
+ LoopN >>= 2;
+ NdiffU <<= 2;
+ Flyinc = Flyinc << 2;
+ };
+};
+
+NsameU2 = NsameU4 >> 1;
+NsameU1 = NsameU2 >> 1;
+Flyinc <<= 1;
+FlyOffsetA = Flyinc << 2;
+FlyOffsetB = FlyOffsetA << 1;
+FlyOffsetAIm = FlyOffsetA + 1;
+FlyOffsetBIm = FlyOffsetB + 1;
+LoopN >>= 1;
+
+/* ****** RADIX 8 Stages */
+for (stage = stage<<1; stage > 0 ; stage--){
+
+ /* an fft stage is done in two parts to ease use of the single quadrant cos table */
+
+/* fftcalc1(iobuf, Utbl, N, NdiffU, LoopN); */
+ if(!(stage&1)){
+ U0rP = &Utbl[0];
+ U0iP = &Utbl[Ntbl[M-1]]; /* for RFFT */
+ U1rP = U0rP;
+ U1iP = U0iP;
+ U2rP = U0rP;
+ U2iP = U0iP;
+ U3offset = (Ntbl[M+1]) / 8; /* for RFFT */
+
+ IOP = ioptr;
+
+ U0r = *U0rP,
+ U0i = *U0iP;
+ U1r = *U1rP,
+ U1i = *U1iP;
+ U2r = *U2rP,
+ U2i = *U2iP;
+ U3r = *( U2rP + U3offset);
+ U3i = *( U2iP - U3offset);
+ }
+
+ fly0P = IOP;
+ fly1P = (IOP+Flyinc);
+ fly2P = (fly1P+Flyinc);
+ fly3P = (fly2P+Flyinc);
+
+ for (diffUcnt = (NdiffU)>>1; diffUcnt > 0; diffUcnt--){
+
+ /* Butterflys */
+ /*
+ f0 - - t0 - - t0 - - t0
+ f1 -U0 - t1 - - t1 - - t1
+ f2 - - f2 -U1 - f5 - - f3
+ f3 -U0 - f1 -U1a- f7 - - f7
+ f4 - - f4 - - f4 -U2 - f6
+ f5 -U0 - f0 - - f0 -U3 - f4
+ f6 - - f6 -U1 - f2 -U2a- f2
+ f7 -U0 - f3 -U1a- f1 -U3a- f5
+ */
+
+ fly0r = *(IOP);
+ fly0i = *(IOP+1);
+ fly1r = *(fly1P);
+ fly1i = *(fly1P+1);
+
+ for (LoopCnt = LoopN-1; LoopCnt > 0 ; LoopCnt--){
+
+ fly2r = *(fly2P);
+ fly2i = *(fly2P + 1);
+ fly3r = *(fly3P);
+ fly3i = *(fly3P + 1);
+ fly4r = *(fly0P + FlyOffsetA);
+ fly4i = *(fly0P + FlyOffsetAIm);
+ fly5r = *(fly1P + FlyOffsetA);
+ fly5i = *(fly1P + FlyOffsetAIm);
+ fly6r = *(fly2P + FlyOffsetA);
+ fly6i = *(fly2P + FlyOffsetAIm);
+ fly7r = *(fly3P + FlyOffsetA);
+ fly7i = *(fly3P + FlyOffsetAIm);
+
+ t1r = fly0r - U0r * fly1r;
+ t1r = t1r - U0i * fly1i;
+ t1i = fly0i + U0i * fly1r;
+ t1i = t1i - U0r * fly1i;
+ t0r = scale * fly0r - t1r;
+ t0i = scale * fly0i - t1i;
+
+ fly1r = fly2r - U0r * fly3r;
+ fly1r = fly1r - U0i * fly3i;
+ fly1i = fly2i + U0i * fly3r;
+ fly1i = fly1i - U0r * fly3i;
+ fly2r = scale * fly2r - fly1r;
+ fly2i = scale * fly2i - fly1i;
+
+ fly0r = fly4r - U0r * fly5r;
+ fly0r = fly0r - U0i * fly5i;
+ fly0i = fly4i + U0i * fly5r;
+ fly0i = fly0i - U0r * fly5i;
+ fly4r = scale * fly4r - fly0r;
+ fly4i = scale * fly4i - fly0i;
+
+ fly3r = fly6r - U0r * fly7r;
+ fly3r = fly3r - U0i * fly7i;
+ fly3i = fly6i + U0i * fly7r;
+ fly3i = fly3i - U0r * fly7i;
+ fly6r = scale * fly6r - fly3r;
+ fly6i = scale * fly6i - fly3i;
+
+
+ fly5r = t0r - U1r * fly2r;
+ fly5r = fly5r - U1i * fly2i;
+ fly5i = t0i + U1i * fly2r;
+ fly5i = fly5i - U1r * fly2i;
+ t0r = scale * t0r - fly5r;
+ t0i = scale * t0i - fly5i;
+
+ fly7r = t1r + U1i * fly1r;
+ fly7r = fly7r - U1r * fly1i;
+ fly7i = t1i + U1r * fly1r;
+ fly7i = fly7i + U1i * fly1i;
+ t1r = scale * t1r - fly7r;
+ t1i = scale * t1i - fly7i;
+
+ fly2r = fly4r - U1r * fly6r;
+ fly2r = fly2r - U1i * fly6i;
+ fly2i = fly4i + U1i * fly6r;
+ fly2i = fly2i - U1r * fly6i;
+ fly4r = scale * fly4r - fly2r;
+ fly4i = scale * fly4i - fly2i;
+
+ fly1r = fly0r + U1i * fly3r;
+ fly1r = fly1r - U1r * fly3i;
+ fly1i = fly0i + U1r * fly3r;
+ fly1i = fly1i + U1i * fly3i;
+ fly0r = scale * fly0r - fly1r;
+ fly0i = scale * fly0i - fly1i;
+
+ fly6r = t0r - U2r * fly4r;
+ fly6r = fly6r - U2i * fly4i;
+ fly6i = t0i + U2i * fly4r;
+ fly6i = fly6i - U2r * fly4i;
+ t0r = scale * t0r - fly6r;
+ t0i = scale * t0i - fly6i;
+
+ fly3r = fly5r - U2i * fly2r;
+ fly3r = fly3r + U2r * fly2i;
+ fly3i = fly5i - U2r * fly2r;
+ fly3i = fly3i - U2i * fly2i;
+ fly2r = scale * fly5r - fly3r;
+ fly2i = scale * fly5i - fly3i;
+
+ fly4r = t1r - U3r * fly0r;
+ fly4r = fly4r - U3i * fly0i;
+ fly4i = t1i + U3i * fly0r;
+ fly4i = fly4i - U3r * fly0i;
+ t1r = scale * t1r - fly4r;
+ t1i = scale * t1i - fly4i;
+
+ fly5r = fly7r + U3i * fly1r;
+ fly5r = fly5r - U3r * fly1i;
+ fly5i = fly7i + U3r * fly1r;
+ fly5i = fly5i + U3i * fly1i;
+ fly7r = scale * fly7r - fly5r;
+ fly7i = scale * fly7i - fly5i;
+
+ *(fly0P + FlyOffsetA) = fly6r;
+ *(fly0P + FlyOffsetAIm) = fly6i;
+ *(fly0P) = t0r;
+ *(fly0P + 1) = t0i;
+ *(fly2P) = fly3r;
+ *(fly2P + 1) = fly3i;
+ *(fly2P + FlyOffsetA) = fly2r;
+ *(fly2P + FlyOffsetAIm) = fly2i;
+
+ fly0r = *(fly0P + FlyOffsetB);
+ fly0i = *(fly0P + FlyOffsetBIm);
+
+ *(fly1P + FlyOffsetA) = fly4r;
+ *(fly1P + FlyOffsetAIm) = fly4i;
+ *(fly1P) = t1r;
+ *(fly1P + 1) = t1i;
+
+ fly1r = *(fly1P + FlyOffsetB);
+ fly1i = *(fly1P + FlyOffsetBIm);
+
+ *(fly3P + FlyOffsetA) = fly5r;
+ *(fly3P + FlyOffsetAIm) = fly5i;
+ *(fly3P) = fly7r;
+ *(fly3P + 1) = fly7i;
+
+ fly0P = (fly0P + FlyOffsetB);
+ fly1P = (fly1P + FlyOffsetB);
+ fly2P = (fly2P + FlyOffsetB);
+ fly3P = (fly3P + FlyOffsetB);
+ };
+
+ fly2r = *(fly2P);
+ fly2i = *(fly2P + 1);
+ fly3r = *(fly3P);
+ fly3i = *(fly3P + 1);
+ fly4r = *(fly0P + FlyOffsetA);
+ fly4i = *(fly0P + FlyOffsetAIm);
+ fly5r = *(fly1P + FlyOffsetA);
+ fly5i = *(fly1P + FlyOffsetAIm);
+ fly6r = *(fly2P + FlyOffsetA);
+ fly6i = *(fly2P + FlyOffsetAIm);
+ fly7r = *(fly3P + FlyOffsetA);
+ fly7i = *(fly3P + FlyOffsetAIm);
+
+ t1r = fly0r - U0r * fly1r;
+ t1r = t1r - U0i * fly1i;
+ t1i = fly0i + U0i * fly1r;
+ t1i = t1i - U0r * fly1i;
+ t0r = scale * fly0r - t1r;
+ t0i = scale * fly0i - t1i;
+
+ fly1r = fly2r - U0r * fly3r;
+ fly1r = fly1r - U0i * fly3i;
+ fly1i = fly2i + U0i * fly3r;
+ fly1i = fly1i - U0r * fly3i;
+ fly2r = scale * fly2r - fly1r;
+ fly2i = scale * fly2i - fly1i;
+
+ fly0r = fly4r - U0r * fly5r;
+ fly0r = fly0r - U0i * fly5i;
+ fly0i = fly4i + U0i * fly5r;
+ fly0i = fly0i - U0r * fly5i;
+ fly4r = scale * fly4r - fly0r;
+ fly4i = scale * fly4i - fly0i;
+
+ fly3r = fly6r - U0r * fly7r;
+ fly3r = fly3r - U0i * fly7i;
+ fly3i = fly6i + U0i * fly7r;
+ fly3i = fly3i - U0r * fly7i;
+ fly6r = scale * fly6r - fly3r;
+ fly6i = scale * fly6i - fly3i;
+
+
+ fly5r = t0r - U1r * fly2r;
+ fly5r = fly5r - U1i * fly2i;
+ fly5i = t0i + U1i * fly2r;
+ fly5i = fly5i - U1r * fly2i;
+ t0r = scale * t0r - fly5r;
+ t0i = scale * t0i - fly5i;
+
+ fly7r = t1r + U1i * fly1r;
+ fly7r = fly7r - U1r * fly1i;
+ fly7i = t1i + U1r * fly1r;
+ fly7i = fly7i + U1i * fly1i;
+ t1r = scale * t1r - fly7r;
+ t1i = scale * t1i - fly7i;
+
+ fly2r = fly4r - U1r * fly6r;
+ fly2r = fly2r - U1i * fly6i;
+ fly2i = fly4i + U1i * fly6r;
+ fly2i = fly2i - U1r * fly6i;
+ fly4r = scale * fly4r - fly2r;
+ fly4i = scale * fly4i - fly2i;
+
+ fly1r = fly0r + U1i * fly3r;
+ fly1r = fly1r - U1r * fly3i;
+ fly1i = fly0i + U1r * fly3r;
+ fly1i = fly1i + U1i * fly3i;
+ fly0r = scale * fly0r - fly1r;
+ fly0i = scale * fly0i - fly1i;
+
+ U0i = *(U0iP = (U0iP - NsameU4));
+ U0r = *(U0rP = (U0rP + NsameU4));
+ U1r = *(U1rP = (U1rP + NsameU2));
+ U1i = *(U1iP = (U1iP - NsameU2));
+ if(stage&1)
+ U0r = -U0r;
+
+ fly6r = t0r - U2r * fly4r;
+ fly6r = fly6r - U2i * fly4i;
+ fly6i = t0i + U2i * fly4r;
+ fly6i = fly6i - U2r * fly4i;
+ t0r = scale * t0r - fly6r;
+ t0i = scale * t0i - fly6i;
+
+ fly3r = fly5r - U2i * fly2r;
+ fly3r = fly3r + U2r * fly2i;
+ fly3i = fly5i - U2r * fly2r;
+ fly3i = fly3i - U2i * fly2i;
+ fly2r = scale * fly5r - fly3r;
+ fly2i = scale * fly5i - fly3i;
+
+ fly4r = t1r - U3r * fly0r;
+ fly4r = fly4r - U3i * fly0i;
+ fly4i = t1i + U3i * fly0r;
+ fly4i = fly4i - U3r * fly0i;
+ t1r = scale * t1r - fly4r;
+ t1i = scale * t1i - fly4i;
+
+ fly5r = fly7r + U3i * fly1r;
+ fly5r = fly5r - U3r * fly1i;
+ fly5i = fly7i + U3r * fly1r;
+ fly5i = fly5i + U3i * fly1i;
+ fly7r = scale * fly7r - fly5r;
+ fly7i = scale * fly7i - fly5i;
+
+ *(fly0P + FlyOffsetA) = fly6r;
+ *(fly0P + FlyOffsetAIm) = fly6i;
+ *(fly0P) = t0r;
+ *(fly0P + 1) = t0i;
+
+ U2r = *(U2rP = (U2rP + NsameU1));
+ U2i = *(U2iP = (U2iP - NsameU1));
+
+ *(fly2P) = fly3r;
+ *(fly2P + 1) = fly3i;
+ *(fly2P + FlyOffsetA) = fly2r;
+ *(fly2P + FlyOffsetAIm) = fly2i;
+ *(fly1P + FlyOffsetA) = fly4r;
+ *(fly1P + FlyOffsetAIm) = fly4i;
+ *(fly1P) = t1r;
+ *(fly1P + 1) = t1i;
+
+ U3r = *( U2rP + U3offset);
+ U3i = *( U2iP - U3offset);
+
+ *(fly3P + FlyOffsetA) = fly5r;
+ *(fly3P + FlyOffsetAIm) = fly5i;
+ *(fly3P) = fly7r;
+ *(fly3P + 1) = fly7i;
+
+ IOP = IOP + 2;
+ fly0P = IOP;
+ fly1P = (IOP+Flyinc);
+ fly2P = (fly1P+Flyinc);
+ fly3P = (fly2P+Flyinc);
+ };
+ NsameU4 = -NsameU4;
+
+ if(stage&1){
+ LoopN >>= 3;
+ NsameU1 >>= 3;
+ NsameU2 >>= 3;
+ NsameU4 >>= 3;
+ NdiffU <<= 3;
+ Flyinc = Flyinc << 3;
+ FlyOffsetA <<= 3;
+ FlyOffsetB <<= 3;
+ FlyOffsetAIm = FlyOffsetA + 1;
+ FlyOffsetBIm = FlyOffsetB + 1;
+ }
+}
+
+/* Finish RFFT */
+M=M+1;
+
+FlyOffsetA = Ntbl[M] * 2/4;
+FlyOffsetAIm = Ntbl[M] * 2/4 + 1;
+
+IOP = ioptr;
+
+fly0P = (IOP + Ntbl[M]*2/4);
+fly1P = (IOP + Ntbl[M]*2/8);
+
+U0rP = &Utbl[Ntbl[M-3]];
+
+U0r = *U0rP,
+
+fly0r = *(IOP);
+fly0i = *(IOP + 1);
+fly1r = *(fly0P);
+fly1i = *(fly0P + 1);
+fly2r = *(fly1P);
+fly2i = *(fly1P + 1);
+fly3r = *(fly1P + FlyOffsetA);
+fly3i = *(fly1P + FlyOffsetAIm);
+
+ t0r = scale * fly0r + scale * fly0i; /* compute Re(x[0]) */
+ t0i = scale * fly0r - scale * fly0i; /* compute Re(x[N/2]) */
+ t1r = scale * fly1r;
+ t1i = -scale * fly1i;
+
+ fly0r = fly2r + fly3r;
+ fly0i = fly2i - fly3i;
+ fly1r = fly2i + fly3i;
+ fly1i = fly3r - fly2r;
+
+ fly2r = fly0r + U0r * fly1r;
+ fly2r = fly2r + U0r * fly1i;
+ fly2i = fly0i - U0r * fly1r;
+ fly2i = fly2i + U0r * fly1i;
+ fly3r = scale * fly0r - fly2r;
+ fly3i = fly2i - scale * fly0i;
+
+*(IOP) = t0r;
+*(IOP + 1) = t0i;
+*(fly0P) = t1r;
+*(fly0P + 1) = t1i;
+*(fly1P) = fly2r;
+*(fly1P + 1) = fly2i;
+*(fly1P + FlyOffsetA) = fly3r;
+*(fly1P + FlyOffsetAIm) = fly3i;
+
+U0rP = &Utbl[1];
+U0iP = &Utbl[Ntbl[M-2]-1];
+
+U0r = *U0rP,
+U0i = *U0iP;
+
+fly0P = (IOP + 2);
+fly1P = (IOP + (Ntbl[M-2]-1)*2);
+
+ /* Butterflys */
+ /*
+ f0 - t0 - - f2
+ f1 - t1 -U0 - f3
+ f2 - f0 - - t0
+ f3 - f1 -U0a- t1
+ */
+
+for (diffUcnt = Ntbl[M-3]-1; diffUcnt > 0 ; diffUcnt--){
+
+ fly0r = *(fly0P);
+ fly0i = *(fly0P + 1);
+ fly1r = *(fly1P + FlyOffsetA);
+ fly1i = *(fly1P + FlyOffsetAIm);
+ fly2r = *(fly1P);
+ fly2i = *(fly1P + 1);
+ fly3r = *(fly0P + FlyOffsetA);
+ fly3i = *(fly0P + FlyOffsetAIm);
+
+ t0r = fly0r + fly1r;
+ t0i = fly0i - fly1i;
+ t1r = fly0i + fly1i;
+ t1i = fly1r - fly0r;
+
+ fly0r = fly2r + fly3r;
+ fly0i = fly2i - fly3i;
+ fly1r = fly2i + fly3i;
+ fly1i = fly3r - fly2r;
+
+ fly2r = t0r + U0r * t1r;
+ fly2r = fly2r + U0i * t1i;
+ fly2i = t0i - U0i * t1r;
+ fly2i = fly2i + U0r * t1i;
+ fly3r = scale * t0r - fly2r;
+ fly3i = fly2i - scale * t0i;
+
+ t0r = fly0r + U0i * fly1r;
+ t0r = t0r + U0r * fly1i;
+ t0i = fly0i - U0r * fly1r;
+ t0i = t0i + U0i * fly1i;
+ t1r = scale * fly0r - t0r;
+ t1i = t0i - scale * fly0i;
+
+ *(fly0P) = fly2r;
+ *(fly0P + 1) = fly2i;
+ *(fly1P + FlyOffsetA) = fly3r;
+ *(fly1P + FlyOffsetAIm) = fly3i;
+
+ U0r = *(U0rP = (U0rP + 1));
+ U0i = *(U0iP = (U0iP - 1));
+
+ *(fly1P) = t0r;
+ *(fly1P + 1) = t0i;
+ *(fly0P + FlyOffsetA) = t1r;
+ *(fly0P + FlyOffsetAIm) = t1i;
+
+ fly0P += 2;
+ fly1P -= 2;
+};
+
+ioptr += Ntbl[M];
+}
+}
+
+void riffts(float *ioptr, long M, long Rows, float *Utbl){
+/* Compute in-place real ifft on the rows of the input array */
+/* INPUTS */
+/* M = log2 of fft size */
+/* *ioptr = input data array in the following order */
+/* Re(x[0]), Re(x[N/2]), Re(x[1]), Im(x[1]), Re(x[2]), Im(x[2]), ... Re(x[N/2-1]), Im(x[N/2-1]). */
+/* *Utbl = cosine table */
+/* Rows = number of rows in ioptr array (use Rows of 1 if ioptr is a 1 dimensional array) */
+/* OUTPUTS */
+/* *ioptr = real output data array */
+
+long Flyinc;
+long FlyOffsetA;
+long FlyOffsetAIm;
+long FlyOffsetB;
+long FlyOffsetBIm;
+long NsameU1;
+long NsameU2;
+long NsameU4;
+long diffUcnt;
+long LoopCnt;
+float scale;
+float fly0r;
+float fly0i;
+float fly1r;
+float fly1i;
+float fly2r;
+float fly2i;
+float fly3r;
+float fly3i;
+float fly4r;
+float fly4i;
+float fly5r;
+float fly5i;
+float fly6r;
+float fly6i;
+float fly7r;
+float fly7i;
+float U0r;
+float U0i;
+float U1r;
+float U1i;
+float U2r;
+float U2i;
+float U3r;
+float U3i;
+float t0r;
+float t0i;
+float t1r;
+float t1i;
+
+float *fly0P;
+float *fly1P;
+float *fly2P;
+float *fly3P;
+
+float *U0rP;
+float *U0iP;
+float *U1rP;
+float *U1iP;
+float *U2rP;
+float *U2iP;
+float *IOP;
+long U3offset;
+
+long stage;
+long NdiffU;
+long LoopN;
+
+const long BRshift = MAXMROOT - ((M-1)>>1); /* for RIFFT */
+const long Nrems2 = Ntbl[(M-1)-((M-1)>>1)+1]; /* for RIFFT */
+const long Nroot_1_ColInc = (Ntbl[(M-1)-1]-Ntbl[(M-1)-((M-1)>>1)])*2; /* for RIFFT */
+
+for (;Rows>0;Rows--){
+
+/* Start RIFFT */
+
+FlyOffsetA = Ntbl[M] * 2/4;
+FlyOffsetAIm = Ntbl[M] * 2/4 + 1;
+
+IOP = ioptr;
+
+fly0P = (IOP + Ntbl[M]*2/4);
+fly1P = (IOP + Ntbl[M]*2/8);
+
+U0rP = &Utbl[Ntbl[M-3]];
+
+U0r = *U0rP,
+
+fly0r = *(IOP);
+fly0i = *(IOP + 1);
+fly1r = *(fly0P);
+fly1i = *(fly0P + 1);
+fly2r = *(fly1P);
+fly2i = *(fly1P + 1);
+fly3r = *(fly1P + FlyOffsetA);
+fly3i = *(fly1P + FlyOffsetAIm);
+
+ t0r = fly0r + fly0i;
+ t0i = fly0r - fly0i;
+ t1r = fly1r + fly1r;
+ t1i = -fly1i - fly1i;
+
+ fly0r = fly2r + fly3r;
+ fly0i = fly2i - fly3i;
+ fly1r = fly2r - fly3r;
+ fly1i = fly2i + fly3i;
+
+ fly3r = fly1r * U0r;
+ fly3r = fly3r - U0r * fly1i;
+ fly3i = fly1r * U0r;
+ fly3i = fly3i + U0r * fly1i;
+
+ fly2r = fly0r - fly3i;
+ fly2i = fly0i + fly3r;
+ fly1r = fly0r + fly3i;
+ fly1i = -fly0i + fly3r;
+
+*(IOP) = t0r;
+*(IOP + 1) = t0i;
+*(fly0P) = t1r;
+*(fly0P + 1) = t1i;
+*(fly1P) = fly2r;
+*(fly1P + 1) = fly2i;
+*(fly1P + FlyOffsetA) = fly1r;
+*(fly1P + FlyOffsetAIm) = fly1i;
+
+U0rP = &Utbl[1];
+U0iP = &Utbl[Ntbl[M-2]-1];
+
+U0r = *U0rP,
+U0i = *U0iP;
+
+fly0P = (IOP + 2);
+fly1P = (IOP + (Ntbl[M-2]-1)*2);
+
+ /* Butterflys */
+ /*
+ f0 - t0 - f2
+ f1 -t1 -U0- f3 - f1
+ f2 - f0 - t0
+ f3 -f1 -U0a t1 - f3
+ */
+
+for (diffUcnt = Ntbl[M-3]-1; diffUcnt > 0 ; diffUcnt--){
+
+ fly0r = *(fly0P);
+ fly0i = *(fly0P + 1);
+ fly1r = *(fly1P + FlyOffsetA);
+ fly1i = *(fly1P + FlyOffsetAIm);
+ fly2r = *(fly1P);
+ fly2i = *(fly1P + 1);
+ fly3r = *(fly0P + FlyOffsetA);
+ fly3i = *(fly0P + FlyOffsetAIm);
+
+ t0r = fly0r + fly1r;
+ t0i = fly0i - fly1i;
+ t1r = fly0r - fly1r;
+ t1i = fly0i + fly1i;
+
+ fly0r = fly2r + fly3r;
+ fly0i = fly2i - fly3i;
+ fly1r = fly2r - fly3r;
+ fly1i = fly2i + fly3i;
+
+
+ fly3r = t1r * U0r;
+ fly3r = fly3r - U0i * t1i;
+ fly3i = t1r * U0i;
+ fly3i = fly3i + U0r * t1i;
+
+ t1r = fly1r * U0i;
+ t1r = t1r - U0r * fly1i;
+ t1i = fly1r * U0r;
+ t1i = t1i + U0i * fly1i;
+
+
+ fly2r = t0r - fly3i;
+ fly2i = t0i + fly3r;
+ fly1r = t0r + fly3i;
+ fly1i = -t0i + fly3r;
+
+ t0r = fly0r - t1i;
+ t0i = fly0i + t1r;
+ fly3r = fly0r + t1i;
+ fly3i = -fly0i + t1r;
+
+
+ *(fly0P) = fly2r;
+ *(fly0P + 1) = fly2i;
+ *(fly1P + FlyOffsetA) = fly1r;
+ *(fly1P + FlyOffsetAIm) = fly1i;
+
+ U0r = *(U0rP = (U0rP + 1));
+ U0i = *(U0iP = (U0iP - 1));
+
+ *(fly1P) = t0r;
+ *(fly1P + 1) = t0i;
+ *(fly0P + FlyOffsetA) = fly3r;
+ *(fly0P + FlyOffsetAIm) = fly3i;
+
+ fly0P += 2;
+ fly1P -= 2;
+};
+
+M=M-1; /* for RIFFT */
+
+FlyOffsetA = Ntbl[M] * 2/2;
+FlyOffsetAIm = FlyOffsetA + 1;
+FlyOffsetB = FlyOffsetA + 2;
+FlyOffsetBIm = FlyOffsetB + 1;
+
+/* BitrevR2 ** bit reverse shuffle and first radix 2 stage ******/
+
+scale = 1./Ntbl[M+1];
+for (stage = 0; stage < Ntbl[M-(M>>1)]*2; stage += Ntbl[M>>1]*2){
+ for (LoopN = (Ntbl[(M>>1)-1]-1); LoopN >= 0; LoopN--){
+ LoopCnt = (Ntbl[(M>>1)-1]-1);
+ fly0P = ioptr + Nroot_1_ColInc + ((int)BRcnt[LoopN] >> BRshift)*(2*2) + stage;
+ IOP = ioptr + (LoopN<<(M+1)/2) * 2 + stage;
+ fly1P = IOP + ((int)BRcnt[LoopCnt] >> BRshift)*(2*2);
+ fly0r = *(fly0P);
+ fly0i = *(fly0P+1);
+ fly1r = *(fly0P+FlyOffsetA);
+ fly1i = *(fly0P+FlyOffsetAIm);
+ for (; LoopCnt > LoopN;){
+ fly2r = *(fly0P+2);
+ fly2i = *(fly0P+(2+1));
+ fly3r = *(fly0P+FlyOffsetB);
+ fly3i = *(fly0P+FlyOffsetBIm);
+ fly4r = *(fly1P);
+ fly4i = *(fly1P+1);
+ fly5r = *(fly1P+FlyOffsetA);
+ fly5i = *(fly1P+FlyOffsetAIm);
+ fly6r = *(fly1P+2);
+ fly6i = *(fly1P+(2+1));
+ fly7r = *(fly1P+FlyOffsetB);
+ fly7i = *(fly1P+FlyOffsetBIm);
+
+ t0r = fly0r + fly1r;
+ t0i = fly0i + fly1i;
+ fly1r = fly0r - fly1r;
+ fly1i = fly0i - fly1i;
+ t1r = fly2r + fly3r;
+ t1i = fly2i + fly3i;
+ fly3r = fly2r - fly3r;
+ fly3i = fly2i - fly3i;
+ fly0r = fly4r + fly5r;
+ fly0i = fly4i + fly5i;
+ fly5r = fly4r - fly5r;
+ fly5i = fly4i - fly5i;
+ fly2r = fly6r + fly7r;
+ fly2i = fly6i + fly7i;
+ fly7r = fly6r - fly7r;
+ fly7i = fly6i - fly7i;
+
+ *(fly1P) = scale*t0r;
+ *(fly1P+1) = scale*t0i;
+ *(fly1P+2) = scale*fly1r;
+ *(fly1P+(2+1)) = scale*fly1i;
+ *(fly1P+FlyOffsetA) = scale*t1r;
+ *(fly1P+FlyOffsetAIm) = scale*t1i;
+ *(fly1P+FlyOffsetB) = scale*fly3r;
+ *(fly1P+FlyOffsetBIm) = scale*fly3i;
+ *(fly0P) = scale*fly0r;
+ *(fly0P+1) = scale*fly0i;
+ *(fly0P+2) = scale*fly5r;
+ *(fly0P+(2+1)) = scale*fly5i;
+ *(fly0P+FlyOffsetA) = scale*fly2r;
+ *(fly0P+FlyOffsetAIm) = scale*fly2i;
+ *(fly0P+FlyOffsetB) = scale*fly7r;
+ *(fly0P+FlyOffsetBIm) = scale*fly7i;
+
+ fly0P -= Nrems2;
+ fly0r = *(fly0P);
+ fly0i = *(fly0P+1);
+ fly1r = *(fly0P+FlyOffsetA);
+ fly1i = *(fly0P+FlyOffsetAIm);
+ LoopCnt -= 1;
+ fly1P = (IOP + ((int)BRcnt[LoopCnt] >> BRshift)*(2*2));
+ };
+ fly2r = *(fly0P+2);
+ fly2i = *(fly0P+(2+1));
+ fly3r = *(fly0P+FlyOffsetB);
+ fly3i = *(fly0P+FlyOffsetBIm);
+
+ t0r = fly0r + fly1r;
+ t0i = fly0i + fly1i;
+ fly1r = fly0r - fly1r;
+ fly1i = fly0i - fly1i;
+ t1r = fly2r + fly3r;
+ t1i = fly2i + fly3i;
+ fly3r = fly2r - fly3r;
+ fly3i = fly2i - fly3i;
+
+ *(fly0P) = scale*t0r;
+ *(fly0P+1) = scale*t0i;
+ *(fly0P+2) = scale*fly1r;
+ *(fly0P+(2+1)) = scale*fly1i;
+ *(fly0P+FlyOffsetA) = scale*t1r;
+ *(fly0P+FlyOffsetAIm) = scale*t1i;
+ *(fly0P+FlyOffsetB) = scale*fly3r;
+ *(fly0P+FlyOffsetBIm) = scale*fly3i;
+
+ };
+};
+
+/**** FFTC **************/
+
+scale = 2.0;
+
+NdiffU = 2;
+Flyinc = (NdiffU);
+
+NsameU4 = Ntbl[M+1]/4; /* for RIFFT */
+LoopN = Ntbl[M-3];
+
+stage = ((M-1)/3);
+if ( (M-1-(stage * 3)) != 0 ){
+ FlyOffsetA = Flyinc << 2;
+ FlyOffsetB = FlyOffsetA << 1;
+ FlyOffsetAIm = FlyOffsetA + 1;
+ FlyOffsetBIm = FlyOffsetB + 1;
+ if ( (M-1-(stage * 3)) == 1 ){
+ /* 1 radix 2 stage */
+
+ IOP = ioptr;
+ fly0P = IOP;
+ fly1P = (IOP+Flyinc);
+ fly2P = (fly1P+Flyinc);
+ fly3P = (fly2P+Flyinc);
+
+ /* Butterflys */
+ /*
+ t0 - - t0
+ t1 - - t1
+ f2 - 1- f5
+ f1 - -i- f7
+ f4 - - f4
+ f0 - - f0
+ f6 - 1- f2
+ f3 - -i- f1
+ */
+
+ for (LoopCnt = LoopN; LoopCnt > 0 ; LoopCnt--){
+ t0r = *(fly0P);
+ t0i = *(fly0P + 1);
+ t1r = *(fly1P);
+ t1i = *(fly1P + 1);
+ fly2r = *(fly2P);
+ fly2i = *(fly2P + 1);
+ fly1r = *(fly3P);
+ fly1i = *(fly3P + 1);
+ fly4r = *(fly0P + FlyOffsetA);
+ fly4i = *(fly0P + FlyOffsetAIm);
+ fly0r = *(fly1P + FlyOffsetA);
+ fly0i = *(fly1P + FlyOffsetAIm);
+ fly6r = *(fly2P + FlyOffsetA);
+ fly6i = *(fly2P + FlyOffsetAIm);
+ fly3r = *(fly3P + FlyOffsetA);
+ fly3i = *(fly3P + FlyOffsetAIm);
+
+ fly5r = t0r - fly2r;
+ fly5i = t0i - fly2i;
+ t0r = t0r + fly2r;
+ t0i = t0i + fly2i;
+
+ fly7r = t1r + fly1i;
+ fly7i = t1i - fly1r;
+ t1r = t1r - fly1i;
+ t1i = t1i + fly1r;
+
+ fly2r = fly4r - fly6r;
+ fly2i = fly4i - fly6i;
+ fly4r = fly4r + fly6r;
+ fly4i = fly4i + fly6i;
+
+ fly1r = fly0r + fly3i;
+ fly1i = fly0i - fly3r;
+ fly0r = fly0r - fly3i;
+ fly0i = fly0i + fly3r;
+
+ *(fly2P) = fly5r;
+ *(fly2P + 1) = fly5i;
+ *(fly0P) = t0r;
+ *(fly0P + 1) = t0i;
+ *(fly3P) = fly7r;
+ *(fly3P + 1) = fly7i;
+ *(fly1P) = t1r;
+ *(fly1P + 1) = t1i;
+ *(fly2P + FlyOffsetA) = fly2r;
+ *(fly2P + FlyOffsetAIm) = fly2i;
+ *(fly0P + FlyOffsetA) = fly4r;
+ *(fly0P + FlyOffsetAIm) = fly4i;
+ *(fly3P + FlyOffsetA) = fly1r;
+ *(fly3P + FlyOffsetAIm) = fly1i;
+ *(fly1P + FlyOffsetA) = fly0r;
+ *(fly1P + FlyOffsetAIm) = fly0i;
+
+ fly0P = (fly0P + FlyOffsetB);
+ fly1P = (fly1P + FlyOffsetB);
+ fly2P = (fly2P + FlyOffsetB);
+ fly3P = (fly3P + FlyOffsetB);
+ };
+
+ NsameU4 >>= 1;
+ LoopN >>= 1;
+ NdiffU <<= 1;
+ Flyinc = Flyinc << 1;
+ }
+ else{
+ /* 1 radix 4 stage */
+ IOP = ioptr;
+
+ U3r = 0.7071067811865475244008443621; /* sqrt(0.5); */
+ U3i = U3r;
+ fly0P = IOP;
+ fly1P = (IOP+Flyinc);
+ fly2P = (fly1P+Flyinc);
+ fly3P = (fly2P+Flyinc);
+
+ /* Butterflys */
+ /*
+ t0 - - t0 - - t0
+ t1 - - t1 - - t1
+ f2 - 1- f5 - - f5
+ f1 - -i- f7 - - f7
+ f4 - - f4 - 1- f6
+ f0 - - f0 -U3 - f3
+ f6 - 1- f2 - -i- f4
+ f3 - -i- f1 -U3a- f2
+ */
+
+ for (LoopCnt = LoopN; LoopCnt > 0 ; LoopCnt--){
+ t0r = *(fly0P);
+ t0i = *(fly0P + 1);
+ t1r = *(fly1P);
+ t1i = *(fly1P + 1);
+ fly2r = *(fly2P);
+ fly2i = *(fly2P + 1);
+ fly1r = *(fly3P);
+ fly1i = *(fly3P + 1);
+ fly4r = *(fly0P + FlyOffsetA);
+ fly4i = *(fly0P + FlyOffsetAIm);
+ fly0r = *(fly1P + FlyOffsetA);
+ fly0i = *(fly1P + FlyOffsetAIm);
+ fly6r = *(fly2P + FlyOffsetA);
+ fly6i = *(fly2P + FlyOffsetAIm);
+ fly3r = *(fly3P + FlyOffsetA);
+ fly3i = *(fly3P + FlyOffsetAIm);
+
+ fly5r = t0r - fly2r;
+ fly5i = t0i - fly2i;
+ t0r = t0r + fly2r;
+ t0i = t0i + fly2i;
+
+ fly7r = t1r + fly1i;
+ fly7i = t1i - fly1r;
+ t1r = t1r - fly1i;
+ t1i = t1i + fly1r;
+
+ fly2r = fly4r - fly6r;
+ fly2i = fly4i - fly6i;
+ fly4r = fly4r + fly6r;
+ fly4i = fly4i + fly6i;
+
+ fly1r = fly0r + fly3i;
+ fly1i = fly0i - fly3r;
+ fly0r = fly0r - fly3i;
+ fly0i = fly0i + fly3r;
+
+ fly6r = t0r - fly4r;
+ fly6i = t0i - fly4i;
+ t0r = t0r + fly4r;
+ t0i = t0i + fly4i;
+
+ fly3r = fly5r + fly2i;
+ fly3i = fly5i - fly2r;
+ fly5r = fly5r - fly2i;
+ fly5i = fly5i + fly2r;
+
+ fly4r = t1r - U3r * fly0r;
+ fly4r = fly4r + U3i * fly0i;
+ fly4i = t1i - U3i * fly0r;
+ fly4i = fly4i - U3r * fly0i;
+ t1r = scale * t1r - fly4r;
+ t1i = scale * t1i - fly4i;
+
+ fly2r = fly7r + U3i * fly1r;
+ fly2r = fly2r + U3r * fly1i;
+ fly2i = fly7i - U3r * fly1r;
+ fly2i = fly2i + U3i * fly1i;
+ fly7r = scale * fly7r - fly2r;
+ fly7i = scale * fly7i - fly2i;
+
+ *(fly0P + FlyOffsetA) = fly6r;
+ *(fly0P + FlyOffsetAIm) = fly6i;
+ *(fly0P) = t0r;
+ *(fly0P + 1) = t0i;
+ *(fly2P + FlyOffsetA) = fly3r;
+ *(fly2P + FlyOffsetAIm) = fly3i;
+ *(fly2P) = fly5r;
+ *(fly2P + 1) = fly5i;
+ *(fly1P + FlyOffsetA) = fly4r;
+ *(fly1P + FlyOffsetAIm) = fly4i;
+ *(fly1P) = t1r;
+ *(fly1P + 1) = t1i;
+ *(fly3P + FlyOffsetA) = fly2r;
+ *(fly3P + FlyOffsetAIm) = fly2i;
+ *(fly3P) = fly7r;
+ *(fly3P + 1) = fly7i;
+
+ fly0P = (fly0P + FlyOffsetB);
+ fly1P = (fly1P + FlyOffsetB);
+ fly2P = (fly2P + FlyOffsetB);
+ fly3P = (fly3P + FlyOffsetB);
+
+ };
+
+ NsameU4 >>= 2;
+ LoopN >>= 2;
+ NdiffU <<= 2;
+ Flyinc = Flyinc << 2;
+ };
+};
+
+NsameU2 = NsameU4 >> 1;
+NsameU1 = NsameU2 >> 1;
+Flyinc <<= 1;
+FlyOffsetA = Flyinc << 2;
+FlyOffsetB = FlyOffsetA << 1;
+FlyOffsetAIm = FlyOffsetA + 1;
+FlyOffsetBIm = FlyOffsetB + 1;
+LoopN >>= 1;
+
+/* ****** RADIX 8 Stages */
+for (stage = stage<<1; stage > 0 ; stage--){
+
+ /* an fft stage is done in two parts to ease use of the single quadrant cos table */
+
+/* fftcalc1(iobuf, Utbl, N, NdiffU, LoopN); */
+ if(!(stage&1)){
+ U0rP = &Utbl[0];
+ U0iP = &Utbl[Ntbl[M-1]]; /* for RIFFT */
+ U1rP = U0rP;
+ U1iP = U0iP;
+ U2rP = U0rP;
+ U2iP = U0iP;
+ U3offset = (Ntbl[M+1]) / 8; /* for RIFFT */
+
+ IOP = ioptr;
+
+ U0r = *U0rP,
+ U0i = *U0iP;
+ U1r = *U1rP,
+ U1i = *U1iP;
+ U2r = *U2rP,
+ U2i = *U2iP;
+ U3r = *( U2rP + U3offset);
+ U3i = *( U2iP - U3offset);
+ }
+
+ fly0P = IOP;
+ fly1P = (IOP+Flyinc);
+ fly2P = (fly1P+Flyinc);
+ fly3P = (fly2P+Flyinc);
+
+ for (diffUcnt = (NdiffU)>>1; diffUcnt > 0; diffUcnt--){
+
+ /* Butterflys */
+ /*
+ f0 - - t0 - - t0 - - t0
+ f1 -U0 - t1 - - t1 - - t1
+ f2 - - f2 -U1 - f5 - - f3
+ f3 -U0 - f1 -U1a- f7 - - f7
+ f4 - - f4 - - f4 -U2 - f6
+ f5 -U0 - f0 - - f0 -U3 - f4
+ f6 - - f6 -U1 - f2 -U2a- f2
+ f7 -U0 - f3 -U1a- f1 -U3a- f5
+ */
+
+ fly0r = *(IOP);
+ fly0i = *(IOP+1);
+ fly1r = *(fly1P);
+ fly1i = *(fly1P+1);
+
+ for (LoopCnt = LoopN-1; LoopCnt > 0 ; LoopCnt--){
+
+ fly2r = *(fly2P);
+ fly2i = *(fly2P + 1);
+ fly3r = *(fly3P);
+ fly3i = *(fly3P + 1);
+ fly4r = *(fly0P + FlyOffsetA);
+ fly4i = *(fly0P + FlyOffsetAIm);
+ fly5r = *(fly1P + FlyOffsetA);
+ fly5i = *(fly1P + FlyOffsetAIm);
+ fly6r = *(fly2P + FlyOffsetA);
+ fly6i = *(fly2P + FlyOffsetAIm);
+ fly7r = *(fly3P + FlyOffsetA);
+ fly7i = *(fly3P + FlyOffsetAIm);
+
+ t1r = fly0r - U0r * fly1r;
+ t1r = t1r + U0i * fly1i;
+ t1i = fly0i - U0i * fly1r;
+ t1i = t1i - U0r * fly1i;
+ t0r = scale * fly0r - t1r;
+ t0i = scale * fly0i - t1i;
+
+ fly1r = fly2r - U0r * fly3r;
+ fly1r = fly1r + U0i * fly3i;
+ fly1i = fly2i - U0i * fly3r;
+ fly1i = fly1i - U0r * fly3i;
+ fly2r = scale * fly2r - fly1r;
+ fly2i = scale * fly2i - fly1i;
+
+ fly0r = fly4r - U0r * fly5r;
+ fly0r = fly0r + U0i * fly5i;
+ fly0i = fly4i - U0i * fly5r;
+ fly0i = fly0i - U0r * fly5i;
+ fly4r = scale * fly4r - fly0r;
+ fly4i = scale * fly4i - fly0i;
+
+ fly3r = fly6r - U0r * fly7r;
+ fly3r = fly3r + U0i * fly7i;
+ fly3i = fly6i - U0i * fly7r;
+ fly3i = fly3i - U0r * fly7i;
+ fly6r = scale * fly6r - fly3r;
+ fly6i = scale * fly6i - fly3i;
+
+
+ fly5r = t0r - U1r * fly2r;
+ fly5r = fly5r + U1i * fly2i;
+ fly5i = t0i - U1i * fly2r;
+ fly5i = fly5i - U1r * fly2i;
+ t0r = scale * t0r - fly5r;
+ t0i = scale * t0i - fly5i;
+
+ fly7r = t1r + U1i * fly1r;
+ fly7r = fly7r + U1r * fly1i;
+ fly7i = t1i - U1r * fly1r;
+ fly7i = fly7i + U1i * fly1i;
+ t1r = scale * t1r - fly7r;
+ t1i = scale * t1i - fly7i;
+
+ fly2r = fly4r - U1r * fly6r;
+ fly2r = fly2r + U1i * fly6i;
+ fly2i = fly4i - U1i * fly6r;
+ fly2i = fly2i - U1r * fly6i;
+ fly4r = scale * fly4r - fly2r;
+ fly4i = scale * fly4i - fly2i;
+
+ fly1r = fly0r + U1i * fly3r;
+ fly1r = fly1r + U1r * fly3i;
+ fly1i = fly0i - U1r * fly3r;
+ fly1i = fly1i + U1i * fly3i;
+ fly0r = scale * fly0r - fly1r;
+ fly0i = scale * fly0i - fly1i;
+
+ fly6r = t0r - U2r * fly4r;
+ fly6r = fly6r + U2i * fly4i;
+ fly6i = t0i - U2i * fly4r;
+ fly6i = fly6i - U2r * fly4i;
+ t0r = scale * t0r - fly6r;
+ t0i = scale * t0i - fly6i;
+
+ fly3r = fly5r - U2i * fly2r;
+ fly3r = fly3r - U2r * fly2i;
+ fly3i = fly5i + U2r * fly2r;
+ fly3i = fly3i - U2i * fly2i;
+ fly2r = scale * fly5r - fly3r;
+ fly2i = scale * fly5i - fly3i;
+
+ fly4r = t1r - U3r * fly0r;
+ fly4r = fly4r + U3i * fly0i;
+ fly4i = t1i - U3i * fly0r;
+ fly4i = fly4i - U3r * fly0i;
+ t1r = scale * t1r - fly4r;
+ t1i = scale * t1i - fly4i;
+
+ fly5r = fly7r + U3i * fly1r;
+ fly5r = fly5r + U3r * fly1i;
+ fly5i = fly7i - U3r * fly1r;
+ fly5i = fly5i + U3i * fly1i;
+ fly7r = scale * fly7r - fly5r;
+ fly7i = scale * fly7i - fly5i;
+
+ *(fly0P + FlyOffsetA) = fly6r;
+ *(fly0P + FlyOffsetAIm) = fly6i;
+ *(fly0P) = t0r;
+ *(fly0P + 1) = t0i;
+ *(fly2P) = fly3r;
+ *(fly2P + 1) = fly3i;
+ *(fly2P + FlyOffsetA) = fly2r;
+ *(fly2P + FlyOffsetAIm) = fly2i;
+
+ fly0r = *(fly0P + FlyOffsetB);
+ fly0i = *(fly0P + FlyOffsetBIm);
+
+ *(fly1P + FlyOffsetA) = fly4r;
+ *(fly1P + FlyOffsetAIm) = fly4i;
+ *(fly1P) = t1r;
+ *(fly1P + 1) = t1i;
+
+ fly1r = *(fly1P + FlyOffsetB);
+ fly1i = *(fly1P + FlyOffsetBIm);
+
+ *(fly3P + FlyOffsetA) = fly5r;
+ *(fly3P + FlyOffsetAIm) = fly5i;
+ *(fly3P) = fly7r;
+ *(fly3P + 1) = fly7i;
+
+ fly0P = (fly0P + FlyOffsetB);
+ fly1P = (fly1P + FlyOffsetB);
+ fly2P = (fly2P + FlyOffsetB);
+ fly3P = (fly3P + FlyOffsetB);
+
+ };
+ fly2r = *(fly2P);
+ fly2i = *(fly2P + 1);
+ fly3r = *(fly3P);
+ fly3i = *(fly3P + 1);
+ fly4r = *(fly0P + FlyOffsetA);
+ fly4i = *(fly0P + FlyOffsetAIm);
+ fly5r = *(fly1P + FlyOffsetA);
+ fly5i = *(fly1P + FlyOffsetAIm);
+ fly6r = *(fly2P + FlyOffsetA);
+ fly6i = *(fly2P + FlyOffsetAIm);
+ fly7r = *(fly3P + FlyOffsetA);
+ fly7i = *(fly3P + FlyOffsetAIm);
+
+ t1r = fly0r - U0r * fly1r;
+ t1r = t1r + U0i * fly1i;
+ t1i = fly0i - U0i * fly1r;
+ t1i = t1i - U0r * fly1i;
+ t0r = scale * fly0r - t1r;
+ t0i = scale * fly0i - t1i;
+
+ fly1r = fly2r - U0r * fly3r;
+ fly1r = fly1r + U0i * fly3i;
+ fly1i = fly2i - U0i * fly3r;
+ fly1i = fly1i - U0r * fly3i;
+ fly2r = scale * fly2r - fly1r;
+ fly2i = scale * fly2i - fly1i;
+
+ fly0r = fly4r - U0r * fly5r;
+ fly0r = fly0r + U0i * fly5i;
+ fly0i = fly4i - U0i * fly5r;
+ fly0i = fly0i - U0r * fly5i;
+ fly4r = scale * fly4r - fly0r;
+ fly4i = scale * fly4i - fly0i;
+
+ fly3r = fly6r - U0r * fly7r;
+ fly3r = fly3r + U0i * fly7i;
+ fly3i = fly6i - U0i * fly7r;
+ fly3i = fly3i - U0r * fly7i;
+ fly6r = scale * fly6r - fly3r;
+ fly6i = scale * fly6i - fly3i;
+
+ fly5r = t0r - U1r * fly2r;
+ fly5r = fly5r + U1i * fly2i;
+ fly5i = t0i - U1i * fly2r;
+ fly5i = fly5i - U1r * fly2i;
+ t0r = scale * t0r - fly5r;
+ t0i = scale * t0i - fly5i;
+
+ fly7r = t1r + U1i * fly1r;
+ fly7r = fly7r + U1r * fly1i;
+ fly7i = t1i - U1r * fly1r;
+ fly7i = fly7i + U1i * fly1i;
+ t1r = scale * t1r - fly7r;
+ t1i = scale * t1i - fly7i;
+
+ fly2r = fly4r - U1r * fly6r;
+ fly2r = fly2r + U1i * fly6i;
+ fly2i = fly4i - U1i * fly6r;
+ fly2i = fly2i - U1r * fly6i;
+ fly4r = scale * fly4r - fly2r;
+ fly4i = scale * fly4i - fly2i;
+
+ fly1r = fly0r + U1i * fly3r;
+ fly1r = fly1r + U1r * fly3i;
+ fly1i = fly0i - U1r * fly3r;
+ fly1i = fly1i + U1i * fly3i;
+ fly0r = scale * fly0r - fly1r;
+ fly0i = scale * fly0i - fly1i;
+
+ U0i = *(U0iP = (U0iP - NsameU4));
+ U0r = *(U0rP = (U0rP + NsameU4));
+ U1r = *(U1rP = (U1rP + NsameU2));
+ U1i = *(U1iP = (U1iP - NsameU2));
+ if(stage&1)
+ U0r = - U0r;
+
+ fly6r = t0r - U2r * fly4r;
+ fly6r = fly6r + U2i * fly4i;
+ fly6i = t0i - U2i * fly4r;
+ fly6i = fly6i - U2r * fly4i;
+ t0r = scale * t0r - fly6r;
+ t0i = scale * t0i - fly6i;
+
+ fly3r = fly5r - U2i * fly2r;
+ fly3r = fly3r - U2r * fly2i;
+ fly3i = fly5i + U2r * fly2r;
+ fly3i = fly3i - U2i * fly2i;
+ fly2r = scale * fly5r - fly3r;
+ fly2i = scale * fly5i - fly3i;
+
+ fly4r = t1r - U3r * fly0r;
+ fly4r = fly4r + U3i * fly0i;
+ fly4i = t1i - U3i * fly0r;
+ fly4i = fly4i - U3r * fly0i;
+ t1r = scale * t1r - fly4r;
+ t1i = scale * t1i - fly4i;
+
+ fly5r = fly7r + U3i * fly1r;
+ fly5r = fly5r + U3r * fly1i;
+ fly5i = fly7i - U3r * fly1r;
+ fly5i = fly5i + U3i * fly1i;
+ fly7r = scale * fly7r - fly5r;
+ fly7i = scale * fly7i - fly5i;
+
+ *(fly0P + FlyOffsetA) = fly6r;
+ *(fly0P + FlyOffsetAIm) = fly6i;
+ *(fly0P) = t0r;
+ *(fly0P + 1) = t0i;
+
+ U2r = *(U2rP = (U2rP + NsameU1));
+ U2i = *(U2iP = (U2iP - NsameU1));
+
+ *(fly2P) = fly3r;
+ *(fly2P + 1) = fly3i;
+ *(fly2P + FlyOffsetA) = fly2r;
+ *(fly2P + FlyOffsetAIm) = fly2i;
+ *(fly1P + FlyOffsetA) = fly4r;
+ *(fly1P + FlyOffsetAIm) = fly4i;
+ *(fly1P) = t1r;
+ *(fly1P + 1) = t1i;
+
+ U3r = *( U2rP + U3offset);
+ U3i = *( U2iP - U3offset);
+
+ *(fly3P + FlyOffsetA) = fly5r;
+ *(fly3P + FlyOffsetAIm) = fly5i;
+ *(fly3P) = fly7r;
+ *(fly3P + 1) = fly7i;
+
+ IOP = IOP + 2;
+ fly0P = IOP;
+ fly1P = (IOP+Flyinc);
+ fly2P = (fly1P+Flyinc);
+ fly3P = (fly2P+Flyinc);
+ };
+ NsameU4 = -NsameU4;
+
+ if(stage&1){
+ LoopN >>= 3;
+ NsameU1 >>= 3;
+ NsameU2 >>= 3;
+ NsameU4 >>= 3;
+ NdiffU <<= 3;
+ Flyinc = Flyinc << 3;
+ FlyOffsetA <<= 3;
+ FlyOffsetB <<= 3;
+ FlyOffsetAIm = FlyOffsetA + 1;
+ FlyOffsetBIm = FlyOffsetB + 1;
+ }
+}
+M=M+1; /* for RIFFT */
+
+ioptr += Ntbl[M];
+}
+}
diff --git a/sc4pd/source/fftlib.h b/sc4pd/source/fftlib.h
new file mode 100644
index 0000000..b03317d
--- /dev/null
+++ b/sc4pd/source/fftlib.h
@@ -0,0 +1,62 @@
+long FFTInit(long *fftMptr, long fftN, float *Utbl);
+/* Compute cosine table and check size for complex ffts */
+/* INPUTS */
+/* fftN = size of fft */
+/* OUTPUTS */
+/* *fftMptr = log2 of fft size */
+/* *Utbl = cosine table with fftN/4 + 1 entries (angles = 0 to pi/2 inclusive) */
+/* RETURNS */
+/* 1 if fftN is invalid, 0 otherwise */
+
+long rFFTInit(long *fftMptr, long fftN, float *Utbl);
+/* Compute cosine table and check size for a real input fft */
+/* INPUTS */
+/* fftN = size of fft */
+/* OUTPUTS */
+/* *fftMptr = log2 of fft size */
+/* *Utbl = cosine table with fftN/4 + 1 entries (angles = 0 to pi/2 inclusive) */
+/* RETURNS */
+/* 1 if fftN is invalid, 0 otherwise */
+
+void ffts(float *ioptr, long M, long Rows, float *Utbl);
+/* Compute in-place complex fft on the rows of the input array */
+/* INPUTS */
+/* M = log2 of fft size */
+/* *ioptr = input data array */
+/* *Utbl = cosine table */
+/* Rows = number of rows in ioptr array (use Rows of 1 if ioptr is a 1 dimensional array) */
+/* OUTPUTS */
+/* *ioptr = output data array */
+
+void iffts(float *ioptr, long M, long Rows, float *Utbl);
+/* Compute in-place inverse complex fft on the rows of the input array */
+/* INPUTS */
+/* M = log2 of fft size */
+/* *ioptr = input data array */
+/* *Utbl = cosine table */
+/* Rows = number of rows in ioptr array (use Rows of 1 if ioptr is a 1 dimensional array) */
+/* OUTPUTS */
+/* *ioptr = output data array */
+
+void rffts(float *ioptr, long M, long Rows, float *Utbl);
+/* Compute in-place real fft on the rows of the input array */
+/* INPUTS */
+/* M = log2 of fft size */
+/* *ioptr = real input data array */
+/* *Utbl = cosine table */
+/* Rows = number of rows in ioptr array (use Rows of 1 if ioptr is a 1 dimensional array) */
+/* OUTPUTS */
+/* *ioptr = output data array in the following order */
+/* Re(x[0]), Re(x[N/2]), Re(x[1]), Im(x[1]), Re(x[2]), Im(x[2]), ... Re(x[N/2-1]), Im(x[N/2-1]). */
+
+
+void riffts(float *ioptr, long M, long Rows, float *Utbl);
+/* Compute in-place real ifft on the rows of the input array */
+/* INPUTS */
+/* M = log2 of fft size */
+/* *ioptr = input data array in the following order */
+/* Re(x[0]), Re(x[N/2]), Re(x[1]), Im(x[1]), Re(x[2]), Im(x[2]), ... Re(x[N/2-1]), Im(x[N/2-1]). */
+/* *Utbl = cosine table */
+/* Rows = number of rows in ioptr array (use Rows of 1 if ioptr is a 1 dimensional array) */
+/* OUTPUTS */
+/* *ioptr = real output data array */
diff --git a/sc4pd/source/main.cpp b/sc4pd/source/main.cpp
index 13fbc90..fa81e2e 100644
--- a/sc4pd/source/main.cpp
+++ b/sc4pd/source/main.cpp
@@ -1,3 +1,4 @@
+
/* sc4pd
library initialization
@@ -68,7 +69,8 @@ void sc4pd_library_setup()
"BRF~,\n"
" LPZ1(~), HPZ1(~), LPZ2(~), HPZ2(~), BPZ2(~), BRZ2(~), "
"LFDNoise0~,\n"
- " LFDNoise1~, LFDNoise2~, sc+~, sc-~, sc*~, sc/~\n"
+ " LFDNoise1~, LFDNoise2~, sc+~, sc-~, sc*~, sc/~, "
+ "Convolution~\n"
);
//initialize objects
@@ -296,6 +298,11 @@ void sc4pd_library_setup()
FLEXT_DSP_SETUP(scmul_ar);
FLEXT_DSP_SETUP(scdiv_ar);
+
+ FLEXT_DSP_SETUP(Convolution_ar);
+
+ //init ffts
+ init_ffts();
}
FLEXT_LIB_SETUP(sc4pd,sc4pd_library_setup);
diff --git a/sc4pd/source/sc*.cpp b/sc4pd/source/scmul.cpp
index 55ac074..55ac074 100644
--- a/sc4pd/source/sc*.cpp
+++ b/sc4pd/source/scmul.cpp
diff --git a/sc4pd/source/support.cpp b/sc4pd/source/support.cpp
index 077ea45..69aaf2b 100644
--- a/sc4pd/source/support.cpp
+++ b/sc4pd/source/support.cpp
@@ -108,3 +108,77 @@ int32 timeseed()
return (int32)tsec ^ (int32)tusec ^ count--;
}
+
+/* from Convolution.cpp */
+extern "C"
+{
+ float *cosTable[32];
+ float *fftWindow[32];
+}
+
+
+float* create_cosTable(int log2n)
+{
+ int size = 1 << log2n;
+ int size2 = size / 4 + 1;
+ float *win = (float*)malloc(size2 * sizeof(float));
+ double winc = twopi / size;
+ for (int i=0; i<size2; ++i) {
+ double w = i * winc;
+ win[i] = cos(w);
+ }
+ return win;
+}
+
+float* create_fftwindow(int log2n)
+{
+ int size = 1 << log2n;
+ float *win = (float*)malloc(size * sizeof(float));
+ //double winc = twopi / size;
+ double winc = pi / size;
+ for (int i=0; i<size; ++i) {
+ double w = i * winc;
+ //win[i] = 0.5 - 0.5 * cos(w);
+ win[i] = sin(w);
+ }
+ return win;
+}
+
+void init_ffts()
+{
+#if __VEC__
+
+ for (int i=0; i<32; ++i) {
+ fftsetup[i] = 0;
+ }
+ for (int i=0; i<15; ++i) {
+ fftsetup[i] = create_fftsetup(i, kFFTRadix2);
+ }
+#else
+ for (int i=0; i<32; ++i) {
+ cosTable[i] = 0;
+ fftWindow[i] = 0;
+ }
+ for (int i=3; i<15; ++i) {
+ cosTable[i] = create_cosTable(i);
+ fftWindow[i] = create_fftwindow(i);
+ }
+#endif
+}
+
+void DoWindowing(int log2n, float * fftbuf, int bufsize)
+{
+ float *win = fftWindow[log2n];
+
+ //printf("fail? %i %d /n", log2n, win);
+
+ if (!win) return;
+ float *in = fftbuf - 1;
+ win--;
+
+ for (int i=0; i<bufsize; ++i) {
+ *++in *= *++win;
+ }
+}
+
+#include "fftlib.c"
diff --git a/sc4pd/source/support.hpp b/sc4pd/source/support.hpp
index a9258e8..c9cedae 100644
--- a/sc4pd/source/support.hpp
+++ b/sc4pd/source/support.hpp
@@ -115,5 +115,10 @@ WARRANTIES, see the file, "license.txt," in this distribution.
#define SIGFUN(FUN) &thisType::FUN
+/* from Convolution.cpp */
+void init_ffts();
+float* create_fftwindow(int log2n);
+float* create_cosTable(int log2n);
+void DoWindowing(int log2n, float * fftbuf, int bufsize);
#endif