/* FFT library */ /* Library of in-place fast fourier transforms */ /* Forward and inverse complex transforms */ /* and real forward transform */ /* Optimized for RISC processors with many registers */ /* Version 1.1 John Green NUWC New London CT January 96 */ /* Version 1.1 renamed as fftlib from fftbig */ /* Version 1.1 removed (float *)((char *) ptr) optimization */ /* Version 1.0 John Green NUWC New London CT December 95 */ /* (John Green) green_jt@vsdec.nl.nuwc.navy.mil */ /* green_jt@vsdec.nl.nuwc.navy.mil */ #include #include "fftlib.h" #define MAXMROOT 9 /* 2^(MAXMROOT-1) = # of elements in BRcnt */ /* Bit reversed counter */ static const unsigned char BRcnt[256] = { 0, 128, 64, 192, 32, 160, 96, 224, 16, 144, 80, 208, 48, 176, 112, 240, 8, 136, 72, 200, 40, 168, 104, 232, 24, 152, 88, 216, 56, 184, 120, 248, 4, 132, 68, 196, 36, 164, 100, 228, 20, 148, 84, 212, 52, 180, 116, 244, 12, 140, 76, 204, 44, 172, 108, 236, 28, 156, 92, 220, 60, 188, 124, 252, 2, 130, 66, 194, 34, 162, 98, 226, 18, 146, 82, 210, 50, 178, 114, 242, 10, 138, 74, 202, 42, 170, 106, 234, 26, 154, 90, 218, 58, 186, 122, 250, 6, 134, 70, 198, 38, 166, 102, 230, 22, 150, 86, 214, 54, 182, 118, 246, 14, 142, 78, 206, 46, 174, 110, 238, 30, 158, 94, 222, 62, 190, 126, 254, 1, 129, 65, 193, 33, 161, 97, 225, 17, 145, 81, 209, 49, 177, 113, 241, 9, 137, 73, 201, 41, 169, 105, 233, 25, 153, 89, 217, 57, 185, 121, 249, 5, 133, 69, 197, 37, 165, 101, 229, 21, 149, 85, 213, 53, 181, 117, 245, 13, 141, 77, 205, 45, 173, 109, 237, 29, 157, 93, 221, 61, 189, 125, 253, 3, 131, 67, 195, 35, 163, 99, 227, 19, 147, 83, 211, 51, 179, 115, 243, 11, 139, 75, 203, 43, 171, 107, 235, 27, 155, 91, 219, 59, 187, 123, 251, 7, 135, 71, 199, 39, 167, 103, 231, 23, 151, 87, 215, 55, 183, 119, 247, 15, 143, 79, 207, 47, 175, 111, 239, 31, 159, 95, 223, 63, 191, 127, 255 }; /* Table of powers of 2 */ static const long Ntbl[21] = {1, 2, 4, 8, 16, 32, 64, 128, 256, 512, 1024, 2048, 4096, 8192, 16384, 32768, 65536, 131072, 262144, 524288, 1048576}; long FFTInit(long *fftMptr, long fftN, float *Utbl){ /* Compute cosine table and check size for complex ffts */ /* INPUTS */ /* fftN = size of fft */ /* OUTPUTS */ /* *fftMptr = log2 of fft size */ /* *Utbl = cosine table with fftN/4 + 1 entries (angles = 0 to pi/2 inclusive) */ /* RETURNS */ /* 1 if fftN is invalid, 0 otherwise */ long i1, ErrVal; ErrVal = 0; *fftMptr = (long)(log(fftN)/log(2.0) + 0.5); if ((*fftMptr >= 3) & (*fftMptr <= 19) & (int)(pow(2,*fftMptr)+.5) == fftN) for (i1 = 0; i1 <= fftN/4; i1++) Utbl[i1] = cos( ( 3.1415926535897932384626433832795 * 2.0 * i1 ) / fftN ); else ErrVal = 1; return ErrVal; } long rFFTInit(long *fftMptr, long fftN, float *Utbl){ /* Compute cosine table and check size for a real input fft */ /* INPUTS */ /* fftN = size of fft */ /* OUTPUTS */ /* *fftMptr = log2 of fft size */ /* *Utbl = cosine table with fftN/4 + 1 entries (angles = 0 to pi/2 inclusive) */ /* RETURNS */ /* 1 if fftN is invalid, 0 otherwise */ long i1, ErrVal; ErrVal = 0; *fftMptr = (long)(log(fftN)/log(2.0) + 0.5); if ((*fftMptr >= 4) & (*fftMptr <= 20) & (int)(pow(2,*fftMptr)+.5) == fftN) for (i1 = 0; i1 <= fftN/4; i1++) Utbl[i1] = cos( ( 3.1415926535897932384626433832795 * 2.0 * i1 ) / fftN ); else ErrVal = 1; return ErrVal; } void ffts(float *ioptr, long M, long Rows, float *Utbl){ /* Compute in-place complex fft on the rows of the input array */ /* INPUTS */ /* M = log2 of fft size */ /* *ioptr = input data array */ /* *Utbl = cosine table */ /* Rows = number of rows in ioptr array (use Rows of 1 if ioptr is a 1 dimensional array) */ /* OUTPUTS */ /* *ioptr = output data array */ long Flyinc; long FlyOffsetA; long FlyOffsetAIm; long FlyOffsetB; long FlyOffsetBIm; long NsameU1; long NsameU2; long NsameU4; long diffUcnt; long LoopCnt; float scale; float fly0r; float fly0i; float fly1r; float fly1i; float fly2r; float fly2i; float fly3r; float fly3i; float fly4r; float fly4i; float fly5r; float fly5i; float fly6r; float fly6i; float fly7r; float fly7i; float U0r; float U0i; float U1r; float U1i; float U2r; float U2i; float U3r; float U3i; float t0r; float t0i; float t1r; float t1i; float *fly0P; float *fly1P; float *fly2P; float *fly3P; float *U0rP; float *U0iP; float *U1rP; float *U1iP; float *U2rP; float *U2iP; float *IOP; long U3offset; long stage; long NdiffU; long LoopN; const long BRshift = MAXMROOT - (M>>1); const long Nrems2 = Ntbl[M-(M>>1)+1]; const long Nroot_1_ColInc = (Ntbl[M-1]-Ntbl[M-(M>>1)])*2; scale = 2.0; for (;Rows>0;Rows--){ FlyOffsetA = Ntbl[M] * (2/2); FlyOffsetAIm = FlyOffsetA + 1; FlyOffsetB = FlyOffsetA + 2; FlyOffsetBIm = FlyOffsetB + 1; /* BitrevR2 ** bit reverse and first radix 2 stage ******/ for (stage = 0; stage < Ntbl[M-(M>>1)]*2; stage += Ntbl[M>>1]*2){ for (LoopN = (Ntbl[(M>>1)-1]-1); LoopN >= 0; LoopN--){ LoopCnt = (Ntbl[(M>>1)-1]-1); fly0P = ioptr+ Nroot_1_ColInc + ((int)BRcnt[LoopN] >> BRshift)*(2*2) + stage; IOP = ioptr + (LoopN<<(M+1)/2) * 2 + stage; fly1P = IOP + ((int)BRcnt[LoopCnt] >> BRshift)*(2*2); fly0r = *(fly0P); fly0i = *(fly0P+1); fly1r = *(fly0P+FlyOffsetA); fly1i = *(fly0P+FlyOffsetAIm); for (; LoopCnt > LoopN;){ fly2r = *(fly0P+2); fly2i = *(fly0P+(2+1)); fly3r = *(fly0P+FlyOffsetB); fly3i = *(fly0P+FlyOffsetBIm); fly4r = *(fly1P); fly4i = *(fly1P+1); fly5r = *(fly1P+FlyOffsetA); fly5i = *(fly1P+FlyOffsetAIm); fly6r = *(fly1P+2); fly6i = *(fly1P+(2+1)); fly7r = *(fly1P+FlyOffsetB); fly7i = *(fly1P+FlyOffsetBIm); t0r = fly0r + fly1r; t0i = fly0i + fly1i; fly1r = fly0r - fly1r; fly1i = fly0i - fly1i; t1r = fly2r + fly3r; t1i = fly2i + fly3i; fly3r = fly2r - fly3r; fly3i = fly2i - fly3i; fly0r = fly4r + fly5r; fly0i = fly4i + fly5i; fly5r = fly4r - fly5r; fly5i = fly4i - fly5i; fly2r = fly6r + fly7r; fly2i = fly6i + fly7i; fly7r = fly6r - fly7r; fly7i = fly6i - fly7i; *(fly1P) = t0r; *(fly1P+1) = t0i; *(fly1P+2) = fly1r; *(fly1P+(2+1)) = fly1i; *(fly1P+FlyOffsetA) = t1r; *(fly1P+FlyOffsetAIm) = t1i; *(fly1P+FlyOffsetB) = fly3r; *(fly1P+FlyOffsetBIm) = fly3i; *(fly0P) = fly0r; *(fly0P+1) = fly0i; *(fly0P+2) = fly5r; *(fly0P+(2+1)) = fly5i; *(fly0P+FlyOffsetA) = fly2r; *(fly0P+FlyOffsetAIm) = fly2i; *(fly0P+FlyOffsetB) = fly7r; *(fly0P+FlyOffsetBIm) = fly7i; fly0P -= Nrems2; fly0r = *(fly0P); fly0i = *(fly0P+1); fly1r = *(fly0P+FlyOffsetA); fly1i = *(fly0P+FlyOffsetAIm); LoopCnt -= 1; fly1P = (IOP + ((int)BRcnt[LoopCnt] >> BRshift)*(2*2)); }; fly2r = *(fly0P+2); fly2i = *(fly0P+(2+1)); fly3r = *(fly0P+FlyOffsetB); fly3i = *(fly0P+FlyOffsetBIm); t0r = fly0r + fly1r; t0i = fly0i + fly1i; fly1r = fly0r - fly1r; fly1i = fly0i - fly1i; t1r = fly2r + fly3r; t1i = fly2i + fly3i; fly3r = fly2r - fly3r; fly3i = fly2i - fly3i; *(fly0P) = t0r; *(fly0P+1) = t0i; *(fly0P+2) = fly1r; *(fly0P+(2+1)) = fly1i; *(fly0P+FlyOffsetA) = t1r; *(fly0P+FlyOffsetAIm) = t1i; *(fly0P+FlyOffsetB) = fly3r; *(fly0P+FlyOffsetBIm) = fly3i; }; }; /**** FFTC **************/ NdiffU = 2; Flyinc = (NdiffU); NsameU4 = Ntbl[M]/4; LoopN = Ntbl[M-3]; stage = ((M-1)/3); if ( (M-1-(stage * 3)) != 0 ){ FlyOffsetA = Flyinc << 2; FlyOffsetB = FlyOffsetA << 1; FlyOffsetAIm = FlyOffsetA + 1; FlyOffsetBIm = FlyOffsetB + 1; if ( (M-1-(stage * 3)) == 1 ){ /* 1 radix 2 stage */ IOP = ioptr; fly0P = IOP; fly1P = (IOP+Flyinc); fly2P = (fly1P+Flyinc); fly3P = (fly2P+Flyinc); /* Butterflys */ /* t0 - - t0 t1 - - t1 f2 - 1- f5 f1 - -i- f7 f4 - - f4 f0 - - f0 f6 - 1- f2 f3 - -i- f1 */ for (LoopCnt = LoopN; LoopCnt > 0 ; LoopCnt--){ t0r = *(fly0P); t0i = *(fly0P + 1); t1r = *(fly1P); t1i = *(fly1P + 1); fly2r = *(fly2P); fly2i = *(fly2P + 1); fly1r = *(fly3P); fly1i = *(fly3P + 1); fly4r = *(fly0P + FlyOffsetA); fly4i = *(fly0P + FlyOffsetAIm); fly0r = *(fly1P + FlyOffsetA); fly0i = *(fly1P + FlyOffsetAIm); fly6r = *(fly2P + FlyOffsetA); fly6i = *(fly2P + FlyOffsetAIm); fly3r = *(fly3P + FlyOffsetA); fly3i = *(fly3P + FlyOffsetAIm); fly5r = t0r - fly2r; fly5i = t0i - fly2i; t0r = t0r + fly2r; t0i = t0i + fly2i; fly7r = t1r - fly1i; fly7i = t1i + fly1r; t1r = t1r + fly1i; t1i = t1i - fly1r; fly2r = fly4r - fly6r; fly2i = fly4i - fly6i; fly4r = fly4r + fly6r; fly4i = fly4i + fly6i; fly1r = fly0r - fly3i; fly1i = fly0i + fly3r; fly0r = fly0r + fly3i; fly0i = fly0i - fly3r; *(fly2P) = fly5r; *(fly2P + 1) = fly5i; *(fly0P) = t0r; *(fly0P + 1) = t0i; *(fly3P) = fly7r; *(fly3P + 1) = fly7i; *(fly1P) = t1r; *(fly1P + 1) = t1i; *(fly2P + FlyOffsetA) = fly2r; *(fly2P + FlyOffsetAIm) = fly2i; *(fly0P + FlyOffsetA) = fly4r; *(fly0P + FlyOffsetAIm) = fly4i; *(fly3P + FlyOffsetA) = fly1r; *(fly3P + FlyOffsetAIm) = fly1i; *(fly1P + FlyOffsetA) = fly0r; *(fly1P + FlyOffsetAIm) = fly0i; fly0P = (fly0P + FlyOffsetB); fly1P = (fly1P + FlyOffsetB); fly2P = (fly2P + FlyOffsetB); fly3P = (fly3P + FlyOffsetB); }; NsameU4 >>= 1; LoopN >>= 1; NdiffU <<= 1; Flyinc = Flyinc << 1; } else{ /* 1 radix 4 stage */ IOP = ioptr; U3r = 0.7071067811865475244008443621; /* sqrt(0.5); */ U3i = U3r; fly0P = IOP; fly1P = (IOP+Flyinc); fly2P = (fly1P+Flyinc); fly3P = (fly2P+Flyinc); /* Butterflys */ /* t0 - - t0 - - t0 t1 - - t1 - - t1 f2 - 1- f5 - - f5 f1 - -i- f7 - - f7 f4 - - f4 - 1- f6 f0 - - f0 -U3 - f3 f6 - 1- f2 - -i- f4 f3 - -i- f1 -U3a- f2 */ for (LoopCnt = LoopN; LoopCnt > 0 ; LoopCnt--){ t0r = *(fly0P); t0i = *(fly0P + 1); t1r = *(fly1P); t1i = *(fly1P + 1); fly2r = *(fly2P); fly2i = *(fly2P + 1); fly1r = *(fly3P); fly1i = *(fly3P + 1); fly4r = *(fly0P + FlyOffsetA); fly4i = *(fly0P + FlyOffsetAIm); fly0r = *(fly1P + FlyOffsetA); fly0i = *(fly1P + FlyOffsetAIm); fly6r = *(fly2P + FlyOffsetA); fly6i = *(fly2P + FlyOffsetAIm); fly3r = *(fly3P + FlyOffsetA); fly3i = *(fly3P + FlyOffsetAIm); fly5r = t0r - fly2r; fly5i = t0i - fly2i; t0r = t0r + fly2r; t0i = t0i + fly2i; fly7r = t1r - fly1i; fly7i = t1i + fly1r; t1r = t1r + fly1i; t1i = t1i - fly1r; fly2r = fly4r - fly6r; fly2i = fly4i - fly6i; fly4r = fly4r + fly6r; fly4i = fly4i + fly6i; fly1r = fly0r - fly3i; fly1i = fly0i + fly3r; fly0r = fly0r + fly3i; fly0i = fly0i - fly3r; fly6r = t0r - fly4r; fly6i = t0i - fly4i; t0r = t0r + fly4r; t0i = t0i + fly4i; fly3r = fly5r - fly2i; fly3i = fly5i + fly2r; fly5r = fly5r + fly2i; fly5i = fly5i - fly2r; fly4r = t1r - U3r * fly0r; fly4r = fly4r - U3i * fly0i; fly4i = t1i + U3i * fly0r; fly4i = fly4i - U3r * fly0i; t1r = scale * t1r - fly4r; t1i = scale * t1i - fly4i; fly2r = fly7r + U3i * fly1r; fly2r = fly2r - U3r * fly1i; fly2i = fly7i + U3r * fly1r; fly2i = fly2i + U3i * fly1i; fly7r = scale * fly7r - fly2r; fly7i = scale * fly7i - fly2i; *(fly0P + FlyOffsetA) = fly6r; *(fly0P + FlyOffsetAIm) = fly6i; *(fly0P) = t0r; *(fly0P + 1) = t0i; *(fly2P + FlyOffsetA) = fly3r; *(fly2P + FlyOffsetAIm) = fly3i; *(fly2P) = fly5r; *(fly2P + 1) = fly5i; *(fly1P + FlyOffsetA) = fly4r; *(fly1P + FlyOffsetAIm) = fly4i; *(fly1P) = t1r; *(fly1P + 1) = t1i; *(fly3P + FlyOffsetA) = fly2r; *(fly3P + FlyOffsetAIm) = fly2i; *(fly3P) = fly7r; *(fly3P + 1) = fly7i; fly0P = (fly0P + FlyOffsetB); fly1P = (fly1P + FlyOffsetB); fly2P = (fly2P + FlyOffsetB); fly3P = (fly3P + FlyOffsetB); }; NsameU4 >>= 2; LoopN >>= 2; NdiffU <<= 2; Flyinc = Flyinc << 2; }; }; NsameU2 = NsameU4 >> 1; NsameU1 = NsameU2 >> 1; Flyinc <<= 1; FlyOffsetA = Flyinc << 2; FlyOffsetB = FlyOffsetA << 1; FlyOffsetAIm = FlyOffsetA + 1; FlyOffsetBIm = FlyOffsetB + 1; LoopN >>= 1; /* ****** RADIX 8 Stages */ for (stage = stage<<1; stage > 0 ; stage--){ /* an fft stage is done in two parts to ease use of the single quadrant cos table */ /* fftcalc1(iobuf, Utbl, N, NdiffU, LoopN); */ if(!(stage&1)){ U0rP = &Utbl[0]; U0iP = &Utbl[Ntbl[M-2]]; U1rP = U0rP; U1iP = U0iP; U2rP = U0rP; U2iP = U0iP; U3offset = (Ntbl[M]) / 8; IOP = ioptr; U0r = *U0rP, U0i = *U0iP; U1r = *U1rP, U1i = *U1iP; U2r = *U2rP, U2i = *U2iP; U3r = *( U2rP + U3offset); U3i = *( U2iP - U3offset); } fly0P = IOP; fly1P = (IOP+Flyinc); fly2P = (fly1P+Flyinc); fly3P = (fly2P+Flyinc); for (diffUcnt = (NdiffU)>>1; diffUcnt != 0; diffUcnt--){ /* Butterflys */ /* f0 - - t0 - - t0 - - t0 f1 -U0 - t1 - - t1 - - t1 f2 - - f2 -U1 - f5 - - f3 f3 -U0 - f1 -U1a- f7 - - f7 f4 - - f4 - - f4 -U2 - f6 f5 -U0 - f0 - - f0 -U3 - f4 f6 - - f6 -U1 - f2 -U2a- f2 f7 -U0 - f3 -U1a- f1 -U3a- f5 */ fly0r = *(IOP); fly0i = *(IOP+1); fly1r = *(fly1P); fly1i = *(fly1P+1); for (LoopCnt = LoopN-1; LoopCnt > 0 ; LoopCnt--){ fly2r = *(fly2P); fly2i = *(fly2P + 1); fly3r = *(fly3P); fly3i = *(fly3P + 1); fly4r = *(fly0P + FlyOffsetA); fly4i = *(fly0P + FlyOffsetAIm); fly5r = *(fly1P + FlyOffsetA); fly5i = *(fly1P + FlyOffsetAIm); fly6r = *(fly2P + FlyOffsetA); fly6i = *(fly2P + FlyOffsetAIm); fly7r = *(fly3P + FlyOffsetA); fly7i = *(fly3P + FlyOffsetAIm); t1r = fly0r - U0r * fly1r; t1r = t1r - U0i * fly1i; t1i = fly0i + U0i * fly1r; t1i = t1i - U0r * fly1i; t0r = scale * fly0r - t1r; t0i = scale * fly0i - t1i; fly1r = fly2r - U0r * fly3r; fly1r = fly1r - U0i * fly3i; fly1i = fly2i + U0i * fly3r; fly1i = fly1i - U0r * fly3i; fly2r = scale * fly2r - fly1r; fly2i = scale * fly2i - fly1i; fly0r = fly4r - U0r * fly5r; fly0r = fly0r - U0i * fly5i; fly0i = fly4i + U0i * fly5r; fly0i = fly0i - U0r * fly5i; fly4r = scale * fly4r - fly0r; fly4i = scale * fly4i - fly0i; fly3r = fly6r - U0r * fly7r; fly3r = fly3r - U0i * fly7i; fly3i = fly6i + U0i * fly7r; fly3i = fly3i - U0r * fly7i; fly6r = scale * fly6r - fly3r; fly6i = scale * fly6i - fly3i; fly5r = t0r - U1r * fly2r; fly5r = fly5r - U1i * fly2i; fly5i = t0i + U1i * fly2r; fly5i = fly5i - U1r * fly2i; t0r = scale * t0r - fly5r; t0i = scale * t0i - fly5i; fly7r = t1r + U1i * fly1r; fly7r = fly7r - U1r * fly1i; fly7i = t1i + U1r * fly1r; fly7i = fly7i + U1i * fly1i; t1r = scale * t1r - fly7r; t1i = scale * t1i - fly7i; fly2r = fly4r - U1r * fly6r; fly2r = fly2r - U1i * fly6i; fly2i = fly4i + U1i * fly6r; fly2i = fly2i - U1r * fly6i; fly4r = scale * fly4r - fly2r; fly4i = scale * fly4i - fly2i; fly1r = fly0r + U1i * fly3r; fly1r = fly1r - U1r * fly3i; fly1i = fly0i + U1r * fly3r; fly1i = fly1i + U1i * fly3i; fly0r = scale * fly0r - fly1r; fly0i = scale * fly0i - fly1i; fly6r = t0r - U2r * fly4r; fly6r = fly6r - U2i * fly4i; fly6i = t0i + U2i * fly4r; fly6i = fly6i - U2r * fly4i; t0r = scale * t0r - fly6r; t0i = scale * t0i - fly6i; fly3r = fly5r - U2i * fly2r; fly3r = fly3r + U2r * fly2i; fly3i = fly5i - U2r * fly2r; fly3i = fly3i - U2i * fly2i; fly2r = scale * fly5r - fly3r; fly2i = scale * fly5i - fly3i; fly4r = t1r - U3r * fly0r; fly4r = fly4r - U3i * fly0i; fly4i = t1i + U3i * fly0r; fly4i = fly4i - U3r * fly0i; t1r = scale * t1r - fly4r; t1i = scale * t1i - fly4i; fly5r = fly7r + U3i * fly1r; fly5r = fly5r - U3r * fly1i; fly5i = fly7i + U3r * fly1r; fly5i = fly5i + U3i * fly1i; fly7r = scale * fly7r - fly5r; fly7i = scale * fly7i - fly5i; *(fly0P + FlyOffsetA) = fly6r; *(fly0P + FlyOffsetAIm) = fly6i; *(fly0P) = t0r; *(fly0P + 1) = t0i; *(fly2P) = fly3r; *(fly2P + 1) = fly3i; *(fly2P + FlyOffsetA) = fly2r; *(fly2P + FlyOffsetAIm) = fly2i; fly0r = *(fly0P + FlyOffsetB); fly0i = *(fly0P + FlyOffsetBIm); *(fly1P + FlyOffsetA) = fly4r; *(fly1P + FlyOffsetAIm) = fly4i; *(fly1P) = t1r; *(fly1P + 1) = t1i; fly1r = *(fly1P + FlyOffsetB); fly1i = *(fly1P + FlyOffsetBIm); *(fly3P + FlyOffsetA) = fly5r; *(fly3P + FlyOffsetAIm) = fly5i; *(fly3P) = fly7r; *(fly3P + 1) = fly7i; fly0P = (fly0P + FlyOffsetB); fly1P = (fly1P + FlyOffsetB); fly2P = (fly2P + FlyOffsetB); fly3P = (fly3P + FlyOffsetB); }; fly2r = *(fly2P); fly2i = *(fly2P + 1); fly3r = *(fly3P); fly3i = *(fly3P + 1); fly4r = *(fly0P + FlyOffsetA); fly4i = *(fly0P + FlyOffsetAIm); fly5r = *(fly1P + FlyOffsetA); fly5i = *(fly1P + FlyOffsetAIm); fly6r = *(fly2P + FlyOffsetA); fly6i = *(fly2P + FlyOffsetAIm); fly7r = *(fly3P + FlyOffsetA); fly7i = *(fly3P + FlyOffsetAIm); t1r = fly0r - U0r * fly1r; t1r = t1r - U0i * fly1i; t1i = fly0i + U0i * fly1r; t1i = t1i - U0r * fly1i; t0r = scale * fly0r - t1r; t0i = scale * fly0i - t1i; fly1r = fly2r - U0r * fly3r; fly1r = fly1r - U0i * fly3i; fly1i = fly2i + U0i * fly3r; fly1i = fly1i - U0r * fly3i; fly2r = scale * fly2r - fly1r; fly2i = scale * fly2i - fly1i; fly0r = fly4r - U0r * fly5r; fly0r = fly0r - U0i * fly5i; fly0i = fly4i + U0i * fly5r; fly0i = fly0i - U0r * fly5i; fly4r = scale * fly4r - fly0r; fly4i = scale * fly4i - fly0i; fly3r = fly6r - U0r * fly7r; fly3r = fly3r - U0i * fly7i; fly3i = fly6i + U0i * fly7r; fly3i = fly3i - U0r * fly7i; fly6r = scale * fly6r - fly3r; fly6i = scale * fly6i - fly3i; fly5r = t0r - U1r * fly2r; fly5r = fly5r - U1i * fly2i; fly5i = t0i + U1i * fly2r; fly5i = fly5i - U1r * fly2i; t0r = scale * t0r - fly5r; t0i = scale * t0i - fly5i; fly7r = t1r + U1i * fly1r; fly7r = fly7r - U1r * fly1i; fly7i = t1i + U1r * fly1r; fly7i = fly7i + U1i * fly1i; t1r = scale * t1r - fly7r; t1i = scale * t1i - fly7i; fly2r = fly4r - U1r * fly6r; fly2r = fly2r - U1i * fly6i; fly2i = fly4i + U1i * fly6r; fly2i = fly2i - U1r * fly6i; fly4r = scale * fly4r - fly2r; fly4i = scale * fly4i - fly2i; fly1r = fly0r + U1i * fly3r; fly1r = fly1r - U1r * fly3i; fly1i = fly0i + U1r * fly3r; fly1i = fly1i + U1i * fly3i; fly0r = scale * fly0r - fly1r; fly0i = scale * fly0i - fly1i; U0i = *(U0iP = (U0iP - NsameU4)); U0r = *(U0rP = (U0rP + NsameU4)); U1r = *(U1rP = (U1rP + NsameU2)); U1i = *(U1iP = (U1iP - NsameU2)); if(stage&1) U0r = -U0r; fly6r = t0r - U2r * fly4r; fly6r = fly6r - U2i * fly4i; fly6i = t0i + U2i * fly4r; fly6i = fly6i - U2r * fly4i; t0r = scale * t0r - fly6r; t0i = scale * t0i - fly6i; fly3r = fly5r - U2i * fly2r; fly3r = fly3r + U2r * fly2i; fly3i = fly5i - U2r * fly2r; fly3i = fly3i - U2i * fly2i; fly2r = scale * fly5r - fly3r; fly2i = scale * fly5i - fly3i; fly4r = t1r - U3r * fly0r; fly4r = fly4r - U3i * fly0i; fly4i = t1i + U3i * fly0r; fly4i = fly4i - U3r * fly0i; t1r = scale * t1r - fly4r; t1i = scale * t1i - fly4i; fly5r = fly7r + U3i * fly1r; fly5r = fly5r - U3r * fly1i; fly5i = fly7i + U3r * fly1r; fly5i = fly5i + U3i * fly1i; fly7r = scale * fly7r - fly5r; fly7i = scale * fly7i - fly5i; *(fly0P + FlyOffsetA) = fly6r; *(fly0P + FlyOffsetAIm) = fly6i; *(fly0P) = t0r; *(fly0P + 1) = t0i; U2r = *(U2rP = (U2rP + NsameU1)); U2i = *(U2iP = (U2iP - NsameU1)); *(fly2P) = fly3r; *(fly2P + 1) = fly3i; *(fly2P + FlyOffsetA) = fly2r; *(fly2P + FlyOffsetAIm) = fly2i; *(fly1P + FlyOffsetA) = fly4r; *(fly1P + FlyOffsetAIm) = fly4i; *(fly1P) = t1r; *(fly1P + 1) = t1i; U3r = *( U2rP + U3offset); U3i = *( U2iP - U3offset); *(fly3P + FlyOffsetA) = fly5r; *(fly3P + FlyOffsetAIm) = fly5i; *(fly3P) = fly7r; *(fly3P + 1) = fly7i; IOP = IOP + 2; fly0P = IOP; fly1P = (IOP+Flyinc); fly2P = (fly1P+Flyinc); fly3P = (fly2P+Flyinc); }; NsameU4 = -NsameU4; if(stage&1){ LoopN >>= 3; NsameU1 >>= 3; NsameU2 >>= 3; NsameU4 >>= 3; NdiffU <<= 3; Flyinc <<= 3; FlyOffsetA <<= 3; FlyOffsetB <<= 3; FlyOffsetAIm = FlyOffsetA + 1; FlyOffsetBIm = FlyOffsetB + 1; } } ioptr += 2*Ntbl[M]; } } void iffts(float *ioptr, long M, long Rows, float *Utbl){ /* Compute in-place inverse complex fft on the rows of the input array */ /* INPUTS */ /* M = log2 of fft size */ /* *ioptr = input data array */ /* *Utbl = cosine table */ /* Rows = number of rows in ioptr array (use Rows of 1 if ioptr is a 1 dimensional array) */ /* OUTPUTS */ /* *ioptr = output data array */ long Flyinc; long FlyOffsetA; long FlyOffsetAIm; long FlyOffsetB; long FlyOffsetBIm; long NsameU1; long NsameU2; long NsameU4; long diffUcnt; long LoopCnt; float scale; float fly0r; float fly0i; float fly1r; float fly1i; float fly2r; float fly2i; float fly3r; float fly3i; float fly4r; float fly4i; float fly5r; float fly5i; float fly6r; float fly6i; float fly7r; float fly7i; float U0r; float U0i; float U1r; float U1i; float U2r; float U2i; float U3r; float U3i; float t0r; float t0i; float t1r; float t1i; float *fly0P; float *fly1P; float *fly2P; float *fly3P; float *U0rP; float *U0iP; float *U1rP; float *U1iP; float *U2rP; float *U2iP; float *IOP; long U3offset; long stage; long NdiffU; long LoopN; const long BRshift = MAXMROOT - (M>>1); const long Nrems2 = Ntbl[M-(M>>1)+1]; const long Nroot_1_ColInc = (Ntbl[M-1]-Ntbl[M-(M>>1)])*2; for (;Rows>0;Rows--){ FlyOffsetA = Ntbl[M] * 2/2; FlyOffsetAIm = FlyOffsetA + 1; FlyOffsetB = FlyOffsetA + 2; FlyOffsetBIm = FlyOffsetB + 1; /* BitrevR2 ** bit reverse and first radix 2 stage ******/ scale = 1./Ntbl[M]; for (stage = 0; stage < Ntbl[M-(M>>1)]*2; stage += Ntbl[M>>1]*2){ for (LoopN = (Ntbl[(M>>1)-1]-1); LoopN >= 0; LoopN--){ LoopCnt = (Ntbl[(M>>1)-1]-1); fly0P = ioptr + Nroot_1_ColInc + ((int)BRcnt[LoopN] >> BRshift)*(2*2) + stage; IOP = ioptr + (LoopN<<(M+1)/2) * 2 + stage; fly1P = IOP + ((int)BRcnt[LoopCnt] >> BRshift)*(2*2); fly0r = *(fly0P); fly0i = *(fly0P+1); fly1r = *(fly0P+FlyOffsetA); fly1i = *(fly0P+FlyOffsetAIm); for (; LoopCnt > LoopN;){ fly2r = *(fly0P+2); fly2i = *(fly0P+(2+1)); fly3r = *(fly0P+FlyOffsetB); fly3i = *(fly0P+FlyOffsetBIm); fly4r = *(fly1P); fly4i = *(fly1P+1); fly5r = *(fly1P+FlyOffsetA); fly5i = *(fly1P+FlyOffsetAIm); fly6r = *(fly1P+2); fly6i = *(fly1P+(2+1)); fly7r = *(fly1P+FlyOffsetB); fly7i = *(fly1P+FlyOffsetBIm); t0r = fly0r + fly1r; t0i = fly0i + fly1i; fly1r = fly0r - fly1r; fly1i = fly0i - fly1i; t1r = fly2r + fly3r; t1i = fly2i + fly3i; fly3r = fly2r - fly3r; fly3i = fly2i - fly3i; fly0r = fly4r + fly5r; fly0i = fly4i + fly5i; fly5r = fly4r - fly5r; fly5i = fly4i - fly5i; fly2r = fly6r + fly7r; fly2i = fly6i + fly7i; fly7r = fly6r - fly7r; fly7i = fly6i - fly7i; *(fly1P) = scale*t0r; *(fly1P+1) = scale*t0i; *(fly1P+2) = scale*fly1r; *(fly1P+(2+1)) = scale*fly1i; *(fly1P+FlyOffsetA) = scale*t1r; *(fly1P+FlyOffsetAIm) = scale*t1i; *(fly1P+FlyOffsetB) = scale*fly3r; *(fly1P+FlyOffsetBIm) = scale*fly3i; *(fly0P) = scale*fly0r; *(fly0P+1) = scale*fly0i; *(fly0P+2) = scale*fly5r; *(fly0P+(2+1)) = scale*fly5i; *(fly0P+FlyOffsetA) = scale*fly2r; *(fly0P+FlyOffsetAIm) = scale*fly2i; *(fly0P+FlyOffsetB) = scale*fly7r; *(fly0P+FlyOffsetBIm) = scale*fly7i; fly0P -= Nrems2; fly0r = *(fly0P); fly0i = *(fly0P+1); fly1r = *(fly0P+FlyOffsetA); fly1i = *(fly0P+FlyOffsetAIm); LoopCnt -= 1; fly1P = (IOP + ((int)BRcnt[LoopCnt] >> BRshift)*(2*2)); }; fly2r = *(fly0P+2); fly2i = *(fly0P+(2+1)); fly3r = *(fly0P+FlyOffsetB); fly3i = *(fly0P+FlyOffsetBIm); t0r = fly0r + fly1r; t0i = fly0i + fly1i; fly1r = fly0r - fly1r; fly1i = fly0i - fly1i; t1r = fly2r + fly3r; t1i = fly2i + fly3i; fly3r = fly2r - fly3r; fly3i = fly2i - fly3i; *(fly0P) = scale*t0r; *(fly0P+1) = scale*t0i; *(fly0P+2) = scale*fly1r; *(fly0P+(2+1)) = scale*fly1i; *(fly0P+FlyOffsetA) = scale*t1r; *(fly0P+FlyOffsetAIm) = scale*t1i; *(fly0P+FlyOffsetB) = scale*fly3r; *(fly0P+FlyOffsetBIm) = scale*fly3i; }; }; /**** FFTC **************/ scale = 2.0; NdiffU = 2; Flyinc = (NdiffU); NsameU4 = Ntbl[M]/4; LoopN = Ntbl[M-3]; stage = ((M-1)/3); if ( (M-1-(stage * 3)) != 0 ){ FlyOffsetA = Flyinc << 2; FlyOffsetB = FlyOffsetA << 1; FlyOffsetAIm = FlyOffsetA + 1; FlyOffsetBIm = FlyOffsetB + 1; if ( (M-1-(stage * 3)) == 1 ){ /* 1 radix 2 stage */ IOP = ioptr; fly0P = IOP; fly1P = (IOP+Flyinc); fly2P = (fly1P+Flyinc); fly3P = (fly2P+Flyinc); /* Butterflys */ /* t0 - - t0 t1 - - t1 f2 - 1- f5 f1 - -i- f7 f4 - - f4 f0 - - f0 f6 - 1- f2 f3 - -i- f1 */ for (LoopCnt = LoopN; LoopCnt > 0 ; LoopCnt--){ t0r = *(fly0P); t0i = *(fly0P + 1); t1r = *(fly1P); t1i = *(fly1P + 1); fly2r = *(fly2P); fly2i = *(fly2P + 1); fly1r = *(fly3P); fly1i = *(fly3P + 1); fly4r = *(fly0P + FlyOffsetA); fly4i = *(fly0P + FlyOffsetAIm); fly0r = *(fly1P + FlyOffsetA); fly0i = *(fly1P + FlyOffsetAIm); fly6r = *(fly2P + FlyOffsetA); fly6i = *(fly2P + FlyOffsetAIm); fly3r = *(fly3P + FlyOffsetA); fly3i = *(fly3P + FlyOffsetAIm); fly5r = t0r - fly2r; fly5i = t0i - fly2i; t0r = t0r + fly2r; t0i = t0i + fly2i; fly7r = t1r + fly1i; fly7i = t1i - fly1r; t1r = t1r - fly1i; t1i = t1i + fly1r; fly2r = fly4r - fly6r; fly2i = fly4i - fly6i; fly4r = fly4r + fly6r; fly4i = fly4i + fly6i; fly1r = fly0r + fly3i; fly1i = fly0i - fly3r; fly0r = fly0r - fly3i; fly0i = fly0i + fly3r; *(fly2P) = fly5r; *(fly2P + 1) = fly5i; *(fly0P) = t0r; *(fly0P + 1) = t0i; *(fly3P) = fly7r; *(fly3P + 1) = fly7i; *(fly1P) = t1r; *(fly1P + 1) = t1i; *(fly2P + FlyOffsetA) = fly2r; *(fly2P + FlyOffsetAIm) = fly2i; *(fly0P + FlyOffsetA) = fly4r; *(fly0P + FlyOffsetAIm) = fly4i; *(fly3P + FlyOffsetA) = fly1r; *(fly3P + FlyOffsetAIm) = fly1i; *(fly1P + FlyOffsetA) = fly0r; *(fly1P + FlyOffsetAIm) = fly0i; fly0P = (fly0P + FlyOffsetB); fly1P = (fly1P + FlyOffsetB); fly2P = (fly2P + FlyOffsetB); fly3P = (fly3P + FlyOffsetB); }; NsameU4 >>= 1; LoopN >>= 1; NdiffU <<= 1; Flyinc = Flyinc << 1; } else{ /* 1 radix 4 stage */ IOP = ioptr; U3r = 0.7071067811865475244008443621; /* sqrt(0.5); */ U3i = U3r; fly0P = IOP; fly1P = (IOP+Flyinc); fly2P = (fly1P+Flyinc); fly3P = (fly2P+Flyinc); /* Butterflys */ /* t0 - - t0 - - t0 t1 - - t1 - - t1 f2 - 1- f5 - - f5 f1 - -i- f7 - - f7 f4 - - f4 - 1- f6 f0 - - f0 -U3 - f3 f6 - 1- f2 - -i- f4 f3 - -i- f1 -U3a- f2 */ for (LoopCnt = LoopN; LoopCnt > 0 ; LoopCnt--){ t0r = *(fly0P); t0i = *(fly0P + 1); t1r = *(fly1P); t1i = *(fly1P + 1); fly2r = *(fly2P); fly2i = *(fly2P + 1); fly1r = *(fly3P); fly1i = *(fly3P + 1); fly4r = *(fly0P + FlyOffsetA); fly4i = *(fly0P + FlyOffsetAIm); fly0r = *(fly1P + FlyOffsetA); fly0i = *(fly1P + FlyOffsetAIm); fly6r = *(fly2P + FlyOffsetA); fly6i = *(fly2P + FlyOffsetAIm); fly3r = *(fly3P + FlyOffsetA); fly3i = *(fly3P + FlyOffsetAIm); fly5r = t0r - fly2r; fly5i = t0i - fly2i; t0r = t0r + fly2r; t0i = t0i + fly2i; fly7r = t1r + fly1i; fly7i = t1i - fly1r; t1r = t1r - fly1i; t1i = t1i + fly1r; fly2r = fly4r - fly6r; fly2i = fly4i - fly6i; fly4r = fly4r + fly6r; fly4i = fly4i + fly6i; fly1r = fly0r + fly3i; fly1i = fly0i - fly3r; fly0r = fly0r - fly3i; fly0i = fly0i + fly3r; fly6r = t0r - fly4r; fly6i = t0i - fly4i; t0r = t0r + fly4r; t0i = t0i + fly4i; fly3r = fly5r + fly2i; fly3i = fly5i - fly2r; fly5r = fly5r - fly2i; fly5i = fly5i + fly2r; fly4r = t1r - U3r * fly0r; fly4r = fly4r + U3i * fly0i; fly4i = t1i - U3i * fly0r; fly4i = fly4i - U3r * fly0i; t1r = scale * t1r - fly4r; t1i = scale * t1i - fly4i; fly2r = fly7r + U3i * fly1r; fly2r = fly2r + U3r * fly1i; fly2i = fly7i - U3r * fly1r; fly2i = fly2i + U3i * fly1i; fly7r = scale * fly7r - fly2r; fly7i = scale * fly7i - fly2i; *(fly0P + FlyOffsetA) = fly6r; *(fly0P + FlyOffsetAIm) = fly6i; *(fly0P) = t0r; *(fly0P + 1) = t0i; *(fly2P + FlyOffsetA) = fly3r; *(fly2P + FlyOffsetAIm) = fly3i; *(fly2P) = fly5r; *(fly2P + 1) = fly5i; *(fly1P + FlyOffsetA) = fly4r; *(fly1P + FlyOffsetAIm) = fly4i; *(fly1P) = t1r; *(fly1P + 1) = t1i; *(fly3P + FlyOffsetA) = fly2r; *(fly3P + FlyOffsetAIm) = fly2i; *(fly3P) = fly7r; *(fly3P + 1) = fly7i; fly0P = (fly0P + FlyOffsetB); fly1P = (fly1P + FlyOffsetB); fly2P = (fly2P + FlyOffsetB); fly3P = (fly3P + FlyOffsetB); }; NsameU4 >>= 2; LoopN >>= 2; NdiffU <<= 2; Flyinc = Flyinc << 2; }; }; NsameU2 = NsameU4 >> 1; NsameU1 = NsameU2 >> 1; Flyinc <<= 1; FlyOffsetA = Flyinc << 2; FlyOffsetB = FlyOffsetA << 1; FlyOffsetAIm = FlyOffsetA + 1; FlyOffsetBIm = FlyOffsetB + 1; LoopN >>= 1; /* ****** RADIX 8 Stages */ for (stage = stage<<1; stage > 0 ; stage--){ /* an fft stage is done in two parts to ease use of the single quadrant cos table */ /* fftcalc1(iobuf, Utbl, N, NdiffU, LoopN); */ if(!(stage&1)){ U0rP = &Utbl[0]; U0iP = &Utbl[Ntbl[M-2]]; U1rP = U0rP; U1iP = U0iP; U2rP = U0rP; U2iP = U0iP; U3offset = (Ntbl[M]) / 8; IOP = ioptr; U0r = *U0rP, U0i = *U0iP; U1r = *U1rP, U1i = *U1iP; U2r = *U2rP, U2i = *U2iP; U3r = *( U2rP + U3offset); U3i = *( U2iP - U3offset); } fly0P = IOP; fly1P = (IOP+Flyinc); fly2P = (fly1P+Flyinc); fly3P = (fly2P+Flyinc); for (diffUcnt = (NdiffU)>>1; diffUcnt > 0; diffUcnt--){ /* Butterflys */ /* f0 - - t0 - - t0 - - t0 f1 -U0 - t1 - - t1 - - t1 f2 - - f2 -U1 - f5 - - f3 f3 -U0 - f1 -U1a- f7 - - f7 f4 - - f4 - - f4 -U2 - f6 f5 -U0 - f0 - - f0 -U3 - f4 f6 - - f6 -U1 - f2 -U2a- f2 f7 -U0 - f3 -U1a- f1 -U3a- f5 */ fly0r = *(IOP); fly0i = *(IOP+1); fly1r = *(fly1P); fly1i = *(fly1P+1); for (LoopCnt = LoopN-1; LoopCnt > 0 ; LoopCnt--){ fly2r = *(fly2P); fly2i = *(fly2P + 1); fly3r = *(fly3P); fly3i = *(fly3P + 1); fly4r = *(fly0P + FlyOffsetA); fly4i = *(fly0P + FlyOffsetAIm); fly5r = *(fly1P + FlyOffsetA); fly5i = *(fly1P + FlyOffsetAIm); fly6r = *(fly2P + FlyOffsetA); fly6i = *(fly2P + FlyOffsetAIm); fly7r = *(fly3P + FlyOffsetA); fly7i = *(fly3P + FlyOffsetAIm); t1r = fly0r - U0r * fly1r; t1r = t1r + U0i * fly1i; t1i = fly0i - U0i * fly1r; t1i = t1i - U0r * fly1i; t0r = scale * fly0r - t1r; t0i = scale * fly0i - t1i; fly1r = fly2r - U0r * fly3r; fly1r = fly1r + U0i * fly3i; fly1i = fly2i - U0i * fly3r; fly1i = fly1i - U0r * fly3i; fly2r = scale * fly2r - fly1r; fly2i = scale * fly2i - fly1i; fly0r = fly4r - U0r * fly5r; fly0r = fly0r + U0i * fly5i; fly0i = fly4i - U0i * fly5r; fly0i = fly0i - U0r * fly5i; fly4r = scale * fly4r - fly0r; fly4i = scale * fly4i - fly0i; fly3r = fly6r - U0r * fly7r; fly3r = fly3r + U0i * fly7i; fly3i = fly6i - U0i * fly7r; fly3i = fly3i - U0r * fly7i; fly6r = scale * fly6r - fly3r; fly6i = scale * fly6i - fly3i; fly5r = t0r - U1r * fly2r; fly5r = fly5r + U1i * fly2i; fly5i = t0i - U1i * fly2r; fly5i = fly5i - U1r * fly2i; t0r = scale * t0r - fly5r; t0i = scale * t0i - fly5i; fly7r = t1r + U1i * fly1r; fly7r = fly7r + U1r * fly1i; fly7i = t1i - U1r * fly1r; fly7i = fly7i + U1i * fly1i; t1r = scale * t1r - fly7r; t1i = scale * t1i - fly7i; fly2r = fly4r - U1r * fly6r; fly2r = fly2r + U1i * fly6i; fly2i = fly4i - U1i * fly6r; fly2i = fly2i - U1r * fly6i; fly4r = scale * fly4r - fly2r; fly4i = scale * fly4i - fly2i; fly1r = fly0r + U1i * fly3r; fly1r = fly1r + U1r * fly3i; fly1i = fly0i - U1r * fly3r; fly1i = fly1i + U1i * fly3i; fly0r = scale * fly0r - fly1r; fly0i = scale * fly0i - fly1i; fly6r = t0r - U2r * fly4r; fly6r = fly6r + U2i * fly4i; fly6i = t0i - U2i * fly4r; fly6i = fly6i - U2r * fly4i; t0r = scale * t0r - fly6r; t0i = scale * t0i - fly6i; fly3r = fly5r - U2i * fly2r; fly3r = fly3r - U2r * fly2i; fly3i = fly5i + U2r * fly2r; fly3i = fly3i - U2i * fly2i; fly2r = scale * fly5r - fly3r; fly2i = scale * fly5i - fly3i; fly4r = t1r - U3r * fly0r; fly4r = fly4r + U3i * fly0i; fly4i = t1i - U3i * fly0r; fly4i = fly4i - U3r * fly0i; t1r = scale * t1r - fly4r; t1i = scale * t1i - fly4i; fly5r = fly7r + U3i * fly1r; fly5r = fly5r + U3r * fly1i; fly5i = fly7i - U3r * fly1r; fly5i = fly5i + U3i * fly1i; fly7r = scale * fly7r - fly5r; fly7i = scale * fly7i - fly5i; *(fly0P + FlyOffsetA) = fly6r; *(fly0P + FlyOffsetAIm) = fly6i; *(fly0P) = t0r; *(fly0P + 1) = t0i; *(fly2P) = fly3r; *(fly2P + 1) = fly3i; *(fly2P + FlyOffsetA) = fly2r; *(fly2P + FlyOffsetAIm) = fly2i; fly0r = *(fly0P + FlyOffsetB); fly0i = *(fly0P + FlyOffsetBIm); *(fly1P + FlyOffsetA) = fly4r; *(fly1P + FlyOffsetAIm) = fly4i; *(fly1P) = t1r; *(fly1P + 1) = t1i; fly1r = *(fly1P + FlyOffsetB); fly1i = *(fly1P + FlyOffsetBIm); *(fly3P + FlyOffsetA) = fly5r; *(fly3P + FlyOffsetAIm) = fly5i; *(fly3P) = fly7r; *(fly3P + 1) = fly7i; fly0P = (fly0P + FlyOffsetB); fly1P = (fly1P + FlyOffsetB); fly2P = (fly2P + FlyOffsetB); fly3P = (fly3P + FlyOffsetB); }; fly2r = *(fly2P); fly2i = *(fly2P + 1); fly3r = *(fly3P); fly3i = *(fly3P + 1); fly4r = *(fly0P + FlyOffsetA); fly4i = *(fly0P + FlyOffsetAIm); fly5r = *(fly1P + FlyOffsetA); fly5i = *(fly1P + FlyOffsetAIm); fly6r = *(fly2P + FlyOffsetA); fly6i = *(fly2P + FlyOffsetAIm); fly7r = *(fly3P + FlyOffsetA); fly7i = *(fly3P + FlyOffsetAIm); t1r = fly0r - U0r * fly1r; t1r = t1r + U0i * fly1i; t1i = fly0i - U0i * fly1r; t1i = t1i - U0r * fly1i; t0r = scale * fly0r - t1r; t0i = scale * fly0i - t1i; fly1r = fly2r - U0r * fly3r; fly1r = fly1r + U0i * fly3i; fly1i = fly2i - U0i * fly3r; fly1i = fly1i - U0r * fly3i; fly2r = scale * fly2r - fly1r; fly2i = scale * fly2i - fly1i; fly0r = fly4r - U0r * fly5r; fly0r = fly0r + U0i * fly5i; fly0i = fly4i - U0i * fly5r; fly0i = fly0i - U0r * fly5i; fly4r = scale * fly4r - fly0r; fly4i = scale * fly4i - fly0i; fly3r = fly6r - U0r * fly7r; fly3r = fly3r + U0i * fly7i; fly3i = fly6i - U0i * fly7r; fly3i = fly3i - U0r * fly7i; fly6r = scale * fly6r - fly3r; fly6i = scale * fly6i - fly3i; fly5r = t0r - U1r * fly2r; fly5r = fly5r + U1i * fly2i; fly5i = t0i - U1i * fly2r; fly5i = fly5i - U1r * fly2i; t0r = scale * t0r - fly5r; t0i = scale * t0i - fly5i; fly7r = t1r + U1i * fly1r; fly7r = fly7r + U1r * fly1i; fly7i = t1i - U1r * fly1r; fly7i = fly7i + U1i * fly1i; t1r = scale * t1r - fly7r; t1i = scale * t1i - fly7i; fly2r = fly4r - U1r * fly6r; fly2r = fly2r + U1i * fly6i; fly2i = fly4i - U1i * fly6r; fly2i = fly2i - U1r * fly6i; fly4r = scale * fly4r - fly2r; fly4i = scale * fly4i - fly2i; fly1r = fly0r + U1i * fly3r; fly1r = fly1r + U1r * fly3i; fly1i = fly0i - U1r * fly3r; fly1i = fly1i + U1i * fly3i; fly0r = scale * fly0r - fly1r; fly0i = scale * fly0i - fly1i; U0i = *(U0iP = (U0iP - NsameU4)); U0r = *(U0rP = (U0rP + NsameU4)); U1r = *(U1rP = (U1rP + NsameU2)); U1i = *(U1iP = (U1iP - NsameU2)); if(stage&1) U0r = - U0r; fly6r = t0r - U2r * fly4r; fly6r = fly6r + U2i * fly4i; fly6i = t0i - U2i * fly4r; fly6i = fly6i - U2r * fly4i; t0r = scale * t0r - fly6r; t0i = scale * t0i - fly6i; fly3r = fly5r - U2i * fly2r; fly3r = fly3r - U2r * fly2i; fly3i = fly5i + U2r * fly2r; fly3i = fly3i - U2i * fly2i; fly2r = scale * fly5r - fly3r; fly2i = scale * fly5i - fly3i; fly4r = t1r - U3r * fly0r; fly4r = fly4r + U3i * fly0i; fly4i = t1i - U3i * fly0r; fly4i = fly4i - U3r * fly0i; t1r = scale * t1r - fly4r; t1i = scale * t1i - fly4i; fly5r = fly7r + U3i * fly1r; fly5r = fly5r + U3r * fly1i; fly5i = fly7i - U3r * fly1r; fly5i = fly5i + U3i * fly1i; fly7r = scale * fly7r - fly5r; fly7i = scale * fly7i - fly5i; *(fly0P + FlyOffsetA) = fly6r; *(fly0P + FlyOffsetAIm) = fly6i; *(fly0P) = t0r; *(fly0P + 1) = t0i; U2r = *(U2rP = (U2rP + NsameU1)); U2i = *(U2iP = (U2iP - NsameU1)); *(fly2P) = fly3r; *(fly2P + 1) = fly3i; *(fly2P + FlyOffsetA) = fly2r; *(fly2P + FlyOffsetAIm) = fly2i; *(fly1P + FlyOffsetA) = fly4r; *(fly1P + FlyOffsetAIm) = fly4i; *(fly1P) = t1r; *(fly1P + 1) = t1i; U3r = *( U2rP + U3offset); U3i = *( U2iP - U3offset); *(fly3P + FlyOffsetA) = fly5r; *(fly3P + FlyOffsetAIm) = fly5i; *(fly3P) = fly7r; *(fly3P + 1) = fly7i; IOP = IOP + 2; fly0P = IOP; fly1P = (IOP+Flyinc); fly2P = (fly1P+Flyinc); fly3P = (fly2P+Flyinc); }; NsameU4 = -NsameU4; if(stage&1){ LoopN >>= 3; NsameU1 >>= 3; NsameU2 >>= 3; NsameU4 >>= 3; NdiffU <<= 3; Flyinc = Flyinc << 3; FlyOffsetA <<= 3; FlyOffsetB <<= 3; FlyOffsetAIm = FlyOffsetA + 1; FlyOffsetBIm = FlyOffsetB + 1; } } ioptr += 2*Ntbl[M]; } } /* rffts */ /* compute multiple real input FFTs on 'Rows' consecutively stored arrays */ /* ioptr = pointer to the data in and out */ /* M = log2 of fft size */ /* Rows = number of FFTs to compute */ /* Utbl = Pointer to cosine table */ void rffts(float *ioptr, long M, long Rows, float *Utbl){ /* Compute in-place real fft on the rows of the input array */ /* INPUTS */ /* M = log2 of fft size */ /* *ioptr = real input data array */ /* *Utbl = cosine table */ /* Rows = number of rows in ioptr array (use Rows of 1 if ioptr is a 1 dimensional array) */ /* OUTPUTS */ /* *ioptr = output data array in the following order */ /* Re(x[0]), Re(x[N/2]), Re(x[1]), Im(x[1]), Re(x[2]), Im(x[2]), ... Re(x[N/2-1]), Im(x[N/2-1]). */ long Flyinc; long FlyOffsetA; long FlyOffsetAIm; long FlyOffsetB; long FlyOffsetBIm; long NsameU1; long NsameU2; long NsameU4; long diffUcnt; long LoopCnt; float scale; float fly0r; float fly0i; float fly1r; float fly1i; float fly2r; float fly2i; float fly3r; float fly3i; float fly4r; float fly4i; float fly5r; float fly5i; float fly6r; float fly6i; float fly7r; float fly7i; float U0r; float U0i; float U1r; float U1i; float U2r; float U2i; float U3r; float U3i; float t0r; float t0i; float t1r; float t1i; float *fly0P; float *fly1P; float *fly2P; float *fly3P; float *U0rP; float *U0iP; float *U1rP; float *U1iP; float *U2rP; float *U2iP; float *IOP; long U3offset; long stage; long NdiffU; long LoopN; const long BRshift = MAXMROOT - ((M-1)>>1); /* for RFFT */ const long Nrems2 = Ntbl[(M-1)-((M-1)>>1)+1]; /* for RFFT */ const long Nroot_1_ColInc = (Ntbl[(M-1)-1]-Ntbl[(M-1)-((M-1)>>1)])*2; /* for RFFT */ for (;Rows>0;Rows--){ M=M-1; /* for RFFT */ FlyOffsetA = Ntbl[M] * 2/2; FlyOffsetAIm = FlyOffsetA + 1; FlyOffsetB = FlyOffsetA + 2; FlyOffsetBIm = FlyOffsetB + 1; /* BitrevR2 ** bit reverse shuffle and first radix 2 stage ******/ scale = 0.5; for (stage = 0; stage < Ntbl[M-(M>>1)]*2; stage += Ntbl[M>>1]*2){ for (LoopN = (Ntbl[(M>>1)-1]-1); LoopN >= 0; LoopN--){ LoopCnt = (Ntbl[(M>>1)-1]-1); fly0P = ioptr + Nroot_1_ColInc + ((int)BRcnt[LoopN] >> BRshift)*(2*2) + stage; IOP = ioptr + (LoopN<<(M+1)/2) * 2 + stage; fly1P = IOP + ((int)BRcnt[LoopCnt] >> BRshift)*(2*2); fly0r = *fly0P; fly0i = *(fly0P+1); fly1r = *(fly0P+FlyOffsetA); fly1i = *(fly0P+FlyOffsetAIm); for (; LoopCnt > LoopN;){ fly2r = *(fly0P+2); fly2i = *(fly0P+(2+1)); fly3r = *(fly0P+FlyOffsetB); fly3i = *(fly0P+FlyOffsetBIm); fly4r = *fly1P; fly4i = *(fly1P+1); fly5r = *(fly1P+FlyOffsetA); fly5i = *(fly1P+FlyOffsetAIm); fly6r = *(fly1P+2); fly6i = *(fly1P+(2+1)); fly7r = *(fly1P+FlyOffsetB); fly7i = *(fly1P+FlyOffsetBIm); t0r = fly0r + fly1r; t0i = fly0i + fly1i; fly1r = fly0r - fly1r; fly1i = fly0i - fly1i; t1r = fly2r + fly3r; t1i = fly2i + fly3i; fly3r = fly2r - fly3r; fly3i = fly2i - fly3i; fly0r = fly4r + fly5r; fly0i = fly4i + fly5i; fly5r = fly4r - fly5r; fly5i = fly4i - fly5i; fly2r = fly6r + fly7r; fly2i = fly6i + fly7i; fly7r = fly6r - fly7r; fly7i = fly6i - fly7i; *fly1P = scale*t0r; *(fly1P+1) = scale*t0i; *(fly1P+2) = scale*fly1r; *(fly1P+(2+1)) = scale*fly1i; *(fly1P+FlyOffsetA) = scale*t1r; *(fly1P+FlyOffsetAIm) = scale*t1i; *(fly1P+FlyOffsetB) = scale*fly3r; *(fly1P+FlyOffsetBIm) = scale*fly3i; *fly0P = scale*fly0r; *(fly0P+1) = scale*fly0i; *(fly0P+2) = scale*fly5r; *(fly0P+(2+1)) = scale*fly5i; *(fly0P+FlyOffsetA) = scale*fly2r; *(fly0P+FlyOffsetAIm) = scale*fly2i; *(fly0P+FlyOffsetB) = scale*fly7r; *(fly0P+FlyOffsetBIm) = scale*fly7i; fly0P -= Nrems2; fly0r = *fly0P; fly0i = *(fly0P+1); fly1r = *(fly0P+FlyOffsetA); fly1i = *(fly0P+FlyOffsetAIm); LoopCnt -= 1; fly1P = (IOP + ((int)BRcnt[LoopCnt] >> BRshift)*(2*2)); }; fly2r = *(fly0P+2); fly2i = *(fly0P+(2+1)); fly3r = *(fly0P+FlyOffsetB); fly3i = *(fly0P+FlyOffsetBIm); t0r = fly0r + fly1r; t0i = fly0i + fly1i; fly1r = fly0r - fly1r; fly1i = fly0i - fly1i; t1r = fly2r + fly3r; t1i = fly2i + fly3i; fly3r = fly2r - fly3r; fly3i = fly2i - fly3i; *fly0P = scale*t0r; *(fly0P+1) = scale*t0i; *(fly0P+2) = scale*fly1r; *(fly0P+(2+1)) = scale*fly1i; *(fly0P+FlyOffsetA) = scale*t1r; *(fly0P+FlyOffsetAIm) = scale*t1i; *(fly0P+FlyOffsetB) = scale*fly3r; *(fly0P+FlyOffsetBIm) = scale*fly3i; }; }; /**** FFTC **************/ scale = 2.0; NdiffU = 2; Flyinc = (NdiffU); NsameU4 = Ntbl[M+1]/4; /* for RFFT */ LoopN = Ntbl[M-3]; stage = ((M-1)/3); if ( (M-1-(stage * 3)) != 0 ){ FlyOffsetA = Flyinc << 2; FlyOffsetB = FlyOffsetA << 1; FlyOffsetAIm = FlyOffsetA + 1; FlyOffsetBIm = FlyOffsetB + 1; if ( (M-1-(stage * 3)) == 1 ){ /* 1 radix 2 stage */ IOP = ioptr; fly0P = IOP; fly1P = (IOP+Flyinc); fly2P = (fly1P+Flyinc); fly3P = (fly2P+Flyinc); /* Butterflys */ /* t0 - - t0 t1 - - t1 f2 - 1- f5 f1 - -i- f7 f4 - - f4 f0 - - f0 f6 - 1- f2 f3 - -i- f1 */ for (LoopCnt = LoopN; LoopCnt > 0 ; LoopCnt--){ t0r = *(fly0P); t0i = *(fly0P + 1); t1r = *(fly1P); t1i = *(fly1P + 1); fly2r = *(fly2P); fly2i = *(fly2P + 1); fly1r = *(fly3P); fly1i = *(fly3P + 1); fly4r = *(fly0P + FlyOffsetA); fly4i = *(fly0P + FlyOffsetAIm); fly0r = *(fly1P + FlyOffsetA); fly0i = *(fly1P + FlyOffsetAIm); fly6r = *(fly2P + FlyOffsetA); fly6i = *(fly2P + FlyOffsetAIm); fly3r = *(fly3P + FlyOffsetA); fly3i = *(fly3P + FlyOffsetAIm); fly5r = t0r - fly2r; fly5i = t0i - fly2i; t0r = t0r + fly2r; t0i = t0i + fly2i; fly7r = t1r - fly1i; fly7i = t1i + fly1r; t1r = t1r + fly1i; t1i = t1i - fly1r; fly2r = fly4r - fly6r; fly2i = fly4i - fly6i; fly4r = fly4r + fly6r; fly4i = fly4i + fly6i; fly1r = fly0r - fly3i; fly1i = fly0i + fly3r; fly0r = fly0r + fly3i; fly0i = fly0i - fly3r; *(fly2P) = fly5r; *(fly2P + 1) = fly5i; *(fly0P) = t0r; *(fly0P + 1) = t0i; *(fly3P) = fly7r; *(fly3P + 1) = fly7i; *(fly1P) = t1r; *(fly1P + 1) = t1i; *(fly2P + FlyOffsetA) = fly2r; *(fly2P + FlyOffsetAIm) = fly2i; *(fly0P + FlyOffsetA) = fly4r; *(fly0P + FlyOffsetAIm) = fly4i; *(fly3P + FlyOffsetA) = fly1r; *(fly3P + FlyOffsetAIm) = fly1i; *(fly1P + FlyOffsetA) = fly0r; *(fly1P + FlyOffsetAIm) = fly0i; fly0P = (fly0P + FlyOffsetB); fly1P = (fly1P + FlyOffsetB); fly2P = (fly2P + FlyOffsetB); fly3P = (fly3P + FlyOffsetB); }; NsameU4 >>= 1; LoopN >>= 1; NdiffU <<= 1; Flyinc = Flyinc << 1; } else{ /* 1 radix 4 stage */ IOP = ioptr; U3r = 0.7071067811865475244008443621; /* sqrt(0.5); */ U3i = U3r; fly0P = IOP; fly1P = (IOP+Flyinc); fly2P = (fly1P+Flyinc); fly3P = (fly2P+Flyinc); /* Butterflys */ /* t0 - - t0 - - t0 t1 - - t1 - - t1 f2 - 1- f5 - - f5 f1 - -i- f7 - - f7 f4 - - f4 - 1- f6 f0 - - f0 -U3 - f3 f6 - 1- f2 - -i- f4 f3 - -i- f1 -U3a- f2 */ for (LoopCnt = LoopN; LoopCnt > 0 ; LoopCnt--){ t0r = *(fly0P); t0i = *(fly0P + 1); t1r = *(fly1P); t1i = *(fly1P + 1); fly2r = *(fly2P); fly2i = *(fly2P + 1); fly1r = *(fly3P); fly1i = *(fly3P + 1); fly4r = *(fly0P + FlyOffsetA); fly4i = *(fly0P + FlyOffsetAIm); fly0r = *(fly1P + FlyOffsetA); fly0i = *(fly1P + FlyOffsetAIm); fly6r = *(fly2P + FlyOffsetA); fly6i = *(fly2P + FlyOffsetAIm); fly3r = *(fly3P + FlyOffsetA); fly3i = *(fly3P + FlyOffsetAIm); fly5r = t0r - fly2r; fly5i = t0i - fly2i; t0r = t0r + fly2r; t0i = t0i + fly2i; fly7r = t1r - fly1i; fly7i = t1i + fly1r; t1r = t1r + fly1i; t1i = t1i - fly1r; fly2r = fly4r - fly6r; fly2i = fly4i - fly6i; fly4r = fly4r + fly6r; fly4i = fly4i + fly6i; fly1r = fly0r - fly3i; fly1i = fly0i + fly3r; fly0r = fly0r + fly3i; fly0i = fly0i - fly3r; fly6r = t0r - fly4r; fly6i = t0i - fly4i; t0r = t0r + fly4r; t0i = t0i + fly4i; fly3r = fly5r - fly2i; fly3i = fly5i + fly2r; fly5r = fly5r + fly2i; fly5i = fly5i - fly2r; fly4r = t1r - U3r * fly0r; fly4r = fly4r - U3i * fly0i; fly4i = t1i + U3i * fly0r; fly4i = fly4i - U3r * fly0i; t1r = scale * t1r - fly4r; t1i = scale * t1i - fly4i; fly2r = fly7r + U3i * fly1r; fly2r = fly2r - U3r * fly1i; fly2i = fly7i + U3r * fly1r; fly2i = fly2i + U3i * fly1i; fly7r = scale * fly7r - fly2r; fly7i = scale * fly7i - fly2i; *(fly0P + FlyOffsetA) = fly6r; *(fly0P + FlyOffsetAIm) = fly6i; *(fly0P) = t0r; *(fly0P + 1) = t0i; *(fly2P + FlyOffsetA) = fly3r; *(fly2P + FlyOffsetAIm) = fly3i; *(fly2P) = fly5r; *(fly2P + 1) = fly5i; *(fly1P + FlyOffsetA) = fly4r; *(fly1P + FlyOffsetAIm) = fly4i; *(fly1P) = t1r; *(fly1P + 1) = t1i; *(fly3P + FlyOffsetA) = fly2r; *(fly3P + FlyOffsetAIm) = fly2i; *(fly3P) = fly7r; *(fly3P + 1) = fly7i; fly0P = (fly0P + FlyOffsetB); fly1P = (fly1P + FlyOffsetB); fly2P = (fly2P + FlyOffsetB); fly3P = (fly3P + FlyOffsetB); }; NsameU4 >>= 2; LoopN >>= 2; NdiffU <<= 2; Flyinc = Flyinc << 2; }; }; NsameU2 = NsameU4 >> 1; NsameU1 = NsameU2 >> 1; Flyinc <<= 1; FlyOffsetA = Flyinc << 2; FlyOffsetB = FlyOffsetA << 1; FlyOffsetAIm = FlyOffsetA + 1; FlyOffsetBIm = FlyOffsetB + 1; LoopN >>= 1; /* ****** RADIX 8 Stages */ for (stage = stage<<1; stage > 0 ; stage--){ /* an fft stage is done in two parts to ease use of the single quadrant cos table */ /* fftcalc1(iobuf, Utbl, N, NdiffU, LoopN); */ if(!(stage&1)){ U0rP = &Utbl[0]; U0iP = &Utbl[Ntbl[M-1]]; /* for RFFT */ U1rP = U0rP; U1iP = U0iP; U2rP = U0rP; U2iP = U0iP; U3offset = (Ntbl[M+1]) / 8; /* for RFFT */ IOP = ioptr; U0r = *U0rP, U0i = *U0iP; U1r = *U1rP, U1i = *U1iP; U2r = *U2rP, U2i = *U2iP; U3r = *( U2rP + U3offset); U3i = *( U2iP - U3offset); } fly0P = IOP; fly1P = (IOP+Flyinc); fly2P = (fly1P+Flyinc); fly3P = (fly2P+Flyinc); for (diffUcnt = (NdiffU)>>1; diffUcnt > 0; diffUcnt--){ /* Butterflys */ /* f0 - - t0 - - t0 - - t0 f1 -U0 - t1 - - t1 - - t1 f2 - - f2 -U1 - f5 - - f3 f3 -U0 - f1 -U1a- f7 - - f7 f4 - - f4 - - f4 -U2 - f6 f5 -U0 - f0 - - f0 -U3 - f4 f6 - - f6 -U1 - f2 -U2a- f2 f7 -U0 - f3 -U1a- f1 -U3a- f5 */ fly0r = *(IOP); fly0i = *(IOP+1); fly1r = *(fly1P); fly1i = *(fly1P+1); for (LoopCnt = LoopN-1; LoopCnt > 0 ; LoopCnt--){ fly2r = *(fly2P); fly2i = *(fly2P + 1); fly3r = *(fly3P); fly3i = *(fly3P + 1); fly4r = *(fly0P + FlyOffsetA); fly4i = *(fly0P + FlyOffsetAIm); fly5r = *(fly1P + FlyOffsetA); fly5i = *(fly1P + FlyOffsetAIm); fly6r = *(fly2P + FlyOffsetA); fly6i = *(fly2P + FlyOffsetAIm); fly7r = *(fly3P + FlyOffsetA); fly7i = *(fly3P + FlyOffsetAIm); t1r = fly0r - U0r * fly1r; t1r = t1r - U0i * fly1i; t1i = fly0i + U0i * fly1r; t1i = t1i - U0r * fly1i; t0r = scale * fly0r - t1r; t0i = scale * fly0i - t1i; fly1r = fly2r - U0r * fly3r; fly1r = fly1r - U0i * fly3i; fly1i = fly2i + U0i * fly3r; fly1i = fly1i - U0r * fly3i; fly2r = scale * fly2r - fly1r; fly2i = scale * fly2i - fly1i; fly0r = fly4r - U0r * fly5r; fly0r = fly0r - U0i * fly5i; fly0i = fly4i + U0i * fly5r; fly0i = fly0i - U0r * fly5i; fly4r = scale * fly4r - fly0r; fly4i = scale * fly4i - fly0i; fly3r = fly6r - U0r * fly7r; fly3r = fly3r - U0i * fly7i; fly3i = fly6i + U0i * fly7r; fly3i = fly3i - U0r * fly7i; fly6r = scale * fly6r - fly3r; fly6i = scale * fly6i - fly3i; fly5r = t0r - U1r * fly2r; fly5r = fly5r - U1i * fly2i; fly5i = t0i + U1i * fly2r; fly5i = fly5i - U1r * fly2i; t0r = scale * t0r - fly5r; t0i = scale * t0i - fly5i; fly7r = t1r + U1i * fly1r; fly7r = fly7r - U1r * fly1i; fly7i = t1i + U1r * fly1r; fly7i = fly7i + U1i * fly1i; t1r = scale * t1r - fly7r; t1i = scale * t1i - fly7i; fly2r = fly4r - U1r * fly6r; fly2r = fly2r - U1i * fly6i; fly2i = fly4i + U1i * fly6r; fly2i = fly2i - U1r * fly6i; fly4r = scale * fly4r - fly2r; fly4i = scale * fly4i - fly2i; fly1r = fly0r + U1i * fly3r; fly1r = fly1r - U1r * fly3i; fly1i = fly0i + U1r * fly3r; fly1i = fly1i + U1i * fly3i; fly0r = scale * fly0r - fly1r; fly0i = scale * fly0i - fly1i; fly6r = t0r - U2r * fly4r; fly6r = fly6r - U2i * fly4i; fly6i = t0i + U2i * fly4r; fly6i = fly6i - U2r * fly4i; t0r = scale * t0r - fly6r; t0i = scale * t0i - fly6i; fly3r = fly5r - U2i * fly2r; fly3r = fly3r + U2r * fly2i; fly3i = fly5i - U2r * fly2r; fly3i = fly3i - U2i * fly2i; fly2r = scale * fly5r - fly3r; fly2i = scale * fly5i - fly3i; fly4r = t1r - U3r * fly0r; fly4r = fly4r - U3i * fly0i; fly4i = t1i + U3i * fly0r; fly4i = fly4i - U3r * fly0i; t1r = scale * t1r - fly4r; t1i = scale * t1i - fly4i; fly5r = fly7r + U3i * fly1r; fly5r = fly5r - U3r * fly1i; fly5i = fly7i + U3r * fly1r; fly5i = fly5i + U3i * fly1i; fly7r = scale * fly7r - fly5r; fly7i = scale * fly7i - fly5i; *(fly0P + FlyOffsetA) = fly6r; *(fly0P + FlyOffsetAIm) = fly6i; *(fly0P) = t0r; *(fly0P + 1) = t0i; *(fly2P) = fly3r; *(fly2P + 1) = fly3i; *(fly2P + FlyOffsetA) = fly2r; *(fly2P + FlyOffsetAIm) = fly2i; fly0r = *(fly0P + FlyOffsetB); fly0i = *(fly0P + FlyOffsetBIm); *(fly1P + FlyOffsetA) = fly4r; *(fly1P + FlyOffsetAIm) = fly4i; *(fly1P) = t1r; *(fly1P + 1) = t1i; fly1r = *(fly1P + FlyOffsetB); fly1i = *(fly1P + FlyOffsetBIm); *(fly3P + FlyOffsetA) = fly5r; *(fly3P + FlyOffsetAIm) = fly5i; *(fly3P) = fly7r; *(fly3P + 1) = fly7i; fly0P = (fly0P + FlyOffsetB); fly1P = (fly1P + FlyOffsetB); fly2P = (fly2P + FlyOffsetB); fly3P = (fly3P + FlyOffsetB); }; fly2r = *(fly2P); fly2i = *(fly2P + 1); fly3r = *(fly3P); fly3i = *(fly3P + 1); fly4r = *(fly0P + FlyOffsetA); fly4i = *(fly0P + FlyOffsetAIm); fly5r = *(fly1P + FlyOffsetA); fly5i = *(fly1P + FlyOffsetAIm); fly6r = *(fly2P + FlyOffsetA); fly6i = *(fly2P + FlyOffsetAIm); fly7r = *(fly3P + FlyOffsetA); fly7i = *(fly3P + FlyOffsetAIm); t1r = fly0r - U0r * fly1r; t1r = t1r - U0i * fly1i; t1i = fly0i + U0i * fly1r; t1i = t1i - U0r * fly1i; t0r = scale * fly0r - t1r; t0i = scale * fly0i - t1i; fly1r = fly2r - U0r * fly3r; fly1r = fly1r - U0i * fly3i; fly1i = fly2i + U0i * fly3r; fly1i = fly1i - U0r * fly3i; fly2r = scale * fly2r - fly1r; fly2i = scale * fly2i - fly1i; fly0r = fly4r - U0r * fly5r; fly0r = fly0r - U0i * fly5i; fly0i = fly4i + U0i * fly5r; fly0i = fly0i - U0r * fly5i; fly4r = scale * fly4r - fly0r; fly4i = scale * fly4i - fly0i; fly3r = fly6r - U0r * fly7r; fly3r = fly3r - U0i * fly7i; fly3i = fly6i + U0i * fly7r; fly3i = fly3i - U0r * fly7i; fly6r = scale * fly6r - fly3r; fly6i = scale * fly6i - fly3i; fly5r = t0r - U1r * fly2r; fly5r = fly5r - U1i * fly2i; fly5i = t0i + U1i * fly2r; fly5i = fly5i - U1r * fly2i; t0r = scale * t0r - fly5r; t0i = scale * t0i - fly5i; fly7r = t1r + U1i * fly1r; fly7r = fly7r - U1r * fly1i; fly7i = t1i + U1r * fly1r; fly7i = fly7i + U1i * fly1i; t1r = scale * t1r - fly7r; t1i = scale * t1i - fly7i; fly2r = fly4r - U1r * fly6r; fly2r = fly2r - U1i * fly6i; fly2i = fly4i + U1i * fly6r; fly2i = fly2i - U1r * fly6i; fly4r = scale * fly4r - fly2r; fly4i = scale * fly4i - fly2i; fly1r = fly0r + U1i * fly3r; fly1r = fly1r - U1r * fly3i; fly1i = fly0i + U1r * fly3r; fly1i = fly1i + U1i * fly3i; fly0r = scale * fly0r - fly1r; fly0i = scale * fly0i - fly1i; U0i = *(U0iP = (U0iP - NsameU4)); U0r = *(U0rP = (U0rP + NsameU4)); U1r = *(U1rP = (U1rP + NsameU2)); U1i = *(U1iP = (U1iP - NsameU2)); if(stage&1) U0r = -U0r; fly6r = t0r - U2r * fly4r; fly6r = fly6r - U2i * fly4i; fly6i = t0i + U2i * fly4r; fly6i = fly6i - U2r * fly4i; t0r = scale * t0r - fly6r; t0i = scale * t0i - fly6i; fly3r = fly5r - U2i * fly2r; fly3r = fly3r + U2r * fly2i; fly3i = fly5i - U2r * fly2r; fly3i = fly3i - U2i * fly2i; fly2r = scale * fly5r - fly3r; fly2i = scale * fly5i - fly3i; fly4r = t1r - U3r * fly0r; fly4r = fly4r - U3i * fly0i; fly4i = t1i + U3i * fly0r; fly4i = fly4i - U3r * fly0i; t1r = scale * t1r - fly4r; t1i = scale * t1i - fly4i; fly5r = fly7r + U3i * fly1r; fly5r = fly5r - U3r * fly1i; fly5i = fly7i + U3r * fly1r; fly5i = fly5i + U3i * fly1i; fly7r = scale * fly7r - fly5r; fly7i = scale * fly7i - fly5i; *(fly0P + FlyOffsetA) = fly6r; *(fly0P + FlyOffsetAIm) = fly6i; *(fly0P) = t0r; *(fly0P + 1) = t0i; U2r = *(U2rP = (U2rP + NsameU1)); U2i = *(U2iP = (U2iP - NsameU1)); *(fly2P) = fly3r; *(fly2P + 1) = fly3i; *(fly2P + FlyOffsetA) = fly2r; *(fly2P + FlyOffsetAIm) = fly2i; *(fly1P + FlyOffsetA) = fly4r; *(fly1P + FlyOffsetAIm) = fly4i; *(fly1P) = t1r; *(fly1P + 1) = t1i; U3r = *( U2rP + U3offset); U3i = *( U2iP - U3offset); *(fly3P + FlyOffsetA) = fly5r; *(fly3P + FlyOffsetAIm) = fly5i; *(fly3P) = fly7r; *(fly3P + 1) = fly7i; IOP = IOP + 2; fly0P = IOP; fly1P = (IOP+Flyinc); fly2P = (fly1P+Flyinc); fly3P = (fly2P+Flyinc); }; NsameU4 = -NsameU4; if(stage&1){ LoopN >>= 3; NsameU1 >>= 3; NsameU2 >>= 3; NsameU4 >>= 3; NdiffU <<= 3; Flyinc = Flyinc << 3; FlyOffsetA <<= 3; FlyOffsetB <<= 3; FlyOffsetAIm = FlyOffsetA + 1; FlyOffsetBIm = FlyOffsetB + 1; } } /* Finish RFFT */ M=M+1; FlyOffsetA = Ntbl[M] * 2/4; FlyOffsetAIm = Ntbl[M] * 2/4 + 1; IOP = ioptr; fly0P = (IOP + Ntbl[M]*2/4); fly1P = (IOP + Ntbl[M]*2/8); U0rP = &Utbl[Ntbl[M-3]]; U0r = *U0rP, fly0r = *(IOP); fly0i = *(IOP + 1); fly1r = *(fly0P); fly1i = *(fly0P + 1); fly2r = *(fly1P); fly2i = *(fly1P + 1); fly3r = *(fly1P + FlyOffsetA); fly3i = *(fly1P + FlyOffsetAIm); t0r = scale * fly0r + scale * fly0i; /* compute Re(x[0]) */ t0i = scale * fly0r - scale * fly0i; /* compute Re(x[N/2]) */ t1r = scale * fly1r; t1i = -scale * fly1i; fly0r = fly2r + fly3r; fly0i = fly2i - fly3i; fly1r = fly2i + fly3i; fly1i = fly3r - fly2r; fly2r = fly0r + U0r * fly1r; fly2r = fly2r + U0r * fly1i; fly2i = fly0i - U0r * fly1r; fly2i = fly2i + U0r * fly1i; fly3r = scale * fly0r - fly2r; fly3i = fly2i - scale * fly0i; *(IOP) = t0r; *(IOP + 1) = t0i; *(fly0P) = t1r; *(fly0P + 1) = t1i; *(fly1P) = fly2r; *(fly1P + 1) = fly2i; *(fly1P + FlyOffsetA) = fly3r; *(fly1P + FlyOffsetAIm) = fly3i; U0rP = &Utbl[1]; U0iP = &Utbl[Ntbl[M-2]-1]; U0r = *U0rP, U0i = *U0iP; fly0P = (IOP + 2); fly1P = (IOP + (Ntbl[M-2]-1)*2); /* Butterflys */ /* f0 - t0 - - f2 f1 - t1 -U0 - f3 f2 - f0 - - t0 f3 - f1 -U0a- t1 */ for (diffUcnt = Ntbl[M-3]-1; diffUcnt > 0 ; diffUcnt--){ fly0r = *(fly0P); fly0i = *(fly0P + 1); fly1r = *(fly1P + FlyOffsetA); fly1i = *(fly1P + FlyOffsetAIm); fly2r = *(fly1P); fly2i = *(fly1P + 1); fly3r = *(fly0P + FlyOffsetA); fly3i = *(fly0P + FlyOffsetAIm); t0r = fly0r + fly1r; t0i = fly0i - fly1i; t1r = fly0i + fly1i; t1i = fly1r - fly0r; fly0r = fly2r + fly3r; fly0i = fly2i - fly3i; fly1r = fly2i + fly3i; fly1i = fly3r - fly2r; fly2r = t0r + U0r * t1r; fly2r = fly2r + U0i * t1i; fly2i = t0i - U0i * t1r; fly2i = fly2i + U0r * t1i; fly3r = scale * t0r - fly2r; fly3i = fly2i - scale * t0i; t0r = fly0r + U0i * fly1r; t0r = t0r + U0r * fly1i; t0i = fly0i - U0r * fly1r; t0i = t0i + U0i * fly1i; t1r = scale * fly0r - t0r; t1i = t0i - scale * fly0i; *(fly0P) = fly2r; *(fly0P + 1) = fly2i; *(fly1P + FlyOffsetA) = fly3r; *(fly1P + FlyOffsetAIm) = fly3i; U0r = *(U0rP = (U0rP + 1)); U0i = *(U0iP = (U0iP - 1)); *(fly1P) = t0r; *(fly1P + 1) = t0i; *(fly0P + FlyOffsetA) = t1r; *(fly0P + FlyOffsetAIm) = t1i; fly0P += 2; fly1P -= 2; }; ioptr += Ntbl[M]; } } void riffts(float *ioptr, long M, long Rows, float *Utbl){ /* Compute in-place real ifft on the rows of the input array */ /* INPUTS */ /* M = log2 of fft size */ /* *ioptr = input data array in the following order */ /* Re(x[0]), Re(x[N/2]), Re(x[1]), Im(x[1]), Re(x[2]), Im(x[2]), ... Re(x[N/2-1]), Im(x[N/2-1]). */ /* *Utbl = cosine table */ /* Rows = number of rows in ioptr array (use Rows of 1 if ioptr is a 1 dimensional array) */ /* OUTPUTS */ /* *ioptr = real output data array */ long Flyinc; long FlyOffsetA; long FlyOffsetAIm; long FlyOffsetB; long FlyOffsetBIm; long NsameU1; long NsameU2; long NsameU4; long diffUcnt; long LoopCnt; float scale; float fly0r; float fly0i; float fly1r; float fly1i; float fly2r; float fly2i; float fly3r; float fly3i; float fly4r; float fly4i; float fly5r; float fly5i; float fly6r; float fly6i; float fly7r; float fly7i; float U0r; float U0i; float U1r; float U1i; float U2r; float U2i; float U3r; float U3i; float t0r; float t0i; float t1r; float t1i; float *fly0P; float *fly1P; float *fly2P; float *fly3P; float *U0rP; float *U0iP; float *U1rP; float *U1iP; float *U2rP; float *U2iP; float *IOP; long U3offset; long stage; long NdiffU; long LoopN; const long BRshift = MAXMROOT - ((M-1)>>1); /* for RIFFT */ const long Nrems2 = Ntbl[(M-1)-((M-1)>>1)+1]; /* for RIFFT */ const long Nroot_1_ColInc = (Ntbl[(M-1)-1]-Ntbl[(M-1)-((M-1)>>1)])*2; /* for RIFFT */ for (;Rows>0;Rows--){ /* Start RIFFT */ FlyOffsetA = Ntbl[M] * 2/4; FlyOffsetAIm = Ntbl[M] * 2/4 + 1; IOP = ioptr; fly0P = (IOP + Ntbl[M]*2/4); fly1P = (IOP + Ntbl[M]*2/8); U0rP = &Utbl[Ntbl[M-3]]; U0r = *U0rP, fly0r = *(IOP); fly0i = *(IOP + 1); fly1r = *(fly0P); fly1i = *(fly0P + 1); fly2r = *(fly1P); fly2i = *(fly1P + 1); fly3r = *(fly1P + FlyOffsetA); fly3i = *(fly1P + FlyOffsetAIm); t0r = fly0r + fly0i; t0i = fly0r - fly0i; t1r = fly1r + fly1r; t1i = -fly1i - fly1i; fly0r = fly2r + fly3r; fly0i = fly2i - fly3i; fly1r = fly2r - fly3r; fly1i = fly2i + fly3i; fly3r = fly1r * U0r; fly3r = fly3r - U0r * fly1i; fly3i = fly1r * U0r; fly3i = fly3i + U0r * fly1i; fly2r = fly0r - fly3i; fly2i = fly0i + fly3r; fly1r = fly0r + fly3i; fly1i = -fly0i + fly3r; *(IOP) = t0r; *(IOP + 1) = t0i; *(fly0P) = t1r; *(fly0P + 1) = t1i; *(fly1P) = fly2r; *(fly1P + 1) = fly2i; *(fly1P + FlyOffsetA) = fly1r; *(fly1P + FlyOffsetAIm) = fly1i; U0rP = &Utbl[1]; U0iP = &Utbl[Ntbl[M-2]-1]; U0r = *U0rP, U0i = *U0iP; fly0P = (IOP + 2); fly1P = (IOP + (Ntbl[M-2]-1)*2); /* Butterflys */ /* f0 - t0 - f2 f1 -t1 -U0- f3 - f1 f2 - f0 - t0 f3 -f1 -U0a t1 - f3 */ for (diffUcnt = Ntbl[M-3]-1; diffUcnt > 0 ; diffUcnt--){ fly0r = *(fly0P); fly0i = *(fly0P + 1); fly1r = *(fly1P + FlyOffsetA); fly1i = *(fly1P + FlyOffsetAIm); fly2r = *(fly1P); fly2i = *(fly1P + 1); fly3r = *(fly0P + FlyOffsetA); fly3i = *(fly0P + FlyOffsetAIm); t0r = fly0r + fly1r; t0i = fly0i - fly1i; t1r = fly0r - fly1r; t1i = fly0i + fly1i; fly0r = fly2r + fly3r; fly0i = fly2i - fly3i; fly1r = fly2r - fly3r; fly1i = fly2i + fly3i; fly3r = t1r * U0r; fly3r = fly3r - U0i * t1i; fly3i = t1r * U0i; fly3i = fly3i + U0r * t1i; t1r = fly1r * U0i; t1r = t1r - U0r * fly1i; t1i = fly1r * U0r; t1i = t1i + U0i * fly1i; fly2r = t0r - fly3i; fly2i = t0i + fly3r; fly1r = t0r + fly3i; fly1i = -t0i + fly3r; t0r = fly0r - t1i; t0i = fly0i + t1r; fly3r = fly0r + t1i; fly3i = -fly0i + t1r; *(fly0P) = fly2r; *(fly0P + 1) = fly2i; *(fly1P + FlyOffsetA) = fly1r; *(fly1P + FlyOffsetAIm) = fly1i; U0r = *(U0rP = (U0rP + 1)); U0i = *(U0iP = (U0iP - 1)); *(fly1P) = t0r; *(fly1P + 1) = t0i; *(fly0P + FlyOffsetA) = fly3r; *(fly0P + FlyOffsetAIm) = fly3i; fly0P += 2; fly1P -= 2; }; M=M-1; /* for RIFFT */ FlyOffsetA = Ntbl[M] * 2/2; FlyOffsetAIm = FlyOffsetA + 1; FlyOffsetB = FlyOffsetA + 2; FlyOffsetBIm = FlyOffsetB + 1; /* BitrevR2 ** bit reverse shuffle and first radix 2 stage ******/ scale = 1./Ntbl[M+1]; for (stage = 0; stage < Ntbl[M-(M>>1)]*2; stage += Ntbl[M>>1]*2){ for (LoopN = (Ntbl[(M>>1)-1]-1); LoopN >= 0; LoopN--){ LoopCnt = (Ntbl[(M>>1)-1]-1); fly0P = ioptr + Nroot_1_ColInc + ((int)BRcnt[LoopN] >> BRshift)*(2*2) + stage; IOP = ioptr + (LoopN<<(M+1)/2) * 2 + stage; fly1P = IOP + ((int)BRcnt[LoopCnt] >> BRshift)*(2*2); fly0r = *(fly0P); fly0i = *(fly0P+1); fly1r = *(fly0P+FlyOffsetA); fly1i = *(fly0P+FlyOffsetAIm); for (; LoopCnt > LoopN;){ fly2r = *(fly0P+2); fly2i = *(fly0P+(2+1)); fly3r = *(fly0P+FlyOffsetB); fly3i = *(fly0P+FlyOffsetBIm); fly4r = *(fly1P); fly4i = *(fly1P+1); fly5r = *(fly1P+FlyOffsetA); fly5i = *(fly1P+FlyOffsetAIm); fly6r = *(fly1P+2); fly6i = *(fly1P+(2+1)); fly7r = *(fly1P+FlyOffsetB); fly7i = *(fly1P+FlyOffsetBIm); t0r = fly0r + fly1r; t0i = fly0i + fly1i; fly1r = fly0r - fly1r; fly1i = fly0i - fly1i; t1r = fly2r + fly3r; t1i = fly2i + fly3i; fly3r = fly2r - fly3r; fly3i = fly2i - fly3i; fly0r = fly4r + fly5r; fly0i = fly4i + fly5i; fly5r = fly4r - fly5r; fly5i = fly4i - fly5i; fly2r = fly6r + fly7r; fly2i = fly6i + fly7i; fly7r = fly6r - fly7r; fly7i = fly6i - fly7i; *(fly1P) = scale*t0r; *(fly1P+1) = scale*t0i; *(fly1P+2) = scale*fly1r; *(fly1P+(2+1)) = scale*fly1i; *(fly1P+FlyOffsetA) = scale*t1r; *(fly1P+FlyOffsetAIm) = scale*t1i; *(fly1P+FlyOffsetB) = scale*fly3r; *(fly1P+FlyOffsetBIm) = scale*fly3i; *(fly0P) = scale*fly0r; *(fly0P+1) = scale*fly0i; *(fly0P+2) = scale*fly5r; *(fly0P+(2+1)) = scale*fly5i; *(fly0P+FlyOffsetA) = scale*fly2r; *(fly0P+FlyOffsetAIm) = scale*fly2i; *(fly0P+FlyOffsetB) = scale*fly7r; *(fly0P+FlyOffsetBIm) = scale*fly7i; fly0P -= Nrems2; fly0r = *(fly0P); fly0i = *(fly0P+1); fly1r = *(fly0P+FlyOffsetA); fly1i = *(fly0P+FlyOffsetAIm); LoopCnt -= 1; fly1P = (IOP + ((int)BRcnt[LoopCnt] >> BRshift)*(2*2)); }; fly2r = *(fly0P+2); fly2i = *(fly0P+(2+1)); fly3r = *(fly0P+FlyOffsetB); fly3i = *(fly0P+FlyOffsetBIm); t0r = fly0r + fly1r; t0i = fly0i + fly1i; fly1r = fly0r - fly1r; fly1i = fly0i - fly1i; t1r = fly2r + fly3r; t1i = fly2i + fly3i; fly3r = fly2r - fly3r; fly3i = fly2i - fly3i; *(fly0P) = scale*t0r; *(fly0P+1) = scale*t0i; *(fly0P+2) = scale*fly1r; *(fly0P+(2+1)) = scale*fly1i; *(fly0P+FlyOffsetA) = scale*t1r; *(fly0P+FlyOffsetAIm) = scale*t1i; *(fly0P+FlyOffsetB) = scale*fly3r; *(fly0P+FlyOffsetBIm) = scale*fly3i; }; }; /**** FFTC **************/ scale = 2.0; NdiffU = 2; Flyinc = (NdiffU); NsameU4 = Ntbl[M+1]/4; /* for RIFFT */ LoopN = Ntbl[M-3]; stage = ((M-1)/3); if ( (M-1-(stage * 3)) != 0 ){ FlyOffsetA = Flyinc << 2; FlyOffsetB = FlyOffsetA << 1; FlyOffsetAIm = FlyOffsetA + 1; FlyOffsetBIm = FlyOffsetB + 1; if ( (M-1-(stage * 3)) == 1 ){ /* 1 radix 2 stage */ IOP = ioptr; fly0P = IOP; fly1P = (IOP+Flyinc); fly2P = (fly1P+Flyinc); fly3P = (fly2P+Flyinc); /* Butterflys */ /* t0 - - t0 t1 - - t1 f2 - 1- f5 f1 - -i- f7 f4 - - f4 f0 - - f0 f6 - 1- f2 f3 - -i- f1 */ for (LoopCnt = LoopN; LoopCnt > 0 ; LoopCnt--){ t0r = *(fly0P); t0i = *(fly0P + 1); t1r = *(fly1P); t1i = *(fly1P + 1); fly2r = *(fly2P); fly2i = *(fly2P + 1); fly1r = *(fly3P); fly1i = *(fly3P + 1); fly4r = *(fly0P + FlyOffsetA); fly4i = *(fly0P + FlyOffsetAIm); fly0r = *(fly1P + FlyOffsetA); fly0i = *(fly1P + FlyOffsetAIm); fly6r = *(fly2P + FlyOffsetA); fly6i = *(fly2P + FlyOffsetAIm); fly3r = *(fly3P + FlyOffsetA); fly3i = *(fly3P + FlyOffsetAIm); fly5r = t0r - fly2r; fly5i = t0i - fly2i; t0r = t0r + fly2r; t0i = t0i + fly2i; fly7r = t1r + fly1i; fly7i = t1i - fly1r; t1r = t1r - fly1i; t1i = t1i + fly1r; fly2r = fly4r - fly6r; fly2i = fly4i - fly6i; fly4r = fly4r + fly6r; fly4i = fly4i + fly6i; fly1r = fly0r + fly3i; fly1i = fly0i - fly3r; fly0r = fly0r - fly3i; fly0i = fly0i + fly3r; *(fly2P) = fly5r; *(fly2P + 1) = fly5i; *(fly0P) = t0r; *(fly0P + 1) = t0i; *(fly3P) = fly7r; *(fly3P + 1) = fly7i; *(fly1P) = t1r; *(fly1P + 1) = t1i; *(fly2P + FlyOffsetA) = fly2r; *(fly2P + FlyOffsetAIm) = fly2i; *(fly0P + FlyOffsetA) = fly4r; *(fly0P + FlyOffsetAIm) = fly4i; *(fly3P + FlyOffsetA) = fly1r; *(fly3P + FlyOffsetAIm) = fly1i; *(fly1P + FlyOffsetA) = fly0r; *(fly1P + FlyOffsetAIm) = fly0i; fly0P = (fly0P + FlyOffsetB); fly1P = (fly1P + FlyOffsetB); fly2P = (fly2P + FlyOffsetB); fly3P = (fly3P + FlyOffsetB); }; NsameU4 >>= 1; LoopN >>= 1; NdiffU <<= 1; Flyinc = Flyinc << 1; } else{ /* 1 radix 4 stage */ IOP = ioptr; U3r = 0.7071067811865475244008443621; /* sqrt(0.5); */ U3i = U3r; fly0P = IOP; fly1P = (IOP+Flyinc); fly2P = (fly1P+Flyinc); fly3P = (fly2P+Flyinc); /* Butterflys */ /* t0 - - t0 - - t0 t1 - - t1 - - t1 f2 - 1- f5 - - f5 f1 - -i- f7 - - f7 f4 - - f4 - 1- f6 f0 - - f0 -U3 - f3 f6 - 1- f2 - -i- f4 f3 - -i- f1 -U3a- f2 */ for (LoopCnt = LoopN; LoopCnt > 0 ; LoopCnt--){ t0r = *(fly0P); t0i = *(fly0P + 1); t1r = *(fly1P); t1i = *(fly1P + 1); fly2r = *(fly2P); fly2i = *(fly2P + 1); fly1r = *(fly3P); fly1i = *(fly3P + 1); fly4r = *(fly0P + FlyOffsetA); fly4i = *(fly0P + FlyOffsetAIm); fly0r = *(fly1P + FlyOffsetA); fly0i = *(fly1P + FlyOffsetAIm); fly6r = *(fly2P + FlyOffsetA); fly6i = *(fly2P + FlyOffsetAIm); fly3r = *(fly3P + FlyOffsetA); fly3i = *(fly3P + FlyOffsetAIm); fly5r = t0r - fly2r; fly5i = t0i - fly2i; t0r = t0r + fly2r; t0i = t0i + fly2i; fly7r = t1r + fly1i; fly7i = t1i - fly1r; t1r = t1r - fly1i; t1i = t1i + fly1r; fly2r = fly4r - fly6r; fly2i = fly4i - fly6i; fly4r = fly4r + fly6r; fly4i = fly4i + fly6i; fly1r = fly0r + fly3i; fly1i = fly0i - fly3r; fly0r = fly0r - fly3i; fly0i = fly0i + fly3r; fly6r = t0r - fly4r; fly6i = t0i - fly4i; t0r = t0r + fly4r; t0i = t0i + fly4i; fly3r = fly5r + fly2i; fly3i = fly5i - fly2r; fly5r = fly5r - fly2i; fly5i = fly5i + fly2r; fly4r = t1r - U3r * fly0r; fly4r = fly4r + U3i * fly0i; fly4i = t1i - U3i * fly0r; fly4i = fly4i - U3r * fly0i; t1r = scale * t1r - fly4r; t1i = scale * t1i - fly4i; fly2r = fly7r + U3i * fly1r; fly2r = fly2r + U3r * fly1i; fly2i = fly7i - U3r * fly1r; fly2i = fly2i + U3i * fly1i; fly7r = scale * fly7r - fly2r; fly7i = scale * fly7i - fly2i; *(fly0P + FlyOffsetA) = fly6r; *(fly0P + FlyOffsetAIm) = fly6i; *(fly0P) = t0r; *(fly0P + 1) = t0i; *(fly2P + FlyOffsetA) = fly3r; *(fly2P + FlyOffsetAIm) = fly3i; *(fly2P) = fly5r; *(fly2P + 1) = fly5i; *(fly1P + FlyOffsetA) = fly4r; *(fly1P + FlyOffsetAIm) = fly4i; *(fly1P) = t1r; *(fly1P + 1) = t1i; *(fly3P + FlyOffsetA) = fly2r; *(fly3P + FlyOffsetAIm) = fly2i; *(fly3P) = fly7r; *(fly3P + 1) = fly7i; fly0P = (fly0P + FlyOffsetB); fly1P = (fly1P + FlyOffsetB); fly2P = (fly2P + FlyOffsetB); fly3P = (fly3P + FlyOffsetB); }; NsameU4 >>= 2; LoopN >>= 2; NdiffU <<= 2; Flyinc = Flyinc << 2; }; }; NsameU2 = NsameU4 >> 1; NsameU1 = NsameU2 >> 1; Flyinc <<= 1; FlyOffsetA = Flyinc << 2; FlyOffsetB = FlyOffsetA << 1; FlyOffsetAIm = FlyOffsetA + 1; FlyOffsetBIm = FlyOffsetB + 1; LoopN >>= 1; /* ****** RADIX 8 Stages */ for (stage = stage<<1; stage > 0 ; stage--){ /* an fft stage is done in two parts to ease use of the single quadrant cos table */ /* fftcalc1(iobuf, Utbl, N, NdiffU, LoopN); */ if(!(stage&1)){ U0rP = &Utbl[0]; U0iP = &Utbl[Ntbl[M-1]]; /* for RIFFT */ U1rP = U0rP; U1iP = U0iP; U2rP = U0rP; U2iP = U0iP; U3offset = (Ntbl[M+1]) / 8; /* for RIFFT */ IOP = ioptr; U0r = *U0rP, U0i = *U0iP; U1r = *U1rP, U1i = *U1iP; U2r = *U2rP, U2i = *U2iP; U3r = *( U2rP + U3offset); U3i = *( U2iP - U3offset); } fly0P = IOP; fly1P = (IOP+Flyinc); fly2P = (fly1P+Flyinc); fly3P = (fly2P+Flyinc); for (diffUcnt = (NdiffU)>>1; diffUcnt > 0; diffUcnt--){ /* Butterflys */ /* f0 - - t0 - - t0 - - t0 f1 -U0 - t1 - - t1 - - t1 f2 - - f2 -U1 - f5 - - f3 f3 -U0 - f1 -U1a- f7 - - f7 f4 - - f4 - - f4 -U2 - f6 f5 -U0 - f0 - - f0 -U3 - f4 f6 - - f6 -U1 - f2 -U2a- f2 f7 -U0 - f3 -U1a- f1 -U3a- f5 */ fly0r = *(IOP); fly0i = *(IOP+1); fly1r = *(fly1P); fly1i = *(fly1P+1); for (LoopCnt = LoopN-1; LoopCnt > 0 ; LoopCnt--){ fly2r = *(fly2P); fly2i = *(fly2P + 1); fly3r = *(fly3P); fly3i = *(fly3P + 1); fly4r = *(fly0P + FlyOffsetA); fly4i = *(fly0P + FlyOffsetAIm); fly5r = *(fly1P + FlyOffsetA); fly5i = *(fly1P + FlyOffsetAIm); fly6r = *(fly2P + FlyOffsetA); fly6i = *(fly2P + FlyOffsetAIm); fly7r = *(fly3P + FlyOffsetA); fly7i = *(fly3P + FlyOffsetAIm); t1r = fly0r - U0r * fly1r; t1r = t1r + U0i * fly1i; t1i = fly0i - U0i * fly1r; t1i = t1i - U0r * fly1i; t0r = scale * fly0r - t1r; t0i = scale * fly0i - t1i; fly1r = fly2r - U0r * fly3r; fly1r = fly1r + U0i * fly3i; fly1i = fly2i - U0i * fly3r; fly1i = fly1i - U0r * fly3i; fly2r = scale * fly2r - fly1r; fly2i = scale * fly2i - fly1i; fly0r = fly4r - U0r * fly5r; fly0r = fly0r + U0i * fly5i; fly0i = fly4i - U0i * fly5r; fly0i = fly0i - U0r * fly5i; fly4r = scale * fly4r - fly0r; fly4i = scale * fly4i - fly0i; fly3r = fly6r - U0r * fly7r; fly3r = fly3r + U0i * fly7i; fly3i = fly6i - U0i * fly7r; fly3i = fly3i - U0r * fly7i; fly6r = scale * fly6r - fly3r; fly6i = scale * fly6i - fly3i; fly5r = t0r - U1r * fly2r; fly5r = fly5r + U1i * fly2i; fly5i = t0i - U1i * fly2r; fly5i = fly5i - U1r * fly2i; t0r = scale * t0r - fly5r; t0i = scale * t0i - fly5i; fly7r = t1r + U1i * fly1r; fly7r = fly7r + U1r * fly1i; fly7i = t1i - U1r * fly1r; fly7i = fly7i + U1i * fly1i; t1r = scale * t1r - fly7r; t1i = scale * t1i - fly7i; fly2r = fly4r - U1r * fly6r; fly2r = fly2r + U1i * fly6i; fly2i = fly4i - U1i * fly6r; fly2i = fly2i - U1r * fly6i; fly4r = scale * fly4r - fly2r; fly4i = scale * fly4i - fly2i; fly1r = fly0r + U1i * fly3r; fly1r = fly1r + U1r * fly3i; fly1i = fly0i - U1r * fly3r; fly1i = fly1i + U1i * fly3i; fly0r = scale * fly0r - fly1r; fly0i = scale * fly0i - fly1i; fly6r = t0r - U2r * fly4r; fly6r = fly6r + U2i * fly4i; fly6i = t0i - U2i * fly4r; fly6i = fly6i - U2r * fly4i; t0r = scale * t0r - fly6r; t0i = scale * t0i - fly6i; fly3r = fly5r - U2i * fly2r; fly3r = fly3r - U2r * fly2i; fly3i = fly5i + U2r * fly2r; fly3i = fly3i - U2i * fly2i; fly2r = scale * fly5r - fly3r; fly2i = scale * fly5i - fly3i; fly4r = t1r - U3r * fly0r; fly4r = fly4r + U3i * fly0i; fly4i = t1i - U3i * fly0r; fly4i = fly4i - U3r * fly0i; t1r = scale * t1r - fly4r; t1i = scale * t1i - fly4i; fly5r = fly7r + U3i * fly1r; fly5r = fly5r + U3r * fly1i; fly5i = fly7i - U3r * fly1r; fly5i = fly5i + U3i * fly1i; fly7r = scale * fly7r - fly5r; fly7i = scale * fly7i - fly5i; *(fly0P + FlyOffsetA) = fly6r; *(fly0P + FlyOffsetAIm) = fly6i; *(fly0P) = t0r; *(fly0P + 1) = t0i; *(fly2P) = fly3r; *(fly2P + 1) = fly3i; *(fly2P + FlyOffsetA) = fly2r; *(fly2P + FlyOffsetAIm) = fly2i; fly0r = *(fly0P + FlyOffsetB); fly0i = *(fly0P + FlyOffsetBIm); *(fly1P + FlyOffsetA) = fly4r; *(fly1P + FlyOffsetAIm) = fly4i; *(fly1P) = t1r; *(fly1P + 1) = t1i; fly1r = *(fly1P + FlyOffsetB); fly1i = *(fly1P + FlyOffsetBIm); *(fly3P + FlyOffsetA) = fly5r; *(fly3P + FlyOffsetAIm) = fly5i; *(fly3P) = fly7r; *(fly3P + 1) = fly7i; fly0P = (fly0P + FlyOffsetB); fly1P = (fly1P + FlyOffsetB); fly2P = (fly2P + FlyOffsetB); fly3P = (fly3P + FlyOffsetB); }; fly2r = *(fly2P); fly2i = *(fly2P + 1); fly3r = *(fly3P); fly3i = *(fly3P + 1); fly4r = *(fly0P + FlyOffsetA); fly4i = *(fly0P + FlyOffsetAIm); fly5r = *(fly1P + FlyOffsetA); fly5i = *(fly1P + FlyOffsetAIm); fly6r = *(fly2P + FlyOffsetA); fly6i = *(fly2P + FlyOffsetAIm); fly7r = *(fly3P + FlyOffsetA); fly7i = *(fly3P + FlyOffsetAIm); t1r = fly0r - U0r * fly1r; t1r = t1r + U0i * fly1i; t1i = fly0i - U0i * fly1r; t1i = t1i - U0r * fly1i; t0r = scale * fly0r - t1r; t0i = scale * fly0i - t1i; fly1r = fly2r - U0r * fly3r; fly1r = fly1r + U0i * fly3i; fly1i = fly2i - U0i * fly3r; fly1i = fly1i - U0r * fly3i; fly2r = scale * fly2r - fly1r; fly2i = scale * fly2i - fly1i; fly0r = fly4r - U0r * fly5r; fly0r = fly0r + U0i * fly5i; fly0i = fly4i - U0i * fly5r; fly0i = fly0i - U0r * fly5i; fly4r = scale * fly4r - fly0r; fly4i = scale * fly4i - fly0i; fly3r = fly6r - U0r * fly7r; fly3r = fly3r + U0i * fly7i; fly3i = fly6i - U0i * fly7r; fly3i = fly3i - U0r * fly7i; fly6r = scale * fly6r - fly3r; fly6i = scale * fly6i - fly3i; fly5r = t0r - U1r * fly2r; fly5r = fly5r + U1i * fly2i; fly5i = t0i - U1i * fly2r; fly5i = fly5i - U1r * fly2i; t0r = scale * t0r - fly5r; t0i = scale * t0i - fly5i; fly7r = t1r + U1i * fly1r; fly7r = fly7r + U1r * fly1i; fly7i = t1i - U1r * fly1r; fly7i = fly7i + U1i * fly1i; t1r = scale * t1r - fly7r; t1i = scale * t1i - fly7i; fly2r = fly4r - U1r * fly6r; fly2r = fly2r + U1i * fly6i; fly2i = fly4i - U1i * fly6r; fly2i = fly2i - U1r * fly6i; fly4r = scale * fly4r - fly2r; fly4i = scale * fly4i - fly2i; fly1r = fly0r + U1i * fly3r; fly1r = fly1r + U1r * fly3i; fly1i = fly0i - U1r * fly3r; fly1i = fly1i + U1i * fly3i; fly0r = scale * fly0r - fly1r; fly0i = scale * fly0i - fly1i; U0i = *(U0iP = (U0iP - NsameU4)); U0r = *(U0rP = (U0rP + NsameU4)); U1r = *(U1rP = (U1rP + NsameU2)); U1i = *(U1iP = (U1iP - NsameU2)); if(stage&1) U0r = - U0r; fly6r = t0r - U2r * fly4r; fly6r = fly6r + U2i * fly4i; fly6i = t0i - U2i * fly4r; fly6i = fly6i - U2r * fly4i; t0r = scale * t0r - fly6r; t0i = scale * t0i - fly6i; fly3r = fly5r - U2i * fly2r; fly3r = fly3r - U2r * fly2i; fly3i = fly5i + U2r * fly2r; fly3i = fly3i - U2i * fly2i; fly2r = scale * fly5r - fly3r; fly2i = scale * fly5i - fly3i; fly4r = t1r - U3r * fly0r; fly4r = fly4r + U3i * fly0i; fly4i = t1i - U3i * fly0r; fly4i = fly4i - U3r * fly0i; t1r = scale * t1r - fly4r; t1i = scale * t1i - fly4i; fly5r = fly7r + U3i * fly1r; fly5r = fly5r + U3r * fly1i; fly5i = fly7i - U3r * fly1r; fly5i = fly5i + U3i * fly1i; fly7r = scale * fly7r - fly5r; fly7i = scale * fly7i - fly5i; *(fly0P + FlyOffsetA) = fly6r; *(fly0P + FlyOffsetAIm) = fly6i; *(fly0P) = t0r; *(fly0P + 1) = t0i; U2r = *(U2rP = (U2rP + NsameU1)); U2i = *(U2iP = (U2iP - NsameU1)); *(fly2P) = fly3r; *(fly2P + 1) = fly3i; *(fly2P + FlyOffsetA) = fly2r; *(fly2P + FlyOffsetAIm) = fly2i; *(fly1P + FlyOffsetA) = fly4r; *(fly1P + FlyOffsetAIm) = fly4i; *(fly1P) = t1r; *(fly1P + 1) = t1i; U3r = *( U2rP + U3offset); U3i = *( U2iP - U3offset); *(fly3P + FlyOffsetA) = fly5r; *(fly3P + FlyOffsetAIm) = fly5i; *(fly3P) = fly7r; *(fly3P + 1) = fly7i; IOP = IOP + 2; fly0P = IOP; fly1P = (IOP+Flyinc); fly2P = (fly1P+Flyinc); fly3P = (fly2P+Flyinc); }; NsameU4 = -NsameU4; if(stage&1){ LoopN >>= 3; NsameU1 >>= 3; NsameU2 >>= 3; NsameU4 >>= 3; NdiffU <<= 3; Flyinc = Flyinc << 3; FlyOffsetA <<= 3; FlyOffsetB <<= 3; FlyOffsetAIm = FlyOffsetA + 1; FlyOffsetBIm = FlyOffsetB + 1; } } M=M+1; /* for RIFFT */ ioptr += Ntbl[M]; } }