aboutsummaryrefslogtreecommitdiff
path: root/externals/grill/vasp/source/mixfft.cpp
diff options
context:
space:
mode:
authorThomas Grill <xovo@users.sourceforge.net>2009-04-01 21:13:09 +0000
committerThomas Grill <xovo@users.sourceforge.net>2009-04-01 21:13:09 +0000
commit0ed7a8b68dd73e2b0473b8127aeca99f3bac9061 (patch)
tree5c67818b38a5cc2f9caa5ca7f8640ca356adf02b /externals/grill/vasp/source/mixfft.cpp
parentbb4c7f6a245394d09dac9adfb2efb093d3d98452 (diff)
cleaned up grill externals - replaced with svn:externals to svn.grrrr.org/ext/trunk/
svn path=/trunk/; revision=10951
Diffstat (limited to 'externals/grill/vasp/source/mixfft.cpp')
-rw-r--r--externals/grill/vasp/source/mixfft.cpp588
1 files changed, 0 insertions, 588 deletions
diff --git a/externals/grill/vasp/source/mixfft.cpp b/externals/grill/vasp/source/mixfft.cpp
deleted file mode 100644
index 73d1fca7..00000000
--- a/externals/grill/vasp/source/mixfft.cpp
+++ /dev/null
@@ -1,588 +0,0 @@
-
-#include <math.h>
-#include <stdio.h>
-#include <stdlib.h>
-
-#ifdef _MSC_VER
-#pragma warning(disable: 4244)
-#endif
-
-/************************************************************************
- fft(int n, double xRe[], double xIm[], double yRe[], double yIm[])
- ------------------------------------------------------------------------
- NOTE : This is copyrighted material, Not public domain. See below.
- ------------------------------------------------------------------------
- Input/output:
- int n transformation length.
- double xRe[] real part of input sequence.
- double xIm[] imaginary part of input sequence.
- double yRe[] real part of output sequence.
- double yIm[] imaginary part of output sequence.
- ------------------------------------------------------------------------
- Function:
- The procedure performs a fast discrete Fourier transform (FFT) of
- a complex sequence, x, of an arbitrary length, n. The output, y,
- is also a complex sequence of length n.
-
- y[k] = sum(x[m]*exp(-i*2*pi*k*m/n), m=0..(n-1)), k=0,...,(n-1)
-
- The largest prime factor of n must be less than or equal to the
- constant maxPrimeFactor defined below.
- ------------------------------------------------------------------------
- Author:
- Jens Joergen Nielsen For non-commercial use only.
- Bakkehusene 54 A $100 fee must be paid if used
- DK-2970 Hoersholm commercially. Please contact.
- DENMARK
-
- E-mail : jjn@get2net.dk All rights reserved. October 2000.
- Homepage : http://home.get2net.dk/jjn
- ------------------------------------------------------------------------
- Implementation notes:
- The general idea is to factor the length of the DFT, n, into
- factors that are efficiently handled by the routines.
-
- A number of short DFT's are implemented with a minimum of
- arithmetical operations and using (almost) straight line code
- resulting in very fast execution when the factors of n belong
- to this set. Especially radix-10 is optimized.
-
- Prime factors, that are not in the set of short DFT's are handled
- with direct evaluation of the DFP expression.
-
- Please report any problems to the author.
- Suggestions and improvements are welcomed.
- ------------------------------------------------------------------------
- Benchmarks:
- The Microsoft Visual C++ compiler was used with the following
- compile options:
- /nologo /Gs /G2 /W4 /AH /Ox /D "NDEBUG" /D "_DOS" /FR
- and the FFTBENCH test executed on a 50MHz 486DX :
-
- Length Time [s] Accuracy [dB]
-
- 128 0.0054 -314.8
- 256 0.0116 -309.8
- 512 0.0251 -290.8
- 1024 0.0567 -313.6
- 2048 0.1203 -306.4
- 4096 0.2600 -291.8
- 8192 0.5800 -305.1
- 100 0.0040 -278.5
- 200 0.0099 -280.3
- 500 0.0256 -278.5
- 1000 0.0540 -278.5
- 2000 0.1294 -280.6
- 5000 0.3300 -278.4
- 10000 0.7133 -278.5
- ------------------------------------------------------------------------
- The following procedures are used :
- factorize : factor the transformation length.
- transTableSetup : setup table with sofar-, actual-, and remainRadix.
- permute : permutation allows in-place calculations.
- twiddleTransf : twiddle multiplications and DFT's for one stage.
- initTrig : initialise sine/cosine table.
- fft_4 : length 4 DFT, a la Nussbaumer.
- fft_5 : length 5 DFT, a la Nussbaumer.
- fft_10 : length 10 DFT using prime factor FFT.
- fft_odd : length n DFT, n odd.
-*************************************************************************/
-
-/************************************************************************
-
- changes by Thomas Grill:
-
- - introduced REAL type for numbers
- - made functions static
- - threw fft_n functions out of twiddleTransf
- if feasible, these will be inlined by the compiler
- - changed log prints (to post)
-
-************************************************************************/
-
-#define REAL float
-extern "C" void post(const char *c,...);
-
-/************************************************************************/
-
-
-#define maxPrimeFactor 8000 // all static data should fit into 256kB of cache
-#define maxPrimeFactorDiv2 (maxPrimeFactor+1)/2
-#define maxFactorCount 100
-
-static double c3_1 = -1.5000000000000E+00; /* c3_1 = cos(2*pi/3)-1; */
-static double c3_2 = 8.6602540378444E-01; /* c3_2 = sin(2*pi/3); */
-
-static double u5 = 1.2566370614359E+00; /* u5 = 2*pi/5; */
-static double c5_1 = -1.2500000000000E+00; /* c5_1 = (cos(u5)+cos(2*u5))/2-1;*/
-static double c5_2 = 5.5901699437495E-01; /* c5_2 = (cos(u5)-cos(2*u5))/2; */
-static double c5_3 = -9.5105651629515E-01; /* c5_3 = -sin(u5); */
-static double c5_4 = -1.5388417685876E+00; /* c5_4 = -(sin(u5)+sin(2*u5)); */
-static double c5_5 = 3.6327126400268E-01; /* c5_5 = (sin(u5)-sin(2*u5)); */
-static double c8 = 7.0710678118655E-01; /* c8 = 1/sqrt(2); */
-
-static double pi;
-static int groupOffset,dataOffset,blockOffset,adr;
-static int groupNo,dataNo,blockNo,twNo;
-static double omega;
-static REAL tw_re,tw_im;
-static REAL twiddleRe[maxPrimeFactor], twiddleIm[maxPrimeFactor];
-static REAL trigRe[maxPrimeFactor], trigIm[maxPrimeFactor];
-static REAL zRe[maxPrimeFactor], zIm[maxPrimeFactor];
-static REAL vRe[maxPrimeFactorDiv2], vIm[maxPrimeFactorDiv2];
-static REAL wRe[maxPrimeFactorDiv2], wIm[maxPrimeFactorDiv2];
-
-
-static void factorize(int n, int *nFact, int fact[])
-{
- int i,j,k;
- int nRadix;
- int radices[7];
- int factors[maxFactorCount];
-
- nRadix = 6;
- radices[1]= 2;
- radices[2]= 3;
- radices[3]= 4;
- radices[4]= 5;
- radices[5]= 8;
- radices[6]= 10;
-
- if (n==1)
- {
- j=1;
- factors[1]=1;
- }
- else j=0;
- i=nRadix;
- while ((n>1) && (i>0))
- {
- if ((n % radices[i]) == 0)
- {
- n=n / radices[i];
- j=j+1;
- factors[j]=radices[i];
- }
- else i=i-1;
- }
- if (factors[j] == 2) /*substitute factors 2*8 with 4*4 */
- {
- i = j-1;
- while ((i>0) && (factors[i] != 8)) i--;
- if (i>0)
- {
- factors[j] = 4;
- factors[i] = 4;
- }
- }
- if (n>1)
- {
- for (k=2; k<sqrt((double)n)+1; k++)
- while ((n % k) == 0)
- {
- n=n / k;
- j=j+1;
- factors[j]=k;
- }
- if (n>1)
- {
- j=j+1;
- factors[j]=n;
- }
- }
- for (i=1; i<=j; i++)
- {
- fact[i] = factors[j-i+1];
- }
- *nFact=j;
-} /* factorize */
-
-/****************************************************************************
- After N is factored the parameters that control the stages are generated.
- For each stage we have:
- sofar : the product of the radices so far.
- actual : the radix handled in this stage.
- remain : the product of the remaining radices.
- ****************************************************************************/
-
-static bool transTableSetup(int sofar[], int actual[], int remain[],
- int *nFact,
- int *nPoints)
-{
- int i;
-
- factorize(*nPoints, nFact, actual);
- if (actual[1] > maxPrimeFactor)
- {
- // T.Grill - replaced the printfs by a post
- post("FFT: Prime factor of FFT length is too large (%d) - aborted",actual[1]);
- return false;
- }
-
- remain[0]=*nPoints;
- sofar[1]=1;
- remain[1]=*nPoints / actual[1];
- for (i=2; i<=*nFact; i++)
- {
- sofar[i]=sofar[i-1]*actual[i-1];
- remain[i]=remain[i-1] / actual[i];
- }
- return true;
-} /* transTableSetup */
-
-/****************************************************************************
- The sequence y is the permuted input sequence x so that the following
- transformations can be performed in-place, and the final result is the
- normal order.
- ****************************************************************************/
-
-static void permute(int nPoint, int nFact,
- int fact[], int remain[],
- REAL xRe[], REAL xIm[],
- REAL yRe[], REAL yIm[])
-
-{
- int i,j,k;
- int count[maxFactorCount];
-
- for (i=1; i<=nFact; i++) count[i]=0;
- k=0;
- for (i=0; i<=nPoint-2; i++)
- {
- yRe[i] = xRe[k];
- yIm[i] = xIm[k];
- j=1;
- k=k+remain[j];
- count[1] = count[1]+1;
- while (count[j] >= fact[j])
- {
- count[j]=0;
- k=k-remain[j-1]+remain[j+1];
- j=j+1;
- count[j]=count[j]+1;
- }
- }
- yRe[nPoint-1]=xRe[nPoint-1];
- yIm[nPoint-1]=xIm[nPoint-1];
-} /* permute */
-
-
-/****************************************************************************
- Twiddle factor multiplications and transformations are performed on a
- group of data. The number of multiplications with 1 are reduced by skipping
- the twiddle multiplication of the first stage and of the first group of the
- following stages.
- ***************************************************************************/
-
-static void initTrig(int radix)
-{
- int i;
- double w,xre,xim,xre1,xim1;
-
- w=2*pi/radix;
- trigRe[0]=1; trigIm[0]=0;
- xre1=xre=cos(w);
- xim1=xim=-sin(w);
- trigRe[1]=xre; trigIm[1]=xim;
- for (i=2; i<radix; i++)
- {
- trigRe[i] = xre1 = xre*trigRe[i-1] - xim*trigIm[i-1];
- trigIm[i] = xim1 = xim*trigRe[i-1] + xre*trigIm[i-1];
-// trigRe[i] = xre1 = xre*xre1 - xim*xim1;
-// trigIm[i] = xim1 = xim*xre1 + xre*xim1;
- }
-} /* initTrig */
-
-static void fft_2(REAL aRe[], REAL aIm[])
-{
- double gem;
- gem=zRe[0] + zRe[1];
- zRe[1]=zRe[0] - zRe[1]; zRe[0]=gem;
- gem=zIm[0] + zIm[1];
- zIm[1]=zIm[0] - zIm[1]; zIm[0]=gem;
-}
-
-static void fft_3(REAL aRe[], REAL aIm[])
-{
- REAL t1_re,t1_im;
- REAL m2_re,m2_im;
- REAL m1_re,m1_im;
- REAL s1_re,s1_im;
- t1_re=zRe[1] + zRe[2]; t1_im=zIm[1] + zIm[2];
- zRe[0]=zRe[0] + t1_re; zIm[0]=zIm[0] + t1_im;
- m1_re=c3_1*t1_re; m1_im=c3_1*t1_im;
- m2_re=c3_2*(zIm[1] - zIm[2]);
- m2_im=c3_2*(zRe[2] - zRe[1]);
- s1_re=zRe[0] + m1_re; s1_im=zIm[0] + m1_im;
- zRe[1]=s1_re + m2_re; zIm[1]=s1_im + m2_im;
- zRe[2]=s1_re - m2_re; zIm[2]=s1_im - m2_im;
-}
-
-static void fft_4(REAL aRe[], REAL aIm[])
-{
- REAL t1_re,t1_im, t2_re,t2_im;
- REAL m2_re,m2_im, m3_re,m3_im;
-
- t1_re=aRe[0] + aRe[2]; t1_im=aIm[0] + aIm[2];
- t2_re=aRe[1] + aRe[3]; t2_im=aIm[1] + aIm[3];
-
- m2_re=aRe[0] - aRe[2]; m2_im=aIm[0] - aIm[2];
- m3_re=aIm[1] - aIm[3]; m3_im=aRe[3] - aRe[1];
-
- aRe[0]=t1_re + t2_re; aIm[0]=t1_im + t2_im;
- aRe[2]=t1_re - t2_re; aIm[2]=t1_im - t2_im;
- aRe[1]=m2_re + m3_re; aIm[1]=m2_im + m3_im;
- aRe[3]=m2_re - m3_re; aIm[3]=m2_im - m3_im;
-} /* fft_4 */
-
-
-static void fft_5(REAL aRe[], REAL aIm[])
-{
- REAL t1_re,t1_im, t2_re,t2_im, t3_re,t3_im;
- REAL t4_re,t4_im, t5_re,t5_im;
- REAL m2_re,m2_im, m3_re,m3_im, m4_re,m4_im;
- REAL m1_re,m1_im, m5_re,m5_im;
- REAL s1_re,s1_im, s2_re,s2_im, s3_re,s3_im;
- REAL s4_re,s4_im, s5_re,s5_im;
-
- t1_re=aRe[1] + aRe[4]; t1_im=aIm[1] + aIm[4];
- t2_re=aRe[2] + aRe[3]; t2_im=aIm[2] + aIm[3];
- t3_re=aRe[1] - aRe[4]; t3_im=aIm[1] - aIm[4];
- t4_re=aRe[3] - aRe[2]; t4_im=aIm[3] - aIm[2];
- t5_re=t1_re + t2_re; t5_im=t1_im + t2_im;
- aRe[0]=aRe[0] + t5_re; aIm[0]=aIm[0] + t5_im;
- m1_re=c5_1*t5_re; m1_im=c5_1*t5_im;
- m2_re=c5_2*(t1_re - t2_re); m2_im=c5_2*(t1_im - t2_im);
-
- m3_re=-c5_3*(t3_im + t4_im); m3_im=c5_3*(t3_re + t4_re);
- m4_re=-c5_4*t4_im; m4_im=c5_4*t4_re;
- m5_re=-c5_5*t3_im; m5_im=c5_5*t3_re;
-
- s3_re=m3_re - m4_re; s3_im=m3_im - m4_im;
- s5_re=m3_re + m5_re; s5_im=m3_im + m5_im;
- s1_re=aRe[0] + m1_re; s1_im=aIm[0] + m1_im;
- s2_re=s1_re + m2_re; s2_im=s1_im + m2_im;
- s4_re=s1_re - m2_re; s4_im=s1_im - m2_im;
-
- aRe[1]=s2_re + s3_re; aIm[1]=s2_im + s3_im;
- aRe[2]=s4_re + s5_re; aIm[2]=s4_im + s5_im;
- aRe[3]=s4_re - s5_re; aIm[3]=s4_im - s5_im;
- aRe[4]=s2_re - s3_re; aIm[4]=s2_im - s3_im;
-} /* fft_5 */
-
-static void fft_8()
-{
- REAL aRe[4], aIm[4], bRe[4], bIm[4], gem;
-
- aRe[0] = zRe[0]; bRe[0] = zRe[1];
- aRe[1] = zRe[2]; bRe[1] = zRe[3];
- aRe[2] = zRe[4]; bRe[2] = zRe[5];
- aRe[3] = zRe[6]; bRe[3] = zRe[7];
-
- aIm[0] = zIm[0]; bIm[0] = zIm[1];
- aIm[1] = zIm[2]; bIm[1] = zIm[3];
- aIm[2] = zIm[4]; bIm[2] = zIm[5];
- aIm[3] = zIm[6]; bIm[3] = zIm[7];
-
- fft_4(aRe, aIm); fft_4(bRe, bIm);
-
- gem = c8*(bRe[1] + bIm[1]);
- bIm[1] = c8*(bIm[1] - bRe[1]);
- bRe[1] = gem;
- gem = bIm[2];
- bIm[2] =-bRe[2];
- bRe[2] = gem;
- gem = c8*(bIm[3] - bRe[3]);
- bIm[3] =-c8*(bRe[3] + bIm[3]);
- bRe[3] = gem;
-
- zRe[0] = aRe[0] + bRe[0]; zRe[4] = aRe[0] - bRe[0];
- zRe[1] = aRe[1] + bRe[1]; zRe[5] = aRe[1] - bRe[1];
- zRe[2] = aRe[2] + bRe[2]; zRe[6] = aRe[2] - bRe[2];
- zRe[3] = aRe[3] + bRe[3]; zRe[7] = aRe[3] - bRe[3];
-
- zIm[0] = aIm[0] + bIm[0]; zIm[4] = aIm[0] - bIm[0];
- zIm[1] = aIm[1] + bIm[1]; zIm[5] = aIm[1] - bIm[1];
- zIm[2] = aIm[2] + bIm[2]; zIm[6] = aIm[2] - bIm[2];
- zIm[3] = aIm[3] + bIm[3]; zIm[7] = aIm[3] - bIm[3];
-} /* fft_8 */
-
-static void fft_10()
-{
- REAL aRe[5], aIm[5], bRe[5], bIm[5];
-
- aRe[0] = zRe[0]; bRe[0] = zRe[5];
- aRe[1] = zRe[2]; bRe[1] = zRe[7];
- aRe[2] = zRe[4]; bRe[2] = zRe[9];
- aRe[3] = zRe[6]; bRe[3] = zRe[1];
- aRe[4] = zRe[8]; bRe[4] = zRe[3];
-
- aIm[0] = zIm[0]; bIm[0] = zIm[5];
- aIm[1] = zIm[2]; bIm[1] = zIm[7];
- aIm[2] = zIm[4]; bIm[2] = zIm[9];
- aIm[3] = zIm[6]; bIm[3] = zIm[1];
- aIm[4] = zIm[8]; bIm[4] = zIm[3];
-
- fft_5(aRe, aIm); fft_5(bRe, bIm);
-
- zRe[0] = aRe[0] + bRe[0]; zRe[5] = aRe[0] - bRe[0];
- zRe[6] = aRe[1] + bRe[1]; zRe[1] = aRe[1] - bRe[1];
- zRe[2] = aRe[2] + bRe[2]; zRe[7] = aRe[2] - bRe[2];
- zRe[8] = aRe[3] + bRe[3]; zRe[3] = aRe[3] - bRe[3];
- zRe[4] = aRe[4] + bRe[4]; zRe[9] = aRe[4] - bRe[4];
-
- zIm[0] = aIm[0] + bIm[0]; zIm[5] = aIm[0] - bIm[0];
- zIm[6] = aIm[1] + bIm[1]; zIm[1] = aIm[1] - bIm[1];
- zIm[2] = aIm[2] + bIm[2]; zIm[7] = aIm[2] - bIm[2];
- zIm[8] = aIm[3] + bIm[3]; zIm[3] = aIm[3] - bIm[3];
- zIm[4] = aIm[4] + bIm[4]; zIm[9] = aIm[4] - bIm[4];
-} /* fft_10 */
-
-static void fft_odd(int radix)
-{
- REAL rere, reim, imre, imim;
- int i,j,k,n,max;
-
- n = radix;
- max = (n + 1)/2;
- for (j=1; j < max; j++)
- {
- vRe[j] = zRe[j] + zRe[n-j];
- vIm[j] = zIm[j] - zIm[n-j];
- wRe[j] = zRe[j] - zRe[n-j];
- wIm[j] = zIm[j] + zIm[n-j];
- }
-
- for (j=1; j < max; j++)
- {
- zRe[j]=zRe[0];
- zIm[j]=zIm[0];
- zRe[n-j]=zRe[0];
- zIm[n-j]=zIm[0];
- k=j;
- for (i=1; i < max; i++)
- {
- rere = trigRe[k] * vRe[i];
- imim = trigIm[k] * vIm[i];
- reim = trigRe[k] * wIm[i];
- imre = trigIm[k] * wRe[i];
-
- zRe[n-j] += rere + imim;
- zIm[n-j] += reim - imre;
- zRe[j] += rere - imim;
- zIm[j] += reim + imre;
-
- k = k + j;
- if (k >= n) k = k - n;
- }
- }
- for (j=1; j < max; j++)
- {
- zRe[0]=zRe[0] + vRe[j];
- zIm[0]=zIm[0] + wIm[j];
- }
-} /* fft_odd */
-
-
-static void twiddleTransf(int sofarRadix, int radix, int remainRadix,
- REAL yRe[], REAL yIm[])
-
-{ /* twiddleTransf */
- double cosw, sinw, gem;
-
- initTrig(radix);
- omega = 2*pi/(double)(sofarRadix*radix);
- cosw = cos(omega);
- sinw = -sin(omega);
- tw_re = 1.0;
- tw_im = 0;
- dataOffset=0;
- groupOffset=dataOffset;
- adr=groupOffset;
- for (dataNo=0; dataNo<sofarRadix; dataNo++)
- {
- if (sofarRadix>1)
- {
- twiddleRe[0] = 1.0;
- twiddleIm[0] = 0.0;
- twiddleRe[1] = tw_re;
- twiddleIm[1] = tw_im;
- for (twNo=2; twNo<radix; twNo++)
- {
- twiddleRe[twNo]=tw_re*twiddleRe[twNo-1]
- - tw_im*twiddleIm[twNo-1];
- twiddleIm[twNo]=tw_im*twiddleRe[twNo-1]
- + tw_re*twiddleIm[twNo-1];
- }
- gem = cosw*tw_re - sinw*tw_im;
- tw_im = sinw*tw_re + cosw*tw_im;
- tw_re = gem;
- }
- for (groupNo=0; groupNo<remainRadix; groupNo++)
- {
- if ((sofarRadix>1) && (dataNo > 0))
- {
- zRe[0]=yRe[adr];
- zIm[0]=yIm[adr];
- blockNo=1;
- do {
- adr = adr + sofarRadix;
- zRe[blockNo]= twiddleRe[blockNo] * yRe[adr]
- - twiddleIm[blockNo] * yIm[adr];
- zIm[blockNo]= twiddleRe[blockNo] * yIm[adr]
- + twiddleIm[blockNo] * yRe[adr];
-
- blockNo++;
- } while (blockNo < radix);
- }
- else
- for (blockNo=0; blockNo<radix; blockNo++)
- {
- zRe[blockNo]=yRe[adr];
- zIm[blockNo]=yIm[adr];
- adr=adr+sofarRadix;
- }
- switch(radix) {
- // T.Grill - replaced the inlined code by their function counterparts
- case 2 : fft_2(zRe,zIm); break;
- case 3 : fft_3(zRe,zIm); break;
- case 4 : fft_4(zRe,zIm); break;
- case 5 : fft_5(zRe,zIm); break;
- case 8 : fft_8(); break;
- case 10 : fft_10(); break;
- default : fft_odd(radix); break;
- }
- adr=groupOffset;
- for (blockNo=0; blockNo<radix; blockNo++)
- {
- yRe[adr]=zRe[blockNo]; yIm[adr]=zIm[blockNo];
- adr=adr+sofarRadix;
- }
- groupOffset=groupOffset+sofarRadix*radix;
- adr=groupOffset;
- }
- dataOffset=dataOffset+1;
- groupOffset=dataOffset;
- adr=groupOffset;
- }
-} /* twiddleTransf */
-
-bool mixfft(int n, REAL *xRe, REAL *xIm,REAL *yRe, REAL *yIm)
-{
- int sofarRadix[maxFactorCount],
- actualRadix[maxFactorCount],
- remainRadix[maxFactorCount];
- int nFactor;
- int count;
-
- pi = 4*atan(1.);
-
- if(!transTableSetup(sofarRadix, actualRadix, remainRadix, &nFactor, &n)) return false;
- permute(n, nFactor, actualRadix, remainRadix, xRe, xIm, yRe, yIm);
-
- for (count=1; count<=nFactor; count++)
- twiddleTransf(sofarRadix[count], actualRadix[count], remainRadix[count],
- yRe, yIm);
- return true;
-} /* fft */
-