From 9815096db22c73cacdbb65512d1b61d633db7fa8 Mon Sep 17 00:00:00 2001 From: Thomas Grill Date: Mon, 2 Dec 2002 19:21:08 +0000 Subject: "version 0.1.1" svn path=/trunk/; revision=267 --- externals/grill/vasp/source/ops_dft.cpp | 651 ++++++++++++++++++++++++++++++++ 1 file changed, 651 insertions(+) create mode 100644 externals/grill/vasp/source/ops_dft.cpp (limited to 'externals/grill/vasp/source/ops_dft.cpp') diff --git a/externals/grill/vasp/source/ops_dft.cpp b/externals/grill/vasp/source/ops_dft.cpp new file mode 100644 index 00000000..a83d80fc --- /dev/null +++ b/externals/grill/vasp/source/ops_dft.cpp @@ -0,0 +1,651 @@ +/* + +VASP modular - vector assembling signal processor / objects for Max/MSP and PD + +Copyright (c) 2002 Thomas Grill (xovo@gmx.net) +For information on usage and redistribution, and for a DISCLAIMER OF ALL +WARRANTIES, see the file, "license.txt," in this distribution. + +*/ + +/*! \file ops_dft.cpp + \brief Implementation of DFT routines + + \todo align temporary memory allocations + + All DFTs are normalized by 1/sqrt(n), hence the complex ones are repeatable + + - complex FFT radix-2: in-place + - real FFT radix-2 (split-radix): in-place + - complex DFT radix-n (split-radix): out-of-place + - real DFT radix-n: out-of-place / based on complex DFT radix-n (inefficient) + + In-place transformation is only possible for stride=1 +*/ + +#include "ops_dft.h" +#include +#include + +/////////////////////////////////////////////////////////////// + +BL mixfft(I n,F *xRe,F *xIm,F *yRe,F *yIm); + +#ifdef FLEXT_THREADS +static flext::ThrMutex mixmtx; +#endif + +//! Real forward DFT radix-n (managing routine) +static BL fft_fwd_real_any(I cnt,F *rsdt,I _rss,F *rddt,I _rds) +{ + if(!rddt) rddt = rsdt,_rds = _rss; + +#ifdef VASP_CHN1 + const I rds = 1,rss = 1; +#else + const I rds = _rds,rss = _rss; +#endif + + const BL rst = rss != 1; + const BL rdt = rsdt == rddt || rds != 1; + + F *rstmp,*istmp; + register I i; + + if(rst) { + rstmp = new F[cnt]; + // happens only if rss != 1, no optimization necessary + for(i = 0; i < cnt; ++i) rstmp[i] = rsdt[i*rss]; + } + else + rstmp = rsdt; + + istmp = new F[cnt]; + memset(istmp,0,cnt*sizeof(*istmp)); + + F *rdtmp = rdt?new F[cnt]:rddt; + F *idtmp = new F[cnt]; + + BL ret; + { + // mixfft is not thread-safe +#ifdef FLEXT_THREADS + mixmtx.Lock(); +#endif + ret = mixfft(cnt,rstmp,istmp,rdtmp,idtmp); +#ifdef FLEXT_THREADS + mixmtx.Unlock(); +#endif + } + if(ret) { + const F nrm = 1./sqrt(cnt); + const I n2 = cnt/2; + +#ifndef VASP_COMPACT + if(rds == 1) { + for(i = 0; i <= n2; ++i) rddt[i] = rdtmp[i]*nrm; + for(i = 1; i < cnt-n2; ++i) rddt[i+n2] = idtmp[i]*nrm; + } + else +#endif + { + for(i = 0; i <= n2; ++i) rddt[i*rds] = rdtmp[i]*nrm; + for(i = 1; i < cnt-n2; ++i) rddt[(i+n2)*rds] = idtmp[i]*nrm; + } + } + + if(rst) delete[] rstmp; + delete[] istmp; + if(rdt) delete[] rdtmp; + delete[] idtmp; + + return ret; +} + + +//! Real inverse DFT radix-n (managing routine) +static BL fft_inv_real_any(I cnt,F *rsdt,I _rss,F *rddt,I _rds) +{ + if(!rddt) rddt = rsdt,_rds = _rss; + +#ifdef VASP_CHN1 + const I rds = 1,rss = 1; +#else + const I rds = _rds,rss = _rss; +#endif + + const BL rst = rss != 1; + const BL rdt = rsdt == rddt || rds != 1; + + const I n2 = cnt/2; + F *rstmp,*istmp; + istmp = new F[cnt]; + register I i; + + if(rst) { + rstmp = new F[cnt]; + // happens only if rss != 1, no optimization necessary + for(i = 0; i <= n2; ++i) rstmp[i] = rsdt[i*rss]; + for(i = 1; i < cnt-n2; ++i) istmp[cnt-i] = rsdt[(n2+i)*rss]; + } + else { + rstmp = rsdt; + for(i = 1; i < cnt-n2; ++i) istmp[cnt-i] = rsdt[n2+i]; + } + + // make symmetric parts + for(i = 1; i < cnt-n2; ++i) { + istmp[i] = -istmp[cnt-i]; + rstmp[cnt-i] = rstmp[i]; + } + istmp[0] = 0; + if(cnt%2 == 0) istmp[n2] = 0; + + + F *rdtmp = rdt?new F[cnt]:rddt; + F *idtmp = new F[cnt]; + + BL ret; + { +#ifdef FLEXT_THREADS + mixmtx.Lock(); +#endif + // mixfft is not thread-safe + ret = mixfft(cnt,rstmp,istmp,rdtmp,idtmp); +#ifdef FLEXT_THREADS + mixmtx.Unlock(); +#endif + } + if(ret) { + const F nrm = 1./sqrt(cnt); +#ifndef VASP_COMPACT + if(rds == 1) + for(i = 0; i < cnt; ++i) + rddt[i] = rdtmp[i]*nrm; + else +#endif + for(i = 0; i < cnt; ++i) + rddt[i*rds] = rdtmp[i]*nrm; + } + + if(rst) delete[] rstmp; + delete[] istmp; + if(rdt) delete[] rdtmp; + delete[] idtmp; + + return ret; +} + +/////////////////////////////////////////////////////////////// + +//! Complex forward DFT radix-n (managing routine) +static BL fft_fwd_complex_any(I cnt,F *rsdt,I _rss,F *isdt,I _iss,F *rddt,I _rds,F *iddt,I _ids) +{ + if(!rddt) rddt = rsdt,_rds = _rss; + if(!iddt) iddt = isdt,_ids = _iss; + +#ifdef VASP_CHN1 + const I rds = 1,ids = 1,rss = 1,iss = 1; +#else + const I rds = _rds,ids = _ids,rss = _rss,iss = _iss; +#endif + + const BL rst = rss != 1; + const BL ist = iss != 1; + const BL rdt = rsdt == rddt || rds != 1; + const BL idt = isdt == iddt || ids != 1; + + F *rstmp,*istmp; + register I i; + + if(rst) { + rstmp = new F[cnt]; + // happens only if rss != 1, no optimization necessary + for(i = 0; i < cnt; ++i) rstmp[i] = rsdt[i*rss]; + } + else + rstmp = rsdt; + + if(ist) { + istmp = new F[cnt]; + // happens only if iss != 1, no optimization necessary + for(i = 0; i < cnt; ++i) istmp[i] = isdt[i*iss]; + } + else + istmp = isdt; + + F *rdtmp = rdt?new F[cnt]:rddt; + F *idtmp = idt?new F[cnt]:iddt; + + BL ret; + { +#ifdef FLEXT_THREADS + mixmtx.Lock(); +#endif + // mixfft is not thread-safe + ret = mixfft(cnt,rstmp,istmp,rdtmp,idtmp); +#ifdef FLEXT_THREADS + mixmtx.Unlock(); +#endif + } + if(ret) { + const F nrm = 1./sqrt(cnt); + +#ifdef VASP_COMPACT + for(i = 0; i < cnt; ++i) { + rddt[i*rds] = rdtmp[i]*nrm; + iddt[i*ids] = idtmp[i]*nrm; + } +#else + if(rdt) { + if(rds != 1) + for(i = 0; i < cnt; ++i) rddt[i*rds] = rdtmp[i]*nrm; + else + for(i = 0; i < cnt; ++i) rddt[i] = rdtmp[i]*nrm; + } + else // ok, this branch is not absolutely necessary + if(rds != 1) + for(i = 0; i < cnt; ++i) rddt[i*rds] *= nrm; + else + for(i = 0; i < cnt; ++i) rddt[i] *= nrm; + + if(idt) { + if(ids != 1) + for(i = 0; i < cnt; ++i) iddt[i*ids] = idtmp[i]*nrm; + else + for(i = 0; i < cnt; ++i) iddt[i] = idtmp[i]*nrm; + } + else // ok, this branch is not absolutely necessary + if(ids != 1) + for(i = 0; i < cnt; ++i) iddt[i*ids] *= nrm; + else + for(i = 0; i < cnt; ++i) iddt[i] *= nrm; +#endif + } + + if(rst) delete[] rstmp; + if(ist) delete[] istmp; + if(rdt) delete[] rdtmp; + if(idt) delete[] idtmp; + + return ret; +} + +//! Complex inverse DFT radix-n (managing routine) +static BL fft_inv_complex_any(I cnt,F *rsdt,I _rss,F *isdt,I _iss,F *rddt,I _rds,F *iddt,I _ids) +{ + I i; + + if(!rddt) rddt = rsdt,_rds = _rss; + if(!iddt) iddt = isdt,_ids = _iss; + +#ifdef VASP_CHN1 + const I rds = 1,ids = 1,rss = 1,iss = 1; +#else + const I rds = _rds,ids = _ids,rss = _rss,iss = _iss; +#endif + +#ifndef VASP_COMPACT + if(iss == 1) + for(i = 0; i < cnt; ++i) isdt[i] = -isdt[i]; + else +#endif + for(i = 0; i < cnt; ++i) isdt[i*iss] *= -1; + + BL ret = fft_fwd_complex_any(cnt,rsdt,rss,isdt,iss,rddt,rds,iddt,ids); + + if(ret) { +#ifndef VASP_COMPACT + if(ids == 1) + for(i = 0; i < cnt; ++i) iddt[i] = -iddt[i]; + else +#endif + for(i = 0; i < cnt; ++i) iddt[i*ids] *= -1; + } + + // reverse minus on input + if(isdt != iddt) { +#ifndef VASP_COMPACT + if(iss == 1) + for(i = 0; i < cnt; ++i) isdt[i] = -isdt[i]; + else +#endif + for(i = 0; i < cnt; ++i) isdt[i*iss] *= -1; + } + return ret; +} + +/////////////////////////////////////////////////////////////// + +bool fft_bidir_complex_radix2(int size,float *real,float *imag,int dir); + +//! Complex forward FFT radix-2 (managing routine) +static BL fft_complex_radix2(I cnt,F *rsdt,I _rss,F *isdt,I _iss,F *rddt,I _rds,F *iddt,I _ids,I dir) +{ + if(!rddt) rddt = rsdt,_rds = _rss; + if(!iddt) iddt = isdt,_ids = _iss; + +#ifdef VASP_CHN1 + const I rds = 1,ids = 1,rss = 1,iss = 1; +#else + const I rds = _rds,ids = _ids,rss = _rss,iss = _iss; +#endif + + BL rt = false,it = false; + F *rtmp,*itmp; + register I i; + + if(rss == 1) + rtmp = rsdt; + else { + if(rsdt == rddt || rds != 1) + rtmp = new F[cnt],rt = true; + else + rtmp = rddt; + for(i = 0; i < cnt; ++i) rtmp[i] = rsdt[i*rss]; + } + + if(iss == 1) + itmp = isdt; + else { + if(isdt == iddt || ids != 1) + itmp = new F[cnt],it = true; + else + itmp = iddt; + for(i = 0; i < cnt; ++i) itmp[i] = isdt[i*iss]; + } + + BL ret = fft_bidir_complex_radix2(cnt,rtmp,itmp,dir); + + if(ret) { + const F nrm = 1./sqrt(cnt); + +#ifndef VASP_COMPACT + if(rtmp == rddt) + for(i = 0; i < cnt; ++i) rddt[i] *= nrm; + else if(rds == 1) + for(i = 0; i < cnt; ++i) rddt[i] = rtmp[i]*nrm; + else +#endif + for(i = 0; i < cnt; ++i) rddt[i*rds] = rtmp[i]*nrm; + +#ifndef VASP_COMPACT + if(itmp == iddt) + for(i = 0; i < cnt; ++i) iddt[i] *= nrm; + else if(ids == 1) + for(i = 0; i < cnt; ++i) iddt[i] = itmp[i]*nrm; + else +#endif + for(i = 0; i < cnt; ++i) iddt[i*ids] = itmp[i]*nrm; + } + + if(rt) delete[] rtmp; + if(it) delete[] itmp; + + return ret; +} + +inline BL fft_fwd_complex_radix2(I cnt,F *rsdt,I _rss,F *isdt,I _iss,F *rddt,I _rds,F *iddt,I _ids) +{ + return fft_complex_radix2(cnt,rsdt,_rss,isdt,_iss,rddt,_rds,iddt,_ids,1); +} + +inline BL fft_inv_complex_radix2(I cnt,F *rsdt,I _rss,F *isdt,I _iss,F *rddt,I _rds,F *iddt,I _ids) +{ + return fft_complex_radix2(cnt,rsdt,_rss,isdt,_iss,rddt,_rds,iddt,_ids,-1); +} + +/////////////////////////////////////////////////////////////// + +void realfft_split(float *data,int n); +void irealfft_split(float *data,int n); + +// normalize and reverse imaginary part in-place +static void nrmirev(float *data,int n,float fn) +{ + int i; + const I n2 = n/2,n4 = n2/2; + for(i = 0; i <= n2; ++i) data[i] *= fn; + for(i = 1; i < n4; ++i) { + register F tmp = data[n2+i]; + data[n2+i] = data[n-i]*fn; + data[n-i] = tmp*fn; + } + if(n2%2 == 0) data[n2+n4] *= fn; +} + +//! Real forward FFT radix-2 (managing routine) +BL fft_fwd_real_radix2(I cnt,F *src,I _sstr,F *dst,I _dstr) +{ +#ifdef VASP_CHN1 + const I dstr = 1,sstr = 1; +#else + const I dstr = _dstr,sstr = _sstr; +#endif + + register I i; + const I n2 = cnt/2; + const F fn = (F)(1./sqrt(cnt)); + F *stmp; + if(!dst || src == dst) { + // in-place + + if(sstr == 1) + stmp = src; + else { + stmp = new F[cnt]; + for(i = 0; i < cnt; ++i) stmp[i] = src[i*sstr]; + } + + realfft_split(stmp,cnt); + + if(sstr == 1) { + // src == stmp !!! + nrmirev(stmp,cnt,fn); + } + else { + for(i = 0; i <= n2; ++i) src[i*sstr] = stmp[i]*fn; + for(i = 1; i < n2; ++i) src[(n2+i)*sstr] = stmp[cnt-i]*fn; + delete[] stmp; + } + } + else { + // out of place + + if(sstr == 1) + stmp = src; + else { + stmp = dstr == 1?dst:new F[cnt]; + for(i = 0; i < cnt; ++i) stmp[i] = src[i*sstr]; + } + + realfft_split(stmp,cnt); + + if(sstr == 1) { +#ifdef VASP_COMPACT + if(dstr == 1) { + for(i = 0; i <= n2; ++i) dst[i] = stmp[i]*fn; + for(i = 1; i < n2; ++i) dst[n2+i] = stmp[cnt-i]*fn; + } + else +#endif + { + for(i = 0; i <= n2; ++i) dst[i*dstr] = stmp[i]*fn; + for(i = 1; i < n2; ++i) dst[(n2+i)*dstr] = stmp[cnt-i]*fn; + } + } + else { + if(dstr == 1) { + // dst == stmp !!! + nrmirev(stmp,cnt,fn); + } + else { + for(i = 0; i <= n2; ++i) dst[i*dstr] = stmp[i]*fn; + for(i = 1; i < n2; ++i) dst[(n2+i)*dstr] = stmp[cnt-i]*fn; + delete[] dst; + } + } + } + + return true; +} + +//! Real inverse FFT radix-2 (managing routine) +BL fft_inv_real_radix2(I cnt,F *src,I _sstr,F *dst,I _dstr) +{ +#ifdef VASP_CHN1 + const I dstr = 1,sstr = 1; +#else + const I dstr = _dstr,sstr = _sstr; +#endif + + register I i; + const I n2 = cnt/2; + const F fn = (F)(1./sqrt(cnt)); + F *stmp; + if(!dst || src == dst) { + // in-place + + if(sstr == 1) { + stmp = src; + nrmirev(stmp,cnt,fn); + } + else { + stmp = new F[cnt]; + +#ifdef VASP_COMPACT + if(sstr == 1) { + for(i = 0; i <= n2; ++i) stmp[i] = src[i]*fn; + for(i = 1; i < n2; ++i) stmp[cnt-i] = src[n2+i]*fn; + } + else +#endif + { + for(i = 0; i <= n2; ++i) stmp[i] = src[i*sstr]*fn; + for(i = 1; i < n2; ++i) stmp[cnt-i] = src[(n2+i)*sstr]*fn; + } + } + + irealfft_split(stmp,cnt); + + if(sstr != 1) { + for(i = 0; i < cnt; ++i) src[i*sstr] = stmp[i]; + delete[] stmp; + } + } + else { + // out of place + + if(dstr == 1) { + stmp = dst; +#ifdef VASP_COMPACT + if(sstr == 1) { + for(i = 0; i <= n2; ++i) stmp[i] = src[i]*fn; + for(i = 1; i < n2; ++i) stmp[cnt-i] = src[n2+i]*fn; + } + else +#endif + { + for(i = 0; i <= n2; ++i) stmp[i] = src[i*sstr]*fn; + for(i = 1; i < n2; ++i) stmp[cnt-i] = src[(n2+i)*sstr]*fn; + } + } + else { + stmp = new F[cnt]; + if(sstr == 1) { + // dst == stmp !!! + nrmirev(stmp,cnt,fn); + } + else { + for(i = 0; i <= n2; ++i) stmp[i] = src[i*sstr]*fn; + for(i = 1; i < n2; ++i) stmp[cnt-i] = src[(n2+i)*sstr]*fn; + } + } + + irealfft_split(stmp,cnt); + + if(dstr != 1) { + for(i = 0; i < cnt; ++i) dst[i*dstr] = stmp[i]; + delete[] stmp; + } + } + + return true; +} + +/////////////////////////////////////////////////////////////// + +//! Determine if size is radix-2 +static I radix2(I size) +{ + I i,j; + for(i = j = 1; j < size; i++,j <<= 1) (void)0; + return j == size?i:-1; +} + +Vasp *VaspOp::m_rfft(OpParam &p,Vasp &src,Vasp *dst,BL inv) +{ + RVecBlock *vecs = GetRVecs(p.opname,src,dst); + if(vecs) { + BL ok = true; + for(I i = 0; ok && i < vecs->Vecs(); ++i) { + VBuffer *s = vecs->Src(i); + VBuffer *d = vecs->Dst(i); + if(!d) d = s; + + if(vecs->Frames() > 1) + if(radix2(vecs->Frames()) >= 1) + // radix-2 + if(inv) + ok = fft_inv_real_radix2(vecs->Frames(),s->Pointer(),s->Channels(),d->Pointer(),d->Channels()); + else + ok = fft_fwd_real_radix2(vecs->Frames(),s->Pointer(),s->Channels(),d->Pointer(),d->Channels()); + else + // radix-n + if(inv) + ok = fft_inv_real_any(vecs->Frames(),s->Pointer(),s->Channels(),d->Pointer(),d->Channels()); + else + ok = fft_fwd_real_any(vecs->Frames(),s->Pointer(),s->Channels(),d->Pointer(),d->Channels()); + } + return ok?vecs->ResVasp():NULL; + } + else + return NULL; +} + +Vasp *VaspOp::m_cfft(OpParam &p,Vasp &src,Vasp *dst,BL inv) +{ + CVecBlock *vecs = GetCVecs(p.opname,src,dst,true); + if(vecs) { + BL ok = true; + for(I i = 0; ok && i < vecs->Pairs(); ++i) { + VBuffer *sre = vecs->ReSrc(i),*sim = vecs->ImSrc(i); + VBuffer *dre = vecs->ReDst(i),*dim = vecs->ImDst(i); + if(!dre) dre = sre; + if(!dim) dim = sim; + + if(vecs->Frames() > 1) + if(radix2(vecs->Frames()) >= 1) + // radix-2 + if(inv) + ok = fft_inv_complex_radix2(vecs->Frames(),sre->Pointer(),sre->Channels(),sim?sim->Pointer():NULL,sim?sim->Channels():0,dre->Pointer(),dre->Channels(),dim->Pointer(),dim->Channels()); + else + ok = fft_fwd_complex_radix2(vecs->Frames(),sre->Pointer(),sre->Channels(),sim?sim->Pointer():NULL,sim?sim->Channels():0,dre->Pointer(),dre->Channels(),dim->Pointer(),dim->Channels()); + else + // radix-n + if(inv) + ok = fft_inv_complex_any(vecs->Frames(),sre->Pointer(),sre->Channels(),sim?sim->Pointer():NULL,sim?sim->Channels():0,dre->Pointer(),dre->Channels(),dim->Pointer(),dim->Channels()); + else + ok = fft_fwd_complex_any(vecs->Frames(),sre->Pointer(),sre->Channels(),sim?sim->Pointer():NULL,sim?sim->Channels():0,dre->Pointer(),dre->Channels(),dim->Pointer(),dim->Channels()); + } + return ok?vecs->ResVasp():NULL; + } + else + return NULL; +} + +VASP_UNARY("vasp.rfft",rfft,true,"Real DFT") +VASP_UNARY("vasp.r!fft",rifft,true,"Real inverse DFT") +VASP_UNARY("vasp.cfft",cfft,true,"Complex DFT") +VASP_UNARY("vasp.c!fft",cifft,true,"Complex inverse DFT") + -- cgit v1.2.1