From bc6f43fbe1b22b1c2c63a32372126e0eaaaa08b0 Mon Sep 17 00:00:00 2001 From: Thomas Grill Date: Sat, 15 Mar 2003 04:35:48 +0000 Subject: "" svn path=/trunk/; revision=475 --- externals/grill/vasp/changes.txt | 1 + externals/grill/vasp/source/main.cpp | 211 -------------- externals/grill/vasp/source/obj_imm.cpp | 2 +- externals/grill/vasp/source/opbase.cpp | 8 +- externals/grill/vasp/source/opbase.h | 8 +- externals/grill/vasp/source/opdefs.h | 461 +++++++++++++----------------- externals/grill/vasp/source/opfuns.h | 102 +++++-- externals/grill/vasp/source/oploop.h | 94 ++++-- externals/grill/vasp/source/oppermute.h | 10 +- externals/grill/vasp/source/ops_arith.h | 57 ++-- externals/grill/vasp/source/ops_cmp.h | 4 +- externals/grill/vasp/source/ops_flt.cpp | 13 +- externals/grill/vasp/source/ops_rearr.cpp | 23 +- externals/grill/vasp/vasp.cw | Bin 172574 -> 233776 bytes externals/grill/vasp/vasp.dsp | 6 +- 15 files changed, 424 insertions(+), 576 deletions(-) diff --git a/externals/grill/vasp/changes.txt b/externals/grill/vasp/changes.txt index 0f6e4736..7baeedc2 100644 --- a/externals/grill/vasp/changes.txt +++ b/externals/grill/vasp/changes.txt @@ -3,6 +3,7 @@ Version history: 0.1.3: - CHANGE: exchanged #defines for templates in vector calculation loops - FIX: bug for binary operations of complex vasps +- ADD: implemented (factor-4) loop unrolling for vector calculation loops 0.1.2: - FIX: bug in vasp.frames* ... wrong argument diff --git a/externals/grill/vasp/source/main.cpp b/externals/grill/vasp/source/main.cpp index 504f90ba..3b2c0f75 100644 --- a/externals/grill/vasp/source/main.cpp +++ b/externals/grill/vasp/source/main.cpp @@ -1,422 +1,211 @@ /* - - VASP modular - vector assembling signal processor / objects for Max/MSP and PD - - Copyright (c) 2002 Thomas Grill (xovo@gmx.net) - For information on usage and redistribution, and for a DISCLAIMER OF ALL - WARRANTIES, see the file, "license.txt," in this distribution. - - */ - - #include "main.h" - #include "classes.h" - - - const C *VASP_VERSION = "0.1.3pre1"; - - V lib_setup() - { - post(""); - post("-----------------------------------------"); - post(" VASP modular %s ",VASP_VERSION); - post(" vector assembling signal processor "); - post(" (C)2002-2003 Thomas Grill "); - #ifdef FLEXT_DEBUG - post(" DEBUG BUILD - " __DATE__ " " __TIME__); - #endif - post(""); - post(" http://www.parasitaere-kapazitaeten.net "); - post("-----------------------------------------"); - post(""); - - // call the objects' setup routines - - VASP_SETUP(v); // vasp - VASP_SETUP(multi); // vasp.m - - VASP_SETUP(check); // vasp.check - VASP_SETUP(update); // vasp.update - // VASP_SETUP(post); // vasp.post - - VASP_SETUP(sync); // vasp.sync - - - VASP_SETUP(radio); // vasp.radio - - VASP_SETUP(vector); // vasp.vector - VASP_SETUP(qvectors); // vasp.vectors? - - VASP_SETUP(size); // vasp.size - VASP_SETUP(dsize); // vasp.size+ - VASP_SETUP(qsize); // vasp.size? - VASP_SETUP(msize); // vasp.size* - VASP_SETUP(rsize); // vasp.size/ - - VASP_SETUP(offset); // vasp.offset - VASP_SETUP(doffset); // vasp.offset+ - VASP_SETUP(qoffset); // vasp.offset? - - VASP_SETUP(frames); // vasp.frames - VASP_SETUP(dframes); // vasp.frames+ - VASP_SETUP(qframes); // vasp.frames? - VASP_SETUP(mframes); // vasp.frames* - VASP_SETUP(rframes); // vasp.frames/ - - VASP_SETUP(channel); // vasp.channel - VASP_SETUP(qchannel); // vasp.channel? - - VASP_SETUP(split); - VASP_SETUP(join); - VASP_SETUP(spit); - VASP_SETUP(gather); - VASP_SETUP(part); - - VASP_SETUP(list); - VASP_SETUP(nonzero); - - VASP_SETUP(imm); // vasp.imm - - VASP__SETUP(set); - VASP__SETUP(cset); - VASP_SETUP(copy); - VASP_SETUP(ccopy); - - VASP__SETUP(add); - VASP__SETUP(cadd); - VASP__SETUP(sub); - VASP__SETUP(csub); - VASP__SETUP(subr); - VASP__SETUP(csubr); - VASP__SETUP(mul); - VASP__SETUP(cmul); - VASP__SETUP(div); - VASP__SETUP(cdiv); - VASP__SETUP(divr); - VASP__SETUP(cdivr); - VASP__SETUP(mod); - - VASP__SETUP(sign) - VASP__SETUP(abs) - VASP__SETUP(cabs) - - VASP__SETUP(lwr) - VASP__SETUP(gtr) - VASP__SETUP(alwr) - VASP__SETUP(agtr) - VASP__SETUP(leq) - VASP__SETUP(geq) - VASP__SETUP(aleq) - VASP__SETUP(ageq) - VASP__SETUP(equ) - VASP__SETUP(neq) - - VASP__SETUP(min) - VASP__SETUP(rmin) - VASP__SETUP(max) - VASP__SETUP(rmax) - - VASP__SETUP(minmax) - - VASP_SETUP(qmin) - VASP_SETUP(qmax) - VASP_SETUP(qamin) - VASP_SETUP(qamax) - VASP_SETUP(qrmin) - VASP_SETUP(qrmax) - - VASP__SETUP(gate); - VASP__SETUP(rgate); - // VASP__SETUP(igate); - // VASP__SETUP(rigate); - - VASP_SETUP(peaks) - VASP_SETUP(valleys) - VASP_SETUP(rpeaks) - VASP_SETUP(rvalleys) - - VASP_SETUP(qpeaks); - - /* - VASP_SETUP(qvalleys); - VASP_SETUP(qrpeaks); - VASP_SETUP(qrvalleys); - */ - - VASP__SETUP(sqr) - VASP__SETUP(ssqr) - VASP__SETUP(csqr) - VASP__SETUP(sqrt) - VASP__SETUP(ssqrt) - VASP__SETUP(pow) - VASP__SETUP(cpowi) - VASP__SETUP(rpow); - VASP__SETUP(radd); - - VASP__SETUP(exp) - VASP__SETUP(log) - - VASP__SETUP(polar) - VASP__SETUP(rect) - - VASP__SETUP(cnorm) - // VASP__SETUP(cswap) - VASP__SETUP(cconj) - - VASP_SETUP(shift) - VASP_SETUP(xshift) - VASP__SETUP(rot) - VASP__SETUP(xrot) - VASP__SETUP(mirr) - VASP__SETUP(xmirr) - - VASP__SETUP(osc) - VASP__SETUP(mosc) - VASP__SETUP(cosc) - VASP__SETUP(mcosc) - VASP__SETUP(phasor) - VASP__SETUP(mphasor) - VASP__SETUP(noise) - VASP__SETUP(cnoise) - - VASP__SETUP(window) - VASP__SETUP(mwindow) - VASP__SETUP(iwindow) - VASP__SETUP(miwindow) - VASP__SETUP(xwindow) - VASP__SETUP(mxwindow) - - VASP__SETUP(flp) - VASP__SETUP(fhp) - - VASP__SETUP(int) - VASP__SETUP(dif) - - VASP__SETUP(fix) - - VASP__SETUP(tilt) - VASP__SETUP(xtilt) - - VASP__SETUP(soffset) - VASP__SETUP(sframes) - - VASP__SETUP(rfft) - VASP__SETUP(rifft) - VASP__SETUP(cfft) - VASP__SETUP(cifft) - } - - FLEXT_LIB_SETUP(vasp,lib_setup) -/////////////////////////////////////////////////////////////////////////// - - - diff --git a/externals/grill/vasp/source/obj_imm.cpp b/externals/grill/vasp/source/obj_imm.cpp index 35ea1a92..1c324e37 100644 --- a/externals/grill/vasp/source/obj_imm.cpp +++ b/externals/grill/vasp/source/obj_imm.cpp @@ -84,7 +84,7 @@ public: S *dst = imm.Pointer(); const S *src = buf->Pointer(); register int i; - _D_LOOP(i,len) *(dst++) = *src,src += chns; _E_LOOP + _DE_LOOP(i,len, ( *(dst++) = *src,src += chns ) ) Vasp ret(len,Vasp::Ref(imm)); ToOutVasp(0,ret); diff --git a/externals/grill/vasp/source/opbase.cpp b/externals/grill/vasp/source/opbase.cpp index f3d10ea4..c33a5782 100644 --- a/externals/grill/vasp/source/opbase.cpp +++ b/externals/grill/vasp/source/opbase.cpp @@ -12,7 +12,7 @@ WARRANTIES, see the file, "license.txt," in this distribution. #include "opbase.h" #include "opdefs.h" -Vasp *VaspOp::m_run(OpParam &p,CVasp &src,CVasp *dst,VecOp::opfun *fun) +Vasp *VaspOp::m_run(OpParam &p,CVasp &src,CVasp *dst,VecOp::opfun fun) { Vasp *ret = NULL; RVecBlock *vecs = GetRVecs(p.opname,src,dst); @@ -24,7 +24,7 @@ Vasp *VaspOp::m_run(OpParam &p,CVasp &src,CVasp *dst,VecOp::opfun *fun) return ret; } -Vasp *VaspOp::m_cun(OpParam &p,CVasp &src,CVasp *dst,VecOp::opfun *fun) +Vasp *VaspOp::m_cun(OpParam &p,CVasp &src,CVasp *dst,VecOp::opfun fun) { Vasp *ret = NULL; CVecBlock *vecs = GetCVecs(p.opname,src,dst); @@ -36,7 +36,7 @@ Vasp *VaspOp::m_cun(OpParam &p,CVasp &src,CVasp *dst,VecOp::opfun *fun) return ret; } -Vasp *VaspOp::m_rbin(OpParam &p,CVasp &src,const Argument &arg,CVasp *dst,VecOp::opfun *fun) +Vasp *VaspOp::m_rbin(OpParam &p,CVasp &src,const Argument &arg,CVasp *dst,VecOp::opfun fun) { Vasp *ret = NULL; BL argvasp = arg.IsVasp(); @@ -58,7 +58,7 @@ Vasp *VaspOp::m_rbin(OpParam &p,CVasp &src,const Argument &arg,CVasp *dst,VecOp: return ret; } -Vasp *VaspOp::m_cbin(OpParam &p,CVasp &src,const Argument &arg,CVasp *dst,VecOp::opfun *fun) +Vasp *VaspOp::m_cbin(OpParam &p,CVasp &src,const Argument &arg,CVasp *dst,VecOp::opfun fun) { Vasp *ret = NULL; BL argvasp = arg.IsVasp(); diff --git a/externals/grill/vasp/source/opbase.h b/externals/grill/vasp/source/opbase.h index 47cc61cf..3d9b81c6 100644 --- a/externals/grill/vasp/source/opbase.h +++ b/externals/grill/vasp/source/opbase.h @@ -101,11 +101,11 @@ namespace VaspOp { // -------- transformations ----------------------------------- // unary functions - Vasp *m_run(OpParam &p,CVasp &src,CVasp *dst,VecOp::opfun *fun); // real unary (one vec or real) - Vasp *m_cun(OpParam &p,CVasp &src,CVasp *dst,VecOp::opfun *fun); // complex unary (one vec or complex) + Vasp *m_run(OpParam &p,CVasp &src,CVasp *dst,VecOp::opfun fun); // real unary (one vec or real) + Vasp *m_cun(OpParam &p,CVasp &src,CVasp *dst,VecOp::opfun fun); // complex unary (one vec or complex) // binary functions - Vasp *m_rbin(OpParam &p,CVasp &src,const Argument &arg,CVasp *dst,VecOp::opfun *fun); // real binary (one vec or real) - Vasp *m_cbin(OpParam &p,CVasp &src,const Argument &arg,CVasp *dst,VecOp::opfun *fun); // complex binary (one vec or complex) + Vasp *m_rbin(OpParam &p,CVasp &src,const Argument &arg,CVasp *dst,VecOp::opfun fun); // real binary (one vec or real) + Vasp *m_cbin(OpParam &p,CVasp &src,const Argument &arg,CVasp *dst,VecOp::opfun fun); // complex binary (one vec or complex) } #endif diff --git a/externals/grill/vasp/source/opdefs.h b/externals/grill/vasp/source/opdefs.h index 18863f61..47006db6 100644 --- a/externals/grill/vasp/source/opdefs.h +++ b/externals/grill/vasp/source/opdefs.h @@ -20,345 +20,288 @@ WARRANTIES, see the file, "license.txt," in this distribution. #define _D_ALWAYS1 0 #endif + namespace VecOp { /*! \brief skeleton for unary real operations */ -template BL _F__run(OpParam &p) +//template BL V__run(register const T *sr,I rss,register T *dr,I rds,I frames) +template BL V__run(register const T *sr,I rss,register T *dr,I rds,I frames) { - register const S *sr = p.rsdt; - register S *dr = p.rddt; - register I i; - if(sr == dr) - if(_D_ALWAYS1 || p.rds == 1) - _D_LOOP(i,p.frames) - { FUN(*dr,*dr); dr++; } - _E_LOOP + register I i; + if(sr == dr) + if(_D_ALWAYS1 || rds == 1) + _DE_LOOP(i,frames, ( OP::run(*dr,*dr), dr++ ) ) else - _D_LOOP(i,p.frames) - { FUN(*dr,*dr); dr += p.rds; } - _E_LOOP + _DE_LOOP(i,frames, ( OP::run(*dr,*dr), dr += rds ) ) else - if(_D_ALWAYS1 || (p.rss == 1 && p.rds == 1)) - _D_LOOP(i,p.frames) - { FUN(*dr,*sr); sr++,dr++; } - _E_LOOP + if(_D_ALWAYS1 || (rss == 1 && rds == 1)) + _DE_LOOP(i,frames, ( OP::run(*dr,*sr), sr++,dr++ ) ) else - _D_LOOP(i,p.frames) - { FUN(*dr,*sr); sr += p.rss,dr += p.rds; } - _E_LOOP + _DE_LOOP(i,frames, ( OP::run(*dr,*sr), sr += rss,dr += rds ) ) return true; } -template inline BL _D__run(OpParam &p) { return _F__run(p); } -template inline BL d__run(OpParam &p) { return _d__run(CL::run,p); } /*! \brief skeleton for unary complex operations */ -template BL _F__cun(OpParam &p) +template BL V__cun(register const T *sr,register const T *si,I rss,I iss,register T *dr,register T *di,I rds,I ids,I frames) { - register const S *sr = p.rsdt,*si = p.isdt; - register S *dr = p.rddt,*di = p.iddt; - register I i; + register I i; if(sr == dr && si == di) - if(_D_ALWAYS1 || (p.rds == 1 && p.ids == 1)) - _D_LOOP(i,p.frames) - { FUN(*dr,*di,*dr,*di); dr++,di++; } - _E_LOOP + if(_D_ALWAYS1 || (rds == 1 && ids == 1)) + _DE_LOOP(i,frames, ( OP::cun(*dr,*di,*dr,*di), dr++,di++ ) ) else - _D_LOOP(i,p.frames) - { FUN(*dr,*di,*dr,*di); dr += p.rds,di += p.ids; } - _E_LOOP + _DE_LOOP(i,frames, ( OP::cun(*dr,*di,*dr,*di), dr += rds,di += ids ) ) else - if(_D_ALWAYS1 || (p.rss == 1 && p.iss == 1 && p.rds == 1 && p.ids == 1)) - _D_LOOP(i,p.frames) - { FUN(*dr,*di,*sr,*si); sr++,si++,dr++,di++; } - _E_LOOP + if(_D_ALWAYS1 || (rss == 1 && iss == 1 && rds == 1 && ids == 1)) + _DE_LOOP(i,frames, ( OP::cun(*dr,*di,*sr,*si), sr++,si++,dr++,di++ ) ) else - _D_LOOP(i,p.frames) - { FUN(*dr,*di,*sr,*si); sr += p.rss,si += p.iss,dr += p.rds,di += p.ids; } - _E_LOOP + _DE_LOOP(i,frames, ( OP::cun(*dr,*di,*sr,*si), sr += rss,si += iss,dr += rds,di += ids ) ) return true; } -template inline BL _D__cun(OpParam &p) { return _F__cun(p); } -template inline BL d__cun(OpParam &p) { return _d__cun(CL::cun,p); } +template BL V__vun(I layers,register const T *sr,register T *dr,I frames) +{ + register I i; + switch(layers) { + case 1: + V__run(sr,1,dr,1,frames); + break; + case 2: + _DF_LOOP(i,frames, ( vec_un(dr,sr), sr += 2, dr += 2) ) + break; + case 3: + _DF_LOOP(i,frames, ( vec_un(dr,sr), sr += 3, dr += 3) ) + break; + case 4: + _DF_LOOP(i,frames, ( vec_un(dr,sr), sr += 4, dr += 4) ) + break; + default: + _DF_LOOP(i,frames, ( vec_un(dr,sr,layers), sr += layers, dr += layers) ) + break; + } + return true; +} + + +template BL V__rbin(register const T *sr,I rss,register T *dr,I rds,register const T *ar,I ras,I frames) +{ + register I i; + if(sr == dr) + if(_D_ALWAYS1 || (rds == 1 && ras == 1)) + _DE_LOOP(i,frames, ( OP::rbin(*dr,*dr,*ar), dr++,ar++ ) ) + else + _DE_LOOP(i,frames, ( OP::rbin(*dr,*dr,*ar), dr += rds,ar += ras ) ) + else + if(_D_ALWAYS1 || (rss == 1 && rds == 1 && ras == 1)) + _DE_LOOP(i,frames, ( OP::rbin(*dr,*sr,*ar), sr++,dr++,ar++ ) ) + else + _DE_LOOP(i,frames, ( OP::rbin(*dr,*sr,*ar), sr += rss,dr += rds,ar += ras ) ) + return true; +} + +template BL V__cbin(register const T *sr,register const T *si,I rss,I iss,register T *dr,register T *di,I ids,I rds,const T *ar,const T *ai,I ras,I ias,I frames) +{ + register I i; + if(sr == dr && si == di) + if(_D_ALWAYS1 || (rds == 1 && ids == 1 && ras == 1 && ias == 1)) + _DE_LOOP(i,frames, ( OP::cbin(*dr,*di,*dr,*di,*ar,*ai), dr++,di++,ar++,ai++ ) ) + else + _DE_LOOP(i,frames, ( OP::cbin(*dr,*di,*dr,*di,*ar,*ai), dr += rds,di += ids,ar += ras,ai += ias ) ) + else + _DE_LOOP(i,frames, ( OP::cbin(*dr,*di,*sr,*si,*ar,*ai), sr += rss,si += iss,dr += rds,di += ids,ar += ras,ai += ias ) ) + return true; +} + + +template class _A__vector { +public: + static TR ev(const TR *a,I i,I m) { return a[i*m]; } +}; + +template class _A__scalar { +public: + static TR ev(TR a,I i,I m) { return a; } +}; + +class _A__env { +public: + static R ev(Env::Iter &a,I i,I m) { return a.ValFwd(i); } +}; + +template BL Vx__rbin(register const T *sr,I rss,register T *dr,I rds,TA ar,I frames) +{ + register I i; + if(sr == dr) + if(_D_ALWAYS1 || rds == 1) + _DE_LOOP(i,frames, ( OP::rbin(*dr,*dr,EVARG::ev(ar,i,1)), dr++ ) ) + else + _DE_LOOP(i,frames, ( OP::rbin(*dr,*dr,EVARG::ev(ar,i,1)), dr += rds ) ) + else + if(_D_ALWAYS1 || rss == 1 && rds == 1) + _DE_LOOP(i,frames, ( OP::rbin(*dr,*sr,EVARG::ev(ar,i,1)), sr++,dr++ ) ) + else + _DE_LOOP(i,frames, ( OP::rbin(*dr,*sr,EVARG::ev(ar,i,1)), sr += rss,dr += rds ) ) + return true; +} + +template BL Vx__cbin(register const T *sr,register const T *si,I rss,I iss,register T *dr,register T *di,I ids,I rds,TA1 ar,TA2 ai,I ras,I ias,I frames) +{ + register I i; + if(sr == dr && si == di) + if(_D_ALWAYS1 || (rds == 1 && ids == 1 && ras == 1 && ias == 1)) + _DE_LOOP(i,frames, ( OP::cbin(*dr,*di,*dr,*di,EVARG1::ev(ar,i,1),EVARG2::ev(ai,i,1)), dr++,di++ ) ) + else + _DE_LOOP(i,frames, ( OP::cbin(*dr,*di,*dr,*di,EVARG1::ev(ar,i,ras),EVARG2::ev(ai,i,ias)), dr += rds,di += ids ) ) + else + _DE_LOOP(i,frames, ( OP::cbin(*dr,*di,*sr,*si,EVARG1::ev(ar,i,ras),EVARG2::ev(ai,i,ias)), sr += rss,si += iss,dr += rds,di += ids ) ) + return true; +} +template BL Vx__vbin(I layers,register const T *sr,register T *dr,TA ar,I frames) +{ + register I i; + switch(layers) { + case 1: + Vx__rbin(sr,1,dr,1,ar,frames); + break; + case 2: + _DF_LOOP(i,frames, ( vec_bin(dr,sr,EVARG::ev(ar,i,2)), sr += 2, dr += 2) ) + break; + case 3: + _DF_LOOP(i,frames, ( vec_bin(dr,sr,EVARG::ev(ar,i,3)), sr += 3, dr += 3) ) + break; + case 4: + _DF_LOOP(i,frames, ( vec_bin(dr,sr,EVARG::ev(ar,i,4)), sr += 4, dr += 4) ) + break; + default: + _DF_LOOP(i,frames, ( vec_bin(dr,sr,EVARG::ev(ar,i,layers),layers), sr += layers, dr += layers) ) + break; + } + return true; +} + +template inline BL V__vbin(I layers,register const T *sr,register T *dr,register const T *ar,I frames) +{ + return Vx__vbin >(layers,sr,dr,ar,frames); +} /*! \brief skeleton for binary real operations */ -template BL _F__rbin(OpParam &p) +template BL _F__rbin(OpParam &p) { - register const S *sr = p.rsdt; - register S *dr = p.rddt; - register I i; if(p.HasArg() && p.arg[0].Is()) { switch(p.arg[0].argtp) { - case OpParam::Arg::arg_v: { - register const S *ar = p.arg[0].v.rdt; - if(p.rsdt == p.rddt) - if(_D_ALWAYS1 || (p.rds == 1 && p.arg[0].v.rs == 1)) - _D_LOOP(i,p.frames) - { FUN(*dr,*dr,*ar); dr++,ar++; } - _E_LOOP - else - _D_LOOP(i,p.frames) - { FUN(*dr,*dr,*ar); dr += p.rds,ar += p.arg[0].v.rs; } - _E_LOOP - else - if(_D_ALWAYS1 || (p.rss == 1 && p.rds == 1 && p.arg[0].v.rs == 1)) - _D_LOOP(i,p.frames) - { FUN(*dr,*sr,*ar); sr++,dr++,ar++; } - _E_LOOP - else - _D_LOOP(i,p.frames) - { FUN(*dr,*sr,*ar); sr += p.rss,dr += p.rds,ar += p.arg[0].v.rs; } - _E_LOOP + case OpParam::Arg::arg_v: { + V__rbin(p.rsdt,p.rss,p.rddt,p.rds,p.arg[0].v.rdt,p.arg[0].v.rs,p.frames); break; } case OpParam::Arg::arg_env: { - Env::Iter it(*p.arg[0].e.env); it.Init(0); - if(p.rsdt == p.rddt) - if(_D_ALWAYS1 || p.rds == 1) - _D_LOOP(i,p.frames) - { FUN(*dr,*dr,it.ValFwd(i)); dr++; } - _E_LOOP - else - _D_LOOP(i,p.frames) - { FUN(*dr,*dr,it.ValFwd(i)); dr += p.rds; } - _E_LOOP - else - if(_D_ALWAYS1 || (p.rss == 1 && p.rds == 1)) - _D_LOOP(i,p.frames) - { FUN(*dr,*sr,it.ValFwd(i)); sr++,dr++; } - _E_LOOP - else - _D_LOOP(i,p.frames) - { FUN(*dr,*sr,it.ValFwd(i)); sr += p.rss,dr += p.rds; } - _E_LOOP + Env::Iter it(*p.arg[0].e.env); it.Init(0); + Vx__rbin(p.rsdt,p.rss,p.rddt,p.rds,it,p.frames); break; } case OpParam::Arg::arg_x: { - const R v = p.arg[0].x.r; - if(p.rsdt == p.rddt) - if(_D_ALWAYS1 || p.rds == 1) - _D_LOOP(i,p.frames) - { FUN(*dr,*dr,v); dr++; } - _E_LOOP - else - _D_LOOP(i,p.frames) - { FUN(*dr,*dr,v); dr += p.rds; } - _E_LOOP - else - if(_D_ALWAYS1 || (p.rss == 1 && p.rds == 1)) - _D_LOOP(i,p.frames) - { FUN(*dr,*sr,v); sr++,dr++; } - _E_LOOP - else - _D_LOOP(i,p.frames) - { FUN(*dr,*sr,v); sr += p.rss,dr += p.rds; } - _E_LOOP + Vx__rbin >(p.rsdt,p.rss,p.rddt,p.rds,p.arg[0].x.r,p.frames); break; } } } else { - register const S v = p.rbin.arg; - if(p.rsdt == p.rddt) - if(_D_ALWAYS1 || p.rds == 1) - _D_LOOP(i,p.frames) - { FUN(*dr,*dr,v); dr++; } - _E_LOOP - else - _D_LOOP(i,p.frames) - { FUN(*dr,*dr,v); dr += p.rds; } - _E_LOOP - else - if(_D_ALWAYS1 || (p.rss == 1 && p.rds == 1)) - _D_LOOP(i,p.frames) - { FUN(*dr,*sr,v); sr++,dr++; } - _E_LOOP - else - _D_LOOP(i,p.frames) - { FUN(*dr,*sr,v); sr += p.rss,dr += p.rds; } - _E_LOOP + Vx__rbin >(p.rsdt,p.rss,p.rddt,p.rds,p.rbin.arg,p.frames); } return true; } -template inline BL _D__rbin(OpParam &p) { return _F__rbin(p); } -template inline BL d__rbin(OpParam &p) { return _d__rbin(CL::rbin,p); } - /*! \brief skeleton for binary complex operations */ -template BL _F__cbin(OpParam &p) +template BL _F__cbin(OpParam &p) { - register const S *sr = p.rsdt,*si = p.isdt; - register S *dr = p.rddt,*di = p.iddt; - register I i; if(p.HasArg() && p.arg[0].Is()) { switch(p.arg[0].argtp) { case OpParam::Arg::arg_v: { - register const S *ar = p.arg[0].v.rdt,*ai = p.arg[0].v.idt; - if(ai) - if(sr == dr && si == di) - if(_D_ALWAYS1 || (p.rds == 1 && p.ids == 1 && p.arg[0].v.rs == 1 && p.arg[0].v.is == 1)) - _D_LOOP(i,p.frames) - { FUN(*dr,*di,*dr,*di,*ar,*ai); dr++,di++,ar++,ai++; } - _E_LOOP - else - _D_LOOP(i,p.frames) - { FUN(*dr,*di,*dr,*di,*ar,*ai); dr += p.rds,di += p.ids,ar += p.arg[0].v.rs,ai += p.arg[0].v.is; } - _E_LOOP - else - _D_LOOP(i,p.frames) - { FUN(*dr,*di,*sr,*si,*ar,*ai); sr += p.rss,si += p.iss,dr += p.rds,di += p.ids,ar += p.arg[0].v.rs,ai += p.arg[0].v.is; } - _E_LOOP - else - if(sr == dr && si == di) - _D_LOOP(i,p.frames) - { FUN(*dr,*di,*dr,*di,*ar,0); dr += p.rds,di += p.ids,ar += p.arg[0].v.rs; } - _E_LOOP - else - _D_LOOP(i,p.frames) - { FUN(*dr,*di,*sr,*si,*ar,0); sr += p.rss,si += p.iss,dr += p.rds,di += p.ids,ar += p.arg[0].v.rs; } - _E_LOOP + if(p.arg[0].v.idt) + V__cbin(p.rsdt,p.isdt,p.rss,p.iss,p.rddt,p.iddt,p.rds,p.ids,p.arg[0].v.rdt,p.arg[0].v.idt,p.arg[0].v.rs,p.arg[0].v.is,p.frames); + else + Vx__cbin,_A__scalar >(p.rsdt,p.isdt,p.rss,p.iss,p.rddt,p.iddt,p.rds,p.ids,p.arg[0].v.rdt,0,p.arg[0].v.rs,1,p.frames); break; } case OpParam::Arg::arg_env: { Env::Iter it(*p.arg[0].e.env); it.Init(0); - if(sr == dr && si == di) - if(_D_ALWAYS1 || (p.rds == 1 && p.ids == 1)) - _D_LOOP(i,p.frames) - { FUN(*dr,*di,*dr,*di,it.ValFwd(i),0); dr++,di++; } - _E_LOOP - else - _D_LOOP(i,p.frames) - { FUN(*dr,*di,*dr,*di,it.ValFwd(i),0); dr += p.rds,di += p.ids; } - _E_LOOP - else - _D_LOOP(i,p.frames) - { FUN(*dr,*di,*sr,*si,it.ValFwd(i),0); sr += p.rss,si += p.iss,dr += p.rds,di += p.ids; } - _E_LOOP + Vx__cbin >(p.rsdt,p.isdt,p.rss,p.iss,p.rddt,p.iddt,p.rds,p.ids,it,0,1,1,p.frames); break; } case OpParam::Arg::arg_x: { - register const R ar = p.arg[0].x.r,ai = p.arg[0].x.i; - if(sr == dr && si == di) - if(_D_ALWAYS1 || (p.rds == 1 && p.ids == 1)) - _D_LOOP(i,p.frames) - { FUN(*dr,*di,*dr,*di,ar,ai); dr++,di++; } - _E_LOOP - else - _D_LOOP(i,p.frames) - { FUN(*dr,*di,*dr,*di,ar,ai); dr += p.rds,di += p.ids; } - _E_LOOP - else - _D_LOOP(i,p.frames) - { FUN(*dr,*di,*sr,*si,ar,ai); sr += p.rss,si += p.iss,dr += p.rds,di += p.ids; } - _E_LOOP + Vx__cbin,_A__scalar >(p.rsdt,p.isdt,p.rss,p.iss,p.rddt,p.iddt,p.rds,p.ids,p.arg[0].x.r,p.arg[0].x.i,1,1,p.frames); break; } } } else { - register const S rv = p.cbin.rarg,iv = p.cbin.iarg; - if(sr == dr && si == di) - if(_D_ALWAYS1 || (p.rds == 1 && p.ids == 1)) - _D_LOOP(i,p.frames) - { FUN(*dr,*di,*dr,*di,rv,iv); dr++,di++; } - _E_LOOP - else - _D_LOOP(i,p.frames) - { FUN(*dr,*di,*dr,*di,rv,iv); dr += p.rds,di += p.ids; } - _E_LOOP - else - if(_D_ALWAYS1 || (p.rds == 1 && p.ids == 1 && p.rss == 1 && p.iss == 1)) - _D_LOOP(i,p.frames) - { FUN(*dr,*di,*sr,*si,rv,iv); sr++,si++,dr++,di++; } - _E_LOOP - else - _D_LOOP(i,p.frames) - { FUN(*dr,*di,*sr,*si,rv,iv); sr += p.rss,si += p.iss,dr += p.rds,di += p.ids; } - _E_LOOP + Vx__cbin,_A__scalar >(p.rsdt,p.isdt,p.rss,p.iss,p.rddt,p.iddt,p.rds,p.ids,p.cbin.rarg,p.cbin.iarg,1,1,p.frames); } return true; } -template inline BL _D__cbin(OpParam &p) { return _F__cbin(p); } -template inline BL d__cbin(OpParam &p) { return _d__cbin(CL::cbin,p); } - - /*! \brief skeleton for real operations with parameter block */ -template BL _F__rop(OpParam &p) +template BL V__rop(ARG p,register const S *sr,I rss,register S *dr,I rds,I frames) { - register const S *sr = p.rsdt; - register S *dr = p.rddt; register I i; if(sr == dr) - if(_D_ALWAYS1 || p.rds == 1) - _D_LOOP(i,p.frames) - { FUN(*dr,*dr,p); dr++; } - _E_LOOP + if(_D_ALWAYS1 || rds == 1) + _DE_LOOP(i,frames, ( OP::rop(*dr,*dr,p), dr++ ) ) else - _D_LOOP(i,p.frames) - { FUN(*dr,*dr,p); dr += p.rds; } - _E_LOOP + _DE_LOOP(i,frames, ( OP::rop(*dr,*dr,p), dr += rds ) ) else - if(_D_ALWAYS1 || (p.rss == 1 && p.rds == 1)) - _D_LOOP(i,p.frames) - { FUN(*dr,*sr,p); sr++,dr++; } - _E_LOOP + if(_D_ALWAYS1 || (rss == 1 && p.rds == 1)) + _DE_LOOP(i,frames, ( OP::rop(*dr,*sr,p), sr++,dr++ ) ) else - _D_LOOP(i,p.frames) - { FUN(*dr,*sr,p); sr += p.rss,dr += p.rds; } - _E_LOOP + _DE_LOOP(i,frames, ( OP::rop(*dr,*sr,p), sr += rss,dr += rds ) ) return true; } -template inline BL _D__rop(OpParam &p) { return _F__rop(p); } -template inline BL d__rop(OpParam &p) { return _d__rop(CL::rop,p); } - - /*! \brief skeleton for complex operations with parameter block */ -template BL _F__cop(OpParam &p) +template BL V__cop(ARG p,register const S *sr,register const S *si,I rss,I iss,register S *dr,register S *di,I rds,I ids,I frames) { - register const S *sr = p.rsdt,*si = p.isdt; - register S *dr = p.rddt,*di = p.iddt; register I i; if(sr == dr && si == di) - if(_D_ALWAYS1 || (p.rds == 1 && p.ids == 1)) - _D_LOOP(i,p.frames) - { FUN(*dr,*di,*dr,*di,p); dr++,di++; } - _E_LOOP + if(_D_ALWAYS1 || (rds == 1 && ids == 1)) + _DE_LOOP(i,frames, ( OP::cop(*dr,*di,*dr,*di,p), dr++,di++ ) ) else - _D_LOOP(i,p.frames) - { FUN(*dr,*di,*dr,*di,p); dr += p.rds,di += p.ids; } - _E_LOOP + _DE_LOOP(i,frames, ( OP::cop(*dr,*di,*dr,*di,p), dr += rds,di += ids ) ) else if(_D_ALWAYS1 || (p.rss == 1 && p.iss == 1 && p.rds == 1 && p.ids == 1)) - _D_LOOP(i,p.frames) - { FUN(*dr,*di,*sr,*si,p); sr++,si++,dr++,di++; } - _E_LOOP + _DE_LOOP(i,frames, ( OP::cop(*dr,*di,*sr,*si,p), sr++,si++,dr++,di++ ) ) else - _D_LOOP(i,p.frames) - { FUN(*dr,*di,*sr,*si,p); sr += p.rss,si += p.iss,dr += p.rds,di += p.ids; } - _E_LOOP + _DE_LOOP(i,frames, ( OP::cop(*dr,*di,*sr,*si,p), sr += rss,si += iss,dr += rds,di += ids ) ) return true; } -template inline BL _D__cop(OpParam &p) { return _F__cop(p); } +template inline BL _D__run(OpParam &p) { return V__run(p.rsdt,p.rss,p.rddt,p.rds,p.frames); } +template inline BL _D__cun(OpParam &p) { return V__cun(p.rsdt,p.isdt,p.rss,p.iss,p.rddt,p.iddt,p.rds,p.ids,p.frames); } +template inline BL _D__rbin(OpParam &p) { return _F__rbin(p); } +template inline BL _D__cbin(OpParam &p) { return _F__cbin(p); } +template inline BL _D__rop(OpParam &p) { return V__rop(p,p.rsdt,p.rss,p.rddt,p.rds,p.frames); } +template inline BL _D__cop(OpParam &p) { return V__cop(p,p.rsdt,p.isdt,p.rss,p.iss,p.rddt,p.iddt,p.rds,p.ids,p.frames); } + + +template inline BL d__run(OpParam &p) { return _d__run(CL::run,p); } +template inline BL d__cun(OpParam &p) { return _d__cun(CL::cun,p); } +template inline BL d__rbin(OpParam &p) { return _d__rbin(CL::rbin,p); } +template inline BL d__cbin(OpParam &p) { return _d__cbin(CL::cbin,p); } +template inline BL d__rop(OpParam &p) { return _d__rop(CL::rop,p); } template inline BL d__cop(OpParam &p) { return _d__cop(CL::cop,p); } -template inline BL f__run(OpParam &p) { return _d__run(FUN,p); } -template inline BL f__cun(OpParam &p) { return _d__cun(FUN,p); } -template inline BL f__rbin(OpParam &p) { return _d__rbin(FUN,p); } -template inline BL f__cbin(OpParam &p) { return _d__cbin(FUN,p); } -template inline BL f__rop(OpParam &p) { return _d__rop(FUN,p); } -template inline BL f__cop(OpParam &p) { return _d__cop(FUN,p); } +template inline BL f__run(OpParam &p) { return _d__run(CL::run,p); } +template inline BL f__cun(OpParam &p) { return _d__cun(CL::cun,p); } +template inline BL f__rbin(OpParam &p) { return _d__rbin(CL::rbin,p); } +template inline BL f__cbin(OpParam &p) { return _d__cbin(CL::cbin,p); } +template inline BL f__rop(OpParam &p) { return _d__rop(CL::rop,p); } +template inline BL f__cop(OpParam &p) { return _d__cop(CL::cop,p); } #ifdef VASP_COMPACT @@ -368,12 +311,12 @@ template inline BL D__rbin(OpParam &p) { return _d__rbin(CL template inline BL D__cbin(OpParam &p) { return _d__cbin(CL::cbin,p); } template inline BL D__rop(OpParam &p) { return _d__rop(CL::rop,p); } template inline BL D__cop(OpParam &p) { return _d__cop(CL::cop,p); } -template inline BL F__run(OpParam &p) { return _d__run(FUN,p); } -template inline BL F__cun(OpParam &p) { return _d__cun(FUN,p); } -template inline BL F__rbin(OpParam &p) { return _d__rbin(FUN,p); } -template inline BL F__cbin(OpParam &p) { return _d__cbin(FUN,p); } -template inline BL F__rop(OpParam &p) { return _d__rop(FUN,p); } -template inline BL F__cop(OpParam &p) { return _d__cop(FUN,p); } +template inline BL F__run(OpParam &p) { return _d__run(CL::run,p); } +template inline BL F__cun(OpParam &p) { return _d__cun(CL::cun,p); } +template inline BL F__rbin(OpParam &p) { return _d__rbin(CL::rbin,p); } +template inline BL F__cbin(OpParam &p) { return _d__cbin(CL::cbin,p); } +template inline BL F__rop(OpParam &p) { return _d__rop(CL::rop,p); } +template inline BL F__cop(OpParam &p) { return _d__cop(CL::cop,p); } #else template inline BL D__run(OpParam &p) { return _D__run(p); } template inline BL D__cun(OpParam &p) { return _D__cun(p); } @@ -381,12 +324,12 @@ template inline BL D__rbin(OpParam &p) { return _D__rbin template inline BL D__cbin(OpParam &p) { return _D__cbin(p); } template inline BL D__rop(OpParam &p) { return _D__rop(p); } template inline BL D__cop(OpParam &p) { return _D__cop(p); } -template inline BL F__run(OpParam &p) { return _F__run(p); } -template inline BL F__cun(OpParam &p) { return _F__cun(p); } -template inline BL F__rbin(OpParam &p) { return _F__rbin(p); } -template inline BL F__cbin(OpParam &p) { return _F__cbin(p); } -template inline BL F__rop(OpParam &p) { return _F__rop(p); } -template inline BL F__cop(OpParam &p) { return _F__cop(p); } +template inline BL F__run(OpParam &p) { return V__run(p.rsdt,p.rss,p.rddt,p.rds,p.frames); } +template inline BL F__cun(OpParam &p) { return V__cun(p.rsdt,p.isdt,p.rss,p.iss,p.rddt,p.iddt,p.rds,p.ids,p.frames); } +template inline BL F__rbin(OpParam &p) { return _F__rbin(p); } +template inline BL V__cbin(OpParam &p) { return _F__cbin(p); } +template inline BL F__rop(OpParam &p) { return V__rop(p,p.rsdt,p.rss,p.rddt,p.rds,p.frames); } +template inline BL F__cop(OpParam &p) { return V__cop(p,p.rsdt,p.isdt,p.rss,p.iss,p.rddt,p.iddt,p.rds,p.ids,p.frames); } #endif } // namespace VecOp diff --git a/externals/grill/vasp/source/opfuns.h b/externals/grill/vasp/source/opfuns.h index 99fca899..5ab8d48a 100644 --- a/externals/grill/vasp/source/opfuns.h +++ b/externals/grill/vasp/source/opfuns.h @@ -13,8 +13,44 @@ WARRANTIES, see the file, "license.txt," in this distribution. #include "opdefs.h" + namespace VecOp { + // multi-layer templates + + template + static V vec_un(T *v,const T *a,I n = 0) { + const I _n = N?N:n; + for(I i = 0; i < _n; ++i) FUN(v[i],a[i]); + } + + template + static V vec_un(T *v,T a,I n = 0) { + const I _n = N?N:n; + for(I i = 0; i < _n; ++i) FUN(v[i],a); + } + + template + static V vec_bin(T *v,const T *a,const T *b,I n = 0) { + const I _n = N?N:n; + for(I i = 0; i < _n; ++i) FUN(v[i],a[i],b[i]); + } + + template + static V vec_bin(T *v,const T *a,T b,I n = 0) { + const I _n = N?N:n; + for(I i = 0; i < _n; ++i) FUN(v[i],a[i],b); + } + + + template + static V cvec_un(T *v,const T *a,I n = 0) { vec_un(v,a,n); } + + template + static V cvec_bin(T *v,const T *a,const T *b,I n = 0) { vec_bin(v,a,b,n); } + + + // assignment template class f_copy { @@ -61,7 +97,7 @@ namespace VecOp { static V cbin(T &rv,T &iv,T ra,T ia,T rb,T ib) { - register const R den = sqabs(rb,ib); + register const T den = sqabs(rb,ib); rv = (ra*rb+ia*ib)/den; iv = (ia*rb-ra*ib)/den; } @@ -73,7 +109,7 @@ namespace VecOp { static V cbin(T &rv,T &iv,T ra,T ia,T rb,T ib) { - register const R den = sqabs(ra,ia); + register const T den = sqabs(ra,ia); rv = (rb*ra+ib*ia)/den; iv = (ib*ra-rb*ia)/den; } @@ -114,7 +150,7 @@ namespace VecOp { static V cop(T &rv,T &iv,T ra,T ia,OpParam &p) { register const I powi = p.ibin.arg; - register S rt,it; f_sqr::cun(rt,it,ra,ia); + register T rt,it; f_sqr::cun(rt,it,ra,ia); for(I i = 2; i < powi; ++i) f_mul::cbin(rt,it,rt,it,ra,ia); rv = rt,iv = it; } @@ -126,9 +162,9 @@ namespace VecOp { static V cbin(T &rv,T &iv,T ra,T ia,T rb,T) { - register const R _abs = sqrt(sqabs(ra,ia)); + register const T _abs = sqrt(sqabs(ra,ia)); if(_abs) { - register const R _p = pow(_abs,rb)/_abs; + register const T _p = pow(_abs,rb)/_abs; rv = _p*ra,iv = _p*ia; } else @@ -270,17 +306,23 @@ namespace VecOp { } }; - template V f_aminq(T &,T ra,OpParam &p) - { - register T s = fabs(ra); - if(s < p.norm.minmax) p.norm.minmax = s; - } + template class f_aminq { + public: + static V rop(T &,T ra,OpParam &p) + { + register T s = fabs(ra); + if(s < p.norm.minmax) p.norm.minmax = s; + } + }; - template V f_amaxq(T &,T ra,OpParam &p) - { - register T s = fabs(ra); - if(s > p.norm.minmax) p.norm.minmax = s; - } + template class f_amaxq { + public: + static V rop(T &,T ra,OpParam &p) + { + register T s = fabs(ra); + if(s > p.norm.minmax) p.norm.minmax = s; + } + }; // gating @@ -342,8 +384,8 @@ namespace VecOp { public: static V cbin(T &rv,T &iv,T ra,T ia,T rb,T) { - register const R _abs = sqrt(sqabs(ra,ia))+rb; - register const R _phi = arg(ra,ia); + register const T _abs = sqrt(sqabs(ra,ia))+rb; + register const T _phi = arg(ra,ia); rv = _abs*cos(_phi),iv = _abs*sin(_phi); } @@ -366,7 +408,7 @@ namespace VecOp { else { // denormal bashing (doesn't propagate to the next stage) - static const F anti_denormal = 1e-18F; + static const T anti_denormal = (T)1.e-18; a += anti_denormal; a -= anti_denormal; v = a; @@ -375,4 +417,28 @@ namespace VecOp { }; } + +template +class VecFun { +public: + // strided real data + static BL r_add(I len,register T *dr,register const T *sr,I rds = 1,I rss = 1) { return VecOp::V__rbin::rbin>(sr,rss,dr,rds,len); } + static BL r_sub(I len,register T *dr,register const T *sr,I rds = 1,I rss = 1) { return VecOp::V__rbin::rbin>(sr,rss,dr,rds,len); } + static BL r_subr(I len,register T *dr,register const T *sr,I rds = 1,I rss = 1) { return VecOp::V__rbin::rbin>(sr,rss,dr,rds,len); } + static BL r_mul(I len,register T *dr,register const T *sr,I rds = 1,I rss = 1) { return VecOp::V__rbin::rbin>(sr,rss,dr,rds,len); } + static BL r_div(I len,register T *dr,register const T *sr,I rds = 1,I rss = 1) { return VecOp::V__rbin::rbin>(sr,rss,dr,rds,len); } + static BL r_divr(I len,register T *dr,register const T *sr,I rds = 1,I rss = 1) { return VecOp::V__rbin::rbin>(sr,rss,dr,rds,len); } + static BL r_mod(I len,register T *dr,register const T *sr,I rds = 1,I rss = 1) { return VecOp::V__rbin::rbin>(sr,rss,dr,rds,len); } + + // multi-layer data (non-strided) + static BL v_add(I len,I layers,register T *dr,register const T *sr,register const T *ar) { return VecOp::V__vbin::rbin>(layers,sr,dr,ar,len); } + static BL v_sub(I len,I layers,register T *dr,register const T *sr,register const T *ar) { return VecOp::V__vbin::rbin>(layers,sr,dr,ar,len); } + static BL v_subr(I len,I layers,register T *dr,register const T *sr,register const T *ar) { return VecOp::V__vbin::rbin>(layers,sr,dr,ar,len); } + static BL v_mul(I len,I layers,register T *dr,register const T *sr,register const T *ar) { return VecOp::V__vbin::rbin>(layers,sr,dr,ar,len); } + static BL v_div(I len,I layers,register T *dr,register const T *sr,register const T *ar) { return VecOp::V__vbin::rbin>(layers,sr,dr,ar,len); } + static BL v_divr(I len,I layers,register T *dr,register const T *sr,register const T *ar) { return VecOp::V__vbin::rbin>(layers,sr,dr,ar,len); } + static BL v_mod(I len,I layers,register T *dr,register const T *sr,register const T *ar) { return VecOp::V__vbin::rbin>(layers,sr,dr,ar,len); } +}; + + #endif diff --git a/externals/grill/vasp/source/oploop.h b/externals/grill/vasp/source/oploop.h index 6b268022..888a3f84 100755 --- a/externals/grill/vasp/source/oploop.h +++ b/externals/grill/vasp/source/oploop.h @@ -16,23 +16,85 @@ WARRANTIES, see the file, "license.txt," in this distribution. #define _D_MIN(a,b) ((a) < (b)?(a):(b)) #ifdef FLEXT_THREADS -#define _D_LOOP(VAR,LEN) { \ - register I __i__; \ - for(__i__ = 0; __i__ < LEN; flext_base::ThrYield()) { \ - I __m__ = _D_MIN(LEN,__i__+_D_BLOCK); for(; (VAR = __i__) < __m__; ++__i__) { -#define _E_LOOP } if(__i__ < __m__) break; }} -#define _D_WHILE(COND) { \ - for(; (COND) ; flext_base::ThrYield()) { \ - register I __i__ = 0; \ - for(; __i__ < _D_BLOCK && (COND); ++__i__) { -#define _E_WHILE } if(__i__ < _D_BLOCK) break; }} +/* + #define _D_LOOP(VAR,LEN) { \ + register I __i__; \ + for(__i__ = 0; __i__ < LEN; flext_base::ThrYield()) { \ + I __m__ = _D_MIN(LEN,__i__+_D_BLOCK); for(; (VAR = __i__) < __m__; ++__i__) { + + #define _E_LOOP } if(__i__ < __m__) break; }} +*/ + #define _D_LOOP(VAR,LEN) { \ + for(VAR = 0; VAR < LEN; flext_base::ThrYield()) { \ + register const I __m__ = _D_MIN(LEN,VAR+_D_BLOCK); \ + for(; VAR < __m__; ++VAR) { + + #define _E_LOOP }}} + + #define _D_WHILE(COND) { \ + for(; (COND) ; flext_base::ThrYield()) { \ + register I __i__ = 0; \ + for(; __i__ < _D_BLOCK && (COND); ++__i__) { + + #define _E_WHILE } if(__i__ < _D_BLOCK) break; }} + +#else + + #define _D_LOOP(VAR,LEN) { \ + for(VAR = 0; VAR < LEN; ++VAR) { + + #define _E_LOOP }} + + #define _D_WHILE(COND) { \ + while(COND) { + + #define _E_WHILE }} + +#endif + + +#define _DE_WHILE(COND,BODY) { _D_WHILE(COND) BODY; _E_WHILE } +#define _DF_LOOP(VAR,LEN,BODY) { _D_LOOP(VAR,LEN) BODY; _E_LOOP } + +#ifdef VASP_COMPACT + + #define _DE_LOOP(VAR,LEN,BODY) _DF_LOOP(VAR,LEN,BODY) + #else -#define _D_LOOP(VAR,LEN) { \ - for(VAR = 0; VAR < LEN; ++VAR) { -#define _E_LOOP }} -#define _D_WHILE(COND) { \ - while(COND) { -#define _E_WHILE }} + + #ifdef FLEXT_THREADS + + #define _DE_LOOP(VAR,LEN,BODY) { \ + for(VAR = 0; VAR < LEN; flext_base::ThrYield()) { \ + register const I __m__ = _D_MIN(LEN,VAR+_D_BLOCK); \ + for(; VAR <= __m__-4; VAR += 4) { \ + BODY; \ + BODY; \ + BODY; \ + BODY; \ + } \ + for(; VAR < __m__; ++VAR) { \ + BODY; \ + } \ + } \ + } + + #else + + #define _DE_LOOP(VAR,LEN,BODY) { \ + for(VAR = 0; VAR <= LEN-4; VAR += 4) { \ + BODY; \ + BODY; \ + BODY; \ + BODY; \ + } \ + for(; VAR < LEN; ++VAR) { \ + BODY; \ + } \ + } + + #endif + #endif #endif diff --git a/externals/grill/vasp/source/oppermute.h b/externals/grill/vasp/source/oppermute.h index c307a3ae..45221a1f 100644 --- a/externals/grill/vasp/source/oppermute.h +++ b/externals/grill/vasp/source/oppermute.h @@ -37,7 +37,7 @@ void permutation1(OpParam &p,int (*origination)(int pos, int sz,OpParam &p)) const T *sdt = p.rsdt; const I ss = p.rss; I i; - _D_LOOP(i,sz) ddt[origination(i,sz,p)*ds] = sdt[i*ss]; _E_LOOP + _DE_LOOP(i,sz, ( ddt[origination(i,sz,p)*ds] = sdt[i*ss] ) ) } else { // in place @@ -86,9 +86,9 @@ void permutation2(OpParam &p,int (*origination)(int pos, int sz,OpParam &p)) const I ss = p.rss; I i; if(ss == 1 && rds == 1) - _D_LOOP(i,sz) *(rddt++) = *(sdt++); _E_LOOP + _DE_LOOP(i,sz, ( *(rddt++) = *(sdt++) ) ) else - _D_LOOP(i,sz) *rddt = *sdt,rddt += rds,sdt += ss; _E_LOOP + _DE_LOOP(i,sz, ( *rddt = *sdt,rddt += rds,sdt += ss ) ) rddt = p.rddt; } else permutation1(p,origination); @@ -98,9 +98,9 @@ void permutation2(OpParam &p,int (*origination)(int pos, int sz,OpParam &p)) const I ss = p.iss; I i; if(ss == 1 && ids == 1) - _D_LOOP(i,sz) *(iddt++) = *(sdt++); _E_LOOP + _DE_LOOP(i,sz, ( *(iddt++) = *(sdt++) ) ) else - _D_LOOP(i,sz) *iddt = *sdt,iddt += ids,sdt += ss; _E_LOOP + _DE_LOOP(i,sz, ( *iddt = *sdt,iddt += ids,sdt += ss ) ) iddt = p.iddt; } else { diff --git a/externals/grill/vasp/source/ops_arith.h b/externals/grill/vasp/source/ops_arith.h index 7f22b3f6..8d54d960 100644 --- a/externals/grill/vasp/source/ops_arith.h +++ b/externals/grill/vasp/source/ops_arith.h @@ -15,36 +15,35 @@ WARRANTIES, see the file, "license.txt," in this distribution. // Arithmetic math functions -namespace VecOp { - inline BL d_add(OpParam &p) { return D__rbin >(p); } - inline BL d_sub(OpParam &p) { return D__rbin >(p); } - inline BL d_subr(OpParam &p) { return D__rbin >(p); } - inline BL d_mul(OpParam &p) { return D__rbin >(p); } - inline BL d_div(OpParam &p) { return D__rbin >(p); } - inline BL d_divr(OpParam &p) { return D__rbin >(p); } - inline BL d_mod(OpParam &p) { return D__rbin >(p); } - - inline BL d_sqr(OpParam &p) { return D__run >(p); } - inline BL d_ssqr(OpParam &p) { return d__run >(p); } - - inline BL d_sign(OpParam &p) { return D__run >(p); } - inline BL d_abs(OpParam &p) { return D__run >(p); } -} - namespace VaspOp { - inline Vasp *m_add(OpParam &p,CVasp &src,const Argument &arg,CVasp *dst = NULL) { return m_rbin(p,src,arg,dst,VecOp::d_add); } // add to (one vec or real) - inline Vasp *m_sub(OpParam &p,CVasp &src,const Argument &arg,CVasp *dst = NULL) { return m_rbin(p,src,arg,dst,VecOp::d_sub); } // sub from (one vec or real) - inline Vasp *m_subr(OpParam &p,CVasp &src,const Argument &arg,CVasp *dst = NULL) { return m_rbin(p,src,arg,dst,VecOp::d_subr); } // reverse sub from (one vec or real) - inline Vasp *m_mul(OpParam &p,CVasp &src,const Argument &arg,CVasp *dst = NULL) { return m_rbin(p,src,arg,dst,VecOp::d_mul); } // mul with (one vec or real) - inline Vasp *m_div(OpParam &p,CVasp &src,const Argument &arg,CVasp *dst = NULL) { return m_rbin(p,src,arg,dst,VecOp::d_div); } // div by (one vec or real) - inline Vasp *m_divr(OpParam &p,CVasp &src,const Argument &arg,CVasp *dst = NULL) { return m_rbin(p,src,arg,dst,VecOp::d_divr); } // reverse div by (one vec or real) - inline Vasp *m_mod(OpParam &p,CVasp &src,const Argument &arg,CVasp *dst = NULL) { return m_rbin(p,src,arg,dst,VecOp::d_mod); } // modulo by (one vec or real) - - inline Vasp *m_sqr(OpParam &p,CVasp &src,CVasp *dst = NULL) { return m_run(p,src,dst,VecOp::d_sqr); } // unsigned square - inline Vasp *m_ssqr(OpParam &p,CVasp &src,CVasp *dst = NULL) { return m_run(p,src,dst,VecOp::d_ssqr); } // signed square - - inline Vasp *m_sign(OpParam &p,CVasp &src,CVasp *dst = NULL) { return m_run(p,src,dst,VecOp::d_sign); } // sign function - inline Vasp *m_abs(OpParam &p,CVasp &src,CVasp *dst = NULL) { return m_run(p,src,dst,VecOp::d_abs); } // absolute values + inline BL d_add(OpParam &p) { return VecOp::D__rbin >(p); } + inline BL d_sub(OpParam &p) { return VecOp::D__rbin >(p); } + inline BL d_subr(OpParam &p) { return VecOp::D__rbin >(p); } + inline BL d_mul(OpParam &p) { return VecOp::D__rbin >(p); } + inline BL d_div(OpParam &p) { return VecOp::D__rbin >(p); } + inline BL d_divr(OpParam &p) { return VecOp::D__rbin >(p); } + inline BL d_mod(OpParam &p) { return VecOp::D__rbin >(p); } + + inline BL d_sqr(OpParam &p) { return VecOp::D__run >(p); } + inline BL d_ssqr(OpParam &p) { return VecOp::d__run >(p); } + + inline BL d_sign(OpParam &p) { return VecOp::D__run >(p); } + inline BL d_abs(OpParam &p) { return VecOp::D__run >(p); } + + + inline Vasp *m_add(OpParam &p,CVasp &src,const Argument &arg,CVasp *dst = NULL) { return m_rbin(p,src,arg,dst,d_add); } // add to (one vec or real) + inline Vasp *m_sub(OpParam &p,CVasp &src,const Argument &arg,CVasp *dst = NULL) { return m_rbin(p,src,arg,dst,d_sub); } // sub from (one vec or real) + inline Vasp *m_subr(OpParam &p,CVasp &src,const Argument &arg,CVasp *dst = NULL) { return m_rbin(p,src,arg,dst,d_subr); } // reverse sub from (one vec or real) + inline Vasp *m_mul(OpParam &p,CVasp &src,const Argument &arg,CVasp *dst = NULL) { return m_rbin(p,src,arg,dst,d_mul); } // mul with (one vec or real) + inline Vasp *m_div(OpParam &p,CVasp &src,const Argument &arg,CVasp *dst = NULL) { return m_rbin(p,src,arg,dst,d_div); } // div by (one vec or real) + inline Vasp *m_divr(OpParam &p,CVasp &src,const Argument &arg,CVasp *dst = NULL) { return m_rbin(p,src,arg,dst,d_divr); } // reverse div by (one vec or real) + inline Vasp *m_mod(OpParam &p,CVasp &src,const Argument &arg,CVasp *dst = NULL) { return m_rbin(p,src,arg,dst,d_mod); } // modulo by (one vec or real) + + inline Vasp *m_sqr(OpParam &p,CVasp &src,CVasp *dst = NULL) { return m_run(p,src,dst,d_sqr); } // unsigned square + inline Vasp *m_ssqr(OpParam &p,CVasp &src,CVasp *dst = NULL) { return m_run(p,src,dst,d_ssqr); } // signed square + + inline Vasp *m_sign(OpParam &p,CVasp &src,CVasp *dst = NULL) { return m_run(p,src,dst,d_sign); } // sign function + inline Vasp *m_abs(OpParam &p,CVasp &src,CVasp *dst = NULL) { return m_run(p,src,dst,d_abs); } // absolute values } #endif diff --git a/externals/grill/vasp/source/ops_cmp.h b/externals/grill/vasp/source/ops_cmp.h index 73ab132d..1302120f 100644 --- a/externals/grill/vasp/source/ops_cmp.h +++ b/externals/grill/vasp/source/ops_cmp.h @@ -38,8 +38,8 @@ namespace VecOp { inline BL d_maxq(OpParam &p) { return D__rop >(p); } inline BL d_rminq(OpParam &p) { return d__cop >(p); } inline BL d_rmaxq(OpParam &p) { return d__cop >(p); } - inline BL d_aminq(OpParam &p) { return F__rop >(p); } - inline BL d_amaxq(OpParam &p) { return F__rop >(p); } + inline BL d_aminq(OpParam &p) { return d__rop >(p); } + inline BL d_amaxq(OpParam &p) { return d__rop >(p); } inline BL d_gate(OpParam &p) { return D__rbin >(p); } inline BL d_igate(OpParam &p) { return d__rbin >(p); } diff --git a/externals/grill/vasp/source/ops_flt.cpp b/externals/grill/vasp/source/ops_flt.cpp index f92f585e..deecc5ff 100644 --- a/externals/grill/vasp/source/ops_flt.cpp +++ b/externals/grill/vasp/source/ops_flt.cpp @@ -149,10 +149,7 @@ BL VecOp::d_int(OpParam &p) register S d = p.intdif.carry; register I i; - _D_LOOP(i,p.frames) - *p.rddt = (d += *p.rsdt); - p.rsdt += p.rss,p.rddt += p.rds; - _E_LOOP + _DE_LOOP(i,p.frames, ( *p.rddt = (d += *p.rsdt), p.rsdt += p.rss,p.rddt += p.rds ) ) p.intdif.carry = d; return true; } @@ -168,13 +165,9 @@ BL VecOp::d_dif(OpParam &p) if(p.revdir) post("%s - reversed operation direction due to overlap: opposite sample delay",p.opname); - register S d = p.intdif.carry; + register S d = p.intdif.carry,d1; register I i; - _D_LOOP(i,p.frames) - register S d1 = *p.rsdt; - *p.rddt = d1-d,d = d1; - p.rsdt += p.rss,p.rddt += p.rds; - _E_LOOP + _DE_LOOP(i,p.frames, ( d1 = *p.rsdt, *p.rddt = d1-d,d = d1, p.rsdt += p.rss,p.rddt += p.rds ) ) p.intdif.carry = d; return true; } diff --git a/externals/grill/vasp/source/ops_rearr.cpp b/externals/grill/vasp/source/ops_rearr.cpp index 683d5581..d3f32c2e 100644 --- a/externals/grill/vasp/source/ops_rearr.cpp +++ b/externals/grill/vasp/source/ops_rearr.cpp @@ -73,11 +73,11 @@ BL VecOp::d_shift(OpParam &p) // do shift if(cnt > 0) { if(p.rss == 1 && p.rds == 1) - _D_LOOP(i,cnt) *(dd++) = *(sd++); _E_LOOP + _DE_LOOP(i,cnt, ( *(dd++) = *(sd++) ) ) else if(p.rss == -1 && p.rds == -1) - _D_LOOP(i,cnt) *(dd--) = *(sd--); _E_LOOP + _DE_LOOP(i,cnt, ( *(dd--) = *(sd--) ) ) else - _D_LOOP(i,cnt) *dd = *sd,sd += p.rss,dd += p.rds; _E_LOOP + _DE_LOOP(i,cnt, ( *dd = *sd,sd += p.rss,dd += p.rds ) ) } // fill spaces @@ -85,11 +85,11 @@ BL VecOp::d_shift(OpParam &p) S vfill = p.sh.fill == 1?0:dd[-p.rds]; I aish = abs(ish); if(p.rds == 1) - _D_LOOP(i,aish) *(dd++) = vfill; _E_LOOP + _DE_LOOP(i,aish, ( *(dd++) = vfill ) ) else if(p.rds == -1) - _D_LOOP(i,aish) *(dd--) = vfill; _E_LOOP + _DE_LOOP(i,aish, ( *(dd--) = vfill ) ) else - _D_LOOP(i,aish) *dd = vfill,dd += p.rds; _E_LOOP + _DE_LOOP(i,aish, ( *dd = vfill,dd += p.rds ) ) } return true; @@ -217,19 +217,14 @@ BL VecOp::d_mirr(OpParam &p) if(p.rsdt == p.rddt) { S *dl = p.rddt,*du = p.rddt+(p.frames-1)*p.rds; - _D_WHILE(dl < du) - register S t; - t = *dl; *dl = *du; *du = t; - dl += p.rds,du -= p.rds; - _E_WHILE + register S t; + _DE_WHILE(dl < du, ( t = *dl, *dl = *du, *du = t, dl += p.rds,du -= p.rds ) ) } else { I i; const S *ds = p.rsdt; S *dd = p.rddt+(p.frames-1)*p.rds; - _D_LOOP(i,p.frames) - *dd = *ds,ds += p.rss,dd -= p.rds; - _E_LOOP + _DE_LOOP(i,p.frames, ( *dd = *ds,ds += p.rss,dd -= p.rds ) ) } return true; } diff --git a/externals/grill/vasp/vasp.cw b/externals/grill/vasp/vasp.cw index 77e952b0..212243fb 100644 Binary files a/externals/grill/vasp/vasp.cw and b/externals/grill/vasp/vasp.cw differ diff --git a/externals/grill/vasp/vasp.dsp b/externals/grill/vasp/vasp.dsp index 9fbbd71a..39440734 100644 --- a/externals/grill/vasp/vasp.dsp +++ b/externals/grill/vasp/vasp.dsp @@ -45,7 +45,7 @@ RSC=rc.exe # PROP Ignore_Export_Lib 0 # PROP Target_Dir "" # ADD BASE CPP /nologo /MT /W3 /GX /O2 /D "WIN32" /D "NDEBUG" /D "_WINDOWS" /D "_MBCS" /D "_USRDLL" /D "VASP_EXPORTS" /YX /FD /c -# ADD CPP /nologo /G6 /W3 /O2 /Ob2 /I "c:\programme\audio\pd\src" /I "f:\prog\max\flext\source" /D "WIN32" /D "NDEBUG" /D "_WINDOWS" /D "_MBCS" /D "_USRDLL" /D FLEXT_SYS=2 /YX"main.h" /FD /c +# ADD CPP /nologo /G6 /W3 /Ox /Ot /Og /Ob2 /I "c:\programme\audio\pd\src" /I "f:\prog\max\flext\source" /D "WIN32" /D "NDEBUG" /D "_WINDOWS" /D "_MBCS" /D "_USRDLL" /D FLEXT_SYS=2 /YX"main.h" /FD /c # ADD BASE MTL /nologo /D "NDEBUG" /mktyplib203 /win32 # ADD MTL /nologo /D "NDEBUG" /mktyplib203 /win32 # ADD BASE RSC /l 0xc07 /d "NDEBUG" @@ -72,7 +72,7 @@ LINK32=link.exe # PROP Ignore_Export_Lib 0 # PROP Target_Dir "" # ADD BASE CPP /nologo /MTd /W3 /Gm /GX /ZI /Od /D "WIN32" /D "_DEBUG" /D "_WINDOWS" /D "_MBCS" /D "_USRDLL" /D "VASP_EXPORTS" /YX /FD /GZ /c -# ADD CPP /nologo /W3 /Gm /ZI /Od /I "c:\programme\audio\pd\src" /I "f:\prog\max\flext\source" /D "WIN32" /D "_DEBUG" /D "_WINDOWS" /D "_MBCS" /D "_USRDLL" /D FLEXT_SYS=2 /D "VASP_COMPACT" /FR /YX"main.h" /FD /GZ /c +# ADD CPP /nologo /W3 /Gm /ZI /Od /Ob0 /I "c:\programme\audio\pd\src" /I "f:\prog\max\flext\source" /D "WIN32" /D "_DEBUG" /D "_WINDOWS" /D "_MBCS" /D "_USRDLL" /D FLEXT_SYS=2 /D "VASP_COMPACT" /FR /YX"main.h" /FD /GZ /c # ADD BASE MTL /nologo /D "_DEBUG" /mktyplib203 /win32 # ADD MTL /nologo /D "_DEBUG" /mktyplib203 /win32 # ADD BASE RSC /l 0xc07 /d "_DEBUG" @@ -126,7 +126,7 @@ LINK32=link.exe # PROP Ignore_Export_Lib 0 # PROP Target_Dir "" # ADD BASE CPP /nologo /MT /W3 /GR /GX /O2 /Ob2 /I "c:\programme\audio\pd\src" /I "f:\prog\max\flext" /D "WIN32" /D "NDEBUG" /D "_WINDOWS" /D "_MBCS" /D "_USRDLL" /D "PD" /D "NT" /YX /FD /c -# ADD CPP /nologo /G6 /MT /W3 /O2 /Ob2 /I "c:\programme\audio\pd\src" /I "f:\prog\max\flext\source" /D "WIN32" /D "NDEBUG" /D "_WINDOWS" /D "_MBCS" /D "_USRDLL" /D FLEXT_SYS=2 /D "FLEXT_THREADS" /YX"main.h" /FD /c +# ADD CPP /nologo /G6 /MT /W3 /Ox /Ot /Og /Ob2 /I "c:\programme\audio\pd\src" /I "f:\prog\max\flext\source" /D "WIN32" /D "NDEBUG" /D "_WINDOWS" /D "_MBCS" /D "_USRDLL" /D FLEXT_SYS=2 /D "FLEXT_THREADS" /YX"main.h" /FD /c # ADD BASE MTL /nologo /D "NDEBUG" /mktyplib203 /win32 # ADD MTL /nologo /D "NDEBUG" /mktyplib203 /win32 # ADD BASE RSC /l 0xc07 /d "NDEBUG" -- cgit v1.2.1