From 3eb7ec9a67e867275b862f9947deafe387012819 Mon Sep 17 00:00:00 2001 From: Thomas Grill Date: Mon, 17 Mar 2003 04:36:54 +0000 Subject: "" svn path=/trunk/; revision=476 --- externals/grill/vasp/build-pd-cygwin.sh | 15 ++ externals/grill/vasp/changes.txt | 1 + externals/grill/vasp/config-pd-cygwin.txt | 35 ++++ externals/grill/vasp/config-pd-linux.txt | 5 + externals/grill/vasp/makefile.pd-cygwin | 83 +++++++++ externals/grill/vasp/makefile.pd-darwin | 7 +- externals/grill/vasp/makefile.pd-linux | 6 +- externals/grill/vasp/source/main.cpp | 2 + externals/grill/vasp/source/opbase.cpp | 13 +- externals/grill/vasp/source/opbase.h | 90 ++-------- externals/grill/vasp/source/opdefs.h | 280 ++++++++++++++++++++++-------- externals/grill/vasp/source/opfuns.h | 183 +++++++++++++------ externals/grill/vasp/source/oploop.h | 4 +- externals/grill/vasp/source/ops_arith.cpp | 4 +- externals/grill/vasp/source/ops_arith.h | 57 +++--- externals/grill/vasp/source/ops_assign.h | 11 +- externals/grill/vasp/source/ops_carith.h | 21 ++- externals/grill/vasp/source/ops_cmp.h | 61 ++++--- externals/grill/vasp/source/ops_cplx.h | 13 +- externals/grill/vasp/source/ops_flt.h | 4 +- externals/grill/vasp/source/ops_trnsc.h | 14 +- externals/grill/vasp/source/opvecs.cpp | 4 +- externals/grill/vasp/source/util.cpp | 3 +- externals/grill/vasp/source/util.h | 36 +++- externals/grill/vasp/vasp.cw | Bin 233776 -> 231740 bytes 25 files changed, 624 insertions(+), 328 deletions(-) create mode 100644 externals/grill/vasp/build-pd-cygwin.sh create mode 100644 externals/grill/vasp/config-pd-cygwin.txt create mode 100644 externals/grill/vasp/makefile.pd-cygwin (limited to 'externals/grill/vasp') diff --git a/externals/grill/vasp/build-pd-cygwin.sh b/externals/grill/vasp/build-pd-cygwin.sh new file mode 100644 index 00000000..94778444 --- /dev/null +++ b/externals/grill/vasp/build-pd-cygwin.sh @@ -0,0 +1,15 @@ +#!/bin/sh + +SYS=pd-cygwin + +. config-${SYS}.txt + +make -f makefile.${SYS} && +{ + if [ $INSTDIR != "" ]; then + make -f makefile.${SYS} install + fi + if [ $HELPDIR != "" ]; then + make -f makefile.${SYS} install-help + fi +} diff --git a/externals/grill/vasp/changes.txt b/externals/grill/vasp/changes.txt index 7baeedc2..23e0f917 100644 --- a/externals/grill/vasp/changes.txt +++ b/externals/grill/vasp/changes.txt @@ -4,6 +4,7 @@ Version history: - CHANGE: exchanged #defines for templates in vector calculation loops - FIX: bug for binary operations of complex vasps - ADD: implemented (factor-4) loop unrolling for vector calculation loops +- CHANGE: completely restructured vector optimization and introduced multi-dimensional multi-layered functionality 0.1.2: - FIX: bug in vasp.frames* ... wrong argument diff --git a/externals/grill/vasp/config-pd-cygwin.txt b/externals/grill/vasp/config-pd-cygwin.txt new file mode 100644 index 00000000..d9932ada --- /dev/null +++ b/externals/grill/vasp/config-pd-cygwin.txt @@ -0,0 +1,35 @@ +# VASP - vector assembling vector processor +# Copyright(c) 2002-2003 Thomas Grill (xovo@gmx.net) +# + +# your c++ compiler (define only if it's different than g++) +# CXX=g++ + +# where does the PD installation reside? +PD=/cygdrive/c/programme/audio/pd + +# where are the PD header files? +# leave it blank if it is a system directory (like /usr/local/include), +# since gcc 3.2 complains about it +PDINC=${PD}/src + +# where do the flext libraries reside? +FLEXTPATH=${PD}/flext + +# where should flext libraries be built? +TARGDIR=./pd-cygwin + +# where should VASP be installed? +# (leave blank to omit installation) +# a subfolder "vasp" will be created for the files +INSTDIR=${PD}/extra + +# where should the VASP help be installed? +# (leave blank to omit installation) +# a subfolder "vasp" will be created for the files +HELPDIR=${PD}/doc/5.reference + +# additional compiler flags +# (check if they fit to your system!) +UFLAGS=-mcpu=pentium3 -msse -mfpmath=sse -fprefetch-loop-arrays +# UFLAGS=-mcpu=pentiumpro diff --git a/externals/grill/vasp/config-pd-linux.txt b/externals/grill/vasp/config-pd-linux.txt index ca1188dc..9aca43b3 100644 --- a/externals/grill/vasp/config-pd-linux.txt +++ b/externals/grill/vasp/config-pd-linux.txt @@ -28,3 +28,8 @@ INSTDIR=${PD}/extra # (leave blank to omit installation) # a subfolder "vasp" will be created for the files HELPDIR=${PD}/doc/5.reference + +# additional compiler flags +# (check if they fit to your system!) +# UFLAGS=-mcpu=pentium3 -msse -mfpmath=sse -fprefetch-loop-arrays +UFLAGS=-mcpu=pentiumpro diff --git a/externals/grill/vasp/makefile.pd-cygwin b/externals/grill/vasp/makefile.pd-cygwin new file mode 100644 index 00000000..454c3167 --- /dev/null +++ b/externals/grill/vasp/makefile.pd-cygwin @@ -0,0 +1,83 @@ +# VASP - vector assembling vector processor +# Copyright (c)2002-2003 Thomas Grill (xovo@gmx.net) +# +# Makefile for gcc @ cygwin +# +# usage: +# to build run "make -f makefile.pd-cygwin" +# to install (as root), do "make -f makefile.pd-cygwin install" +# to install help, do "make -f makefile.pd-cygwin install-help" +# + +include config-pd-cygwin.txt + +FLEXTLIB=$(FLEXTPATH)/flext_t-pdwin.lib +PDLIBS=$(PD)/bin/pd.dll $(PD)/bin/pthreadVC.dll + +# compiler stuff +INCLUDES=/usr/include $(PDINC) +FLAGS=-DFLEXT_SYS=2 -DFLEXT_THREADS -fno-exceptions +CFLAGS=-O6 ${UFLAGS} -fmove-all-movables -frerun-loop-opt +#CFLAGS+=-funroll-loops -fmove-all-movables -freduce-all-givs -fschedule-insns2 -foptimize-register-move +LDFLAGS=-Wl,-s + +LIBS=m + +# ---------------------------------------------- +# the rest can stay untouched +# ---------------------------------------------- + +NAME=vasp +SRCDIR=source + +include make-files.txt + +MAKEFILE=makefile.pd-cygwin +TARGET=$(TARGDIR)/$(NAME).dll + + +# default target +all: $(TARGDIR) $(TARGET) + +$(patsubst %,$(SRCDIR)/%,$(SRCS)): $(patsubst %,$(SRCDIR)/%,$(HDRS)) $(MAKEFILE) + touch $(patsubst %,$(SRCDIR)/%,$(SRCS)) + +$(TARGDIR): + -mkdir $(TARGDIR) + +$(TARGDIR)/%.o : $(SRCDIR)/%.cpp + $(CXX) -c $(CFLAGS) $(FLAGS) $(patsubst %,-I%,$(INCLUDES) $(FLEXTPATH)) $< -o $@ + +$(TARGET) : $(patsubst %.cpp,$(TARGDIR)/%.o,$(SRCS)) $(FLEXTLIB) + $(CXX) $(LDFLAGS) -shared $^ ${PDLIBS} $(patsubst %,-l%,$(LIBS)) -o $@ + strip --strip-unneeded $@ + chmod 755 $@ + + +$(INSTDIR): + -mkdir $(INSTDIR) + +$(INSTDIR)/vasp: $(INSTDIR) + -mkdir $(INSTDIR)/vasp + +install:: $(INSTDIR)/vasp + +install:: $(TARGET) pd + cp -R $^ $(INSTDIR)/vasp +# chown -R root.root $(INSTDIR)/vasp + + +$(HELPDIR)/vasp: + -mkdir $(HELPDIR)/vasp + +install-help:: $(HELPDIR)/vasp + +install-help:: ./pd-help + cp $^/*.* $(HELPDIR)/vasp +# chown -R root.root $(HELPDIR)/vasp + + +.PHONY: clean +clean: + rm -f $(TARGDIR)/*.o $(TARGET) + diff --git a/externals/grill/vasp/makefile.pd-darwin b/externals/grill/vasp/makefile.pd-darwin index 34c702d2..89e96614 100755 --- a/externals/grill/vasp/makefile.pd-darwin +++ b/externals/grill/vasp/makefile.pd-darwin @@ -1,5 +1,5 @@ # VASP - vector assembling vector processor -# Copyright (c)2002 Thomas Grill (xovo@gmx.net) +# Copyright (c)2002-2003 Thomas Grill (xovo@gmx.net) # # Makefile for gcc @ OSX (darwin) # @@ -17,8 +17,9 @@ FLEXTLIB=$(FLEXTPATH)/flext_t.a # compiler stuff INCLUDES=$(PDINC) -FLAGS=-DFLEXT_SYS=2 -DFLEXT_THREADS -CFLAGS=-O6 ${UFLAGS} -Wno-unused -Wno-parentheses -Wno-switch -Wstrict-prototypes +FLAGS=-DFLEXT_SYS=2 -DFLEXT_THREADS -Wno-unused -Wno-parentheses -Wno-switch -Wstrict-prototypes -fno-exceptions +CFLAGS=-O6 ${UFLAGS} -fmove-all-movables -frerun-loop-opt -fprefetch-loop-arrays +#CFLAGS+=-funroll-loops -freduce-all-givs -fschedule-insns2 -foptimize-register-move LIBS=m LDFLAGS=-bundle -bundle_loader $(PDBIN) FRAMEWORKS=Carbon diff --git a/externals/grill/vasp/makefile.pd-linux b/externals/grill/vasp/makefile.pd-linux index a76cdbaa..6f9342cb 100644 --- a/externals/grill/vasp/makefile.pd-linux +++ b/externals/grill/vasp/makefile.pd-linux @@ -1,5 +1,5 @@ # VASP - vector assembling vector processor -# Copyright (c)2002 Thomas Grill (xovo@gmx.net) +# Copyright (c)2002-2003 Thomas Grill (xovo@gmx.net) # # Makefile for gcc @ linux # @@ -16,8 +16,8 @@ FLEXTLIB=$(FLEXTPATH)/flext_t.a # compiler stuff INCLUDES=$(PDINC) FLAGS=-DFLEXT_SYS=2 -DFLEXT_THREADS -fno-exceptions -CFLAGS=-O6 -mcpu=pentiumpro -fmove-all-movables -frerun-loop-opt -finline-functions # -funroll-loops -#CFLAGS=-O6 -mcpu=pentium3 -msse -mfpmath=sse -fmove-all-movables -frerun-loop-opt -finline-functions # -funroll-loops +CFLAGS=-O6 ${UFLAGS} -fmove-all-movables -frerun-loop-opt +#CFLAGS+=-funroll-loops -fmove-all-movables -freduce-all-givs -fschedule-insns2 -foptimize-register-move LDFLAGS=-Wl,-s LIBS=m diff --git a/externals/grill/vasp/source/main.cpp b/externals/grill/vasp/source/main.cpp index 3b2c0f75..5f63895a 100644 --- a/externals/grill/vasp/source/main.cpp +++ b/externals/grill/vasp/source/main.cpp @@ -14,6 +14,8 @@ WARRANTIES, see the file, "license.txt," in this distribution. const C *VASP_VERSION = "0.1.3pre1"; +#include "opfuns.h" + V lib_setup() { post(""); diff --git a/externals/grill/vasp/source/opbase.cpp b/externals/grill/vasp/source/opbase.cpp index c33a5782..5abcb9fa 100644 --- a/externals/grill/vasp/source/opbase.cpp +++ b/externals/grill/vasp/source/opbase.cpp @@ -12,7 +12,7 @@ WARRANTIES, see the file, "license.txt," in this distribution. #include "opbase.h" #include "opdefs.h" -Vasp *VaspOp::m_run(OpParam &p,CVasp &src,CVasp *dst,VecOp::opfun fun) +Vasp *VaspOp::m_run(OpParam &p,CVasp &src,CVasp *dst,opfun fun) { Vasp *ret = NULL; RVecBlock *vecs = GetRVecs(p.opname,src,dst); @@ -24,7 +24,7 @@ Vasp *VaspOp::m_run(OpParam &p,CVasp &src,CVasp *dst,VecOp::opfun fun) return ret; } -Vasp *VaspOp::m_cun(OpParam &p,CVasp &src,CVasp *dst,VecOp::opfun fun) +Vasp *VaspOp::m_cun(OpParam &p,CVasp &src,CVasp *dst,opfun fun) { Vasp *ret = NULL; CVecBlock *vecs = GetCVecs(p.opname,src,dst); @@ -36,7 +36,7 @@ Vasp *VaspOp::m_cun(OpParam &p,CVasp &src,CVasp *dst,VecOp::opfun fun) return ret; } -Vasp *VaspOp::m_rbin(OpParam &p,CVasp &src,const Argument &arg,CVasp *dst,VecOp::opfun fun) +Vasp *VaspOp::m_rbin(OpParam &p,CVasp &src,const Argument &arg,CVasp *dst,opfun fun) { Vasp *ret = NULL; BL argvasp = arg.IsVasp(); @@ -58,7 +58,7 @@ Vasp *VaspOp::m_rbin(OpParam &p,CVasp &src,const Argument &arg,CVasp *dst,VecOp: return ret; } -Vasp *VaspOp::m_cbin(OpParam &p,CVasp &src,const Argument &arg,CVasp *dst,VecOp::opfun fun) +Vasp *VaspOp::m_cbin(OpParam &p,CVasp &src,const Argument &arg,CVasp *dst,opfun fun) { Vasp *ret = NULL; BL argvasp = arg.IsVasp(); @@ -83,8 +83,3 @@ Vasp *VaspOp::m_cbin(OpParam &p,CVasp &src,const Argument &arg,CVasp *dst,VecOp: return ret; } - -#ifdef FLEXT_THREADS -flext::ThrMutex VecOp::C_base::mtx; -#endif - diff --git a/externals/grill/vasp/source/opbase.h b/externals/grill/vasp/source/opbase.h index 3d9b81c6..bba4a3a6 100644 --- a/externals/grill/vasp/source/opbase.h +++ b/externals/grill/vasp/source/opbase.h @@ -16,96 +16,32 @@ WARRANTIES, see the file, "license.txt," in this distribution. #include "vecblk.h" #include "opparam.h" -namespace VecOp { - typedef BL opfun(OpParam &p); - - class C_base { - public: - #ifdef FLEXT_THREADS - static flext::ThrMutex mtx; - static V Lock() { mtx.Lock(); } - static V Unlock() { mtx.Unlock(); } - #else - static V Lock() {} - static V Unlock() {} - #endif - }; - - template class C_run: public C_base { - public: - static BL Do(V f(T &v,T a),OpParam &p) { Lock(); fun = f; _D__run >(p); Unlock(); return true; } - static V run(T &v,T a) { fun(v,a); } - static V (*fun)(T &v,T a); - }; - template V (*C_run::fun)(T &v,T a); - - template class C_cun: public C_base { - public: - static BL Do(V f(T &rv,T &iv,T ra,T ia),OpParam &p) { Lock(); fun = f; _D__cun >(p); Unlock(); return true; } - static V cun(T &rv,T &iv,T ra,T ia) { fun(rv,iv,ra,ia); } - static V (*fun)(T &rv,T &iv,T ra,T ia); - }; - template V (*C_cun::fun)(T &rv,T &iv,T ra,T ia); - - template class C_rbin: public C_base { - public: - static BL Do(V f(T &v,T a,T b),OpParam &p) { Lock(); fun = f; _D__rbin >(p); Unlock(); return true; } - static V rbin(T &v,T a,T b) { fun(v,a,b); } - static V (*fun)(T &v,T a,T b); - }; - template V (*C_rbin::fun)(T &v,T a,T b); - - template class C_cbin: public C_base { - public: - static BL Do(V f(T &rv,T &iv,T ra,T ia,T rb,T ib),OpParam &p) { Lock(); fun = f; _D__cbin >(p); Unlock(); return true; } - static V cbin(T &rv,T &iv,T ra,T ia,T rb,T ib) { fun(rv,iv,ra,ia,rb,ib); } - static V (*fun)(T &rv,T &iv,T ra,T ia,T rb,T ib); - }; - template V (*C_cbin::fun)(T &rv,T &iv,T ra,T ia,T rb,T ib); - - template class C_rop: public C_base { - public: - static BL Do(V f(T &v,T a,OpParam &p),OpParam &p) { Lock(); fun = f; _D__rop >(p); Unlock(); return true; } - static V rop(T &v,T a,OpParam &p) { fun(v,a,p); } - static V (*fun)(T &v,T a,OpParam &p); - }; - template V (*C_rop::fun)(T &v,T a,OpParam &p); - - template class C_cop: public C_base { - public: - static BL Do(V f(T &rv,T &iv,T ra,T ia,OpParam &p),OpParam &p) { Lock(); fun = f; _D__cop >(p); Unlock(); return true; } - static V cop(T &rv,T &iv,T ra,T ia,OpParam &p) { fun(rv,iv,ra,ia,p); } - static V (*fun)(T &rv,T &iv,T ra,T ia,OpParam &p); - }; - template V (*C_cop::fun)(T &rv,T &iv,T ra,T ia,OpParam &p); +namespace VaspOp { + typedef BL opfun(OpParam &p); - template BL _d__run(V fun(T &v,T a),OpParam &p) { return C_run::Do(fun,p); } - template BL _d__cun(V fun(T &rv,T &iv,T ra,T ia),OpParam &p) { return C_cun::Do(fun,p); } - template BL _d__rbin(V fun(T &v,T a,T b),OpParam &p) { return C_rbin::Do(fun,p); } - template BL _d__cbin(V fun(T &rv,T &iv,T ra,T ia,T rb,T ib),OpParam &p) { return C_cbin::Do(fun,p); } - template BL _d__rop(V fun(T &v,T a,OpParam &p),OpParam &p) { return C_rop::Do(fun,p); } - template BL _d__cop(V fun(T &rv,T &iv,T ra,T ia,OpParam &p),OpParam &p) { return C_cop::Do(fun,p); } -} - + // -------- prepare vectors and do vector operation ----------- + // in opvecs.cpp -namespace VaspOp { RVecBlock *GetRVecs(const C *op,CVasp &src,CVasp *dst = NULL); CVecBlock *GetCVecs(const C *op,CVasp &src,CVasp *dst = NULL,BL full = false); RVecBlock *GetRVecs(const C *op,CVasp &src,const CVasp &arg,CVasp *dst = NULL,I multi = -1,BL ssize = true); CVecBlock *GetCVecs(const C *op,CVasp &src,const CVasp &arg,CVasp *dst = NULL,I multi = -1,BL ssize = true,BL full = false); - Vasp *DoOp(RVecBlock *vecs,VecOp::opfun *fun,OpParam &p,BL symm = false); - Vasp *DoOp(CVecBlock *vecs,VecOp::opfun *fun,OpParam &p,BL symm = false); + Vasp *DoOp(RVecBlock *vecs,opfun *fun,OpParam &p,BL symm = false); + Vasp *DoOp(CVecBlock *vecs,opfun *fun,OpParam &p,BL symm = false); // -------- transformations ----------------------------------- + // in opbase.cpp // unary functions - Vasp *m_run(OpParam &p,CVasp &src,CVasp *dst,VecOp::opfun fun); // real unary (one vec or real) - Vasp *m_cun(OpParam &p,CVasp &src,CVasp *dst,VecOp::opfun fun); // complex unary (one vec or complex) + Vasp *m_run(OpParam &p,CVasp &src,CVasp *dst,opfun fun); // real unary (one vec or real) + Vasp *m_cun(OpParam &p,CVasp &src,CVasp *dst,opfun fun); // complex unary (one vec or complex) + // binary functions - Vasp *m_rbin(OpParam &p,CVasp &src,const Argument &arg,CVasp *dst,VecOp::opfun fun); // real binary (one vec or real) - Vasp *m_cbin(OpParam &p,CVasp &src,const Argument &arg,CVasp *dst,VecOp::opfun fun); // complex binary (one vec or complex) + Vasp *m_rbin(OpParam &p,CVasp &src,const Argument &arg,CVasp *dst,opfun fun); // real binary (one vec or real) + Vasp *m_cbin(OpParam &p,CVasp &src,const Argument &arg,CVasp *dst,opfun fun); // complex binary (one vec or complex) + } #endif diff --git a/externals/grill/vasp/source/opdefs.h b/externals/grill/vasp/source/opdefs.h index 47006db6..8d7d522c 100644 --- a/externals/grill/vasp/source/opdefs.h +++ b/externals/grill/vasp/source/opdefs.h @@ -2,7 +2,7 @@ VASP modular - vector assembling signal processor / objects for Max/MSP and PD -Copyright (c) 2002 Thomas Grill (xovo@gmx.net) +Copyright (c) 2002-2003 Thomas Grill (xovo@gmx.net) For information on usage and redistribution, and for a DISCLAIMER OF ALL WARRANTIES, see the file, "license.txt," in this distribution. @@ -23,19 +23,48 @@ WARRANTIES, see the file, "license.txt," in this distribution. namespace VecOp { +// multi-layer templates + +template +inline BL vec_un(T *v,const T *a,I n = 0) { + const I _n = LR?LR:n; + for(I i = 0; i < _n; ++i) OP::run(v[i],a[i]); + return true; +} + +template +inline BL vec_un(T *v,T a,I n = 0) { + const I _n = LR?LR:n; + for(I i = 0; i < _n; ++i) OP::run(v[i],a); + return true; +} + +template +inline BL vec_bin(T *v,const T *a,const TR *b,I n = 0) { + const I _n = LR?LR:n; + for(I i = 0; i < _n; ++i) OP::rbin(v[i],a[i],b[i]); + return true; +} + +template +inline BL vec_bin(T *v,const T *a,TR b,I n = 0) { + const I _n = LR?LR:n; + for(I i = 0; i < _n; ++i) OP::rbin(v[i],a[i],b); + return true; +} + /*! \brief skeleton for unary real operations */ -//template BL V__run(register const T *sr,I rss,register T *dr,I rds,I frames) template BL V__run(register const T *sr,I rss,register T *dr,I rds,I frames) { register I i; - if(sr == dr) - if(_D_ALWAYS1 || rds == 1) + if(sr == dr && OP::run_opt() >= 3) + if((_D_ALWAYS1 || rds == 1) && OP::run_opt() >= 2) _DE_LOOP(i,frames, ( OP::run(*dr,*dr), dr++ ) ) else _DE_LOOP(i,frames, ( OP::run(*dr,*dr), dr += rds ) ) else - if(_D_ALWAYS1 || (rss == 1 && rds == 1)) + if((_D_ALWAYS1 || (rss == 1 && rds == 1)) && OP::run_opt() >= 2) _DE_LOOP(i,frames, ( OP::run(*dr,*sr), sr++,dr++ ) ) else _DE_LOOP(i,frames, ( OP::run(*dr,*sr), sr += rss,dr += rds ) ) @@ -48,13 +77,13 @@ template BL V__run(register const T *sr,I rss,register T *dr,I template BL V__cun(register const T *sr,register const T *si,I rss,I iss,register T *dr,register T *di,I rds,I ids,I frames) { register I i; - if(sr == dr && si == di) - if(_D_ALWAYS1 || (rds == 1 && ids == 1)) + if(sr == dr && si == di && OP::cun_opt() >= 3) + if((_D_ALWAYS1 || (rds == 1 && ids == 1)) && OP::cun_opt() >= 2) _DE_LOOP(i,frames, ( OP::cun(*dr,*di,*dr,*di), dr++,di++ ) ) else _DE_LOOP(i,frames, ( OP::cun(*dr,*di,*dr,*di), dr += rds,di += ids ) ) else - if(_D_ALWAYS1 || (rss == 1 && iss == 1 && rds == 1 && ids == 1)) + if((_D_ALWAYS1 || (rss == 1 && iss == 1 && rds == 1 && ids == 1)) && OP::cun_opt() >= 2) _DE_LOOP(i,frames, ( OP::cun(*dr,*di,*sr,*si), sr++,si++,dr++,di++ ) ) else _DE_LOOP(i,frames, ( OP::cun(*dr,*di,*sr,*si), sr += rss,si += iss,dr += rds,di += ids ) ) @@ -69,13 +98,13 @@ template BL V__vun(I layers,register const T *sr,register T *d V__run(sr,1,dr,1,frames); break; case 2: - _DF_LOOP(i,frames, ( vec_un(dr,sr), sr += 2, dr += 2) ) + _DF_LOOP(i,frames, ( vec_un(dr,sr,2), sr += 2, dr += 2) ) break; case 3: - _DF_LOOP(i,frames, ( vec_un(dr,sr), sr += 3, dr += 3) ) + _DF_LOOP(i,frames, ( vec_un(dr,sr,3), sr += 3, dr += 3) ) break; case 4: - _DF_LOOP(i,frames, ( vec_un(dr,sr), sr += 4, dr += 4) ) + _DF_LOOP(i,frames, ( vec_un(dr,sr,4), sr += 4, dr += 4) ) break; default: _DF_LOOP(i,frames, ( vec_un(dr,sr,layers), sr += layers, dr += layers) ) @@ -88,13 +117,13 @@ template BL V__vun(I layers,register const T *sr,register T *d template BL V__rbin(register const T *sr,I rss,register T *dr,I rds,register const T *ar,I ras,I frames) { register I i; - if(sr == dr) - if(_D_ALWAYS1 || (rds == 1 && ras == 1)) + if(sr == dr && OP::rbin_opt() >= 3) + if((_D_ALWAYS1 || (rds == 1 && ras == 1)) && OP::rbin_opt() >= 2) _DE_LOOP(i,frames, ( OP::rbin(*dr,*dr,*ar), dr++,ar++ ) ) else _DE_LOOP(i,frames, ( OP::rbin(*dr,*dr,*ar), dr += rds,ar += ras ) ) else - if(_D_ALWAYS1 || (rss == 1 && rds == 1 && ras == 1)) + if((_D_ALWAYS1 || (rss == 1 && rds == 1 && ras == 1)) && OP::rbin_opt() >= 2) _DE_LOOP(i,frames, ( OP::rbin(*dr,*sr,*ar), sr++,dr++,ar++ ) ) else _DE_LOOP(i,frames, ( OP::rbin(*dr,*sr,*ar), sr += rss,dr += rds,ar += ras ) ) @@ -104,8 +133,8 @@ template BL V__rbin(register const T *sr,I rss,register T *dr, template BL V__cbin(register const T *sr,register const T *si,I rss,I iss,register T *dr,register T *di,I ids,I rds,const T *ar,const T *ai,I ras,I ias,I frames) { register I i; - if(sr == dr && si == di) - if(_D_ALWAYS1 || (rds == 1 && ids == 1 && ras == 1 && ias == 1)) + if(sr == dr && si == di && OP::cbin_opt() >= 3) + if((_D_ALWAYS1 || (rds == 1 && ids == 1 && ras == 1 && ias == 1)) && OP::cbin_opt() >= 2) _DE_LOOP(i,frames, ( OP::cbin(*dr,*di,*dr,*di,*ar,*ai), dr++,di++,ar++,ai++ ) ) else _DE_LOOP(i,frames, ( OP::cbin(*dr,*di,*dr,*di,*ar,*ai), dr += rds,di += ids,ar += ras,ai += ias ) ) @@ -117,45 +146,48 @@ template BL V__cbin(register const T *sr,register const T *si, template class _A__vector { public: + static BL unroll() { return true; } static TR ev(const TR *a,I i,I m) { return a[i*m]; } }; template class _A__scalar { public: + static BL unroll() { return true; } static TR ev(TR a,I i,I m) { return a; } }; class _A__env { public: + static BL unroll() { return false; } static R ev(Env::Iter &a,I i,I m) { return a.ValFwd(i); } }; template BL Vx__rbin(register const T *sr,I rss,register T *dr,I rds,TA ar,I frames) { register I i; - if(sr == dr) - if(_D_ALWAYS1 || rds == 1) - _DE_LOOP(i,frames, ( OP::rbin(*dr,*dr,EVARG::ev(ar,i,1)), dr++ ) ) + if(sr == dr && OP::rbin_opt() >= 3) + if((_D_ALWAYS1 || rds == 1) && OP::rbin_opt() >= 2) + _DQ_LOOP(EVARG::unroll(),i,frames, ( OP::rbin(*dr,*dr,EVARG::ev(ar,i,1)), dr++ ) ) else - _DE_LOOP(i,frames, ( OP::rbin(*dr,*dr,EVARG::ev(ar,i,1)), dr += rds ) ) + _DQ_LOOP(EVARG::unroll(),i,frames, ( OP::rbin(*dr,*dr,EVARG::ev(ar,i,1)), dr += rds ) ) else - if(_D_ALWAYS1 || rss == 1 && rds == 1) - _DE_LOOP(i,frames, ( OP::rbin(*dr,*sr,EVARG::ev(ar,i,1)), sr++,dr++ ) ) + if((_D_ALWAYS1 || rss == 1 && rds == 1) && OP::rbin_opt() >= 2) + _DQ_LOOP(EVARG::unroll(),i,frames, ( OP::rbin(*dr,*sr,EVARG::ev(ar,i,1)), sr++,dr++ ) ) else - _DE_LOOP(i,frames, ( OP::rbin(*dr,*sr,EVARG::ev(ar,i,1)), sr += rss,dr += rds ) ) + _DQ_LOOP(EVARG::unroll(),i,frames, ( OP::rbin(*dr,*sr,EVARG::ev(ar,i,1)), sr += rss,dr += rds ) ) return true; } template BL Vx__cbin(register const T *sr,register const T *si,I rss,I iss,register T *dr,register T *di,I ids,I rds,TA1 ar,TA2 ai,I ras,I ias,I frames) { register I i; - if(sr == dr && si == di) - if(_D_ALWAYS1 || (rds == 1 && ids == 1 && ras == 1 && ias == 1)) - _DE_LOOP(i,frames, ( OP::cbin(*dr,*di,*dr,*di,EVARG1::ev(ar,i,1),EVARG2::ev(ai,i,1)), dr++,di++ ) ) + if(sr == dr && si == di && OP::cbin_opt() >= 3) + if((_D_ALWAYS1 || (rds == 1 && ids == 1 && ras == 1 && ias == 1)) && OP::cbin_opt() >= 2) + _DQ_LOOP(EVARG1::unroll() && EVARG2::unroll(),i,frames, ( OP::cbin(*dr,*di,*dr,*di,EVARG1::ev(ar,i,1),EVARG2::ev(ai,i,1)), dr++,di++ ) ) else - _DE_LOOP(i,frames, ( OP::cbin(*dr,*di,*dr,*di,EVARG1::ev(ar,i,ras),EVARG2::ev(ai,i,ias)), dr += rds,di += ids ) ) + _DQ_LOOP(EVARG1::unroll() && EVARG2::unroll(),i,frames, ( OP::cbin(*dr,*di,*dr,*di,EVARG1::ev(ar,i,ras),EVARG2::ev(ai,i,ias)), dr += rds,di += ids ) ) else - _DE_LOOP(i,frames, ( OP::cbin(*dr,*di,*sr,*si,EVARG1::ev(ar,i,ras),EVARG2::ev(ai,i,ias)), sr += rss,si += iss,dr += rds,di += ids ) ) + _DQ_LOOP(EVARG1::unroll() && EVARG2::unroll(),i,frames, ( OP::cbin(*dr,*di,*sr,*si,EVARG1::ev(ar,i,ras),EVARG2::ev(ai,i,ias)), sr += rss,si += iss,dr += rds,di += ids ) ) return true; } @@ -167,16 +199,16 @@ template BL Vx__vbin(I layers,re Vx__rbin(sr,1,dr,1,ar,frames); break; case 2: - _DF_LOOP(i,frames, ( vec_bin(dr,sr,EVARG::ev(ar,i,2)), sr += 2, dr += 2) ) + _DF_LOOP(i,frames, ( vec_bin(dr,sr,EVARG::ev(ar,i,2),2), sr += 2, dr += 2) ) break; case 3: - _DF_LOOP(i,frames, ( vec_bin(dr,sr,EVARG::ev(ar,i,3)), sr += 3, dr += 3) ) + _DF_LOOP(i,frames, ( vec_bin(dr,sr,EVARG::ev(ar,i,3),3), sr += 3, dr += 3) ) break; case 4: - _DF_LOOP(i,frames, ( vec_bin(dr,sr,EVARG::ev(ar,i,4)), sr += 4, dr += 4) ) + _DF_LOOP(i,frames, ( vec_bin(dr,sr,EVARG::ev(ar,i,4),4), sr += 4, dr += 4) ) break; default: - _DF_LOOP(i,frames, ( vec_bin(dr,sr,EVARG::ev(ar,i,layers),layers), sr += layers, dr += layers) ) + _DF_LOOP(i,frames, ( vec_bin(dr,sr,EVARG::ev(ar,i,layers),layers), sr += layers, dr += layers) ) break; } return true; @@ -250,13 +282,13 @@ template BL _F__cbin(OpParam &p) template BL V__rop(ARG p,register const S *sr,I rss,register S *dr,I rds,I frames) { register I i; - if(sr == dr) - if(_D_ALWAYS1 || rds == 1) + if(sr == dr && OP::rop_opt() >= 3) + if((_D_ALWAYS1 || rds == 1) && OP::rop_opt() >= 2) _DE_LOOP(i,frames, ( OP::rop(*dr,*dr,p), dr++ ) ) else _DE_LOOP(i,frames, ( OP::rop(*dr,*dr,p), dr += rds ) ) else - if(_D_ALWAYS1 || (rss == 1 && p.rds == 1)) + if((_D_ALWAYS1 || (rss == 1 && p.rds == 1)) && OP::rop_opt() >= 2) _DE_LOOP(i,frames, ( OP::rop(*dr,*sr,p), sr++,dr++ ) ) else _DE_LOOP(i,frames, ( OP::rop(*dr,*sr,p), sr += rss,dr += rds ) ) @@ -268,13 +300,13 @@ template BL V__rop(ARG p,register const S *sr,I rss, template BL V__cop(ARG p,register const S *sr,register const S *si,I rss,I iss,register S *dr,register S *di,I rds,I ids,I frames) { register I i; - if(sr == dr && si == di) - if(_D_ALWAYS1 || (rds == 1 && ids == 1)) + if(sr == dr && si == di && OP::cop_opt() >= 3) + if((_D_ALWAYS1 || (rds == 1 && ids == 1)) && OP::cop_opt() >= 2) _DE_LOOP(i,frames, ( OP::cop(*dr,*di,*dr,*di,p), dr++,di++ ) ) else _DE_LOOP(i,frames, ( OP::cop(*dr,*di,*dr,*di,p), dr += rds,di += ids ) ) else - if(_D_ALWAYS1 || (p.rss == 1 && p.iss == 1 && p.rds == 1 && p.ids == 1)) + if((_D_ALWAYS1 || (p.rss == 1 && p.iss == 1 && p.rds == 1 && p.ids == 1)) && OP::cop_opt() >= 2) _DE_LOOP(i,frames, ( OP::cop(*dr,*di,*sr,*si,p), sr++,si++,dr++,di++ ) ) else _DE_LOOP(i,frames, ( OP::cop(*dr,*di,*sr,*si,p), sr += rss,si += iss,dr += rds,di += ids ) ) @@ -289,49 +321,149 @@ template inline BL _D__rop(OpParam &p) { return V__rop inline BL _D__cop(OpParam &p) { return V__cop(p,p.rsdt,p.isdt,p.rss,p.iss,p.rddt,p.iddt,p.rds,p.ids,p.frames); } -template inline BL d__run(OpParam &p) { return _d__run(CL::run,p); } -template inline BL d__cun(OpParam &p) { return _d__cun(CL::cun,p); } -template inline BL d__rbin(OpParam &p) { return _d__rbin(CL::rbin,p); } -template inline BL d__cbin(OpParam &p) { return _d__cbin(CL::cbin,p); } -template inline BL d__rop(OpParam &p) { return _d__rop(CL::rop,p); } -template inline BL d__cop(OpParam &p) { return _d__cop(CL::cop,p); } +template BL _d__run(V fun(T &v,T a),OpParam &p) +{ + int i; + if(p.rds == 1 && p.rss == 1) + _DE_LOOP(i,p.frames, ( fun(p.rddt[i],p.rsdt[i]) ) ) + else + _DF_LOOP(i,p.frames, ( fun(p.rddt[p.rds*i],p.rsdt[p.rss*i]) ) ) + return true; +} + +template BL _d__cun(V fun(T &rv,T &iv,T ra,T ia),OpParam &p) +{ + int i; + if(p.rds == 1 && p.ids == 1 && p.rss == 1 && p.iss == 1) + _DE_LOOP(i,p.frames, ( fun(p.rddt[i],p.iddt[i],p.rsdt[i],p.isdt[i]) ) ) + else + _DF_LOOP(i,p.frames, ( fun(p.rddt[p.rds*i],p.iddt[p.ids*i],p.rsdt[p.rss*i],p.isdt[p.iss*i]) ) ) + return true; +} + +template BL _d__rbin(V fun(T &v,T a,T b),OpParam &p) +{ + int i; + if(p.HasArg() && p.arg[0].Is()) { + switch(p.arg[0].argtp) { + case OpParam::Arg::arg_v: { + const T *adr = p.arg[0].v.rdt; + const I asr = p.arg[0].v.rs; + _DF_LOOP(i,p.frames, ( fun(p.rddt[p.rds*i],p.rsdt[p.rss*i],adr[asr*i]) ) ) + break; + } + case OpParam::Arg::arg_env: { + Env::Iter it(*p.arg[0].e.env); it.Init(0); + _DF_LOOP(i,p.frames, ( fun(p.rddt[p.rds*i],p.rsdt[p.rss*i],it.ValFwd(i)) ) ) + break; + } + case OpParam::Arg::arg_x: { + const T av = p.arg[0].x.r; + _DF_LOOP(i,p.frames, ( fun(p.rddt[p.rds*i],p.rsdt[p.rss*i],av) ) ) + break; + } + } + } + else { + _DF_LOOP(i,p.frames, ( fun(p.rddt[p.rds*i],p.rsdt[p.rss*i],p.rbin.arg) ) ) + } + return true; +} + +template BL _d__cbin(V fun(T &rv,T &iv,T ra,T ia,T rb,T ib),OpParam &p) +{ + int i; + if(p.HasArg() && p.arg[0].Is()) { + switch(p.arg[0].argtp) { + case OpParam::Arg::arg_v: { + const T *adr = p.arg[0].v.rdt,*adi = p.arg[0].v.idt; + const I asr = p.arg[0].v.rs,asi = p.arg[0].v.is; + if(adi) + _DF_LOOP(i,p.frames, ( fun(p.rddt[p.rds*i],p.iddt[p.ids*i],p.rsdt[p.rss*i],p.isdt[p.iss*i],adr[asr*i],adi[asi*i]) ) ) + else + _DF_LOOP(i,p.frames, ( fun(p.rddt[p.rds*i],p.iddt[p.ids*i],p.rsdt[p.rss*i],p.isdt[p.iss*i],adr[asr*i],0) ) ) + break; + } + case OpParam::Arg::arg_env: { + Env::Iter it(*p.arg[0].e.env); it.Init(0); + _DF_LOOP(i,p.frames, ( fun(p.rddt[p.rds*i],p.iddt[p.ids*i],p.rsdt[p.rss*i],p.isdt[p.iss*i],it.ValFwd(i),0) ) ) + break; + } + case OpParam::Arg::arg_x: { + const T avr = p.arg[0].x.r,avi = p.arg[0].x.i; + _DF_LOOP(i,p.frames, ( fun(p.rddt[p.rds*i],p.iddt[p.ids*i],p.rsdt[p.rss*i],p.isdt[p.iss*i],avr,avi) ) ) + break; + } + } + } + else { + _DF_LOOP(i,p.frames, ( fun(p.rddt[p.rds*i],p.iddt[p.ids*i],p.rsdt[p.rss*i],p.isdt[p.iss*i],p.cbin.rarg,p.cbin.iarg) ) ) + } + return true; +} + +template BL _d__rop(V fun(T &v,T a,OpParam &p),OpParam &p) +{ + int i; + if(p.rds == 1 && p.rss == 1) + _DE_LOOP(i,p.frames, ( fun(p.rddt[i],p.rsdt[i],p) ) ) + else + _DF_LOOP(i,p.frames, ( fun(p.rddt[p.rds*i],p.rsdt[p.rss*i],p) ) ) + return true; +} + +template BL _d__cop(V fun(T &rv,T &iv,T ra,T ia,OpParam &p),OpParam &p) +{ + int i; + if(p.rds == 1 && p.ids == 1 && p.rss == 1 && p.iss == 1) + _DE_LOOP(i,p.frames, ( fun(p.rddt[i],p.iddt[i],p.rsdt[i],p.isdt[i],p) ) ) + else + _DF_LOOP(i,p.frames, ( fun(p.rddt[p.rds*i],p.iddt[p.ids*i],p.rsdt[p.rss*i],p.isdt[p.iss*i],p) ) ) + return true; +} -template inline BL f__run(OpParam &p) { return _d__run(CL::run,p); } -template inline BL f__cun(OpParam &p) { return _d__cun(CL::cun,p); } -template inline BL f__rbin(OpParam &p) { return _d__rbin(CL::rbin,p); } -template inline BL f__cbin(OpParam &p) { return _d__cbin(CL::cbin,p); } -template inline BL f__rop(OpParam &p) { return _d__rop(CL::rop,p); } -template inline BL f__cop(OpParam &p) { return _d__cop(CL::cop,p); } #ifdef VASP_COMPACT -template inline BL D__run(OpParam &p) { return _d__run(CL::run,p); } -template inline BL D__cun(OpParam &p) { return _d__cun(CL::cun,p); } -template inline BL D__rbin(OpParam &p) { return _d__rbin(CL::rbin,p); } -template inline BL D__cbin(OpParam &p) { return _d__cbin(CL::cbin,p); } -template inline BL D__rop(OpParam &p) { return _d__rop(CL::rop,p); } -template inline BL D__cop(OpParam &p) { return _d__cop(CL::cop,p); } -template inline BL F__run(OpParam &p) { return _d__run(CL::run,p); } -template inline BL F__cun(OpParam &p) { return _d__cun(CL::cun,p); } -template inline BL F__rbin(OpParam &p) { return _d__rbin(CL::rbin,p); } -template inline BL F__cbin(OpParam &p) { return _d__cbin(CL::cbin,p); } -template inline BL F__rop(OpParam &p) { return _d__rop(CL::rop,p); } -template inline BL F__cop(OpParam &p) { return _d__cop(CL::cop,p); } + template inline BL D__run(OpParam &p) { return _d__run(CL::run,p); } + template inline BL D__cun(OpParam &p) { return _d__cun(CL::cun,p); } + template inline BL D__rbin(OpParam &p) { return _d__rbin(CL::rbin,p); } + template inline BL D__cbin(OpParam &p) { return _d__cbin(CL::cbin,p); } + template inline BL D__rop(OpParam &p) { return _d__rop(CL::rop,p); } + template inline BL D__cop(OpParam &p) { return _d__cop(CL::cop,p); } #else -template inline BL D__run(OpParam &p) { return _D__run(p); } -template inline BL D__cun(OpParam &p) { return _D__cun(p); } -template inline BL D__rbin(OpParam &p) { return _D__rbin(p); } -template inline BL D__cbin(OpParam &p) { return _D__cbin(p); } -template inline BL D__rop(OpParam &p) { return _D__rop(p); } -template inline BL D__cop(OpParam &p) { return _D__cop(p); } -template inline BL F__run(OpParam &p) { return V__run(p.rsdt,p.rss,p.rddt,p.rds,p.frames); } -template inline BL F__cun(OpParam &p) { return V__cun(p.rsdt,p.isdt,p.rss,p.iss,p.rddt,p.iddt,p.rds,p.ids,p.frames); } -template inline BL F__rbin(OpParam &p) { return _F__rbin(p); } -template inline BL V__cbin(OpParam &p) { return _F__cbin(p); } -template inline BL F__rop(OpParam &p) { return V__rop(p,p.rsdt,p.rss,p.rddt,p.rds,p.frames); } -template inline BL F__cop(OpParam &p) { return V__cop(p,p.rsdt,p.isdt,p.rss,p.iss,p.rddt,p.iddt,p.rds,p.ids,p.frames); } + template inline BL D__run(OpParam &p) { return CL::run_opt()?_D__run(p):_d__run(CL::run,p); } + template inline BL D__cun(OpParam &p) { return CL::cun_opt()?_D__cun(p):_d__cun(CL::cun,p); } + template inline BL D__rbin(OpParam &p) { return CL::rbin_opt()?_D__rbin(p):_d__rbin(CL::rbin,p); } + template inline BL D__cbin(OpParam &p) { return CL::cbin_opt()?_D__cbin(p):_d__cbin(CL::cbin,p); } + template inline BL D__rop(OpParam &p) { return CL::rop_opt()?_D__rop(p):_d__rop(CL::rop,p); } + template inline BL D__cop(OpParam &p) { return CL::cop_opt()?_D__cop(p):_d__cop(CL::cop,p); } #endif + +// process multi-dimensional data + +template inline BL V__vmulti(BL vbin(I layers,const T *sr,T *dr,const T *ar,I len),I layers,const T *sr,T *dr,const T *ar,I dim,const I *dims) +{ + if(dim == 1 || !dims) { + return vbin(layers,sr,dr,ar,dims?dims[0]:dim); + } + else if(dim > 1) { + // calculate stride for next dimensions + I i,s,str = layers*dims[0]; + for(i = 1; i < dim-1; ++i) str *= dims[i]; + const I dimn = dims[i]; + + for(s = i = 0; i < dimn; ++i,s += str) + V__vmulti(vbin,layers,sr+s,dr+s,ar+s,dim-1,dims); + return true; + } + else + return false; +} + + + } // namespace VecOp #endif diff --git a/externals/grill/vasp/source/opfuns.h b/externals/grill/vasp/source/opfuns.h index 5ab8d48a..2228641d 100644 --- a/externals/grill/vasp/source/opfuns.h +++ b/externals/grill/vasp/source/opfuns.h @@ -12,56 +12,27 @@ WARRANTIES, see the file, "license.txt," in this distribution. #define __VASP_OPFUNS_H #include "opdefs.h" +#include +#include "util.h" namespace VecOp { - // multi-layer templates - - template - static V vec_un(T *v,const T *a,I n = 0) { - const I _n = N?N:n; - for(I i = 0; i < _n; ++i) FUN(v[i],a[i]); - } - - template - static V vec_un(T *v,T a,I n = 0) { - const I _n = N?N:n; - for(I i = 0; i < _n; ++i) FUN(v[i],a); - } - - template - static V vec_bin(T *v,const T *a,const T *b,I n = 0) { - const I _n = N?N:n; - for(I i = 0; i < _n; ++i) FUN(v[i],a[i],b[i]); - } - - template - static V vec_bin(T *v,const T *a,T b,I n = 0) { - const I _n = N?N:n; - for(I i = 0; i < _n; ++i) FUN(v[i],a[i],b); - } - - - template - static V cvec_un(T *v,const T *a,I n = 0) { vec_un(v,a,n); } - - template - static V cvec_bin(T *v,const T *a,const T *b,I n = 0) { vec_bin(v,a,b,n); } - - - // assignment template class f_copy { public: + static I run_opt() { return 3; } static V run(T &v,T a) { v = a; } + static I cun_opt() { return 2; } static V cun(T &rv,T &iv,T ra,T ia) { rv = ra,iv = ia; } }; template class f_set { public: + static I rbin_opt() { return 3; } static V rbin(T &v,T,T b) { v = b; } + static I cbin_opt() { return 2; } static V cbin(T &rv,T &iv,T,T,T rb,T ib) { rv = rb,iv = ib; } }; @@ -69,32 +40,42 @@ namespace VecOp { template class f_add { public: + static I rbin_opt() { return 3; } static V rbin(T &v,T a,T b) { v = a+b; } + static I cbin_opt() { return 2; } static V cbin(T &rv,T &iv,T ra,T ia,T rb,T ib) { rv = ra+rb,iv = ia+ib; } }; template class f_sub { public: + static I rbin_opt() { return 3; } static V rbin(T &v,T a,T b) { v = a-b; } + static I cbin_opt() { return 2; } static V cbin(T &rv,T &iv,T ra,T ia,T rb,T ib) { rv = ra-rb,iv = ia-ib; } }; template class f_subr { public: + static I rbin_opt() { return 2; } static V rbin(T &v,T a,T b) { v = b-a; } + static I cbin_opt() { return 2; } static V cbin(T &rv,T &iv,T ra,T ia,T rb,T ib) { rv = rb-ra,iv = ib-ia; } }; template class f_mul { public: + static I rbin_opt() { return 3; } static V rbin(T &v,T a,T b) { v = a*b; } + static I cbin_opt() { return 1; } static V cbin(T &rv,T &iv,T ra,T ia,T rb,T ib) { rv = ra*rb-ia*ib, iv = ra*ib+rb*ia; } }; template class f_div { public: + static I rbin_opt() { return 2; } static V rbin(T &v,T a,T b) { v = a/b; } + static I cbin_opt() { return 0; } static V cbin(T &rv,T &iv,T ra,T ia,T rb,T ib) { register const T den = sqabs(rb,ib); @@ -105,8 +86,10 @@ namespace VecOp { template class f_divr { public: + static I rbin_opt() { return 2; } static V rbin(T &v,T a,T b) { v = b/a; } + static I cbin_opt() { return 0; } static V cbin(T &rv,T &iv,T ra,T ia,T rb,T ib) { register const T den = sqabs(ra,ia); @@ -117,28 +100,35 @@ namespace VecOp { template class f_mod { public: + static I rbin_opt() { return 0; } static V rbin(T &v,T a,T b) { v = fmod(a,b); } }; template class f_abs { public: + static I run_opt() { return 0; } static V run(T &v,T a) { v = fabs(a); } + static I cun_opt() { return 0; } static V cun(T &rv,T &iv,T ra,T ia) { rv = sqrt(ra*ra+ia*ia),iv = 0; } }; template class f_sign { public: + static I run_opt() { return 0; } static V run(T &v,T a) { v = (a == 0?0:(a < 0?-1.:1.)); } }; template class f_sqr { public: + static I run_opt() { return 3; } static V run(T &v,T a) { v = a*a; } + static I cun_opt() { return 1; } static V cun(T &rv,T &iv,T ra,T ia) { rv = ra*ra-ia*ia; iv = ra*ia*2; } }; template class f_ssqr { public: + static I run_opt() { return 0; } static V run(T &v,T a) { v = a*fabs(a); } }; @@ -147,19 +137,22 @@ namespace VecOp { template class f_powi { public: + static I cop_opt() { return 0; } static V cop(T &rv,T &iv,T ra,T ia,OpParam &p) { register const I powi = p.ibin.arg; - register T rt,it; f_sqr::cun(rt,it,ra,ia); - for(I i = 2; i < powi; ++i) f_mul::cbin(rt,it,rt,it,ra,ia); + register T rt,it; VecOp::f_sqr::cun(rt,it,ra,ia); + for(I i = 2; i < powi; ++i) VecOp::f_mul::cbin(rt,it,rt,it,ra,ia); rv = rt,iv = it; } }; template class f_pow { public: + static I rbin_opt() { return 0; } static V rbin(T &v,T a,T b) { v = pow(fabs(a),b)*sgn(a); } + static I cbin_opt() { return 0; } static V cbin(T &rv,T &iv,T ra,T ia,T rb,T) { register const T _abs = sqrt(sqabs(ra,ia)); @@ -170,26 +163,32 @@ namespace VecOp { else rv = iv = 0; } + protected: + static T sgn(T x) { return x?(x > 0?1:-1):0; } }; template class f_sqrt { public: + static I run_opt() { return 0; } static V run(T &v,T a) { v = sqrt(fabs(a)); } }; template class f_ssqrt { public: + static I run_opt() { return 0; } static V run(T &v,T a) { v = sqrt(fabs(a))*sgn(a); } }; template class f_exp { public: + static I run_opt() { return 0; } static V run(T &v,T a) { v = exp(a); } }; template class f_log { public: + static I run_opt() { return 0; } static V run(T &v,T a) { v = log(a); } // \todo detect NANs }; @@ -197,51 +196,61 @@ namespace VecOp { template class f_lwr { public: + static I rbin_opt() { return 1; } static V rbin(T &v,T a,T b) { v = a < b?1:0; } }; template class f_gtr { public: + static I rbin_opt() { return 1; } static V rbin(T &v,T a,T b) { v = a > b?1:0; } }; template class f_alwr { public: + static I rbin_opt() { return 0; } static V rbin(T &v,T a,T b) { v = fabs(a) < fabs(b)?1:0; } }; template class f_agtr { public: + static I rbin_opt() { return 0; } static V rbin(T &v,T a,T b) { v = fabs(a) > fabs(b)?1:0; } }; template class f_leq { public: + static I rbin_opt() { return 1; } static V rbin(T &v,T a,T b) { v = a <= b?1:0; } }; template class f_geq { public: + static I rbin_opt() { return 1; } static V rbin(T &v,T a,T b) { v = a >= b?1:0; } }; template class f_aleq { public: + static I rbin_opt() { return 0; } static V rbin(T &v,T a,T b) { v = fabs(a) <= fabs(b)?1:0; } }; template class f_ageq { public: + static I rbin_opt() { return 0; } static V rbin(T &v,T a,T b) { v = fabs(a) >= fabs(b)?1:0; } }; template class f_equ { public: + static I rbin_opt() { return 1; } static V rbin(T &v,T a,T b) { v = a == b?1:0; } }; template class f_neq { public: + static I rbin_opt() { return 1; } static V rbin(T &v,T a,T b) { v = a != b?1:0; } }; @@ -249,8 +258,10 @@ namespace VecOp { template class f_min { public: + static I rbin_opt() { return 1; } static V rbin(T &v,T a,T b) { v = a < b?a:b; } + static I cbin_opt() { return 0; } static V cbin(T &rv,T &iv,T ra,T ia,T rb,T ib) { if(sqabs(ra,ia) < sqabs(rb,ib)) rv = ra,iv = ia; @@ -260,8 +271,10 @@ namespace VecOp { template class f_max { public: + static I rbin_opt() { return 1; } static V rbin(T &v,T a,T b) { v = a > b?a:b; } + static I cbin_opt() { return 0; } static V cbin(T &rv,T &iv,T ra,T ia,T rb,T ib) { if(sqabs(ra,ia) > sqabs(rb,ib)) rv = ra,iv = ia; @@ -271,6 +284,7 @@ namespace VecOp { template class f_minmax { public: + static I cun_opt() { return 0; } static V cun(T &rv,T &iv,T ra,T ia) { if(ra < ia) rv = ra,iv = ia; @@ -280,11 +294,13 @@ namespace VecOp { template class f_minq { public: + static I rop_opt() { return 0; } static V rop(T &,T ra,OpParam &p) { if(ra < p.norm.minmax) p.norm.minmax = ra; } + static I cop_opt() { return 0; } static V cop(T &,T &,T ra,T ia,OpParam &p) { register T s = sqabs(ra,ia); @@ -294,11 +310,13 @@ namespace VecOp { template class f_maxq { public: + static I rop_opt() { return 0; } static V rop(T &,T ra,OpParam &p) { if(ra > p.norm.minmax) p.norm.minmax = ra; } + static I cop_opt() { return 0; } static V cop(T &,T &,T ra,T ia,OpParam &p) { register T s = sqabs(ra,ia); @@ -308,6 +326,7 @@ namespace VecOp { template class f_aminq { public: + static I rop_opt() { return 0; } static V rop(T &,T ra,OpParam &p) { register T s = fabs(ra); @@ -317,6 +336,7 @@ namespace VecOp { template class f_amaxq { public: + static I rop_opt() { return 0; } static V rop(T &,T ra,OpParam &p) { register T s = fabs(ra); @@ -329,8 +349,10 @@ namespace VecOp { template class f_gate { public: + static I rbin_opt() { return 0; } static V rbin(T &rv,T ra,T rb) { rv = fabs(ra) >= rb?ra:0; } + static I cbin_opt() { return 0; } static V cbin(T &rv,T &iv,T ra,T ia,T rb,T) { register const T _abs = sqabs(ra,ia); @@ -342,8 +364,10 @@ namespace VecOp { template class f_igate { public: + static I rbin_opt() { return 0; } static V rbin(T &rv,T ra,T rb) { rv = fabs(ra) <= rb?ra:0; } + static I cbin_opt() { return 0; } static V cbin(T &rv,T &iv,T ra,T ia,T rb,T) { register const T _abs = sqabs(ra,ia); @@ -357,6 +381,7 @@ namespace VecOp { template class f_norm { public: + static I cun_opt() { return 0; } static V cun(T &rv,T &iv,T ra,T ia) { register T f = sqabs(ra,ia); @@ -367,21 +392,25 @@ namespace VecOp { template class f_conj { public: + static I cun_opt() { return 2; } static V cun(T &,T &iv,T,T ia) { iv = -ia; } }; template class f_polar { public: + static I cun_opt() { return 0; } static V cun(T &rv,T &iv,T ra,T ia) { rv = sqrt(sqabs(ra,ia)),iv = arg(ra,ia); } }; template class f_rect { public: + static I cun_opt() { return 0; } static V cun(T &rv,T &iv,T ra,T ia) { rv = ra*cos(ia),iv = ra*sin(ia); } }; template class f_radd { public: + static I cbin_opt() { return 0; } static V cbin(T &rv,T &iv,T ra,T ia,T rb,T) { register const T _abs = sqrt(sqabs(ra,ia))+rb; @@ -401,6 +430,7 @@ namespace VecOp { \param v destination vasp (NULL for in-place operation) \return normalized destination vasp */ + static I run_opt() { return 0; } static V run(T &v,T a) { if(a != a) // NAN @@ -415,30 +445,75 @@ namespace VecOp { } } }; + } +#define DEFOP(T,FUN,OP,KIND) \ +namespace VecOp { inline BL FUN(OpParam &p) { return D__##KIND >(p); } } + + +#define DEFVEC_R(T,OP) \ + static BL r_##OP (I len,T *dr,I rds,const T *sr,I rss) { return VecOp::V__rbin >(sr,rss,dr,rds,len); } \ + static BL v_##OP##_(I layers,const T *sr,T *dr,const T *ar,I len) { return VecOp::V__vbin >(layers,sr,dr,ar,len); } \ + static BL v_##OP (I dim,const I *dims,I layers,T *dr,const T *sr,const T *ar) { return VecOp::V__vmulti(v_##OP##_,layers,sr,dr,ar,dim,dims); } + +#define DEFVEC_C(T,OP) \ + static BL c_##OP (I len,T *dr,T *di,I rds,I ids,const T *sr,I rss,I iss) { return VecOp::V__cbin >(sr,rss,iss,dr,rds,ids,len); } + +#define DEFVEC_B(T,OP) DEFVEC_R(T,OP) DEFVEC_C(T,OP) + + template class VecFun { public: - // strided real data - static BL r_add(I len,register T *dr,register const T *sr,I rds = 1,I rss = 1) { return VecOp::V__rbin::rbin>(sr,rss,dr,rds,len); } - static BL r_sub(I len,register T *dr,register const T *sr,I rds = 1,I rss = 1) { return VecOp::V__rbin::rbin>(sr,rss,dr,rds,len); } - static BL r_subr(I len,register T *dr,register const T *sr,I rds = 1,I rss = 1) { return VecOp::V__rbin::rbin>(sr,rss,dr,rds,len); } - static BL r_mul(I len,register T *dr,register const T *sr,I rds = 1,I rss = 1) { return VecOp::V__rbin::rbin>(sr,rss,dr,rds,len); } - static BL r_div(I len,register T *dr,register const T *sr,I rds = 1,I rss = 1) { return VecOp::V__rbin::rbin>(sr,rss,dr,rds,len); } - static BL r_divr(I len,register T *dr,register const T *sr,I rds = 1,I rss = 1) { return VecOp::V__rbin::rbin>(sr,rss,dr,rds,len); } - static BL r_mod(I len,register T *dr,register const T *sr,I rds = 1,I rss = 1) { return VecOp::V__rbin::rbin>(sr,rss,dr,rds,len); } - - // multi-layer data (non-strided) - static BL v_add(I len,I layers,register T *dr,register const T *sr,register const T *ar) { return VecOp::V__vbin::rbin>(layers,sr,dr,ar,len); } - static BL v_sub(I len,I layers,register T *dr,register const T *sr,register const T *ar) { return VecOp::V__vbin::rbin>(layers,sr,dr,ar,len); } - static BL v_subr(I len,I layers,register T *dr,register const T *sr,register const T *ar) { return VecOp::V__vbin::rbin>(layers,sr,dr,ar,len); } - static BL v_mul(I len,I layers,register T *dr,register const T *sr,register const T *ar) { return VecOp::V__vbin::rbin>(layers,sr,dr,ar,len); } - static BL v_div(I len,I layers,register T *dr,register const T *sr,register const T *ar) { return VecOp::V__vbin::rbin>(layers,sr,dr,ar,len); } - static BL v_divr(I len,I layers,register T *dr,register const T *sr,register const T *ar) { return VecOp::V__vbin::rbin>(layers,sr,dr,ar,len); } - static BL v_mod(I len,I layers,register T *dr,register const T *sr,register const T *ar) { return VecOp::V__vbin::rbin>(layers,sr,dr,ar,len); } + DEFVEC_B(T,copy) + + DEFVEC_B(T,add) + DEFVEC_B(T,sub) + DEFVEC_B(T,subr) + DEFVEC_B(T,mul) + DEFVEC_B(T,div) + DEFVEC_B(T,divr) + DEFVEC_R(T,mod) + DEFVEC_B(T,abs) + DEFVEC_R(T,sign) + DEFVEC_B(T,sqr) + DEFVEC_R(T,ssqr) + + DEFVEC_C(T,powi) + DEFVEC_B(T,pow) + DEFVEC_R(T,sqrt) + DEFVEC_R(T,ssqrt) + DEFVEC_R(T,exp) + DEFVEC_R(T,log) + + DEFVEC_R(T,lwr) + DEFVEC_R(T,gtr) + DEFVEC_R(T,alwr) + DEFVEC_R(T,agtr) + DEFVEC_R(T,leq) + DEFVEC_R(T,geq) + DEFVEC_R(T,aleq) + DEFVEC_R(T,ageq) + DEFVEC_R(T,equ) + DEFVEC_R(T,neq) + + DEFVEC_B(T,min) + DEFVEC_B(T,max) + DEFVEC_C(T,minmax) + DEFVEC_C(T,gate) + DEFVEC_C(T,igate) + + DEFVEC_C(T,norm) + DEFVEC_C(T,conj) + DEFVEC_C(T,polar) + DEFVEC_C(T,rect) + DEFVEC_C(T,radd) + + DEFVEC_R(T,fix) }; + #endif diff --git a/externals/grill/vasp/source/oploop.h b/externals/grill/vasp/source/oploop.h index 888a3f84..603cd7ce 100755 --- a/externals/grill/vasp/source/oploop.h +++ b/externals/grill/vasp/source/oploop.h @@ -11,7 +11,7 @@ WARRANTIES, see the file, "license.txt," in this distribution. #ifndef __VASP_OPLOOP_H #define __VASP_OPLOOP_H -#define _D_BLOCK 1024 +#define _D_BLOCK 4096 #define _D_MIN(a,b) ((a) < (b)?(a):(b)) @@ -97,4 +97,6 @@ WARRANTIES, see the file, "license.txt," in this distribution. #endif +#define _DQ_LOOP(UNROLL,VAR,LEN,BODY) { if(UNROLL) _DE_LOOP(VAR,LEN,BODY) else _DF_LOOP(VAR,LEN,BODY) } + #endif diff --git a/externals/grill/vasp/source/ops_arith.cpp b/externals/grill/vasp/source/ops_arith.cpp index 5eb11e7d..fb8ad4b7 100644 --- a/externals/grill/vasp/source/ops_arith.cpp +++ b/externals/grill/vasp/source/ops_arith.cpp @@ -2,7 +2,7 @@ VASP modular - vector assembling signal processor / objects for Max/MSP and PD -Copyright (c) 2002 Thomas Grill (xovo@gmx.net) +Copyright (c) 2002-2003 Thomas Grill (xovo@gmx.net) For information on usage and redistribution, and for a DISCLAIMER OF ALL WARRANTIES, see the file, "license.txt," in this distribution. @@ -31,5 +31,5 @@ VASP_UNARY("vasp.ssqr",ssqr,true,"Calculates the square with preservation of the // ----------------------------------------------------- VASP_UNARY("vasp.sign",sign,true,"Calculates the sign (signum function)") -VASP_UNARY("vasp.abs",abs,true,"Calulates the absolute value") +VASP_UNARY("vasp.abs",abs,true,"Calculates the absolute value") diff --git a/externals/grill/vasp/source/ops_arith.h b/externals/grill/vasp/source/ops_arith.h index 8d54d960..064ecb0e 100644 --- a/externals/grill/vasp/source/ops_arith.h +++ b/externals/grill/vasp/source/ops_arith.h @@ -15,35 +15,36 @@ WARRANTIES, see the file, "license.txt," in this distribution. // Arithmetic math functions +DEFOP(S,d_add,add,rbin) +DEFOP(S,d_sub,sub,rbin) +DEFOP(S,d_subr,subr,rbin) +DEFOP(S,d_mul,mul,rbin) +DEFOP(S,d_div,div,rbin) +DEFOP(S,d_divr,divr,rbin) +DEFOP(S,d_mod,mod,rbin) + +DEFOP(S,d_sqr,sqr,run) +DEFOP(S,d_ssqr,ssqr,run) + +DEFOP(S,d_sign,sign,run) +DEFOP(S,d_abs,abs,run) + + namespace VaspOp { - inline BL d_add(OpParam &p) { return VecOp::D__rbin >(p); } - inline BL d_sub(OpParam &p) { return VecOp::D__rbin >(p); } - inline BL d_subr(OpParam &p) { return VecOp::D__rbin >(p); } - inline BL d_mul(OpParam &p) { return VecOp::D__rbin >(p); } - inline BL d_div(OpParam &p) { return VecOp::D__rbin >(p); } - inline BL d_divr(OpParam &p) { return VecOp::D__rbin >(p); } - inline BL d_mod(OpParam &p) { return VecOp::D__rbin >(p); } - - inline BL d_sqr(OpParam &p) { return VecOp::D__run >(p); } - inline BL d_ssqr(OpParam &p) { return VecOp::d__run >(p); } - - inline BL d_sign(OpParam &p) { return VecOp::D__run >(p); } - inline BL d_abs(OpParam &p) { return VecOp::D__run >(p); } - - - inline Vasp *m_add(OpParam &p,CVasp &src,const Argument &arg,CVasp *dst = NULL) { return m_rbin(p,src,arg,dst,d_add); } // add to (one vec or real) - inline Vasp *m_sub(OpParam &p,CVasp &src,const Argument &arg,CVasp *dst = NULL) { return m_rbin(p,src,arg,dst,d_sub); } // sub from (one vec or real) - inline Vasp *m_subr(OpParam &p,CVasp &src,const Argument &arg,CVasp *dst = NULL) { return m_rbin(p,src,arg,dst,d_subr); } // reverse sub from (one vec or real) - inline Vasp *m_mul(OpParam &p,CVasp &src,const Argument &arg,CVasp *dst = NULL) { return m_rbin(p,src,arg,dst,d_mul); } // mul with (one vec or real) - inline Vasp *m_div(OpParam &p,CVasp &src,const Argument &arg,CVasp *dst = NULL) { return m_rbin(p,src,arg,dst,d_div); } // div by (one vec or real) - inline Vasp *m_divr(OpParam &p,CVasp &src,const Argument &arg,CVasp *dst = NULL) { return m_rbin(p,src,arg,dst,d_divr); } // reverse div by (one vec or real) - inline Vasp *m_mod(OpParam &p,CVasp &src,const Argument &arg,CVasp *dst = NULL) { return m_rbin(p,src,arg,dst,d_mod); } // modulo by (one vec or real) - - inline Vasp *m_sqr(OpParam &p,CVasp &src,CVasp *dst = NULL) { return m_run(p,src,dst,d_sqr); } // unsigned square - inline Vasp *m_ssqr(OpParam &p,CVasp &src,CVasp *dst = NULL) { return m_run(p,src,dst,d_ssqr); } // signed square - - inline Vasp *m_sign(OpParam &p,CVasp &src,CVasp *dst = NULL) { return m_run(p,src,dst,d_sign); } // sign function - inline Vasp *m_abs(OpParam &p,CVasp &src,CVasp *dst = NULL) { return m_run(p,src,dst,d_abs); } // absolute values + + inline Vasp *m_add(OpParam &p,CVasp &src,const Argument &arg,CVasp *dst = NULL) { return m_rbin(p,src,arg,dst,VecOp::d_add); } // add to (one vec or real) + inline Vasp *m_sub(OpParam &p,CVasp &src,const Argument &arg,CVasp *dst = NULL) { return m_rbin(p,src,arg,dst,VecOp::d_sub); } // sub from (one vec or real) + inline Vasp *m_subr(OpParam &p,CVasp &src,const Argument &arg,CVasp *dst = NULL) { return m_rbin(p,src,arg,dst,VecOp::d_subr); } // reverse sub from (one vec or real) + inline Vasp *m_mul(OpParam &p,CVasp &src,const Argument &arg,CVasp *dst = NULL) { return m_rbin(p,src,arg,dst,VecOp::d_mul); } // mul with (one vec or real) + inline Vasp *m_div(OpParam &p,CVasp &src,const Argument &arg,CVasp *dst = NULL) { return m_rbin(p,src,arg,dst,VecOp::d_div); } // div by (one vec or real) + inline Vasp *m_divr(OpParam &p,CVasp &src,const Argument &arg,CVasp *dst = NULL) { return m_rbin(p,src,arg,dst,VecOp::d_divr); } // reverse div by (one vec or real) + inline Vasp *m_mod(OpParam &p,CVasp &src,const Argument &arg,CVasp *dst = NULL) { return m_rbin(p,src,arg,dst,VecOp::d_mod); } // modulo by (one vec or real) + + inline Vasp *m_sqr(OpParam &p,CVasp &src,CVasp *dst = NULL) { return m_run(p,src,dst,VecOp::d_sqr); } // unsigned square + inline Vasp *m_ssqr(OpParam &p,CVasp &src,CVasp *dst = NULL) { return m_run(p,src,dst,VecOp::d_ssqr); } // signed square + + inline Vasp *m_sign(OpParam &p,CVasp &src,CVasp *dst = NULL) { return m_run(p,src,dst,VecOp::d_sign); } // sign function + inline Vasp *m_abs(OpParam &p,CVasp &src,CVasp *dst = NULL) { return m_run(p,src,dst,VecOp::d_abs); } // absolute values } #endif diff --git a/externals/grill/vasp/source/ops_assign.h b/externals/grill/vasp/source/ops_assign.h index a495eb3b..78a3392e 100644 --- a/externals/grill/vasp/source/ops_assign.h +++ b/externals/grill/vasp/source/ops_assign.h @@ -15,13 +15,12 @@ WARRANTIES, see the file, "license.txt," in this distribution. // Assignment functions -namespace VecOp { - inline BL d_copy(OpParam &p) { return D__run >(p); } - inline BL d_ccopy(OpParam &p) { return D__cun >(p); } +DEFOP(S,d_copy,copy,run) +DEFOP(S,d_ccopy,copy,cun) + +DEFOP(S,d_set,set,rbin) +DEFOP(S,d_cset,set,cbin) - inline BL d_set(OpParam &p) { return D__rbin >(p); } - inline BL d_cset(OpParam &p) { return D__cbin >(p); } -} namespace VaspOp { inline Vasp *m_set(OpParam &p,CVasp &src,const Argument &arg,CVasp *dst = NULL) { return m_rbin(p,src,arg,dst,VecOp::d_set); } // copy to (one vec or real) diff --git a/externals/grill/vasp/source/ops_carith.h b/externals/grill/vasp/source/ops_carith.h index 0b049fb4..04514a10 100644 --- a/externals/grill/vasp/source/ops_carith.h +++ b/externals/grill/vasp/source/ops_carith.h @@ -15,20 +15,19 @@ WARRANTIES, see the file, "license.txt," in this distribution. // Arithmetic math functions -namespace VecOp { - inline BL d_cadd(OpParam &p) { return D__cbin >(p); } - inline BL d_csub(OpParam &p) { return D__cbin >(p); } - inline BL d_csubr(OpParam &p) { return D__cbin >(p); } - inline BL d_cmul(OpParam &p) { return D__cbin >(p); } - inline BL d_cdiv(OpParam &p) { return d__cbin >(p); } - inline BL d_cdivr(OpParam &p) { return d__cbin >(p); } +DEFOP(S,d_cadd,add,cbin) +DEFOP(S,d_csub,sub,cbin) +DEFOP(S,d_csubr,subr,cbin) +DEFOP(S,d_cmul,mul,cbin) +DEFOP(S,d_cdiv,div,cbin) +DEFOP(S,d_cdivr,divr,cbin) - inline BL d_csqr(OpParam &p) { return D__cun >(p); } +DEFOP(S,d_csqr,sqr,cun) - inline BL d_cpowi(OpParam &p) { return d__cop >(p); } +DEFOP(S,d_cpowi,powi,cop) + +DEFOP(S,d_cabs,abs,cun) - inline BL d_cabs(OpParam &p) { return D__cun >(p); } -} namespace VaspOp { inline Vasp *m_cadd(OpParam &p,CVasp &src,const Argument &arg,CVasp *dst = NULL) { return m_cbin(p,src,arg,dst,VecOp::d_cadd); } // complex add (pairs of vecs or complex) diff --git a/externals/grill/vasp/source/ops_cmp.h b/externals/grill/vasp/source/ops_cmp.h index 1302120f..ae445b94 100644 --- a/externals/grill/vasp/source/ops_cmp.h +++ b/externals/grill/vasp/source/ops_cmp.h @@ -15,37 +15,36 @@ WARRANTIES, see the file, "license.txt," in this distribution. // Comparison functions -namespace VecOp { - inline BL d_lwr(OpParam &p) { return D__rbin >(p); } - inline BL d_gtr(OpParam &p) { return D__rbin >(p); } - inline BL d_alwr(OpParam &p) { return D__rbin >(p); } - inline BL d_agtr(OpParam &p) { return D__rbin >(p); } - inline BL d_leq(OpParam &p) { return D__rbin >(p); } - inline BL d_geq(OpParam &p) { return D__rbin >(p); } - inline BL d_aleq(OpParam &p) { return D__rbin >(p); } - inline BL d_ageq(OpParam &p) { return D__rbin >(p); } - inline BL d_equ(OpParam &p) { return D__rbin >(p); } - inline BL d_neq(OpParam &p) { return D__rbin >(p); } - - inline BL d_min(OpParam &p) { return D__rbin >(p); } - inline BL d_max(OpParam &p) { return D__rbin >(p); } - inline BL d_rmin(OpParam &p) { return d__cbin >(p); } - inline BL d_rmax(OpParam &p) { return d__cbin >(p); } - - inline BL d_minmax(OpParam &p) { return d__cun >(p); } - - inline BL d_minq(OpParam &p) { return D__rop >(p); } - inline BL d_maxq(OpParam &p) { return D__rop >(p); } - inline BL d_rminq(OpParam &p) { return d__cop >(p); } - inline BL d_rmaxq(OpParam &p) { return d__cop >(p); } - inline BL d_aminq(OpParam &p) { return d__rop >(p); } - inline BL d_amaxq(OpParam &p) { return d__rop >(p); } - - inline BL d_gate(OpParam &p) { return D__rbin >(p); } - inline BL d_igate(OpParam &p) { return d__rbin >(p); } - inline BL d_rgate(OpParam &p) { return d__cbin >(p); } - inline BL d_rigate(OpParam &p) { return d__cbin >(p); } -} +DEFOP(S,d_lwr,lwr,rbin) +DEFOP(S,d_gtr,gtr,rbin) +DEFOP(S,d_alwr,alwr,rbin) +DEFOP(S,d_agtr,agtr,rbin) +DEFOP(S,d_leq,leq,rbin) +DEFOP(S,d_geq,geq,rbin) +DEFOP(S,d_aleq,aleq,rbin) +DEFOP(S,d_ageq,ageq,rbin) +DEFOP(S,d_equ,equ,rbin) +DEFOP(S,d_neq,neq,rbin) + +DEFOP(S,d_min,min,rbin) +DEFOP(S,d_max,max,rbin) +DEFOP(S,d_rmin,min,cbin) +DEFOP(S,d_rmax,max,cbin) + +DEFOP(S,d_minmax,minmax,cun) + +DEFOP(S,d_minq,minq,rop) +DEFOP(S,d_maxq,maxq,rop) +DEFOP(S,d_rminq,minq,cop) +DEFOP(S,d_rmaxq,maxq,cop) +DEFOP(S,d_aminq,aminq,rop) +DEFOP(S,d_amaxq,amaxq,rop) + +DEFOP(S,d_gate,gate,rbin) +DEFOP(S,d_igate,igate,rbin) +DEFOP(S,d_rgate,gate,cbin) +DEFOP(S,d_rigate,igate,cbin) + namespace VaspOp { inline Vasp *m_lwr(OpParam &p,CVasp &src,const Argument &arg,CVasp *dst = NULL) { return m_rbin(p,src,arg,dst,VecOp::d_lwr); } // lower than diff --git a/externals/grill/vasp/source/ops_cplx.h b/externals/grill/vasp/source/ops_cplx.h index c77de675..696aae55 100644 --- a/externals/grill/vasp/source/ops_cplx.h +++ b/externals/grill/vasp/source/ops_cplx.h @@ -15,16 +15,15 @@ WARRANTIES, see the file, "license.txt," in this distribution. // Complex functions -namespace VecOp { - inline BL d_polar(OpParam &p) { return d__cun >(p); } - inline BL d_rect(OpParam &p) { return d__cun >(p); } +DEFOP(S,d_polar,polar,cun) +DEFOP(S,d_rect,rect,cun) - inline BL d_radd(OpParam &p) { return d__cbin >(p); } +DEFOP(S,d_radd,radd,cbin) - inline BL d_cnorm(OpParam &p) { return d__cun >(p); } +DEFOP(S,d_cnorm,norm,cun) + +DEFOP(S,d_cconj,conj,cun) - inline BL d_cconj(OpParam &p) { return D__cun >(p); } -} namespace VaspOp { inline Vasp *m_polar(OpParam &p,CVasp &src,CVasp *dst = NULL) { return m_cun(p,src,dst,VecOp::d_polar); } // cartesian -> polar (each two) diff --git a/externals/grill/vasp/source/ops_flt.h b/externals/grill/vasp/source/ops_flt.h index ea7fe5ce..debb4039 100644 --- a/externals/grill/vasp/source/ops_flt.h +++ b/externals/grill/vasp/source/ops_flt.h @@ -21,10 +21,10 @@ namespace VecOp { BL d_int(OpParam &p); BL d_dif(OpParam &p); - - inline BL d_fix(OpParam &p) { return D__run >(p); } } +DEFOP(S,d_fix,fix,run) + namespace VaspOp { // passive filters Vasp *m_fhp(OpParam &p,CVasp &src,const Argument &arg,CVasp *dst = NULL,BL hp = true); //! hi pass diff --git a/externals/grill/vasp/source/ops_trnsc.h b/externals/grill/vasp/source/ops_trnsc.h index 73617d9a..408aeb9b 100644 --- a/externals/grill/vasp/source/ops_trnsc.h +++ b/externals/grill/vasp/source/ops_trnsc.h @@ -15,16 +15,14 @@ WARRANTIES, see the file, "license.txt," in this distribution. // Transcendent math functions -namespace VecOp { - inline BL d_pow(OpParam &p) { return d__rbin >(p); } - inline BL d_rpow(OpParam &p) { return d__cbin >(p); } +DEFOP(S,d_pow,pow,rbin) +DEFOP(S,d_rpow,pow,cbin) - inline BL d_sqrt(OpParam &p) { return d__run >(p); } - inline BL d_ssqrt(OpParam &p) { return d__run >(p); } +DEFOP(S,d_sqrt,sqrt,run) +DEFOP(S,d_ssqrt,ssqrt,run) - inline BL d_exp(OpParam &p) { return d__run >(p); } - inline BL d_log(OpParam &p) { return d__run >(p); } -} +DEFOP(S,d_exp,exp,run) +DEFOP(S,d_log,log,run) namespace VaspOp { inline Vasp *m_pow(OpParam &p,CVasp &src,const Argument &arg,CVasp *dst = NULL) { return m_rbin(p,src,arg,dst,VecOp::d_pow); } // power diff --git a/externals/grill/vasp/source/opvecs.cpp b/externals/grill/vasp/source/opvecs.cpp index e29f39bc..f78210be 100644 --- a/externals/grill/vasp/source/opvecs.cpp +++ b/externals/grill/vasp/source/opvecs.cpp @@ -418,7 +418,7 @@ CVecBlock *VaspOp::GetCVecs(const C *op,CVasp &src,const CVasp &arg,CVasp *dst,I \remark operative function must be capable of handling reversed direction */ -Vasp *VaspOp::DoOp(RVecBlock *vecs,VecOp::opfun *fun,OpParam &p,BL symm) +Vasp *VaspOp::DoOp(RVecBlock *vecs,opfun *fun,OpParam &p,BL symm) { BL ok = true; @@ -521,7 +521,7 @@ Vasp *VaspOp::DoOp(RVecBlock *vecs,VecOp::opfun *fun,OpParam &p,BL symm) \remark operative function must be capable of handling reversed direction */ -Vasp *VaspOp::DoOp(CVecBlock *vecs,VecOp::opfun *fun,OpParam &p,BL symm) +Vasp *VaspOp::DoOp(CVecBlock *vecs,opfun *fun,OpParam &p,BL symm) { BL ok = true; diff --git a/externals/grill/vasp/source/util.cpp b/externals/grill/vasp/source/util.cpp index 417b74c2..0b1050c1 100644 --- a/externals/grill/vasp/source/util.cpp +++ b/externals/grill/vasp/source/util.cpp @@ -12,6 +12,7 @@ WARRANTIES, see the file, "license.txt," in this distribution. #include "util.h" #include +/* R arg(R re,R im) { if(re) @@ -20,4 +21,4 @@ R arg(R re,R im) if(im || re) return im > 0?PI/2:-PI/2; else return 0; } - +*/ diff --git a/externals/grill/vasp/source/util.h b/externals/grill/vasp/source/util.h index 1b41c447..859f37e7 100644 --- a/externals/grill/vasp/source/util.h +++ b/externals/grill/vasp/source/util.h @@ -20,14 +20,32 @@ WARRANTIES, see the file, "license.txt," in this distribution. #define BIG 1.e10 -R arg(R re,R im); -inline R arg(const CX &c) { return arg(c.real,c.imag); } -inline F sqabs(F re,F im) { return re*re+im*im; } -inline F sqabs(const CX &c) { return sqabs(c.real,c.imag); } -inline F sgn(F x) { return x < 0.?-1.F:1.F; } -inline V swap(F &a,F &b) { F c = a; a = b; b = c; } - -inline I min(I a,I b) { return a < b?a:b; } -inline I max(I a,I b) { return a > b?a:b; } +template +inline V swap(T &a,T &b) { T c = a; a = b; b = c; } + +template +inline T min(T a,T b) { return a < b?a:b; } + +template +inline T max(T a,T b) { return a > b?a:b; } + + +template +T arg(T re,T im) +{ + if(re) + return (T)(fmod(atan(im/re)+(re < 0?2*PI:PI),2*PI)-PI); + else + if(im || re) return (T)(im > 0?PI/2:-PI/2); + else return 0; +} + +template +inline T sgn(T x) { return (T)(x?(x < 0?-1:1):0); } + +template +inline T sqabs(T re,T im) { return re*re+im*im; } + + #endif diff --git a/externals/grill/vasp/vasp.cw b/externals/grill/vasp/vasp.cw index 212243fb..1601dbcb 100644 Binary files a/externals/grill/vasp/vasp.cw and b/externals/grill/vasp/vasp.cw differ -- cgit v1.2.1