27 files changed, 627 insertions, 331 deletions
diff --git a/externals/grill/flext/flext.cw b/externals/grill/flext/flext.cw
index b418effb..9d5eecd8 100644
--- a/externals/grill/flext/flext.cw
+++ b/externals/grill/flext/flext.cw
diff --git a/externals/grill/flext/makefile.pd-cygwin b/externals/grill/flext/makefile.pd-cygwin
index 60cfa4f5..5239346d 100644
--- a/externals/grill/flext/makefile.pd-cygwin
+++ b/externals/grill/flext/makefile.pd-cygwin
@@ -17,7 +17,7 @@ include $(CONFIG)
 
 
 # compiler+linker stuff
-INCLUDES=$(PDPATH)/src 
+INCLUDES=/usr/include $(PDPATH)/src 
 
 FLAGS=-DFLEXT_SYS=2 -fno-exceptions $(UFLAGS)
 CFLAGS=-O6 
@@ -55,7 +55,7 @@ TARGET_D=$(TARGDIR)/$(NAME)_d-pdwin.lib
 TARGET_T=$(TARGDIR)/$(NAME)_t-pdwin.lib
 TARGET_TD=$(TARGDIR)/$(NAME)_td-pdwin.lib
 
-all: $(TARGDIR) $(TARGET) $(TARGET_D) # $(TARGET_T) $(TARGET_TD)
+all: $(TARGDIR) $(TARGET) $(TARGET_D) $(TARGET_T) $(TARGET_TD)
 
 $(TARGDIR):
 	-mkdir $(TARGDIR)
@@ -102,7 +102,7 @@ $(INSTDIR):
 install:: $(INSTDIR)
 endif
 
-install:: $(TARGET) $(TARGET_D) $(patsubst %,$(SRCDIR)/%,$(HDRS)) # $(TARGET_T) $(TARGET_TD)  
+install:: $(TARGET) $(TARGET_D) $(TARGET_T) $(TARGET_TD) $(patsubst %,$(SRCDIR)/%,$(HDRS)) 
 	cp $^ $(INSTDIR)
 
 
diff --git a/externals/grill/vasp/build-pd-cygwin.sh b/externals/grill/vasp/build-pd-cygwin.sh
new file mode 100644
index 00000000..94778444
--- /dev/null
+++ b/externals/grill/vasp/build-pd-cygwin.sh
@@ -0,0 +1,15 @@
+#!/bin/sh  
+
+SYS=pd-cygwin
+
+. config-${SYS}.txt
+
+make -f makefile.${SYS} &&
+{ 
+	if [ $INSTDIR != "" ]; then
+		make -f makefile.${SYS} install
+	fi
+	if [ $HELPDIR != "" ]; then
+		make -f makefile.${SYS} install-help
+	fi
+}
diff --git a/externals/grill/vasp/changes.txt b/externals/grill/vasp/changes.txt
index 7baeedc2..23e0f917 100644
--- a/externals/grill/vasp/changes.txt
+++ b/externals/grill/vasp/changes.txt
@@ -4,6 +4,7 @@ Version history:
 - CHANGE: exchanged #defines for templates in vector calculation loops
 - FIX: bug for binary operations of complex vasps
 - ADD: implemented (factor-4) loop unrolling for vector calculation loops
+- CHANGE: completely restructured vector optimization and introduced multi-dimensional multi-layered functionality
 
 0.1.2:
 - FIX: bug in vasp.frames* ... wrong argument
diff --git a/externals/grill/vasp/config-pd-cygwin.txt b/externals/grill/vasp/config-pd-cygwin.txt
new file mode 100644
index 00000000..d9932ada
--- /dev/null
+++ b/externals/grill/vasp/config-pd-cygwin.txt
@@ -0,0 +1,35 @@
+# VASP - vector assembling vector processor
+# Copyright(c) 2002-2003 Thomas Grill (xovo@gmx.net)
+#
+
+# your c++ compiler (define only if it's different than g++)
+# CXX=g++
+
+# where does the PD installation reside?
+PD=/cygdrive/c/programme/audio/pd
+
+# where are the PD header files?
+# leave it blank if it is a system directory (like /usr/local/include),
+# 	since gcc 3.2 complains about it
+PDINC=${PD}/src
+
+# where do the flext libraries reside?
+FLEXTPATH=${PD}/flext
+
+# where should flext libraries be built?
+TARGDIR=./pd-cygwin
+
+# where should VASP be installed?
+# (leave blank to omit installation)
+# a subfolder "vasp" will be created for the files
+INSTDIR=${PD}/extra
+
+# where should the VASP help be installed?
+# (leave blank to omit installation)
+# a subfolder "vasp" will be created for the files
+HELPDIR=${PD}/doc/5.reference
+
+# additional compiler flags
+# (check if they fit to your system!)
+UFLAGS=-mcpu=pentium3 -msse -mfpmath=sse -fprefetch-loop-arrays
+# UFLAGS=-mcpu=pentiumpro
diff --git a/externals/grill/vasp/config-pd-linux.txt b/externals/grill/vasp/config-pd-linux.txt
index ca1188dc..9aca43b3 100644
--- a/externals/grill/vasp/config-pd-linux.txt
+++ b/externals/grill/vasp/config-pd-linux.txt
@@ -28,3 +28,8 @@ INSTDIR=${PD}/extra
 # (leave blank to omit installation)
 # a subfolder "vasp" will be created for the files
 HELPDIR=${PD}/doc/5.reference
+
+# additional compiler flags
+# (check if they fit to your system!)
+# UFLAGS=-mcpu=pentium3 -msse -mfpmath=sse -fprefetch-loop-arrays
+UFLAGS=-mcpu=pentiumpro
diff --git a/externals/grill/vasp/makefile.pd-cygwin b/externals/grill/vasp/makefile.pd-cygwin
new file mode 100644
index 00000000..454c3167
--- /dev/null
+++ b/externals/grill/vasp/makefile.pd-cygwin
@@ -0,0 +1,83 @@
+# VASP - vector assembling vector processor
+# Copyright (c)2002-2003 Thomas Grill (xovo@gmx.net)
+#
+# Makefile for gcc @ cygwin
+#
+# usage:
+# to build run "make -f makefile.pd-cygwin"
+# to install (as root), do "make -f makefile.pd-cygwin install"
+# to install help, do "make -f makefile.pd-cygwin install-help"
+#
+
+include config-pd-cygwin.txt
+
+FLEXTLIB=$(FLEXTPATH)/flext_t-pdwin.lib 
+PDLIBS=$(PD)/bin/pd.dll $(PD)/bin/pthreadVC.dll
+
+# compiler stuff
+INCLUDES=/usr/include $(PDINC) 
+FLAGS=-DFLEXT_SYS=2 -DFLEXT_THREADS -fno-exceptions
+CFLAGS=-O6 ${UFLAGS} -fmove-all-movables -frerun-loop-opt
+#CFLAGS+=-funroll-loops -fmove-all-movables -freduce-all-givs -fschedule-insns2 -foptimize-register-move
+LDFLAGS=-Wl,-s
+
+LIBS=m 
+
+# ----------------------------------------------
+# the rest can stay untouched
+# ----------------------------------------------
+
+NAME=vasp
+SRCDIR=source
+
+include make-files.txt
+
+MAKEFILE=makefile.pd-cygwin
+TARGET=$(TARGDIR)/$(NAME).dll
+
+
+# default target
+all: $(TARGDIR) $(TARGET) 
+
+$(patsubst %,$(SRCDIR)/%,$(SRCS)): $(patsubst %,$(SRCDIR)/%,$(HDRS)) $(MAKEFILE)
+	touch $(patsubst %,$(SRCDIR)/%,$(SRCS))
+
+$(TARGDIR):
+	-mkdir $(TARGDIR)
+
+$(TARGDIR)/%.o : $(SRCDIR)/%.cpp 
+	$(CXX) -c $(CFLAGS) $(FLAGS) $(patsubst %,-I%,$(INCLUDES) $(FLEXTPATH)) $< -o $@
+
+$(TARGET) : $(patsubst %.cpp,$(TARGDIR)/%.o,$(SRCS)) $(FLEXTLIB) 
+	$(CXX) $(LDFLAGS) -shared $^ ${PDLIBS} $(patsubst %,-l%,$(LIBS)) -o $@ 
+	strip --strip-unneeded $@
+	chmod 755 $@
+
+
+$(INSTDIR):
+	-mkdir $(INSTDIR)
+
+$(INSTDIR)/vasp: $(INSTDIR)
+	-mkdir $(INSTDIR)/vasp
+
+install:: $(INSTDIR)/vasp
+
+install:: $(TARGET) pd
+	cp -R $^ $(INSTDIR)/vasp
+#	chown -R root.root $(INSTDIR)/vasp
+
+
+$(HELPDIR)/vasp:
+	-mkdir $(HELPDIR)/vasp
+
+install-help:: $(HELPDIR)/vasp
+
+install-help:: ./pd-help
+	cp $^/*.* $(HELPDIR)/vasp
+#	chown -R root.root $(HELPDIR)/vasp
+
+
+.PHONY: clean
+clean:
+	rm -f $(TARGDIR)/*.o $(TARGET)
+	
diff --git a/externals/grill/vasp/makefile.pd-darwin b/externals/grill/vasp/makefile.pd-darwin
index 34c702d2..89e96614 100755
--- a/externals/grill/vasp/makefile.pd-darwin
+++ b/externals/grill/vasp/makefile.pd-darwin
@@ -1,5 +1,5 @@
 # VASP - vector assembling vector processor
-# Copyright (c)2002 Thomas Grill (xovo@gmx.net)
+# Copyright (c)2002-2003 Thomas Grill (xovo@gmx.net)
 #
 # Makefile for gcc @ OSX (darwin)
 #
@@ -17,8 +17,9 @@ FLEXTLIB=$(FLEXTPATH)/flext_t.a
 
 # compiler stuff
 INCLUDES=$(PDINC) 
-FLAGS=-DFLEXT_SYS=2 -DFLEXT_THREADS
-CFLAGS=-O6 ${UFLAGS} -Wno-unused -Wno-parentheses -Wno-switch -Wstrict-prototypes 
+FLAGS=-DFLEXT_SYS=2 -DFLEXT_THREADS -Wno-unused -Wno-parentheses -Wno-switch -Wstrict-prototypes -fno-exceptions 
+CFLAGS=-O6 ${UFLAGS} -fmove-all-movables -frerun-loop-opt -fprefetch-loop-arrays 
+#CFLAGS+=-funroll-loops -freduce-all-givs -fschedule-insns2 -foptimize-register-move
 LIBS=m
 LDFLAGS=-bundle -bundle_loader $(PDBIN)
 FRAMEWORKS=Carbon
diff --git a/externals/grill/vasp/makefile.pd-linux b/externals/grill/vasp/makefile.pd-linux
index a76cdbaa..6f9342cb 100644
--- a/externals/grill/vasp/makefile.pd-linux
+++ b/externals/grill/vasp/makefile.pd-linux
@@ -1,5 +1,5 @@
 # VASP - vector assembling vector processor
-# Copyright (c)2002 Thomas Grill (xovo@gmx.net)
+# Copyright (c)2002-2003 Thomas Grill (xovo@gmx.net)
 #
 # Makefile for gcc @ linux
 #
@@ -16,8 +16,8 @@ FLEXTLIB=$(FLEXTPATH)/flext_t.a
 # compiler stuff
 INCLUDES=$(PDINC) 
 FLAGS=-DFLEXT_SYS=2 -DFLEXT_THREADS -fno-exceptions
-CFLAGS=-O6 -mcpu=pentiumpro -fmove-all-movables -frerun-loop-opt -finline-functions # -funroll-loops 
-#CFLAGS=-O6 -mcpu=pentium3 -msse -mfpmath=sse -fmove-all-movables -frerun-loop-opt -finline-functions # -funroll-loops 
+CFLAGS=-O6 ${UFLAGS} -fmove-all-movables -frerun-loop-opt
+#CFLAGS+=-funroll-loops -fmove-all-movables -freduce-all-givs -fschedule-insns2 -foptimize-register-move
 LDFLAGS=-Wl,-s
 
 LIBS=m
diff --git a/externals/grill/vasp/source/main.cpp b/externals/grill/vasp/source/main.cpp
index 3b2c0f75..5f63895a 100644
--- a/externals/grill/vasp/source/main.cpp
+++ b/externals/grill/vasp/source/main.cpp
@@ -14,6 +14,8 @@ WARRANTIES, see the file, "license.txt," in this distribution.
 
 const C *VASP_VERSION = "0.1.3pre1";
 
+#include "opfuns.h"
+
 V lib_setup()
 {
 	post("");
diff --git a/externals/grill/vasp/source/opbase.cpp b/externals/grill/vasp/source/opbase.cpp
index c33a5782..5abcb9fa 100644
--- a/externals/grill/vasp/source/opbase.cpp
+++ b/externals/grill/vasp/source/opbase.cpp
@@ -12,7 +12,7 @@ WARRANTIES, see the file, "license.txt," in this distribution.
 #include "opbase.h"
 #include "opdefs.h"
 
-Vasp *VaspOp::m_run(OpParam &p,CVasp &src,CVasp *dst,VecOp::opfun fun) 
+Vasp *VaspOp::m_run(OpParam &p,CVasp &src,CVasp *dst,opfun fun) 
 { 
 	Vasp *ret = NULL;
 	RVecBlock *vecs = GetRVecs(p.opname,src,dst);
@@ -24,7 +24,7 @@ Vasp *VaspOp::m_run(OpParam &p,CVasp &src,CVasp *dst,VecOp::opfun fun)
 	return ret;
 }
 
-Vasp *VaspOp::m_cun(OpParam &p,CVasp &src,CVasp *dst,VecOp::opfun fun) 
+Vasp *VaspOp::m_cun(OpParam &p,CVasp &src,CVasp *dst,opfun fun) 
 { 
 	Vasp *ret = NULL;
 	CVecBlock *vecs = GetCVecs(p.opname,src,dst);
@@ -36,7 +36,7 @@ Vasp *VaspOp::m_cun(OpParam &p,CVasp &src,CVasp *dst,VecOp::opfun fun)
 	return ret;
 }
 
-Vasp *VaspOp::m_rbin(OpParam &p,CVasp &src,const Argument &arg,CVasp *dst,VecOp::opfun fun) 
+Vasp *VaspOp::m_rbin(OpParam &p,CVasp &src,const Argument &arg,CVasp *dst,opfun fun) 
 { 
 	Vasp *ret = NULL;
 	BL argvasp = arg.IsVasp();
@@ -58,7 +58,7 @@ Vasp *VaspOp::m_rbin(OpParam &p,CVasp &src,const Argument &arg,CVasp *dst,VecOp:
 	return ret;
 }
 
-Vasp *VaspOp::m_cbin(OpParam &p,CVasp &src,const Argument &arg,CVasp *dst,VecOp::opfun fun) 
+Vasp *VaspOp::m_cbin(OpParam &p,CVasp &src,const Argument &arg,CVasp *dst,opfun fun) 
 { 
 	Vasp *ret = NULL;
 	BL argvasp = arg.IsVasp();
@@ -83,8 +83,3 @@ Vasp *VaspOp::m_cbin(OpParam &p,CVasp &src,const Argument &arg,CVasp *dst,VecOp:
 
 	return ret;
 }
-
-#ifdef FLEXT_THREADS
-flext::ThrMutex VecOp::C_base::mtx;
-#endif
-
diff --git a/externals/grill/vasp/source/opbase.h b/externals/grill/vasp/source/opbase.h
index 3d9b81c6..bba4a3a6 100644
--- a/externals/grill/vasp/source/opbase.h
+++ b/externals/grill/vasp/source/opbase.h
@@ -16,96 +16,32 @@ WARRANTIES, see the file, "license.txt," in this distribution.
 #include "vecblk.h"
 #include "opparam.h"
 
-namespace VecOp {
-	typedef BL opfun(OpParam &p);
-
-    class C_base {
-    public:
-    #ifdef FLEXT_THREADS
-        static flext::ThrMutex mtx;
-        static V Lock() { mtx.Lock(); }
-        static V Unlock() { mtx.Unlock(); }
-    #else
-        static V Lock() {}
-        static V Unlock() {}
-    #endif
-    };
-
-    template<class T> class C_run: public C_base {
-    public: 
-        static BL Do(V f(T &v,T a),OpParam &p) { Lock(); fun = f; _D__run<T,C_run<T> >(p); Unlock(); return true; }
-        static V run(T &v,T a) { fun(v,a); } 
-        static V (*fun)(T &v,T a);
-    };
-    template<class T> V (*C_run<T>::fun)(T &v,T a);
-
-    template<class T> class C_cun: public C_base {
-    public: 
-        static BL Do(V f(T &rv,T &iv,T ra,T ia),OpParam &p) { Lock(); fun = f; _D__cun<T,C_cun<T> >(p); Unlock(); return true; }
-        static V cun(T &rv,T &iv,T ra,T ia) { fun(rv,iv,ra,ia); } 
-        static V (*fun)(T &rv,T &iv,T ra,T ia);
-    };
-    template<class T> V (*C_cun<T>::fun)(T &rv,T &iv,T ra,T ia);
-
-    template<class T> class C_rbin: public C_base {
-    public: 
-        static BL Do(V f(T &v,T a,T b),OpParam &p) { Lock(); fun = f; _D__rbin<T,C_rbin<T> >(p); Unlock(); return true; }
-        static V rbin(T &v,T a,T b) { fun(v,a,b); } 
-        static V (*fun)(T &v,T a,T b);
-    };
-    template<class T> V (*C_rbin<T>::fun)(T &v,T a,T b);
-
-    template<class T> class C_cbin: public C_base {
-    public: 
-        static BL Do(V f(T &rv,T &iv,T ra,T ia,T rb,T ib),OpParam &p) { Lock(); fun = f; _D__cbin<T,C_cbin<T> >(p); Unlock(); return true; }
-        static V cbin(T &rv,T &iv,T ra,T ia,T rb,T ib) { fun(rv,iv,ra,ia,rb,ib); } 
-        static V (*fun)(T &rv,T &iv,T ra,T ia,T rb,T ib);
-    };
-    template<class T> V (*C_cbin<T>::fun)(T &rv,T &iv,T ra,T ia,T rb,T ib);
-
-    template<class T> class C_rop: public C_base {
-    public: 
-        static BL Do(V f(T &v,T a,OpParam &p),OpParam &p) { Lock(); fun = f; _D__rop<T,C_rop<T> >(p); Unlock(); return true; }
-        static V rop(T &v,T a,OpParam &p) { fun(v,a,p); } 
-        static V (*fun)(T &v,T a,OpParam &p);
-    };
-    template<class T> V (*C_rop<T>::fun)(T &v,T a,OpParam &p);
-
-    template<class T> class C_cop: public C_base {
-    public: 
-        static BL Do(V f(T &rv,T &iv,T ra,T ia,OpParam &p),OpParam &p) { Lock(); fun = f; _D__cop<T,C_cop<T> >(p); Unlock(); return true; }
-        static V cop(T &rv,T &iv,T ra,T ia,OpParam &p) { fun(rv,iv,ra,ia,p); } 
-        static V (*fun)(T &rv,T &iv,T ra,T ia,OpParam &p);
-    };
-    template<class T> V (*C_cop<T>::fun)(T &rv,T &iv,T ra,T ia,OpParam &p);
 
+namespace VaspOp {
+	typedef BL opfun(OpParam &p);
 
-    template<class T> BL _d__run(V fun(T &v,T a),OpParam &p)	{ return C_run<T>::Do(fun,p); }
-    template<class T> BL _d__cun(V fun(T &rv,T &iv,T ra,T ia),OpParam &p) { return C_cun<T>::Do(fun,p); }
-    template<class T> BL _d__rbin(V fun(T &v,T a,T b),OpParam &p) { return C_rbin<T>::Do(fun,p); }
-    template<class T> BL _d__cbin(V fun(T &rv,T &iv,T ra,T ia,T rb,T ib),OpParam &p) { return C_cbin<T>::Do(fun,p); }
-    template<class T> BL _d__rop(V fun(T &v,T a,OpParam &p),OpParam &p) { return C_rop<T>::Do(fun,p); }
-    template<class T> BL _d__cop(V fun(T &rv,T &iv,T ra,T ia,OpParam &p),OpParam &p) { return C_cop<T>::Do(fun,p); }
-}
-
+	// -------- prepare vectors and do vector operation -----------
+	// in opvecs.cpp
 
-namespace VaspOp {
 	RVecBlock *GetRVecs(const C *op,CVasp &src,CVasp *dst = NULL);
 	CVecBlock *GetCVecs(const C *op,CVasp &src,CVasp *dst = NULL,BL full = false);
 	RVecBlock *GetRVecs(const C *op,CVasp &src,const CVasp &arg,CVasp *dst = NULL,I multi = -1,BL ssize = true);
 	CVecBlock *GetCVecs(const C *op,CVasp &src,const CVasp &arg,CVasp *dst = NULL,I multi = -1,BL ssize = true,BL full = false);
 	
-	Vasp *DoOp(RVecBlock *vecs,VecOp::opfun *fun,OpParam &p,BL symm = false);
-	Vasp *DoOp(CVecBlock *vecs,VecOp::opfun *fun,OpParam &p,BL symm = false);
+	Vasp *DoOp(RVecBlock *vecs,opfun *fun,OpParam &p,BL symm = false);
+	Vasp *DoOp(CVecBlock *vecs,opfun *fun,OpParam &p,BL symm = false);
 
 	// -------- transformations -----------------------------------
+	// in opbase.cpp
 
 	// unary functions
-	Vasp *m_run(OpParam &p,CVasp &src,CVasp *dst,VecOp::opfun fun); // real unary (one vec or real)
-	Vasp *m_cun(OpParam &p,CVasp &src,CVasp *dst,VecOp::opfun fun); // complex unary (one vec or complex)
+	Vasp *m_run(OpParam &p,CVasp &src,CVasp *dst,opfun fun); // real unary (one vec or real)
+	Vasp *m_cun(OpParam &p,CVasp &src,CVasp *dst,opfun fun); // complex unary (one vec or complex)
+
 	// binary functions
-	Vasp *m_rbin(OpParam &p,CVasp &src,const Argument &arg,CVasp *dst,VecOp::opfun fun); // real binary (one vec or real)
-	Vasp *m_cbin(OpParam &p,CVasp &src,const Argument &arg,CVasp *dst,VecOp::opfun fun); // complex binary (one vec or complex)
+	Vasp *m_rbin(OpParam &p,CVasp &src,const Argument &arg,CVasp *dst,opfun fun); // real binary (one vec or real)
+	Vasp *m_cbin(OpParam &p,CVasp &src,const Argument &arg,CVasp *dst,opfun fun); // complex binary (one vec or complex)
+
 }
 
 #endif
diff --git a/externals/grill/vasp/source/opdefs.h b/externals/grill/vasp/source/opdefs.h
index 47006db6..8d7d522c 100644
--- a/externals/grill/vasp/source/opdefs.h
+++ b/externals/grill/vasp/source/opdefs.h
@@ -2,7 +2,7 @@
 
 VASP modular - vector assembling signal processor / objects for Max/MSP and PD
 
-Copyright (c) 2002 Thomas Grill (xovo@gmx.net)
+Copyright (c) 2002-2003 Thomas Grill (xovo@gmx.net)
 For information on usage and redistribution, and for a DISCLAIMER OF ALL
 WARRANTIES, see the file, "license.txt," in this distribution.  
 
@@ -23,19 +23,48 @@ WARRANTIES, see the file, "license.txt," in this distribution.
 
 namespace VecOp {
 
+// multi-layer templates
+
+template<class T,class OP,int LR>
+inline BL vec_un(T *v,const T *a,I n = 0) { 
+    const I _n = LR?LR:n;
+    for(I i = 0; i < _n; ++i) OP::run(v[i],a[i]); 
+    return true;
+}
+
+template<class T,class OP,int LR>
+inline BL vec_un(T *v,T a,I n = 0) { 
+    const I _n = LR?LR:n;
+    for(I i = 0; i < _n; ++i) OP::run(v[i],a); 
+    return true;
+}
+
+template<class T,class TR,class OP,int LR>
+inline BL vec_bin(T *v,const T *a,const TR *b,I n = 0) { 
+    const I _n = LR?LR:n;
+    for(I i = 0; i < _n; ++i) OP::rbin(v[i],a[i],b[i]); 
+    return true;
+}
+
+template<class T,class TR,class OP,int LR>
+inline BL vec_bin(T *v,const T *a,TR b,I n = 0) { 
+    const I _n = LR?LR:n;
+    for(I i = 0; i < _n; ++i) OP::rbin(v[i],a[i],b); 
+    return true;
+}
+
 /*! \brief skeleton for unary real operations
 */
-//template<class T,V FUN(T &v,T a)> BL V__run(register const T *sr,I rss,register T *dr,I rds,I frames)
 template<class T,class OP> BL V__run(register const T *sr,I rss,register T *dr,I rds,I frames)
 {																
     register I i;
-    if(sr == dr)												
-		if(_D_ALWAYS1 || rds == 1)							
+    if(sr == dr && OP::run_opt() >= 3)												
+		if((_D_ALWAYS1 || rds == 1) && OP::run_opt() >= 2)							
             _DE_LOOP(i,frames, ( OP::run(*dr,*dr), dr++ ) )
 		else													
 			_DE_LOOP(i,frames, ( OP::run(*dr,*dr), dr += rds ) )
 	else														
-		if(_D_ALWAYS1 || (rss == 1 && rds == 1))			
+		if((_D_ALWAYS1 || (rss == 1 && rds == 1)) && OP::run_opt() >= 2)			
 			_DE_LOOP(i,frames, ( OP::run(*dr,*sr), sr++,dr++ ) )
 		else													
 			_DE_LOOP(i,frames, ( OP::run(*dr,*sr), sr += rss,dr += rds ) )
@@ -48,13 +77,13 @@ template<class T,class OP> BL V__run(register const T *sr,I rss,register T *dr,I
 template<class T,class OP> BL V__cun(register const T *sr,register const T *si,I rss,I iss,register T *dr,register T *di,I rds,I ids,I frames)
 {																
     register I i;
-	if(sr == dr && si == di)									
-		if(_D_ALWAYS1 || (rds == 1 && ids == 1))			
+	if(sr == dr && si == di && OP::cun_opt() >= 3)									
+		if((_D_ALWAYS1 || (rds == 1 && ids == 1)) && OP::cun_opt() >= 2)			
             _DE_LOOP(i,frames, ( OP::cun(*dr,*di,*dr,*di), dr++,di++ ) )
 		else													
 			_DE_LOOP(i,frames, ( OP::cun(*dr,*di,*dr,*di), dr += rds,di += ids ) )
 	else														
-		if(_D_ALWAYS1 || (rss == 1 && iss == 1 && rds == 1 && ids == 1)) 
+		if((_D_ALWAYS1 || (rss == 1 && iss == 1 && rds == 1 && ids == 1)) && OP::cun_opt() >= 2) 
 			_DE_LOOP(i,frames, ( OP::cun(*dr,*di,*sr,*si), sr++,si++,dr++,di++ ) )
 		else													
 			_DE_LOOP(i,frames, ( OP::cun(*dr,*di,*sr,*si), sr += rss,si += iss,dr += rds,di += ids ) )
@@ -69,13 +98,13 @@ template<class T,class OP> BL V__vun(I layers,register const T *sr,register T *d
             V__run<T,OP>(sr,1,dr,1,frames); 
             break;
     case 2: 
-			_DF_LOOP(i,frames, ( vec_un<T,OP,2>(dr,sr), sr += 2, dr += 2) )
+			_DF_LOOP(i,frames, ( vec_un<T,OP,2>(dr,sr,2), sr += 2, dr += 2) )
             break;
     case 3: 
-			_DF_LOOP(i,frames, ( vec_un<T,OP,3>(dr,sr), sr += 3, dr += 3) )
+			_DF_LOOP(i,frames, ( vec_un<T,OP,3>(dr,sr,3), sr += 3, dr += 3) )
             break;
     case 4: 
-			_DF_LOOP(i,frames, ( vec_un<T,OP,4>(dr,sr), sr += 4, dr += 4) )
+			_DF_LOOP(i,frames, ( vec_un<T,OP,4>(dr,sr,4), sr += 4, dr += 4) )
             break;
     default:
 			_DF_LOOP(i,frames, ( vec_un<T,OP,0>(dr,sr,layers), sr += layers, dr += layers) )
@@ -88,13 +117,13 @@ template<class T,class OP> BL V__vun(I layers,register const T *sr,register T *d
 template<class T,class OP> BL V__rbin(register const T *sr,I rss,register T *dr,I rds,register const T *ar,I ras,I frames)
 {																
 	register I i;												
-	if(sr == dr)								
-		if(_D_ALWAYS1 || (rds == 1 && ras == 1))				
+	if(sr == dr && OP::rbin_opt() >= 3)								
+		if((_D_ALWAYS1 || (rds == 1 && ras == 1)) && OP::rbin_opt() >= 2)				
             _DE_LOOP(i,frames, ( OP::rbin(*dr,*dr,*ar), dr++,ar++ ) )
 		else												
 			_DE_LOOP(i,frames, ( OP::rbin(*dr,*dr,*ar), dr += rds,ar += ras ) )
 	else													
-		if(_D_ALWAYS1 || (rss == 1 && rds == 1 && ras == 1))	
+		if((_D_ALWAYS1 || (rss == 1 && rds == 1 && ras == 1)) && OP::rbin_opt() >= 2)	
 			_DE_LOOP(i,frames, ( OP::rbin(*dr,*sr,*ar), sr++,dr++,ar++ ) )
 		else												
 			_DE_LOOP(i,frames, ( OP::rbin(*dr,*sr,*ar), sr += rss,dr += rds,ar += ras ) )
@@ -104,8 +133,8 @@ template<class T,class OP> BL V__rbin(register const T *sr,I rss,register T *dr,
 template<class T,class OP> BL V__cbin(register const T *sr,register const T *si,I rss,I iss,register T *dr,register T *di,I ids,I rds,const T *ar,const T *ai,I ras,I ias,I frames)
 {																
 	register I i;												
-	if(sr == dr && si == di)							
-		if(_D_ALWAYS1 || (rds == 1 && ids == 1 && ras == 1 && ias == 1)) 
+	if(sr == dr && si == di && OP::cbin_opt() >= 3)							
+		if((_D_ALWAYS1 || (rds == 1 && ids == 1 && ras == 1 && ias == 1)) && OP::cbin_opt() >= 2) 
             _DE_LOOP(i,frames, ( OP::cbin(*dr,*di,*dr,*di,*ar,*ai), dr++,di++,ar++,ai++ ) )
 		else											
 			_DE_LOOP(i,frames, ( OP::cbin(*dr,*di,*dr,*di,*ar,*ai), dr += rds,di += ids,ar += ras,ai += ias ) )
@@ -117,45 +146,48 @@ template<class T,class OP> BL V__cbin(register const T *sr,register const T *si,
 
 template<class TR> class _A__vector {
 public:
+	static BL unroll() { return true; }
     static TR ev(const TR *a,I i,I m) { return a[i*m]; }
 };
 
 template<class TR> class _A__scalar {
 public:
+	static BL unroll() { return true; }
     static TR ev(TR a,I i,I m) { return a; }
 };
 
 class _A__env {
 public:
+	static BL unroll() { return false; }
     static R ev(Env::Iter &a,I i,I m) { return a.ValFwd(i); }
 };
 
 template<class T,class TA,class TR,class OP,class EVARG> BL Vx__rbin(register const T *sr,I rss,register T *dr,I rds,TA ar,I frames)
 {																
 	register I i;												
-	if(sr == dr)								
-		if(_D_ALWAYS1 || rds == 1)				
-            _DE_LOOP(i,frames, ( OP::rbin(*dr,*dr,EVARG::ev(ar,i,1)), dr++ ) )
+	if(sr == dr && OP::rbin_opt() >= 3)
+		if((_D_ALWAYS1 || rds == 1) && OP::rbin_opt() >= 2)				
+            _DQ_LOOP(EVARG::unroll(),i,frames, ( OP::rbin(*dr,*dr,EVARG::ev(ar,i,1)), dr++ ) )
 		else												
-            _DE_LOOP(i,frames, ( OP::rbin(*dr,*dr,EVARG::ev(ar,i,1)), dr += rds ) )
+            _DQ_LOOP(EVARG::unroll(),i,frames, ( OP::rbin(*dr,*dr,EVARG::ev(ar,i,1)), dr += rds ) )
 	else													
-		if(_D_ALWAYS1 || rss == 1 && rds == 1)	
-            _DE_LOOP(i,frames, ( OP::rbin(*dr,*sr,EVARG::ev(ar,i,1)), sr++,dr++ ) )
+		if((_D_ALWAYS1 || rss == 1 && rds == 1) && OP::rbin_opt() >= 2)	
+            _DQ_LOOP(EVARG::unroll(),i,frames, ( OP::rbin(*dr,*sr,EVARG::ev(ar,i,1)), sr++,dr++ ) )
 		else												
-            _DE_LOOP(i,frames, ( OP::rbin(*dr,*sr,EVARG::ev(ar,i,1)), sr += rss,dr += rds ) )
+            _DQ_LOOP(EVARG::unroll(),i,frames, ( OP::rbin(*dr,*sr,EVARG::ev(ar,i,1)), sr += rss,dr += rds ) )
     return true;
 }
 
 template<class T,class TA1,class TA2,class TR,class OP,class EVARG1,class EVARG2> BL Vx__cbin(register const T *sr,register const T *si,I rss,I iss,register T *dr,register T *di,I ids,I rds,TA1 ar,TA2 ai,I ras,I ias,I frames)
 {																
 	register I i;												
-	if(sr == dr && si == di)							
-		if(_D_ALWAYS1 || (rds == 1 && ids == 1 && ras == 1 && ias == 1)) 
-            _DE_LOOP(i,frames, ( OP::cbin(*dr,*di,*dr,*di,EVARG1::ev(ar,i,1),EVARG2::ev(ai,i,1)), dr++,di++ ) )
+	if(sr == dr && si == di && OP::cbin_opt() >= 3)							
+		if((_D_ALWAYS1 || (rds == 1 && ids == 1 && ras == 1 && ias == 1)) && OP::cbin_opt() >= 2) 
+            _DQ_LOOP(EVARG1::unroll() && EVARG2::unroll(),i,frames, ( OP::cbin(*dr,*di,*dr,*di,EVARG1::ev(ar,i,1),EVARG2::ev(ai,i,1)), dr++,di++ ) )
 		else											
-            _DE_LOOP(i,frames, ( OP::cbin(*dr,*di,*dr,*di,EVARG1::ev(ar,i,ras),EVARG2::ev(ai,i,ias)), dr += rds,di += ids ) )
+            _DQ_LOOP(EVARG1::unroll() && EVARG2::unroll(),i,frames, ( OP::cbin(*dr,*di,*dr,*di,EVARG1::ev(ar,i,ras),EVARG2::ev(ai,i,ias)), dr += rds,di += ids ) )
 	else												
-        _DE_LOOP(i,frames, ( OP::cbin(*dr,*di,*sr,*si,EVARG1::ev(ar,i,ras),EVARG2::ev(ai,i,ias)), sr += rss,si += iss,dr += rds,di += ids ) )
+        _DQ_LOOP(EVARG1::unroll() && EVARG2::unroll(),i,frames, ( OP::cbin(*dr,*di,*sr,*si,EVARG1::ev(ar,i,ras),EVARG2::ev(ai,i,ias)), sr += rss,si += iss,dr += rds,di += ids ) )
     return true;
 }
 
@@ -167,16 +199,16 @@ template<class T,class TA,class TR,class OP,class EVARG> BL Vx__vbin(I layers,re
             Vx__rbin<T,TA,TR,OP,EVARG>(sr,1,dr,1,ar,frames); 
             break;
     case 2: 
-			_DF_LOOP(i,frames, ( vec_bin<T,OP,2>(dr,sr,EVARG::ev(ar,i,2)), sr += 2, dr += 2) )
+			_DF_LOOP(i,frames, ( vec_bin<T,TR,OP,2>(dr,sr,EVARG::ev(ar,i,2),2), sr += 2, dr += 2) )
             break;
     case 3: 
-			_DF_LOOP(i,frames, ( vec_bin<T,OP,3>(dr,sr,EVARG::ev(ar,i,3)), sr += 3, dr += 3) )
+			_DF_LOOP(i,frames, ( vec_bin<T,TR,OP,3>(dr,sr,EVARG::ev(ar,i,3),3), sr += 3, dr += 3) )
             break;
     case 4: 
-			_DF_LOOP(i,frames, ( vec_bin<T,OP,4>(dr,sr,EVARG::ev(ar,i,4)), sr += 4, dr += 4) )
+			_DF_LOOP(i,frames, ( vec_bin<T,TR,OP,4>(dr,sr,EVARG::ev(ar,i,4),4), sr += 4, dr += 4) )
             break;
     default:
-			_DF_LOOP(i,frames, ( vec_bin<T,OP,0>(dr,sr,EVARG::ev(ar,i,layers),layers), sr += layers, dr += layers) )
+			_DF_LOOP(i,frames, ( vec_bin<T,TR,OP,0>(dr,sr,EVARG::ev(ar,i,layers),layers), sr += layers, dr += layers) )
             break;
     }
     return true;
@@ -250,13 +282,13 @@ template<class T,class OP> BL _F__cbin(OpParam &p)
 template<class T,class ARG,class OP> BL V__rop(ARG p,register const S *sr,I rss,register S *dr,I rds,I frames)
 {																
 	register I i;												
-	if(sr == dr)												
-		if(_D_ALWAYS1 || rds == 1)											
+	if(sr == dr && OP::rop_opt() >= 3)												
+		if((_D_ALWAYS1 || rds == 1) && OP::rop_opt() >= 2)											
             _DE_LOOP(i,frames, ( OP::rop(*dr,*dr,p), dr++ ) )
 		else													
 			_DE_LOOP(i,frames, ( OP::rop(*dr,*dr,p), dr += rds ) )
 	else														
-		if(_D_ALWAYS1 || (rss == 1 && p.rds == 1))			
+		if((_D_ALWAYS1 || (rss == 1 && p.rds == 1)) && OP::rop_opt() >= 2)			
 			_DE_LOOP(i,frames, ( OP::rop(*dr,*sr,p), sr++,dr++ ) )
 		else													
 			_DE_LOOP(i,frames, ( OP::rop(*dr,*sr,p), sr += rss,dr += rds ) )
@@ -268,13 +300,13 @@ template<class T,class ARG,class OP> BL V__rop(ARG p,register const S *sr,I rss,
 template<class T,class ARG,class OP> BL V__cop(ARG p,register const S *sr,register const S *si,I rss,I iss,register S *dr,register S *di,I rds,I ids,I frames)
 {																
 	register I i;												
-	if(sr == dr && si == di)									
-		if(_D_ALWAYS1 || (rds == 1 && ids == 1))			
+	if(sr == dr && si == di && OP::cop_opt() >= 3)									
+		if((_D_ALWAYS1 || (rds == 1 && ids == 1)) && OP::cop_opt() >= 2)			
 			_DE_LOOP(i,frames, ( OP::cop(*dr,*di,*dr,*di,p), dr++,di++ ) )
 		else													
 			_DE_LOOP(i,frames, ( OP::cop(*dr,*di,*dr,*di,p), dr += rds,di += ids ) )
 	else														
-		if(_D_ALWAYS1 || (p.rss == 1 && p.iss == 1 && p.rds == 1 && p.ids == 1)) 
+		if((_D_ALWAYS1 || (p.rss == 1 && p.iss == 1 && p.rds == 1 && p.ids == 1)) && OP::cop_opt() >= 2) 
 			_DE_LOOP(i,frames, ( OP::cop(*dr,*di,*sr,*si,p), sr++,si++,dr++,di++ ) )
 		else													
 			_DE_LOOP(i,frames, ( OP::cop(*dr,*di,*sr,*si,p), sr += rss,si += iss,dr += rds,di += ids ) )
@@ -289,49 +321,149 @@ template<class T,class CL> inline BL _D__rop(OpParam &p) { return V__rop<T,OpPar
 template<class T,class CL> inline BL _D__cop(OpParam &p) { return V__cop<T,OpParam &,CL>(p,p.rsdt,p.isdt,p.rss,p.iss,p.rddt,p.iddt,p.rds,p.ids,p.frames); }
 
 
-template<class T,class CL> inline BL d__run(OpParam &p) { return _d__run<T>(CL::run,p); }
-template<class T,class CL> inline BL d__cun(OpParam &p) { return _d__cun<T>(CL::cun,p); }
-template<class T,class CL> inline BL d__rbin(OpParam &p) { return _d__rbin<T>(CL::rbin,p); }
-template<class T,class CL> inline BL d__cbin(OpParam &p) { return _d__cbin<T>(CL::cbin,p); }
-template<class T,class CL> inline BL d__rop(OpParam &p) { return _d__rop<T>(CL::rop,p); }
-template<class T,class CL> inline BL d__cop(OpParam &p) { return _d__cop<T>(CL::cop,p); }
+template<class T> BL _d__run(V fun(T &v,T a),OpParam &p) 
+{ 
+	int i;
+	if(p.rds == 1 && p.rss == 1)
+		_DE_LOOP(i,p.frames, ( fun(p.rddt[i],p.rsdt[i]) ) ) 
+	else
+		_DF_LOOP(i,p.frames, ( fun(p.rddt[p.rds*i],p.rsdt[p.rss*i]) ) ) 
+	return true;
+}
+
+template<class T> BL _d__cun(V fun(T &rv,T &iv,T ra,T ia),OpParam &p)
+{ 
+	int i;
+	if(p.rds == 1 && p.ids == 1 && p.rss == 1 && p.iss == 1)
+		_DE_LOOP(i,p.frames, ( fun(p.rddt[i],p.iddt[i],p.rsdt[i],p.isdt[i]) ) ) 
+	else
+		_DF_LOOP(i,p.frames, ( fun(p.rddt[p.rds*i],p.iddt[p.ids*i],p.rsdt[p.rss*i],p.isdt[p.iss*i]) ) ) 
+	return true;
+}
+
+template<class T> BL _d__rbin(V fun(T &v,T a,T b),OpParam &p)
+{ 
+	int i;
+	if(p.HasArg() && p.arg[0].Is()) {							
+		switch(p.arg[0].argtp) {								
+		case OpParam::Arg::arg_v: {	
+			const T *adr = p.arg[0].v.rdt;
+			const I asr = p.arg[0].v.rs;
+			_DF_LOOP(i,p.frames, ( fun(p.rddt[p.rds*i],p.rsdt[p.rss*i],adr[asr*i]) ) ) 
+			break;												
+		}														
+		case OpParam::Arg::arg_env: {							
+			Env::Iter it(*p.arg[0].e.env); it.Init(0);
+			_DF_LOOP(i,p.frames, ( fun(p.rddt[p.rds*i],p.rsdt[p.rss*i],it.ValFwd(i)) ) ) 
+			break;												
+		}														
+		case OpParam::Arg::arg_x: {
+			const T av = p.arg[0].x.r;
+			_DF_LOOP(i,p.frames, ( fun(p.rddt[p.rds*i],p.rsdt[p.rss*i],av) ) ) 
+			break;												
+		}														
+		}														
+	}															
+	else {														
+		_DF_LOOP(i,p.frames, ( fun(p.rddt[p.rds*i],p.rsdt[p.rss*i],p.rbin.arg) ) ) 
+	}															
+	return true;
+}
+
+template<class T> BL _d__cbin(V fun(T &rv,T &iv,T ra,T ia,T rb,T ib),OpParam &p)
+{ 
+	int i;
+	if(p.HasArg() && p.arg[0].Is()) {							
+		switch(p.arg[0].argtp) {									
+		case OpParam::Arg::arg_v: {									
+			const T *adr = p.arg[0].v.rdt,*adi = p.arg[0].v.idt;
+			const I asr = p.arg[0].v.rs,asi = p.arg[0].v.is;
+			if(adi)
+				_DF_LOOP(i,p.frames, ( fun(p.rddt[p.rds*i],p.iddt[p.ids*i],p.rsdt[p.rss*i],p.isdt[p.iss*i],adr[asr*i],adi[asi*i]) ) ) 
+            else
+				_DF_LOOP(i,p.frames, ( fun(p.rddt[p.rds*i],p.iddt[p.ids*i],p.rsdt[p.rss*i],p.isdt[p.iss*i],adr[asr*i],0) ) ) 
+			break;												
+		}														
+		case OpParam::Arg::arg_env: {							
+			Env::Iter it(*p.arg[0].e.env); it.Init(0);			
+			_DF_LOOP(i,p.frames, ( fun(p.rddt[p.rds*i],p.iddt[p.ids*i],p.rsdt[p.rss*i],p.isdt[p.iss*i],it.ValFwd(i),0) ) ) 
+			break;												
+		}														
+		case OpParam::Arg::arg_x: {								
+			const T avr = p.arg[0].x.r,avi = p.arg[0].x.i;
+			_DF_LOOP(i,p.frames, ( fun(p.rddt[p.rds*i],p.iddt[p.ids*i],p.rsdt[p.rss*i],p.isdt[p.iss*i],avr,avi) ) ) 
+			break;
+		}														
+		}														
+	}															
+	else {														
+		_DF_LOOP(i,p.frames, ( fun(p.rddt[p.rds*i],p.iddt[p.ids*i],p.rsdt[p.rss*i],p.isdt[p.iss*i],p.cbin.rarg,p.cbin.iarg) ) ) 
+	}															
+	return true;
+}
+
+template<class T> BL _d__rop(V fun(T &v,T a,OpParam &p),OpParam &p)
+{ 
+	int i;
+	if(p.rds == 1 && p.rss == 1)
+		_DE_LOOP(i,p.frames, ( fun(p.rddt[i],p.rsdt[i],p) ) ) 
+	else
+		_DF_LOOP(i,p.frames, ( fun(p.rddt[p.rds*i],p.rsdt[p.rss*i],p) ) ) 
+	return true;
+}
+
+template<class T> BL _d__cop(V fun(T &rv,T &iv,T ra,T ia,OpParam &p),OpParam &p)
+{ 
+	int i;
+	if(p.rds == 1 && p.ids == 1 && p.rss == 1 && p.iss == 1)
+		_DE_LOOP(i,p.frames, ( fun(p.rddt[i],p.iddt[i],p.rsdt[i],p.isdt[i],p) ) ) 
+	else
+		_DF_LOOP(i,p.frames, ( fun(p.rddt[p.rds*i],p.iddt[p.ids*i],p.rsdt[p.rss*i],p.isdt[p.iss*i],p) ) ) 
+	return true;
+}
 
-template<class T,class CL> inline BL f__run(OpParam &p) { return _d__run(CL::run,p); }
-template<class T,class CL> inline BL f__cun(OpParam &p) { return _d__cun(CL::cun,p); }
-template<class T,class CL> inline BL f__rbin(OpParam &p) { return _d__rbin(CL::rbin,p); }
-template<class T,class CL> inline BL f__cbin(OpParam &p) { return _d__cbin(CL::cbin,p); }
-template<class T,class CL> inline BL f__rop(OpParam &p) { return _d__rop(CL::rop,p); }
-template<class T,class CL> inline BL f__cop(OpParam &p) { return _d__cop(CL::cop,p); }
 
 
 #ifdef VASP_COMPACT
-template<class T,class CL> inline BL D__run(OpParam &p) { return _d__run<T>(CL::run,p); }
-template<class T,class CL> inline BL D__cun(OpParam &p) { return _d__cun<T>(CL::cun,p); }
-template<class T,class CL> inline BL D__rbin(OpParam &p) { return _d__rbin<T>(CL::rbin,p); }
-template<class T,class CL> inline BL D__cbin(OpParam &p) { return _d__cbin<T>(CL::cbin,p); }
-template<class T,class CL> inline BL D__rop(OpParam &p) { return _d__rop<T>(CL::rop,p); }
-template<class T,class CL> inline BL D__cop(OpParam &p) { return _d__cop<T>(CL::cop,p); }
-template<class T,class CL> inline BL F__run(OpParam &p) { return _d__run(CL::run,p); }
-template<class T,class CL> inline BL F__cun(OpParam &p) { return _d__cun<T>(CL::cun,p); }
-template<class T,class CL> inline BL F__rbin(OpParam &p) { return _d__rbin<T>(CL::rbin,p); }
-template<class T,class CL> inline BL F__cbin(OpParam &p) { return _d__cbin<T>(CL::cbin,p); }
-template<class T,class CL> inline BL F__rop(OpParam &p) { return _d__rop<T>(CL::rop,p); }
-template<class T,class CL> inline BL F__cop(OpParam &p) { return _d__cop<T>(CL::cop,p); }
+    template<class T,class CL> inline BL D__run(OpParam &p) { return _d__run<T>(CL::run,p); }
+    template<class T,class CL> inline BL D__cun(OpParam &p) { return _d__cun<T>(CL::cun,p); }
+    template<class T,class CL> inline BL D__rbin(OpParam &p) { return _d__rbin<T>(CL::rbin,p); }
+    template<class T,class CL> inline BL D__cbin(OpParam &p) { return _d__cbin<T>(CL::cbin,p); }
+	template<class T,class CL> inline BL D__rop(OpParam &p) { return _d__rop<T>(CL::rop,p); }
+	template<class T,class CL> inline BL D__cop(OpParam &p) { return _d__cop<T>(CL::cop,p); }
 #else
-template<class T,class CL> inline BL D__run(OpParam &p) { return _D__run<T,CL>(p); }
-template<class T,class CL> inline BL D__cun(OpParam &p) { return _D__cun<T,CL>(p); }
-template<class T,class CL> inline BL D__rbin(OpParam &p) { return _D__rbin<T,CL>(p); }
-template<class T,class CL> inline BL D__cbin(OpParam &p) { return _D__cbin<T,CL>(p); }
-template<class T,class CL> inline BL D__rop(OpParam &p) { return _D__rop<T,CL>(p); }
-template<class T,class CL> inline BL D__cop(OpParam &p) { return _D__cop<T,CL>(p); }
-template<class T,class CL> inline BL F__run(OpParam &p) { return V__run<T,CL>(p.rsdt,p.rss,p.rddt,p.rds,p.frames); }
-template<class T,class CL> inline BL F__cun(OpParam &p) { return V__cun<T,CL>(p.rsdt,p.isdt,p.rss,p.iss,p.rddt,p.iddt,p.rds,p.ids,p.frames); }
-template<class T,class CL> inline BL F__rbin(OpParam &p) { return _F__rbin<T,CL>(p); }
-template<class T,class CL> inline BL V__cbin(OpParam &p) { return _F__cbin<T,CL>(p); }
-template<class T,class CL> inline BL F__rop(OpParam &p) { return V__rop<T,OpParam &,CL>(p,p.rsdt,p.rss,p.rddt,p.rds,p.frames); }
-template<class T,class CL> inline BL F__cop(OpParam &p) { return V__cop<T,OpParam &,CL>(p,p.rsdt,p.isdt,p.rss,p.iss,p.rddt,p.iddt,p.rds,p.ids,p.frames); }
+	template<class T,class CL> inline BL D__run(OpParam &p) { return CL::run_opt()?_D__run<T,CL>(p):_d__run<T>(CL::run,p); }
+	template<class T,class CL> inline BL D__cun(OpParam &p) { return CL::cun_opt()?_D__cun<T,CL>(p):_d__cun<T>(CL::cun,p); }
+	template<class T,class CL> inline BL D__rbin(OpParam &p) { return CL::rbin_opt()?_D__rbin<T,CL>(p):_d__rbin<T>(CL::rbin,p); }
+	template<class T,class CL> inline BL D__cbin(OpParam &p) { return CL::cbin_opt()?_D__cbin<T,CL>(p):_d__cbin<T>(CL::cbin,p); }
+	template<class T,class CL> inline BL D__rop(OpParam &p) { return CL::rop_opt()?_D__rop<T,CL>(p):_d__rop<T>(CL::rop,p); }
+	template<class T,class CL> inline BL D__cop(OpParam &p) { return CL::cop_opt()?_D__cop<T,CL>(p):_d__cop<T>(CL::cop,p); }
 #endif
 
+
+// process multi-dimensional data
+
+template<class T> inline BL V__vmulti(BL vbin(I layers,const T *sr,T *dr,const T *ar,I len),I layers,const T *sr,T *dr,const T *ar,I dim,const I *dims)
+{
+	if(dim == 1 || !dims) {
+		return vbin(layers,sr,dr,ar,dims?dims[0]:dim);
+	}
+	else if(dim > 1) {
+		// calculate stride for next dimensions
+		I i,s,str = layers*dims[0];
+		for(i = 1; i < dim-1; ++i) str *= dims[i];
+		const I dimn = dims[i];
+		
+		for(s = i = 0; i < dimn; ++i,s += str)
+			V__vmulti(vbin,layers,sr+s,dr+s,ar+s,dim-1,dims);
+		return true;
+	}
+	else
+		return false;
+}
+
+
+
 } // namespace VecOp
 
 #endif
diff --git a/externals/grill/vasp/source/opfuns.h b/externals/grill/vasp/source/opfuns.h
index 5ab8d48a..2228641d 100644
--- a/externals/grill/vasp/source/opfuns.h
+++ b/externals/grill/vasp/source/opfuns.h
@@ -12,56 +12,27 @@ WARRANTIES, see the file, "license.txt," in this distribution.
 #define __VASP_OPFUNS_H
 
 #include "opdefs.h"
+#include <math.h>
+#include "util.h"
 
 
 namespace VecOp {
 
-    // multi-layer templates
-
-    template<class T,V FUN(T &v,T a),I N>
-    static V vec_un(T *v,const T *a,I n = 0) { 
-        const I _n = N?N:n;
-        for(I i = 0; i < _n; ++i) FUN(v[i],a[i]); 
-    }
-
-    template<class T,V FUN(T &v,T a),I N>
-    static V vec_un(T *v,T a,I n = 0) { 
-        const I _n = N?N:n;
-        for(I i = 0; i < _n; ++i) FUN(v[i],a); 
-    }
-
-    template<class T,V FUN(T &v,T a,T b),I N>
-    static V vec_bin(T *v,const T *a,const T *b,I n = 0) { 
-        const I _n = N?N:n;
-        for(I i = 0; i < _n; ++i) FUN(v[i],a[i],b[i]); 
-    }
-
-    template<class T,V FUN(T &v,T a,T b),I N>
-    static V vec_bin(T *v,const T *a,T b,I n = 0) { 
-        const I _n = N?N:n;
-        for(I i = 0; i < _n; ++i) FUN(v[i],a[i],b); 
-    }
-
-
-    template<class T,class CL,I N>
-    static V cvec_un(T *v,const T *a,I n = 0) { vec_un<T,CL::run,N>(v,a,n); }
-
-    template<class T,class CL,I N>
-    static V cvec_bin(T *v,const T *a,const T *b,I n = 0) { vec_bin<T,n,CL::rbin>(v,a,b,n); }
-
-
-
     // assignment
 
     template<class T> class f_copy { 
     public: 
+    	static I run_opt() { return 3; }
         static V run(T &v,T a) { v = a; }
+    	static I cun_opt() { return 2; }
         static V cun(T &rv,T &iv,T ra,T ia) { rv = ra,iv = ia; } 
     };
 
     template<class T> class f_set { 
     public: 
+    	static I rbin_opt() { return 3; }
         static V rbin(T &v,T,T b) { v = b; }
+    	static I cbin_opt() { return 2; }
         static V cbin(T &rv,T &iv,T,T,T rb,T ib) { rv = rb,iv = ib; } 
     };
 
@@ -69,32 +40,42 @@ namespace VecOp {
 
     template<class T> class f_add {
     public: 
+    	static I rbin_opt() { return 3; }
         static V rbin(T &v,T a,T b) { v = a+b; }
+    	static I cbin_opt() { return 2; }
         static V cbin(T &rv,T &iv,T ra,T ia,T rb,T ib) { rv = ra+rb,iv = ia+ib; }
     };
 
     template<class T> class f_sub {
     public: 
+    	static I rbin_opt() { return 3; }
         static V rbin(T &v,T a,T b) { v = a-b; }
+    	static I cbin_opt() { return 2; }
         static V cbin(T &rv,T &iv,T ra,T ia,T rb,T ib) { rv = ra-rb,iv = ia-ib; }
     };
 
     template<class T> class f_subr {
     public: 
+    	static I rbin_opt() { return 2; }
         static V rbin(T &v,T a,T b) { v = b-a; }
+    	static I cbin_opt() { return 2; }
         static V cbin(T &rv,T &iv,T ra,T ia,T rb,T ib) { rv = rb-ra,iv = ib-ia; }
     };
 
     template<class T> class f_mul {
     public: 
+    	static I rbin_opt() { return 3; }
         static V rbin(T &v,T a,T b) { v = a*b; }
+    	static I cbin_opt() { return 1; }
         static V cbin(T &rv,T &iv,T ra,T ia,T rb,T ib) { rv = ra*rb-ia*ib, iv = ra*ib+rb*ia; }
     };
 
     template<class T> class f_div {
     public: 
+    	static I rbin_opt() { return 2; }
         static V rbin(T &v,T a,T b) { v = a/b; }
 
+    	static I cbin_opt() { return 0; }
         static V cbin(T &rv,T &iv,T ra,T ia,T rb,T ib) 
         { 
 	        register const T den = sqabs(rb,ib);
@@ -105,8 +86,10 @@ namespace VecOp {
 
     template<class T> class f_divr {
     public: 
+    	static I rbin_opt() { return 2; }
         static V rbin(T &v,T a,T b) { v = b/a; }
 
+    	static I cbin_opt() { return 0; }
         static V cbin(T &rv,T &iv,T ra,T ia,T rb,T ib)
         { 
 	        register const T den = sqabs(ra,ia);
@@ -117,28 +100,35 @@ namespace VecOp {
 
     template<class T> class f_mod { 
     public: 
+    	static I rbin_opt() { return 0; }
         static V rbin(T &v,T a,T b) { v = fmod(a,b); } 
     };
 
     template<class T> class f_abs {
     public: 
+    	static I run_opt() { return 0; }
         static V run(T &v,T a) { v = fabs(a); }
+    	static I cun_opt() { return 0; }
         static V cun(T &rv,T &iv,T ra,T ia) { rv = sqrt(ra*ra+ia*ia),iv = 0; }
     };
 
     template<class T> class f_sign { 
     public: 
+    	static I run_opt() { return 0; }
         static V run(T &v,T a) { v = (a == 0?0:(a < 0?-1.:1.)); } 
     };
 
     template<class T> class f_sqr {
     public: 
+    	static I run_opt() { return 3; }
         static V run(T &v,T a) { v = a*a; } 
+    	static I cun_opt() { return 1; }
         static V cun(T &rv,T &iv,T ra,T ia) { rv = ra*ra-ia*ia; iv = ra*ia*2; }
     };
 
     template<class T> class f_ssqr { 
     public: 
+    	static I run_opt() { return 0; }
         static V run(T &v,T a) { v = a*fabs(a); } 
     };
 
@@ -147,19 +137,22 @@ namespace VecOp {
 
     template<class T> class f_powi {
     public: 
+    	static I cop_opt() { return 0; }
         static V cop(T &rv,T &iv,T ra,T ia,OpParam &p) 
         { 
 	        register const I powi = p.ibin.arg;
-            register T rt,it; f_sqr<T>::cun(rt,it,ra,ia);
-            for(I i = 2; i < powi; ++i) f_mul<T>::cbin(rt,it,rt,it,ra,ia);
+            register T rt,it; VecOp::f_sqr<T>::cun(rt,it,ra,ia);
+            for(I i = 2; i < powi; ++i) VecOp::f_mul<T>::cbin(rt,it,rt,it,ra,ia);
 	        rv = rt,iv = it;
         } 
     };
 
     template<class T> class f_pow {
     public: 
+    	static I rbin_opt() { return 0; }
         static V rbin(T &v,T a,T b) { v = pow(fabs(a),b)*sgn(a); } 
 
+    	static I cbin_opt() { return 0; }
         static V cbin(T &rv,T &iv,T ra,T ia,T rb,T) 
         { 
 	        register const T _abs = sqrt(sqabs(ra,ia));
@@ -170,26 +163,32 @@ namespace VecOp {
 	        else
 		        rv = iv = 0;
         } 
+    protected:
+        static T sgn(T x) { return x?(x > 0?1:-1):0; }
     };
 
     template<class T> class f_sqrt {
     public: 
+    	static I run_opt() { return 0; }
         static V run(T &v,T a) { v = sqrt(fabs(a)); } 
     };
 
     template<class T> class f_ssqrt {
     public:
+    	static I run_opt() { return 0; }
         static V run(T &v,T a) { v = sqrt(fabs(a))*sgn(a); } 
     };
 
 
     template<class T> class f_exp {
     public: 
+    	static I run_opt() { return 0; }
         static V run(T &v,T a) { v = exp(a); } 
     };
 
     template<class T> class f_log {
     public: 
+    	static I run_opt() { return 0; }
         static V run(T &v,T a) { v = log(a); }  // \todo detect NANs
     };
 
@@ -197,51 +196,61 @@ namespace VecOp {
 
     template<class T> class f_lwr {
     public: 
+    	static I rbin_opt() { return 1; }
         static V rbin(T &v,T a,T b) { v = a < b?1:0; }
     };
 
     template<class T> class f_gtr {
     public: 
+    	static I rbin_opt() { return 1; }
         static V rbin(T &v,T a,T b) { v = a > b?1:0; }
     };
 
     template<class T> class f_alwr {
     public: 
+    	static I rbin_opt() { return 0; }
         static V rbin(T &v,T a,T b) { v = fabs(a) < fabs(b)?1:0; }
     };
 
     template<class T> class f_agtr {
     public: 
+    	static I rbin_opt() { return 0; }
         static V rbin(T &v,T a,T b) { v = fabs(a) > fabs(b)?1:0; }
     };
 
     template<class T> class f_leq {
     public: 
+    	static I rbin_opt() { return 1; }
         static V rbin(T &v,T a,T b) { v = a <= b?1:0; }
     };
 
     template<class T> class f_geq {
     public: 
+    	static I rbin_opt() { return 1; }
         static V rbin(T &v,T a,T b) { v = a >= b?1:0; }
     };
 
     template<class T> class f_aleq {
     public: 
+    	static I rbin_opt() { return 0; }
         static V rbin(T &v,T a,T b) { v = fabs(a) <= fabs(b)?1:0; }
     };
 
     template<class T> class f_ageq {
     public: 
+    	static I rbin_opt() { return 0; }
         static V rbin(T &v,T a,T b) { v = fabs(a) >= fabs(b)?1:0; }
     };
 
     template<class T> class f_equ {
     public: 
+    	static I rbin_opt() { return 1; }
         static V rbin(T &v,T a,T b) { v = a == b?1:0; }
     };
 
     template<class T> class f_neq {
     public: 
+    	static I rbin_opt() { return 1; }
         static V rbin(T &v,T a,T b) { v = a != b?1:0; }
     };
 
@@ -249,8 +258,10 @@ namespace VecOp {
 
     template<class T> class f_min {
     public: 
+    	static I rbin_opt() { return 1; }
         static V rbin(T &v,T a,T b) { v = a < b?a:b; }
 
+    	static I cbin_opt() { return 0; }
         static V cbin(T &rv,T &iv,T ra,T ia,T rb,T ib) 
         { 
 	        if(sqabs(ra,ia) < sqabs(rb,ib))	rv = ra,iv = ia; 
@@ -260,8 +271,10 @@ namespace VecOp {
 
     template<class T> class f_max {
     public: 
+    	static I rbin_opt() { return 1; }
         static V rbin(T &v,T a,T b) { v = a > b?a:b; }
 
+    	static I cbin_opt() { return 0; }
         static V cbin(T &rv,T &iv,T ra,T ia,T rb,T ib) 
         { 
 	        if(sqabs(ra,ia) > sqabs(rb,ib))	rv = ra,iv = ia; 
@@ -271,6 +284,7 @@ namespace VecOp {
 
     template<class T> class f_minmax {
     public:
+    	static I cun_opt() { return 0; }
         static V cun(T &rv,T &iv,T ra,T ia) 
         { 
 	        if(ra < ia)	rv = ra,iv = ia; 
@@ -280,11 +294,13 @@ namespace VecOp {
 
     template<class T> class f_minq {
     public: 
+    	static I rop_opt() { return 0; }
         static V rop(T &,T ra,OpParam &p) 
         { 
 	        if(ra < p.norm.minmax) p.norm.minmax = ra; 
         } 
 
+    	static I cop_opt() { return 0; }
         static V cop(T &,T &,T ra,T ia,OpParam &p) 
         { 
 	        register T s = sqabs(ra,ia); 
@@ -294,11 +310,13 @@ namespace VecOp {
 
     template<class T> class f_maxq {
     public: 
+    	static I rop_opt() { return 0; }
         static V rop(T &,T ra,OpParam &p) 
         { 
 	        if(ra > p.norm.minmax) p.norm.minmax = ra; 
         } 
 
+    	static I cop_opt() { return 0; }
         static V cop(T &,T &,T ra,T ia,OpParam &p) 
         { 
 	        register T s = sqabs(ra,ia); 
@@ -308,6 +326,7 @@ namespace VecOp {
 
     template<class T> class f_aminq {
     public: 
+    	static I rop_opt() { return 0; }
         static V rop(T &,T ra,OpParam &p) 
         { 
 	        register T s = fabs(ra); 
@@ -317,6 +336,7 @@ namespace VecOp {
 
     template<class T> class f_amaxq {
     public: 
+    	static I rop_opt() { return 0; }
         static V rop(T &,T ra,OpParam &p) 
         { 
 	        register T s = fabs(ra); 
@@ -329,8 +349,10 @@ namespace VecOp {
 
     template<class T> class f_gate {
     public:
+    	static I rbin_opt() { return 0; }
         static V rbin(T &rv,T ra,T rb) { rv = fabs(ra) >= rb?ra:0; } 
 
+    	static I cbin_opt() { return 0; }
         static V cbin(T &rv,T &iv,T ra,T ia,T rb,T) 
         { 
 	        register const T _abs = sqabs(ra,ia);
@@ -342,8 +364,10 @@ namespace VecOp {
 
     template<class T> class f_igate {
     public:
+    	static I rbin_opt() { return 0; }
         static V rbin(T &rv,T ra,T rb) { rv = fabs(ra) <= rb?ra:0; } 
 
+    	static I cbin_opt() { return 0; }
         static V cbin(T &rv,T &iv,T ra,T ia,T rb,T) 
         { 
 	        register const T _abs = sqabs(ra,ia);
@@ -357,6 +381,7 @@ namespace VecOp {
 
     template<class T> class f_norm {
     public:
+    	static I cun_opt() { return 0; }
         static V cun(T &rv,T &iv,T ra,T ia) 
         { 
 	        register T f = sqabs(ra,ia);
@@ -367,21 +392,25 @@ namespace VecOp {
 
     template<class T> class f_conj {
     public:
+    	static I cun_opt() { return 2; }
         static V cun(T &,T &iv,T,T ia) { iv = -ia; }
     };
 
     template<class T> class f_polar {
     public:
+    	static I cun_opt() { return 0; }
         static V cun(T &rv,T &iv,T ra,T ia) { rv = sqrt(sqabs(ra,ia)),iv = arg(ra,ia); }
     };
 
     template<class T> class f_rect {
     public:
+    	static I cun_opt() { return 0; }
         static V cun(T &rv,T &iv,T ra,T ia) { rv = ra*cos(ia),iv = ra*sin(ia); }
     };
 
     template<class T> class f_radd {
     public:
+    	static I cbin_opt() { return 0; }
         static V cbin(T &rv,T &iv,T ra,T ia,T rb,T) 
         { 
 	        register const T _abs = sqrt(sqabs(ra,ia))+rb;
@@ -401,6 +430,7 @@ namespace VecOp {
 	        \param v destination vasp (NULL for in-place operation)
 	        \return normalized destination vasp
         */
+    	static I run_opt() { return 0; }
         static V run(T &v,T a) 
         { 
 	        if(a != a) // NAN
@@ -415,30 +445,75 @@ namespace VecOp {
 	        }
         } 
     };
+
 }
 
 
+#define DEFOP(T,FUN,OP,KIND) \
+namespace VecOp { inline BL FUN(OpParam &p) { return D__##KIND<T,f_##OP<T> >(p); } }
+
+
+#define DEFVEC_R(T,OP) \
+    static BL r_##OP (I len,T *dr,I rds,const T *sr,I rss) { return VecOp::V__rbin<T,VecOp::f_##OP <T> >(sr,rss,dr,rds,len); } \
+    static BL v_##OP##_(I layers,const T *sr,T *dr,const T *ar,I len) { return VecOp::V__vbin<T,VecOp::f_##OP <T> >(layers,sr,dr,ar,len); } \
+    static BL v_##OP (I dim,const I *dims,I layers,T *dr,const T *sr,const T *ar) { return VecOp::V__vmulti<T>(v_##OP##_,layers,sr,dr,ar,dim,dims); }
+
+#define DEFVEC_C(T,OP) \
+    static BL c_##OP (I len,T *dr,T *di,I rds,I ids,const T *sr,I rss,I iss) { return VecOp::V__cbin<T,VecOp::f_##OP <T> >(sr,rss,iss,dr,rds,ids,len); }
+
+#define DEFVEC_B(T,OP) DEFVEC_R(T,OP) DEFVEC_C(T,OP)
+
+
 template<class T>
 class VecFun {
 public:
-    // strided real data
-    static BL r_add(I len,register T *dr,register const T *sr,I rds = 1,I rss = 1) { return VecOp::V__rbin<T,VecOp::f_add<T>::rbin>(sr,rss,dr,rds,len); }
-    static BL r_sub(I len,register T *dr,register const T *sr,I rds = 1,I rss = 1) { return VecOp::V__rbin<T,VecOp::f_sub<T>::rbin>(sr,rss,dr,rds,len); }
-    static BL r_subr(I len,register T *dr,register const T *sr,I rds = 1,I rss = 1) { return VecOp::V__rbin<T,VecOp::f_subr<T>::rbin>(sr,rss,dr,rds,len); }
-    static BL r_mul(I len,register T *dr,register const T *sr,I rds = 1,I rss = 1) { return VecOp::V__rbin<T,VecOp::f_mul<T>::rbin>(sr,rss,dr,rds,len); }
-    static BL r_div(I len,register T *dr,register const T *sr,I rds = 1,I rss = 1) { return VecOp::V__rbin<T,VecOp::f_div<T>::rbin>(sr,rss,dr,rds,len); }
-    static BL r_divr(I len,register T *dr,register const T *sr,I rds = 1,I rss = 1) { return VecOp::V__rbin<T,VecOp::f_divr<T>::rbin>(sr,rss,dr,rds,len); }
-    static BL r_mod(I len,register T *dr,register const T *sr,I rds = 1,I rss = 1) { return VecOp::V__rbin<T,VecOp::f_mod<T>::rbin>(sr,rss,dr,rds,len); }
-
-    // multi-layer data (non-strided)
-    static BL v_add(I len,I layers,register T *dr,register const T *sr,register const T *ar) { return VecOp::V__vbin<T,VecOp::f_add<T>::rbin>(layers,sr,dr,ar,len); }
-    static BL v_sub(I len,I layers,register T *dr,register const T *sr,register const T *ar) { return VecOp::V__vbin<T,VecOp::f_sub<T>::rbin>(layers,sr,dr,ar,len); }
-    static BL v_subr(I len,I layers,register T *dr,register const T *sr,register const T *ar) { return VecOp::V__vbin<T,VecOp::f_subr<T>::rbin>(layers,sr,dr,ar,len); }
-    static BL v_mul(I len,I layers,register T *dr,register const T *sr,register const T *ar) { return VecOp::V__vbin<T,VecOp::f_mul<T>::rbin>(layers,sr,dr,ar,len); }
-    static BL v_div(I len,I layers,register T *dr,register const T *sr,register const T *ar) { return VecOp::V__vbin<T,VecOp::f_div<T>::rbin>(layers,sr,dr,ar,len); }
-    static BL v_divr(I len,I layers,register T *dr,register const T *sr,register const T *ar) { return VecOp::V__vbin<T,VecOp::f_divr<T>::rbin>(layers,sr,dr,ar,len); }
-    static BL v_mod(I len,I layers,register T *dr,register const T *sr,register const T *ar) { return VecOp::V__vbin<T,VecOp::f_mod<T>::rbin>(layers,sr,dr,ar,len); }
+	DEFVEC_B(T,copy)	
+
+	DEFVEC_B(T,add)
+	DEFVEC_B(T,sub)
+	DEFVEC_B(T,subr)
+	DEFVEC_B(T,mul)
+	DEFVEC_B(T,div)
+	DEFVEC_B(T,divr)
+	DEFVEC_R(T,mod)
+	DEFVEC_B(T,abs)
+	DEFVEC_R(T,sign)
+	DEFVEC_B(T,sqr)
+	DEFVEC_R(T,ssqr)
+
+	DEFVEC_C(T,powi)
+	DEFVEC_B(T,pow)
+	DEFVEC_R(T,sqrt)
+	DEFVEC_R(T,ssqrt)
+	DEFVEC_R(T,exp)
+	DEFVEC_R(T,log)
+
+	DEFVEC_R(T,lwr)
+	DEFVEC_R(T,gtr)
+	DEFVEC_R(T,alwr)
+	DEFVEC_R(T,agtr)
+	DEFVEC_R(T,leq)
+	DEFVEC_R(T,geq)
+	DEFVEC_R(T,aleq)
+	DEFVEC_R(T,ageq)
+	DEFVEC_R(T,equ)
+	DEFVEC_R(T,neq)
+
+	DEFVEC_B(T,min)
+	DEFVEC_B(T,max)
+	DEFVEC_C(T,minmax)
+	DEFVEC_C(T,gate)
+	DEFVEC_C(T,igate)
+
+	DEFVEC_C(T,norm)
+	DEFVEC_C(T,conj)
+	DEFVEC_C(T,polar)
+	DEFVEC_C(T,rect)
+	DEFVEC_C(T,radd)
+
+	DEFVEC_R(T,fix)
 };
 
 
+
 #endif
diff --git a/externals/grill/vasp/source/oploop.h b/externals/grill/vasp/source/oploop.h
index 888a3f84..603cd7ce 100755
--- a/externals/grill/vasp/source/oploop.h
+++ b/externals/grill/vasp/source/oploop.h
@@ -11,7 +11,7 @@ WARRANTIES, see the file, "license.txt," in this distribution.
 #ifndef __VASP_OPLOOP_H
 #define __VASP_OPLOOP_H
 
-#define _D_BLOCK 1024
+#define _D_BLOCK 4096
 
 #define _D_MIN(a,b) ((a) < (b)?(a):(b))
 
@@ -97,4 +97,6 @@ WARRANTIES, see the file, "license.txt," in this distribution.
 
 #endif
 
+#define _DQ_LOOP(UNROLL,VAR,LEN,BODY) { if(UNROLL) _DE_LOOP(VAR,LEN,BODY) else _DF_LOOP(VAR,LEN,BODY) }
+
 #endif
diff --git a/externals/grill/vasp/source/ops_arith.cpp b/externals/grill/vasp/source/ops_arith.cpp
index 5eb11e7d..fb8ad4b7 100644
--- a/externals/grill/vasp/source/ops_arith.cpp
+++ b/externals/grill/vasp/source/ops_arith.cpp
@@ -2,7 +2,7 @@
 
 VASP modular - vector assembling signal processor / objects for Max/MSP and PD
 
-Copyright (c) 2002 Thomas Grill (xovo@gmx.net)
+Copyright (c) 2002-2003 Thomas Grill (xovo@gmx.net)
 For information on usage and redistribution, and for a DISCLAIMER OF ALL
 WARRANTIES, see the file, "license.txt," in this distribution.  
 
@@ -31,5 +31,5 @@ VASP_UNARY("vasp.ssqr",ssqr,true,"Calculates the square with preservation of the
 // -----------------------------------------------------
 
 VASP_UNARY("vasp.sign",sign,true,"Calculates the sign (signum function)") 
-VASP_UNARY("vasp.abs",abs,true,"Calulates the absolute value") 
+VASP_UNARY("vasp.abs",abs,true,"Calculates the absolute value") 
 
diff --git a/externals/grill/vasp/source/ops_arith.h b/externals/grill/vasp/source/ops_arith.h
index 8d54d960..064ecb0e 100644
--- a/externals/grill/vasp/source/ops_arith.h
+++ b/externals/grill/vasp/source/ops_arith.h
@@ -15,35 +15,36 @@ WARRANTIES, see the file, "license.txt," in this distribution.
 
 // Arithmetic math functions
 
+DEFOP(S,d_add,add,rbin)
+DEFOP(S,d_sub,sub,rbin)
+DEFOP(S,d_subr,subr,rbin)
+DEFOP(S,d_mul,mul,rbin)
+DEFOP(S,d_div,div,rbin)
+DEFOP(S,d_divr,divr,rbin)
+DEFOP(S,d_mod,mod,rbin)
+
+DEFOP(S,d_sqr,sqr,run)
+DEFOP(S,d_ssqr,ssqr,run)
+
+DEFOP(S,d_sign,sign,run)
+DEFOP(S,d_abs,abs,run)
+
+
 namespace VaspOp {
-    inline BL d_add(OpParam &p) { return VecOp::D__rbin<S,VecOp::f_add<S> >(p); }
-    inline BL d_sub(OpParam &p) { return VecOp::D__rbin<S,VecOp::f_sub<S> >(p); }
-    inline BL d_subr(OpParam &p) { return VecOp::D__rbin<S,VecOp::f_subr<S> >(p); }
-    inline BL d_mul(OpParam &p) { return VecOp::D__rbin<S,VecOp::f_mul<S> >(p); }
-    inline BL d_div(OpParam &p) { return VecOp::D__rbin<S,VecOp::f_div<S> >(p); }
-    inline BL d_divr(OpParam &p) { return VecOp::D__rbin<S,VecOp::f_divr<S> >(p); }
-    inline BL d_mod(OpParam &p) { return VecOp::D__rbin<S,VecOp::f_mod<S> >(p); }
-
-    inline BL d_sqr(OpParam &p) { return VecOp::D__run<S,VecOp::f_sqr<S> >(p); }
-    inline BL d_ssqr(OpParam &p) { return VecOp::d__run<S,VecOp::f_ssqr<S> >(p); }
-
-    inline BL d_sign(OpParam &p) { return VecOp::D__run<S,VecOp::f_sign<S> >(p); }
-    inline BL d_abs(OpParam &p) { return VecOp::D__run<S,VecOp::f_abs<S> >(p); }
-
-
-    inline Vasp *m_add(OpParam &p,CVasp &src,const Argument &arg,CVasp *dst = NULL) { return m_rbin(p,src,arg,dst,d_add); } // add to (one vec or real)
-	inline Vasp *m_sub(OpParam &p,CVasp &src,const Argument &arg,CVasp *dst = NULL) { return m_rbin(p,src,arg,dst,d_sub); } // sub from (one vec or real)
-	inline Vasp *m_subr(OpParam &p,CVasp &src,const Argument &arg,CVasp *dst = NULL) { return m_rbin(p,src,arg,dst,d_subr); } // reverse sub from (one vec or real)
-	inline Vasp *m_mul(OpParam &p,CVasp &src,const Argument &arg,CVasp *dst = NULL) { return m_rbin(p,src,arg,dst,d_mul); } // mul with (one vec or real)
-	inline Vasp *m_div(OpParam &p,CVasp &src,const Argument &arg,CVasp *dst = NULL) { return m_rbin(p,src,arg,dst,d_div); } // div by (one vec or real)
-	inline Vasp *m_divr(OpParam &p,CVasp &src,const Argument &arg,CVasp *dst = NULL) { return m_rbin(p,src,arg,dst,d_divr); } // reverse div by (one vec or real)
-	inline Vasp *m_mod(OpParam &p,CVasp &src,const Argument &arg,CVasp *dst = NULL) { return m_rbin(p,src,arg,dst,d_mod); } // modulo by (one vec or real)
-
-	inline Vasp *m_sqr(OpParam &p,CVasp &src,CVasp *dst = NULL) { return m_run(p,src,dst,d_sqr); }    // unsigned square 
-	inline Vasp *m_ssqr(OpParam &p,CVasp &src,CVasp *dst = NULL) { return m_run(p,src,dst,d_ssqr); }   // signed square 
-
-	inline Vasp *m_sign(OpParam &p,CVasp &src,CVasp *dst = NULL) { return m_run(p,src,dst,d_sign); }  // sign function 
-	inline Vasp *m_abs(OpParam &p,CVasp &src,CVasp *dst = NULL) { return m_run(p,src,dst,d_abs); }  // absolute values
+
+    inline Vasp *m_add(OpParam &p,CVasp &src,const Argument &arg,CVasp *dst = NULL) { return m_rbin(p,src,arg,dst,VecOp::d_add); } // add to (one vec or real)
+	inline Vasp *m_sub(OpParam &p,CVasp &src,const Argument &arg,CVasp *dst = NULL) { return m_rbin(p,src,arg,dst,VecOp::d_sub); } // sub from (one vec or real)
+	inline Vasp *m_subr(OpParam &p,CVasp &src,const Argument &arg,CVasp *dst = NULL) { return m_rbin(p,src,arg,dst,VecOp::d_subr); } // reverse sub from (one vec or real)
+	inline Vasp *m_mul(OpParam &p,CVasp &src,const Argument &arg,CVasp *dst = NULL) { return m_rbin(p,src,arg,dst,VecOp::d_mul); } // mul with (one vec or real)
+	inline Vasp *m_div(OpParam &p,CVasp &src,const Argument &arg,CVasp *dst = NULL) { return m_rbin(p,src,arg,dst,VecOp::d_div); } // div by (one vec or real)
+	inline Vasp *m_divr(OpParam &p,CVasp &src,const Argument &arg,CVasp *dst = NULL) { return m_rbin(p,src,arg,dst,VecOp::d_divr); } // reverse div by (one vec or real)
+	inline Vasp *m_mod(OpParam &p,CVasp &src,const Argument &arg,CVasp *dst = NULL) { return m_rbin(p,src,arg,dst,VecOp::d_mod); } // modulo by (one vec or real)
+
+	inline Vasp *m_sqr(OpParam &p,CVasp &src,CVasp *dst = NULL) { return m_run(p,src,dst,VecOp::d_sqr); }    // unsigned square 
+	inline Vasp *m_ssqr(OpParam &p,CVasp &src,CVasp *dst = NULL) { return m_run(p,src,dst,VecOp::d_ssqr); }   // signed square 
+
+	inline Vasp *m_sign(OpParam &p,CVasp &src,CVasp *dst = NULL) { return m_run(p,src,dst,VecOp::d_sign); }  // sign function 
+	inline Vasp *m_abs(OpParam &p,CVasp &src,CVasp *dst = NULL) { return m_run(p,src,dst,VecOp::d_abs); }  // absolute values
 }
 
 #endif
diff --git a/externals/grill/vasp/source/ops_assign.h b/externals/grill/vasp/source/ops_assign.h
index a495eb3b..78a3392e 100644
--- a/externals/grill/vasp/source/ops_assign.h
+++ b/externals/grill/vasp/source/ops_assign.h
@@ -15,13 +15,12 @@ WARRANTIES, see the file, "license.txt," in this distribution.
 
 // Assignment functions
 
-namespace VecOp {
-    inline BL d_copy(OpParam &p) { return D__run<S,f_copy<S> >(p); }
-    inline BL d_ccopy(OpParam &p) { return D__cun<S,f_copy<S> >(p); }
+DEFOP(S,d_copy,copy,run)
+DEFOP(S,d_ccopy,copy,cun)
+
+DEFOP(S,d_set,set,rbin)
+DEFOP(S,d_cset,set,cbin)
 
-    inline BL d_set(OpParam &p) { return D__rbin<S,f_set<S> >(p); }
-    inline BL d_cset(OpParam &p) { return D__cbin<S,f_set<S> >(p); }
-}
 
 namespace VaspOp {
 	inline Vasp *m_set(OpParam &p,CVasp &src,const Argument &arg,CVasp *dst = NULL) { return m_rbin(p,src,arg,dst,VecOp::d_set); } // copy to (one vec or real)
diff --git a/externals/grill/vasp/source/ops_carith.h b/externals/grill/vasp/source/ops_carith.h
index 0b049fb4..04514a10 100644
--- a/externals/grill/vasp/source/ops_carith.h
+++ b/externals/grill/vasp/source/ops_carith.h
@@ -15,20 +15,19 @@ WARRANTIES, see the file, "license.txt," in this distribution.
 
 // Arithmetic math functions
 
-namespace VecOp {
-    inline BL d_cadd(OpParam &p) { return D__cbin<S,f_add<S> >(p); }
-    inline BL d_csub(OpParam &p) { return D__cbin<S,f_sub<S> >(p); }
-    inline BL d_csubr(OpParam &p) { return D__cbin<S,f_subr<S> >(p); }
-    inline BL d_cmul(OpParam &p) { return D__cbin<S,f_mul<S> >(p); }
-    inline BL d_cdiv(OpParam &p) { return d__cbin<S,f_div<S> >(p); }
-    inline BL d_cdivr(OpParam &p) { return d__cbin<S,f_divr<S> >(p); }
+DEFOP(S,d_cadd,add,cbin)
+DEFOP(S,d_csub,sub,cbin)
+DEFOP(S,d_csubr,subr,cbin)
+DEFOP(S,d_cmul,mul,cbin)
+DEFOP(S,d_cdiv,div,cbin)
+DEFOP(S,d_cdivr,divr,cbin)
 
-    inline BL d_csqr(OpParam &p) { return D__cun<S,f_sqr<S> >(p); }
+DEFOP(S,d_csqr,sqr,cun)
 
-    inline BL d_cpowi(OpParam &p) { return d__cop<S,f_powi<S> >(p); }
+DEFOP(S,d_cpowi,powi,cop)
+
+DEFOP(S,d_cabs,abs,cun)
 
-    inline BL d_cabs(OpParam &p) { return D__cun<S,f_abs<S> >(p); }
-}
 
 namespace VaspOp {
 	inline Vasp *m_cadd(OpParam &p,CVasp &src,const Argument &arg,CVasp *dst = NULL) { return m_cbin(p,src,arg,dst,VecOp::d_cadd); }  // complex add (pairs of vecs or complex)
diff --git a/externals/grill/vasp/source/ops_cmp.h b/externals/grill/vasp/source/ops_cmp.h
index 1302120f..ae445b94 100644
--- a/externals/grill/vasp/source/ops_cmp.h
+++ b/externals/grill/vasp/source/ops_cmp.h
@@ -15,37 +15,36 @@ WARRANTIES, see the file, "license.txt," in this distribution.
 
 // Comparison functions
 
-namespace VecOp {
-    inline BL d_lwr(OpParam &p) { return D__rbin<S,f_lwr<S> >(p); }
-    inline BL d_gtr(OpParam &p) { return D__rbin<S,f_gtr<S> >(p); }
-    inline BL d_alwr(OpParam &p) { return D__rbin<S,f_alwr<S> >(p); }
-    inline BL d_agtr(OpParam &p) { return D__rbin<S,f_agtr<S> >(p); }
-    inline BL d_leq(OpParam &p) { return D__rbin<S,f_leq<S> >(p); }
-    inline BL d_geq(OpParam &p) { return D__rbin<S,f_geq<S> >(p); }
-    inline BL d_aleq(OpParam &p) { return D__rbin<S,f_aleq<S> >(p); }
-    inline BL d_ageq(OpParam &p) { return D__rbin<S,f_ageq<S> >(p); }
-    inline BL d_equ(OpParam &p) { return D__rbin<S,f_equ<S> >(p); }
-    inline BL d_neq(OpParam &p) { return D__rbin<S,f_neq<S> >(p); }
-
-    inline BL d_min(OpParam &p) { return D__rbin<S,f_min<S> >(p); }
-    inline BL d_max(OpParam &p) { return D__rbin<S,f_max<S> >(p); }
-    inline BL d_rmin(OpParam &p) { return d__cbin<S,f_min<S> >(p); }
-    inline BL d_rmax(OpParam &p) { return d__cbin<S,f_max<S> >(p); }
-
-    inline BL d_minmax(OpParam &p) { return d__cun<S,f_minmax<S> >(p); }
-
-    inline BL d_minq(OpParam &p) { return D__rop<S,f_minq<S> >(p); }
-    inline BL d_maxq(OpParam &p) { return D__rop<S,f_maxq<S> >(p); }
-    inline BL d_rminq(OpParam &p) { return d__cop<S,f_minq<S> >(p); }
-    inline BL d_rmaxq(OpParam &p) { return d__cop<S,f_maxq<S> >(p); }
-    inline BL d_aminq(OpParam &p) { return d__rop<S,f_aminq<S> >(p); }
-    inline BL d_amaxq(OpParam &p) { return d__rop<S,f_amaxq<S> >(p); }
-
-    inline BL d_gate(OpParam &p) { return D__rbin<S,f_gate<S> >(p); }
-    inline BL d_igate(OpParam &p) { return d__rbin<S,f_igate<S> >(p); }
-    inline BL d_rgate(OpParam &p) { return d__cbin<S,f_gate<S> >(p); }
-    inline BL d_rigate(OpParam &p) { return d__cbin<S,f_igate<S> >(p); }
-}
+DEFOP(S,d_lwr,lwr,rbin)
+DEFOP(S,d_gtr,gtr,rbin)
+DEFOP(S,d_alwr,alwr,rbin)
+DEFOP(S,d_agtr,agtr,rbin)
+DEFOP(S,d_leq,leq,rbin)
+DEFOP(S,d_geq,geq,rbin)
+DEFOP(S,d_aleq,aleq,rbin)
+DEFOP(S,d_ageq,ageq,rbin)
+DEFOP(S,d_equ,equ,rbin)
+DEFOP(S,d_neq,neq,rbin)
+
+DEFOP(S,d_min,min,rbin)
+DEFOP(S,d_max,max,rbin)
+DEFOP(S,d_rmin,min,cbin)
+DEFOP(S,d_rmax,max,cbin)
+
+DEFOP(S,d_minmax,minmax,cun)
+
+DEFOP(S,d_minq,minq,rop)
+DEFOP(S,d_maxq,maxq,rop)
+DEFOP(S,d_rminq,minq,cop)
+DEFOP(S,d_rmaxq,maxq,cop)
+DEFOP(S,d_aminq,aminq,rop)
+DEFOP(S,d_amaxq,amaxq,rop)
+
+DEFOP(S,d_gate,gate,rbin)
+DEFOP(S,d_igate,igate,rbin)
+DEFOP(S,d_rgate,gate,cbin)
+DEFOP(S,d_rigate,igate,cbin)
+
 
 namespace VaspOp {
 	inline Vasp *m_lwr(OpParam &p,CVasp &src,const Argument &arg,CVasp *dst = NULL) { return m_rbin(p,src,arg,dst,VecOp::d_lwr); } // lower than
diff --git a/externals/grill/vasp/source/ops_cplx.h b/externals/grill/vasp/source/ops_cplx.h
index c77de675..696aae55 100644
--- a/externals/grill/vasp/source/ops_cplx.h
+++ b/externals/grill/vasp/source/ops_cplx.h
@@ -15,16 +15,15 @@ WARRANTIES, see the file, "license.txt," in this distribution.
 
 // Complex functions
 
-namespace VecOp {
-    inline BL d_polar(OpParam &p) { return d__cun<S,f_polar<S> >(p); }
-    inline BL d_rect(OpParam &p) { return d__cun<S,f_rect<S> >(p); }
+DEFOP(S,d_polar,polar,cun)
+DEFOP(S,d_rect,rect,cun)
 
-    inline BL d_radd(OpParam &p) { return d__cbin<S,f_radd<S> >(p); }
+DEFOP(S,d_radd,radd,cbin)
 
-    inline BL d_cnorm(OpParam &p) { return d__cun<S,f_norm<S> >(p); }
+DEFOP(S,d_cnorm,norm,cun)
+
+DEFOP(S,d_cconj,conj,cun)
 
-    inline BL d_cconj(OpParam &p) { return D__cun<S,f_conj<S> >(p); }
-}
 
 namespace VaspOp {
 	inline Vasp *m_polar(OpParam &p,CVasp &src,CVasp *dst = NULL) { return m_cun(p,src,dst,VecOp::d_polar); } // cartesian -> polar (each two)
diff --git a/externals/grill/vasp/source/ops_flt.h b/externals/grill/vasp/source/ops_flt.h
index ea7fe5ce..debb4039 100644
--- a/externals/grill/vasp/source/ops_flt.h
+++ b/externals/grill/vasp/source/ops_flt.h
@@ -21,10 +21,10 @@ namespace VecOp {
 
 	BL d_int(OpParam &p);
 	BL d_dif(OpParam &p); 
-
-    inline BL d_fix(OpParam &p) { return D__run<S,f_fix<S> >(p); }
 }
 
+DEFOP(S,d_fix,fix,run)
+
 namespace VaspOp {
 	// passive filters
 	Vasp *m_fhp(OpParam &p,CVasp &src,const Argument &arg,CVasp *dst = NULL,BL hp = true); //! hi pass
diff --git a/externals/grill/vasp/source/ops_trnsc.h b/externals/grill/vasp/source/ops_trnsc.h
index 73617d9a..408aeb9b 100644
--- a/externals/grill/vasp/source/ops_trnsc.h
+++ b/externals/grill/vasp/source/ops_trnsc.h
@@ -15,16 +15,14 @@ WARRANTIES, see the file, "license.txt," in this distribution.
 
 // Transcendent math functions
 
-namespace VecOp {
-    inline BL d_pow(OpParam &p) { return d__rbin<S,f_pow<S> >(p); }
-    inline BL d_rpow(OpParam &p) { return d__cbin<S,f_pow<S> >(p); }
+DEFOP(S,d_pow,pow,rbin)
+DEFOP(S,d_rpow,pow,cbin)
 
-    inline BL d_sqrt(OpParam &p) { return d__run<S,f_sqrt<S> >(p); }
-    inline BL d_ssqrt(OpParam &p) { return d__run<S,f_ssqrt<S> >(p); }
+DEFOP(S,d_sqrt,sqrt,run)
+DEFOP(S,d_ssqrt,ssqrt,run)
 
-    inline BL d_exp(OpParam &p) { return d__run<S,f_exp<S> >(p); }
-    inline BL d_log(OpParam &p) { return d__run<S,f_log<S> >(p); }
-}
+DEFOP(S,d_exp,exp,run)
+DEFOP(S,d_log,log,run)
 
 namespace VaspOp {
 	inline Vasp *m_pow(OpParam &p,CVasp &src,const Argument &arg,CVasp *dst = NULL) { return m_rbin(p,src,arg,dst,VecOp::d_pow); } // power
diff --git a/externals/grill/vasp/source/opvecs.cpp b/externals/grill/vasp/source/opvecs.cpp
index e29f39bc..f78210be 100644
--- a/externals/grill/vasp/source/opvecs.cpp
+++ b/externals/grill/vasp/source/opvecs.cpp
@@ -418,7 +418,7 @@ CVecBlock *VaspOp::GetCVecs(const C *op,CVasp &src,const CVasp &arg,CVasp *dst,I
 
 	\remark operative function must be capable of handling reversed direction
 */
-Vasp *VaspOp::DoOp(RVecBlock *vecs,VecOp::opfun *fun,OpParam &p,BL symm)
+Vasp *VaspOp::DoOp(RVecBlock *vecs,opfun *fun,OpParam &p,BL symm)
 {
 	BL ok = true;
 
@@ -521,7 +521,7 @@ Vasp *VaspOp::DoOp(RVecBlock *vecs,VecOp::opfun *fun,OpParam &p,BL symm)
 
 	\remark operative function must be capable of handling reversed direction
 */
-Vasp *VaspOp::DoOp(CVecBlock *vecs,VecOp::opfun *fun,OpParam &p,BL symm)
+Vasp *VaspOp::DoOp(CVecBlock *vecs,opfun *fun,OpParam &p,BL symm)
 {
 	BL ok = true;
 
diff --git a/externals/grill/vasp/source/util.cpp b/externals/grill/vasp/source/util.cpp
index 417b74c2..0b1050c1 100644
--- a/externals/grill/vasp/source/util.cpp
+++ b/externals/grill/vasp/source/util.cpp
@@ -12,6 +12,7 @@ WARRANTIES, see the file, "license.txt," in this distribution.
 #include "util.h"
 #include <math.h>
 
+/*
 R arg(R re,R im)
 {
 	if(re) 
@@ -20,4 +21,4 @@ R arg(R re,R im)
 		if(im || re) return im > 0?PI/2:-PI/2;
 		else return 0;
 }
-
+*/
diff --git a/externals/grill/vasp/source/util.h b/externals/grill/vasp/source/util.h
index 1b41c447..859f37e7 100644
--- a/externals/grill/vasp/source/util.h
+++ b/externals/grill/vasp/source/util.h
@@ -20,14 +20,32 @@ WARRANTIES, see the file, "license.txt," in this distribution.
 #define BIG 1.e10
 
 
-R arg(R re,R im);
-inline R arg(const CX &c) { return arg(c.real,c.imag); }
-inline F sqabs(F re,F im) { return re*re+im*im; }
-inline F sqabs(const CX &c) { return sqabs(c.real,c.imag); }
-inline F sgn(F x) { return x < 0.?-1.F:1.F; }
-inline V swap(F &a,F &b) { F c = a; a = b; b = c; }
-
-inline I min(I a,I b) { return a < b?a:b; }
-inline I max(I a,I b) { return a > b?a:b; }
+template<class T>
+inline V swap(T &a,T &b) { T c = a; a = b; b = c; }
+
+template<class T>
+inline T min(T a,T b) { return a < b?a:b; }
+
+template<class T>
+inline T max(T a,T b) { return a > b?a:b; }
+
+
+template<class T>
+T arg(T re,T im)
+{
+    if(re) 
+	return (T)(fmod(atan(im/re)+(re < 0?2*PI:PI),2*PI)-PI);
+    else
+	if(im || re) return (T)(im > 0?PI/2:-PI/2);
+        else return 0;
+}
+
+template<class T>
+inline T sgn(T x) { return (T)(x?(x < 0?-1:1):0); }
+
+template<class T>
+inline T sqabs(T re,T im) { return re*re+im*im; }
+
+
 
 #endif
diff --git a/externals/grill/vasp/vasp.cw b/externals/grill/vasp/vasp.cw
index 212243fb..1601dbcb 100644
--- a/externals/grill/vasp/vasp.cw
+++ b/externals/grill/vasp/vasp.cw