aboutsummaryrefslogtreecommitdiff
path: root/externals/grill/vasp/source/opfuns.h
diff options
context:
space:
mode:
Diffstat (limited to 'externals/grill/vasp/source/opfuns.h')
-rw-r--r--externals/grill/vasp/source/opfuns.h183
1 files changed, 129 insertions, 54 deletions
diff --git a/externals/grill/vasp/source/opfuns.h b/externals/grill/vasp/source/opfuns.h
index 5ab8d48a..2228641d 100644
--- a/externals/grill/vasp/source/opfuns.h
+++ b/externals/grill/vasp/source/opfuns.h
@@ -12,56 +12,27 @@ WARRANTIES, see the file, "license.txt," in this distribution.
#define __VASP_OPFUNS_H
#include "opdefs.h"
+#include <math.h>
+#include "util.h"
namespace VecOp {
- // multi-layer templates
-
- template<class T,V FUN(T &v,T a),I N>
- static V vec_un(T *v,const T *a,I n = 0) {
- const I _n = N?N:n;
- for(I i = 0; i < _n; ++i) FUN(v[i],a[i]);
- }
-
- template<class T,V FUN(T &v,T a),I N>
- static V vec_un(T *v,T a,I n = 0) {
- const I _n = N?N:n;
- for(I i = 0; i < _n; ++i) FUN(v[i],a);
- }
-
- template<class T,V FUN(T &v,T a,T b),I N>
- static V vec_bin(T *v,const T *a,const T *b,I n = 0) {
- const I _n = N?N:n;
- for(I i = 0; i < _n; ++i) FUN(v[i],a[i],b[i]);
- }
-
- template<class T,V FUN(T &v,T a,T b),I N>
- static V vec_bin(T *v,const T *a,T b,I n = 0) {
- const I _n = N?N:n;
- for(I i = 0; i < _n; ++i) FUN(v[i],a[i],b);
- }
-
-
- template<class T,class CL,I N>
- static V cvec_un(T *v,const T *a,I n = 0) { vec_un<T,CL::run,N>(v,a,n); }
-
- template<class T,class CL,I N>
- static V cvec_bin(T *v,const T *a,const T *b,I n = 0) { vec_bin<T,n,CL::rbin>(v,a,b,n); }
-
-
-
// assignment
template<class T> class f_copy {
public:
+ static I run_opt() { return 3; }
static V run(T &v,T a) { v = a; }
+ static I cun_opt() { return 2; }
static V cun(T &rv,T &iv,T ra,T ia) { rv = ra,iv = ia; }
};
template<class T> class f_set {
public:
+ static I rbin_opt() { return 3; }
static V rbin(T &v,T,T b) { v = b; }
+ static I cbin_opt() { return 2; }
static V cbin(T &rv,T &iv,T,T,T rb,T ib) { rv = rb,iv = ib; }
};
@@ -69,32 +40,42 @@ namespace VecOp {
template<class T> class f_add {
public:
+ static I rbin_opt() { return 3; }
static V rbin(T &v,T a,T b) { v = a+b; }
+ static I cbin_opt() { return 2; }
static V cbin(T &rv,T &iv,T ra,T ia,T rb,T ib) { rv = ra+rb,iv = ia+ib; }
};
template<class T> class f_sub {
public:
+ static I rbin_opt() { return 3; }
static V rbin(T &v,T a,T b) { v = a-b; }
+ static I cbin_opt() { return 2; }
static V cbin(T &rv,T &iv,T ra,T ia,T rb,T ib) { rv = ra-rb,iv = ia-ib; }
};
template<class T> class f_subr {
public:
+ static I rbin_opt() { return 2; }
static V rbin(T &v,T a,T b) { v = b-a; }
+ static I cbin_opt() { return 2; }
static V cbin(T &rv,T &iv,T ra,T ia,T rb,T ib) { rv = rb-ra,iv = ib-ia; }
};
template<class T> class f_mul {
public:
+ static I rbin_opt() { return 3; }
static V rbin(T &v,T a,T b) { v = a*b; }
+ static I cbin_opt() { return 1; }
static V cbin(T &rv,T &iv,T ra,T ia,T rb,T ib) { rv = ra*rb-ia*ib, iv = ra*ib+rb*ia; }
};
template<class T> class f_div {
public:
+ static I rbin_opt() { return 2; }
static V rbin(T &v,T a,T b) { v = a/b; }
+ static I cbin_opt() { return 0; }
static V cbin(T &rv,T &iv,T ra,T ia,T rb,T ib)
{
register const T den = sqabs(rb,ib);
@@ -105,8 +86,10 @@ namespace VecOp {
template<class T> class f_divr {
public:
+ static I rbin_opt() { return 2; }
static V rbin(T &v,T a,T b) { v = b/a; }
+ static I cbin_opt() { return 0; }
static V cbin(T &rv,T &iv,T ra,T ia,T rb,T ib)
{
register const T den = sqabs(ra,ia);
@@ -117,28 +100,35 @@ namespace VecOp {
template<class T> class f_mod {
public:
+ static I rbin_opt() { return 0; }
static V rbin(T &v,T a,T b) { v = fmod(a,b); }
};
template<class T> class f_abs {
public:
+ static I run_opt() { return 0; }
static V run(T &v,T a) { v = fabs(a); }
+ static I cun_opt() { return 0; }
static V cun(T &rv,T &iv,T ra,T ia) { rv = sqrt(ra*ra+ia*ia),iv = 0; }
};
template<class T> class f_sign {
public:
+ static I run_opt() { return 0; }
static V run(T &v,T a) { v = (a == 0?0:(a < 0?-1.:1.)); }
};
template<class T> class f_sqr {
public:
+ static I run_opt() { return 3; }
static V run(T &v,T a) { v = a*a; }
+ static I cun_opt() { return 1; }
static V cun(T &rv,T &iv,T ra,T ia) { rv = ra*ra-ia*ia; iv = ra*ia*2; }
};
template<class T> class f_ssqr {
public:
+ static I run_opt() { return 0; }
static V run(T &v,T a) { v = a*fabs(a); }
};
@@ -147,19 +137,22 @@ namespace VecOp {
template<class T> class f_powi {
public:
+ static I cop_opt() { return 0; }
static V cop(T &rv,T &iv,T ra,T ia,OpParam &p)
{
register const I powi = p.ibin.arg;
- register T rt,it; f_sqr<T>::cun(rt,it,ra,ia);
- for(I i = 2; i < powi; ++i) f_mul<T>::cbin(rt,it,rt,it,ra,ia);
+ register T rt,it; VecOp::f_sqr<T>::cun(rt,it,ra,ia);
+ for(I i = 2; i < powi; ++i) VecOp::f_mul<T>::cbin(rt,it,rt,it,ra,ia);
rv = rt,iv = it;
}
};
template<class T> class f_pow {
public:
+ static I rbin_opt() { return 0; }
static V rbin(T &v,T a,T b) { v = pow(fabs(a),b)*sgn(a); }
+ static I cbin_opt() { return 0; }
static V cbin(T &rv,T &iv,T ra,T ia,T rb,T)
{
register const T _abs = sqrt(sqabs(ra,ia));
@@ -170,26 +163,32 @@ namespace VecOp {
else
rv = iv = 0;
}
+ protected:
+ static T sgn(T x) { return x?(x > 0?1:-1):0; }
};
template<class T> class f_sqrt {
public:
+ static I run_opt() { return 0; }
static V run(T &v,T a) { v = sqrt(fabs(a)); }
};
template<class T> class f_ssqrt {
public:
+ static I run_opt() { return 0; }
static V run(T &v,T a) { v = sqrt(fabs(a))*sgn(a); }
};
template<class T> class f_exp {
public:
+ static I run_opt() { return 0; }
static V run(T &v,T a) { v = exp(a); }
};
template<class T> class f_log {
public:
+ static I run_opt() { return 0; }
static V run(T &v,T a) { v = log(a); } // \todo detect NANs
};
@@ -197,51 +196,61 @@ namespace VecOp {
template<class T> class f_lwr {
public:
+ static I rbin_opt() { return 1; }
static V rbin(T &v,T a,T b) { v = a < b?1:0; }
};
template<class T> class f_gtr {
public:
+ static I rbin_opt() { return 1; }
static V rbin(T &v,T a,T b) { v = a > b?1:0; }
};
template<class T> class f_alwr {
public:
+ static I rbin_opt() { return 0; }
static V rbin(T &v,T a,T b) { v = fabs(a) < fabs(b)?1:0; }
};
template<class T> class f_agtr {
public:
+ static I rbin_opt() { return 0; }
static V rbin(T &v,T a,T b) { v = fabs(a) > fabs(b)?1:0; }
};
template<class T> class f_leq {
public:
+ static I rbin_opt() { return 1; }
static V rbin(T &v,T a,T b) { v = a <= b?1:0; }
};
template<class T> class f_geq {
public:
+ static I rbin_opt() { return 1; }
static V rbin(T &v,T a,T b) { v = a >= b?1:0; }
};
template<class T> class f_aleq {
public:
+ static I rbin_opt() { return 0; }
static V rbin(T &v,T a,T b) { v = fabs(a) <= fabs(b)?1:0; }
};
template<class T> class f_ageq {
public:
+ static I rbin_opt() { return 0; }
static V rbin(T &v,T a,T b) { v = fabs(a) >= fabs(b)?1:0; }
};
template<class T> class f_equ {
public:
+ static I rbin_opt() { return 1; }
static V rbin(T &v,T a,T b) { v = a == b?1:0; }
};
template<class T> class f_neq {
public:
+ static I rbin_opt() { return 1; }
static V rbin(T &v,T a,T b) { v = a != b?1:0; }
};
@@ -249,8 +258,10 @@ namespace VecOp {
template<class T> class f_min {
public:
+ static I rbin_opt() { return 1; }
static V rbin(T &v,T a,T b) { v = a < b?a:b; }
+ static I cbin_opt() { return 0; }
static V cbin(T &rv,T &iv,T ra,T ia,T rb,T ib)
{
if(sqabs(ra,ia) < sqabs(rb,ib)) rv = ra,iv = ia;
@@ -260,8 +271,10 @@ namespace VecOp {
template<class T> class f_max {
public:
+ static I rbin_opt() { return 1; }
static V rbin(T &v,T a,T b) { v = a > b?a:b; }
+ static I cbin_opt() { return 0; }
static V cbin(T &rv,T &iv,T ra,T ia,T rb,T ib)
{
if(sqabs(ra,ia) > sqabs(rb,ib)) rv = ra,iv = ia;
@@ -271,6 +284,7 @@ namespace VecOp {
template<class T> class f_minmax {
public:
+ static I cun_opt() { return 0; }
static V cun(T &rv,T &iv,T ra,T ia)
{
if(ra < ia) rv = ra,iv = ia;
@@ -280,11 +294,13 @@ namespace VecOp {
template<class T> class f_minq {
public:
+ static I rop_opt() { return 0; }
static V rop(T &,T ra,OpParam &p)
{
if(ra < p.norm.minmax) p.norm.minmax = ra;
}
+ static I cop_opt() { return 0; }
static V cop(T &,T &,T ra,T ia,OpParam &p)
{
register T s = sqabs(ra,ia);
@@ -294,11 +310,13 @@ namespace VecOp {
template<class T> class f_maxq {
public:
+ static I rop_opt() { return 0; }
static V rop(T &,T ra,OpParam &p)
{
if(ra > p.norm.minmax) p.norm.minmax = ra;
}
+ static I cop_opt() { return 0; }
static V cop(T &,T &,T ra,T ia,OpParam &p)
{
register T s = sqabs(ra,ia);
@@ -308,6 +326,7 @@ namespace VecOp {
template<class T> class f_aminq {
public:
+ static I rop_opt() { return 0; }
static V rop(T &,T ra,OpParam &p)
{
register T s = fabs(ra);
@@ -317,6 +336,7 @@ namespace VecOp {
template<class T> class f_amaxq {
public:
+ static I rop_opt() { return 0; }
static V rop(T &,T ra,OpParam &p)
{
register T s = fabs(ra);
@@ -329,8 +349,10 @@ namespace VecOp {
template<class T> class f_gate {
public:
+ static I rbin_opt() { return 0; }
static V rbin(T &rv,T ra,T rb) { rv = fabs(ra) >= rb?ra:0; }
+ static I cbin_opt() { return 0; }
static V cbin(T &rv,T &iv,T ra,T ia,T rb,T)
{
register const T _abs = sqabs(ra,ia);
@@ -342,8 +364,10 @@ namespace VecOp {
template<class T> class f_igate {
public:
+ static I rbin_opt() { return 0; }
static V rbin(T &rv,T ra,T rb) { rv = fabs(ra) <= rb?ra:0; }
+ static I cbin_opt() { return 0; }
static V cbin(T &rv,T &iv,T ra,T ia,T rb,T)
{
register const T _abs = sqabs(ra,ia);
@@ -357,6 +381,7 @@ namespace VecOp {
template<class T> class f_norm {
public:
+ static I cun_opt() { return 0; }
static V cun(T &rv,T &iv,T ra,T ia)
{
register T f = sqabs(ra,ia);
@@ -367,21 +392,25 @@ namespace VecOp {
template<class T> class f_conj {
public:
+ static I cun_opt() { return 2; }
static V cun(T &,T &iv,T,T ia) { iv = -ia; }
};
template<class T> class f_polar {
public:
+ static I cun_opt() { return 0; }
static V cun(T &rv,T &iv,T ra,T ia) { rv = sqrt(sqabs(ra,ia)),iv = arg(ra,ia); }
};
template<class T> class f_rect {
public:
+ static I cun_opt() { return 0; }
static V cun(T &rv,T &iv,T ra,T ia) { rv = ra*cos(ia),iv = ra*sin(ia); }
};
template<class T> class f_radd {
public:
+ static I cbin_opt() { return 0; }
static V cbin(T &rv,T &iv,T ra,T ia,T rb,T)
{
register const T _abs = sqrt(sqabs(ra,ia))+rb;
@@ -401,6 +430,7 @@ namespace VecOp {
\param v destination vasp (NULL for in-place operation)
\return normalized destination vasp
*/
+ static I run_opt() { return 0; }
static V run(T &v,T a)
{
if(a != a) // NAN
@@ -415,30 +445,75 @@ namespace VecOp {
}
}
};
+
}
+#define DEFOP(T,FUN,OP,KIND) \
+namespace VecOp { inline BL FUN(OpParam &p) { return D__##KIND<T,f_##OP<T> >(p); } }
+
+
+#define DEFVEC_R(T,OP) \
+ static BL r_##OP (I len,T *dr,I rds,const T *sr,I rss) { return VecOp::V__rbin<T,VecOp::f_##OP <T> >(sr,rss,dr,rds,len); } \
+ static BL v_##OP##_(I layers,const T *sr,T *dr,const T *ar,I len) { return VecOp::V__vbin<T,VecOp::f_##OP <T> >(layers,sr,dr,ar,len); } \
+ static BL v_##OP (I dim,const I *dims,I layers,T *dr,const T *sr,const T *ar) { return VecOp::V__vmulti<T>(v_##OP##_,layers,sr,dr,ar,dim,dims); }
+
+#define DEFVEC_C(T,OP) \
+ static BL c_##OP (I len,T *dr,T *di,I rds,I ids,const T *sr,I rss,I iss) { return VecOp::V__cbin<T,VecOp::f_##OP <T> >(sr,rss,iss,dr,rds,ids,len); }
+
+#define DEFVEC_B(T,OP) DEFVEC_R(T,OP) DEFVEC_C(T,OP)
+
+
template<class T>
class VecFun {
public:
- // strided real data
- static BL r_add(I len,register T *dr,register const T *sr,I rds = 1,I rss = 1) { return VecOp::V__rbin<T,VecOp::f_add<T>::rbin>(sr,rss,dr,rds,len); }
- static BL r_sub(I len,register T *dr,register const T *sr,I rds = 1,I rss = 1) { return VecOp::V__rbin<T,VecOp::f_sub<T>::rbin>(sr,rss,dr,rds,len); }
- static BL r_subr(I len,register T *dr,register const T *sr,I rds = 1,I rss = 1) { return VecOp::V__rbin<T,VecOp::f_subr<T>::rbin>(sr,rss,dr,rds,len); }
- static BL r_mul(I len,register T *dr,register const T *sr,I rds = 1,I rss = 1) { return VecOp::V__rbin<T,VecOp::f_mul<T>::rbin>(sr,rss,dr,rds,len); }
- static BL r_div(I len,register T *dr,register const T *sr,I rds = 1,I rss = 1) { return VecOp::V__rbin<T,VecOp::f_div<T>::rbin>(sr,rss,dr,rds,len); }
- static BL r_divr(I len,register T *dr,register const T *sr,I rds = 1,I rss = 1) { return VecOp::V__rbin<T,VecOp::f_divr<T>::rbin>(sr,rss,dr,rds,len); }
- static BL r_mod(I len,register T *dr,register const T *sr,I rds = 1,I rss = 1) { return VecOp::V__rbin<T,VecOp::f_mod<T>::rbin>(sr,rss,dr,rds,len); }
-
- // multi-layer data (non-strided)
- static BL v_add(I len,I layers,register T *dr,register const T *sr,register const T *ar) { return VecOp::V__vbin<T,VecOp::f_add<T>::rbin>(layers,sr,dr,ar,len); }
- static BL v_sub(I len,I layers,register T *dr,register const T *sr,register const T *ar) { return VecOp::V__vbin<T,VecOp::f_sub<T>::rbin>(layers,sr,dr,ar,len); }
- static BL v_subr(I len,I layers,register T *dr,register const T *sr,register const T *ar) { return VecOp::V__vbin<T,VecOp::f_subr<T>::rbin>(layers,sr,dr,ar,len); }
- static BL v_mul(I len,I layers,register T *dr,register const T *sr,register const T *ar) { return VecOp::V__vbin<T,VecOp::f_mul<T>::rbin>(layers,sr,dr,ar,len); }
- static BL v_div(I len,I layers,register T *dr,register const T *sr,register const T *ar) { return VecOp::V__vbin<T,VecOp::f_div<T>::rbin>(layers,sr,dr,ar,len); }
- static BL v_divr(I len,I layers,register T *dr,register const T *sr,register const T *ar) { return VecOp::V__vbin<T,VecOp::f_divr<T>::rbin>(layers,sr,dr,ar,len); }
- static BL v_mod(I len,I layers,register T *dr,register const T *sr,register const T *ar) { return VecOp::V__vbin<T,VecOp::f_mod<T>::rbin>(layers,sr,dr,ar,len); }
+ DEFVEC_B(T,copy)
+
+ DEFVEC_B(T,add)
+ DEFVEC_B(T,sub)
+ DEFVEC_B(T,subr)
+ DEFVEC_B(T,mul)
+ DEFVEC_B(T,div)
+ DEFVEC_B(T,divr)
+ DEFVEC_R(T,mod)
+ DEFVEC_B(T,abs)
+ DEFVEC_R(T,sign)
+ DEFVEC_B(T,sqr)
+ DEFVEC_R(T,ssqr)
+
+ DEFVEC_C(T,powi)
+ DEFVEC_B(T,pow)
+ DEFVEC_R(T,sqrt)
+ DEFVEC_R(T,ssqrt)
+ DEFVEC_R(T,exp)
+ DEFVEC_R(T,log)
+
+ DEFVEC_R(T,lwr)
+ DEFVEC_R(T,gtr)
+ DEFVEC_R(T,alwr)
+ DEFVEC_R(T,agtr)
+ DEFVEC_R(T,leq)
+ DEFVEC_R(T,geq)
+ DEFVEC_R(T,aleq)
+ DEFVEC_R(T,ageq)
+ DEFVEC_R(T,equ)
+ DEFVEC_R(T,neq)
+
+ DEFVEC_B(T,min)
+ DEFVEC_B(T,max)
+ DEFVEC_C(T,minmax)
+ DEFVEC_C(T,gate)
+ DEFVEC_C(T,igate)
+
+ DEFVEC_C(T,norm)
+ DEFVEC_C(T,conj)
+ DEFVEC_C(T,polar)
+ DEFVEC_C(T,rect)
+ DEFVEC_C(T,radd)
+
+ DEFVEC_R(T,fix)
};
+
#endif