From bc6f43fbe1b22b1c2c63a32372126e0eaaaa08b0 Mon Sep 17 00:00:00 2001 From: Thomas Grill Date: Sat, 15 Mar 2003 04:35:48 +0000 Subject: "" svn path=/trunk/; revision=475 --- externals/grill/vasp/source/opfuns.h | 102 ++++++++++++++++++++++++++++------- 1 file changed, 84 insertions(+), 18 deletions(-) (limited to 'externals/grill/vasp/source/opfuns.h') diff --git a/externals/grill/vasp/source/opfuns.h b/externals/grill/vasp/source/opfuns.h index 99fca899..5ab8d48a 100644 --- a/externals/grill/vasp/source/opfuns.h +++ b/externals/grill/vasp/source/opfuns.h @@ -13,8 +13,44 @@ WARRANTIES, see the file, "license.txt," in this distribution. #include "opdefs.h" + namespace VecOp { + // multi-layer templates + + template + static V vec_un(T *v,const T *a,I n = 0) { + const I _n = N?N:n; + for(I i = 0; i < _n; ++i) FUN(v[i],a[i]); + } + + template + static V vec_un(T *v,T a,I n = 0) { + const I _n = N?N:n; + for(I i = 0; i < _n; ++i) FUN(v[i],a); + } + + template + static V vec_bin(T *v,const T *a,const T *b,I n = 0) { + const I _n = N?N:n; + for(I i = 0; i < _n; ++i) FUN(v[i],a[i],b[i]); + } + + template + static V vec_bin(T *v,const T *a,T b,I n = 0) { + const I _n = N?N:n; + for(I i = 0; i < _n; ++i) FUN(v[i],a[i],b); + } + + + template + static V cvec_un(T *v,const T *a,I n = 0) { vec_un(v,a,n); } + + template + static V cvec_bin(T *v,const T *a,const T *b,I n = 0) { vec_bin(v,a,b,n); } + + + // assignment template class f_copy { @@ -61,7 +97,7 @@ namespace VecOp { static V cbin(T &rv,T &iv,T ra,T ia,T rb,T ib) { - register const R den = sqabs(rb,ib); + register const T den = sqabs(rb,ib); rv = (ra*rb+ia*ib)/den; iv = (ia*rb-ra*ib)/den; } @@ -73,7 +109,7 @@ namespace VecOp { static V cbin(T &rv,T &iv,T ra,T ia,T rb,T ib) { - register const R den = sqabs(ra,ia); + register const T den = sqabs(ra,ia); rv = (rb*ra+ib*ia)/den; iv = (ib*ra-rb*ia)/den; } @@ -114,7 +150,7 @@ namespace VecOp { static V cop(T &rv,T &iv,T ra,T ia,OpParam &p) { register const I powi = p.ibin.arg; - register S rt,it; f_sqr::cun(rt,it,ra,ia); + register T rt,it; f_sqr::cun(rt,it,ra,ia); for(I i = 2; i < powi; ++i) f_mul::cbin(rt,it,rt,it,ra,ia); rv = rt,iv = it; } @@ -126,9 +162,9 @@ namespace VecOp { static V cbin(T &rv,T &iv,T ra,T ia,T rb,T) { - register const R _abs = sqrt(sqabs(ra,ia)); + register const T _abs = sqrt(sqabs(ra,ia)); if(_abs) { - register const R _p = pow(_abs,rb)/_abs; + register const T _p = pow(_abs,rb)/_abs; rv = _p*ra,iv = _p*ia; } else @@ -270,17 +306,23 @@ namespace VecOp { } }; - template V f_aminq(T &,T ra,OpParam &p) - { - register T s = fabs(ra); - if(s < p.norm.minmax) p.norm.minmax = s; - } + template class f_aminq { + public: + static V rop(T &,T ra,OpParam &p) + { + register T s = fabs(ra); + if(s < p.norm.minmax) p.norm.minmax = s; + } + }; - template V f_amaxq(T &,T ra,OpParam &p) - { - register T s = fabs(ra); - if(s > p.norm.minmax) p.norm.minmax = s; - } + template class f_amaxq { + public: + static V rop(T &,T ra,OpParam &p) + { + register T s = fabs(ra); + if(s > p.norm.minmax) p.norm.minmax = s; + } + }; // gating @@ -342,8 +384,8 @@ namespace VecOp { public: static V cbin(T &rv,T &iv,T ra,T ia,T rb,T) { - register const R _abs = sqrt(sqabs(ra,ia))+rb; - register const R _phi = arg(ra,ia); + register const T _abs = sqrt(sqabs(ra,ia))+rb; + register const T _phi = arg(ra,ia); rv = _abs*cos(_phi),iv = _abs*sin(_phi); } @@ -366,7 +408,7 @@ namespace VecOp { else { // denormal bashing (doesn't propagate to the next stage) - static const F anti_denormal = 1e-18F; + static const T anti_denormal = (T)1.e-18; a += anti_denormal; a -= anti_denormal; v = a; @@ -375,4 +417,28 @@ namespace VecOp { }; } + +template +class VecFun { +public: + // strided real data + static BL r_add(I len,register T *dr,register const T *sr,I rds = 1,I rss = 1) { return VecOp::V__rbin::rbin>(sr,rss,dr,rds,len); } + static BL r_sub(I len,register T *dr,register const T *sr,I rds = 1,I rss = 1) { return VecOp::V__rbin::rbin>(sr,rss,dr,rds,len); } + static BL r_subr(I len,register T *dr,register const T *sr,I rds = 1,I rss = 1) { return VecOp::V__rbin::rbin>(sr,rss,dr,rds,len); } + static BL r_mul(I len,register T *dr,register const T *sr,I rds = 1,I rss = 1) { return VecOp::V__rbin::rbin>(sr,rss,dr,rds,len); } + static BL r_div(I len,register T *dr,register const T *sr,I rds = 1,I rss = 1) { return VecOp::V__rbin::rbin>(sr,rss,dr,rds,len); } + static BL r_divr(I len,register T *dr,register const T *sr,I rds = 1,I rss = 1) { return VecOp::V__rbin::rbin>(sr,rss,dr,rds,len); } + static BL r_mod(I len,register T *dr,register const T *sr,I rds = 1,I rss = 1) { return VecOp::V__rbin::rbin>(sr,rss,dr,rds,len); } + + // multi-layer data (non-strided) + static BL v_add(I len,I layers,register T *dr,register const T *sr,register const T *ar) { return VecOp::V__vbin::rbin>(layers,sr,dr,ar,len); } + static BL v_sub(I len,I layers,register T *dr,register const T *sr,register const T *ar) { return VecOp::V__vbin::rbin>(layers,sr,dr,ar,len); } + static BL v_subr(I len,I layers,register T *dr,register const T *sr,register const T *ar) { return VecOp::V__vbin::rbin>(layers,sr,dr,ar,len); } + static BL v_mul(I len,I layers,register T *dr,register const T *sr,register const T *ar) { return VecOp::V__vbin::rbin>(layers,sr,dr,ar,len); } + static BL v_div(I len,I layers,register T *dr,register const T *sr,register const T *ar) { return VecOp::V__vbin::rbin>(layers,sr,dr,ar,len); } + static BL v_divr(I len,I layers,register T *dr,register const T *sr,register const T *ar) { return VecOp::V__vbin::rbin>(layers,sr,dr,ar,len); } + static BL v_mod(I len,I layers,register T *dr,register const T *sr,register const T *ar) { return VecOp::V__vbin::rbin>(layers,sr,dr,ar,len); } +}; + + #endif -- cgit v1.2.1