From be03cee9cac9d22b3bda35a8204a40684f7b4068 Mon Sep 17 00:00:00 2001 From: Benjamin Glaessle Date: Wed, 12 Nov 2014 14:23:34 +0100 Subject: [PATCH 1/2] fixed missing subtype.field() bug * the bug appeared because field() member functions where missing in template instantations of copymask, random, gaussian ... * the fix mostly allows code to be compiled against qdp++, the result should be ok for scalar and parscalar builds without threading, but I havent run any tests! For threaded builds, it is at least slower, if not wrong * I cannot predict the behaviour for jit, parvecscalar, etc builds additional fixes might be necessary ... * two issues worth mentioning: 1. it is an architectural choice how threaded RNG is supposed to behave (same result as non-threaded? how is this supposed to be enforced?) 2. currently "random(ferm[subset])" and "random(ferm,subset)" might behave differently (same for gaussian) * I am unsure about the OSubScalar behaviour, is it correct that this is simply supposed to be OScalar again, with no effective Subset? --- include/qdp_globalfuncs_subtype.h | 21 +++++++++++++++++++++ include/qdp_outersubtype.h | 10 +++++----- include/qdp_parscalar_specific.h | 19 ++++++------------- include/qdp_parscalarvec_specific.h | 18 ++++-------------- include/qdp_scalar_specific.h | 21 +++++++-------------- include/qdp_scalarvec_specific.h | 18 ++++-------------- 6 files changed, 47 insertions(+), 60 deletions(-) diff --git a/include/qdp_globalfuncs_subtype.h b/include/qdp_globalfuncs_subtype.h index d1accaa0a..26cc1fa2a 100644 --- a/include/qdp_globalfuncs_subtype.h +++ b/include/qdp_globalfuncs_subtype.h @@ -131,6 +131,27 @@ sum( const OSubLattice& s1 ) } } + template + inline + void random_F( T* dest, const Subset& s) + { + // omp - or is this in conflict with RNG functionality? + for(int j=0; j < s.numSiteTable(); ++j) + { + random( dest[j] ); + } + } + + template + inline + void gaussian_F( T* dest, const Subset& s) + { + // omp - see above? + for(int j=0; j < s.numSiteTable(); ++j) + { + gaussian( dest[j] ); + } + } //! dest = 0 template diff --git a/include/qdp_outersubtype.h b/include/qdp_outersubtype.h index 6ef19c3c4..d740c1157 100644 --- a/include/qdp_outersubtype.h +++ b/include/qdp_outersubtype.h @@ -305,14 +305,14 @@ struct DoublePrecType > template void zero_rep(OScalar& dest, const Subset& s) { - zero_rep(dest.field().elem()); + zero_rep(dest.elem()); } //! dest = 0 template void zero_rep(OSubScalar dest) { - zero_rep(dest.field().elem()); + zero_rep(*dest.getF()); } //! dest = (mask) ? s1 : dest @@ -320,7 +320,7 @@ template void copymask(OSubScalar dest, const OScalar& mask, const OScalar& s1) { - copymask(dest.field().elem(), mask.elem(), s1.elem()); + copymask(*dest.getF(), mask.elem(), s1.elem()); } @@ -335,7 +335,7 @@ void random(OSubScalar d); template void gaussian(OSubScalar dd) { - OLattice& d = dd.field(); + T& d = *(dd.getF()); const Subset& s = dd.subset(); OScalar r1, r2; @@ -343,7 +343,7 @@ void gaussian(OSubScalar dd) random(r1(s)); random(r2(s)); - fill_gaussian(d.elem(), r1.elem(), r2.elem()); + fill_gaussian(d, r1.elem(), r2.elem()); } } // namespace QDP diff --git a/include/qdp_parscalar_specific.h b/include/qdp_parscalar_specific.h index e5842f124..5c517d3ea 100644 --- a/include/qdp_parscalar_specific.h +++ b/include/qdp_parscalar_specific.h @@ -463,7 +463,6 @@ void evaluate_F(T* dest, const Op& op, const QDPExpr >& rhs, template void copymask(OSubLattice d, const OLattice& mask, const OLattice& s1) { - OLattice& dest = d.field(); const Subset& s = d.subset(); const int *tab = s.siteTable().slice(); @@ -471,7 +470,7 @@ void copymask(OSubLattice d, const OLattice& mask, const OLattice& s for(int j=0; j < s.numSiteTable(); ++j) { int i = tab[j]; - copymask(dest.elem(i), mask.elem(i), s1.elem(i)); + copymask(d.getF()[i], mask.elem(i), s1.elem(i)); } } @@ -547,10 +546,7 @@ random(OLattice& d, const Subset& s) template void random(OSubLattice dd) { - OLattice& d = dd.field(); - const Subset& s = dd.subset(); - - random(d,s); + random_F(dd.getF(),dd.subset()); } @@ -586,10 +582,7 @@ void gaussian(OLattice& d, const Subset& s) template void gaussian(OSubLattice dd) { - OLattice& d = dd.field(); - const Subset& s = dd.subset(); - - gaussian(d,s); + gaussian_F(dd.getF(),dd.subset()); } @@ -2558,9 +2551,9 @@ void writeOLattice(BinaryWriter& bin, template void write(BinaryWriter& bin, OSubLattice dd) { - const OLattice& d = dd.field(); + T* d = dd.getF(); - writeOLattice(bin, (const char *)&(d.elem(0)), + writeOLattice(bin, (const char *)d, sizeof(typename WordType::Type_t), sizeof(T) / sizeof(typename WordType::Type_t), dd.subset()); @@ -2610,7 +2603,7 @@ void readOLattice(BinaryReader& bin, template void read(BinaryReader& bin, OSubLattice d) { - readOLattice(bin, (char *)(d.field().getF()), + readOLattice(bin, (char *)(d.getF()), sizeof(typename WordType::Type_t), sizeof(T) / sizeof(typename WordType::Type_t), d.subset()); diff --git a/include/qdp_parscalarvec_specific.h b/include/qdp_parscalarvec_specific.h index 4fe3af7bd..aec8e9641 100644 --- a/include/qdp_parscalarvec_specific.h +++ b/include/qdp_parscalarvec_specific.h @@ -208,7 +208,6 @@ template void copymask(OSubLattice d, const OLattice& mask, const OLattice& s1) { - OLattice& dest = d.field(); const Subset& s = d.subset(); #if ! defined(QDP_NOT_IMPLEMENTED) @@ -216,7 +215,7 @@ copymask(OSubLattice d, const OLattice& mask, const OLattice& for(int j=0; j < s.numSiteTable(); ++j) { int i = tab[j]; - copymask(dest.elem(i), mask.elem(i), s1.elem(i)); + copymask(d.getF()[i], mask.elem(i), s1.elem(i)); } #else QDP_error("copymask_Subset not implemented"); @@ -292,10 +291,7 @@ random(OLattice& d, const Subset& s) template void random(const OSubLattice& dd) { - OLattice& d = const_cast&>(dd).field(); - const S& s = dd.subset(); - - random(d,s); + random_F(const_cast&>(dd).getF(),dd.subset()); } @@ -334,10 +330,7 @@ void gaussian(OLattice& d, const Subset& s) template void gaussian(const OSubLattice& dd) { - OLattice& d = const_cast&>(dd).field(); - const S& s = dd.subset(); - - gaussian(d,s); + gaussian_F(const_cast&>(dd).getF(),dd.subset()); } @@ -374,10 +367,7 @@ void zero_rep(OLattice& dest, const Subset& s) template void zero_rep(OSubLattice dd) { - OLattice& d = dd.field(); - const S& s = dd.subset(); - - zero_rep(d,s); + zero_rep_F(dd.getF(),dd.subset()); } diff --git a/include/qdp_scalar_specific.h b/include/qdp_scalar_specific.h index 372a90308..a5a9f007a 100644 --- a/include/qdp_scalar_specific.h +++ b/include/qdp_scalar_specific.h @@ -280,7 +280,6 @@ template void copymask(OSubLattice d, const OLattice& mask, const OLattice& s1) { - OLattice& dest = d.field(); const Subset& s = d.subset(); const int *tab = s.siteTable().slice(); @@ -288,7 +287,7 @@ copymask(OSubLattice d, const OLattice& mask, const OLattice& s1) for(int j=0; j < s.numSiteTable(); ++j) { int i = tab[j]; - copymask(dest.elem(i), mask.elem(i), s1.elem(i)); + copymask(dd.getF()[i], mask.elem(i), s1.elem(i)); } } @@ -367,10 +366,7 @@ random(OLattice& d, const Subset& s) template void random(OSubLattice dd) { - OLattice& d = dd.field(); - const Subset& s = dd.subset(); - - random(d,s); + random_F(dd.getF(),dd.subset()); } @@ -407,10 +403,7 @@ void gaussian(OLattice& d, const Subset& s) template void gaussian(OSubLattice dd) { - OLattice& d = dd.field(); - const Subset& s = dd.subset(); - - gaussian(d,s); + gaussian_F(dd.getF(),dd.subset()); } @@ -2120,7 +2113,7 @@ void write(BinaryWriter& bin, OSubLattice dd) const Subset& sub = dd.subset(); const Set& set = sub.getSet(); - const OLattice& d = dd.field(); + const T* d = dd.getF(); const multi1d& lat_color = set.latticeColoring(); const int color = sub.color(); @@ -2132,7 +2125,7 @@ void write(BinaryWriter& bin, OSubLattice dd) int i = Layout::linearSiteIndex(site); if (lat_color[i] == color) { - bin.writeArray((const char*)&(d.elem(i)), + bin.writeArray((const char*)&(d[i]), sizeof(typename WordType::Type_t), sizeof(T) / sizeof(typename WordType::Type_t)); } @@ -2195,7 +2188,7 @@ void read(BinaryReader& bin, OSubLattice dd) const Subset& sub = dd.subset(); const Set& set = sub.getSet(); - OLattice& d = dd.field(); + T* d = dd.getF(); const multi1d& lat_color = set.latticeColoring(); const int color = sub.color(); @@ -2207,7 +2200,7 @@ void read(BinaryReader& bin, OSubLattice dd) int i = Layout::linearSiteIndex(site); if (lat_color[i] == color) { - bin.readArray((char*)&(d.elem(i)), + bin.readArray((char*)&(d[i]), sizeof(typename WordType::Type_t), sizeof(T) / sizeof(typename WordType::Type_t)); } diff --git a/include/qdp_scalarvec_specific.h b/include/qdp_scalarvec_specific.h index 922449067..792f9c648 100644 --- a/include/qdp_scalarvec_specific.h +++ b/include/qdp_scalarvec_specific.h @@ -142,7 +142,6 @@ template void copymask(OSubLattice d, const OLattice& mask, const OLattice& s1) { - OLattice& dest = d.field(); const Subset& s = d.subset(); #if ! defined(QDP_NOT_IMPLEMENTED) @@ -150,7 +149,7 @@ copymask(OSubLattice d, const OLattice& mask, const OLattice& for(int j=0; j < s.numSiteTable(); ++j) { int i = tab[j]; - copymask(dest.elem(i), mask.elem(i), s1.elem(i)); + copymask(d.getF()[i], mask.elem(i), s1.elem(i)); } #else QDP_error_exit("copymask_Subset not implemented"); @@ -228,10 +227,7 @@ random(OLattice& d, const Subset& s) template void random(const OSubLattice& dd) { - OLattice& d = const_cast&>(dd).field(); - const S& s = dd.subset(); - - random(d,s); + random_F(const_cast&>(dd).getF(),dd.subset()); } @@ -269,10 +265,7 @@ void gaussian(OLattice& d, const Subset& s) template void gaussian(const OSubLattice& dd) { - OLattice& d = const_cast&>(dd).field(); - const S& s = dd.subset(); - - gaussian(d,s); + gaussian_F(const_cast&>(dd).getF(),dd.subset()); } @@ -309,10 +302,7 @@ void zero_rep(OLattice& dest, const Subset& s) template void zero_rep(OSubLattice dd) { - OLattice& d = dd.field(); - const S& s = dd.subset(); - - zero_rep(d,s); + zero_rep_F(dd.getF(),dd.subset()); } From 36e5ddee882a1dedf74f46a6d76a89b307287947 Mon Sep 17 00:00:00 2001 From: Benjamin Glaessle Date: Fri, 19 Dec 2014 18:20:30 +0100 Subject: [PATCH 2/2] fixed typo that forbid scalar build --- include/qdp_scalar_specific.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/include/qdp_scalar_specific.h b/include/qdp_scalar_specific.h index a5a9f007a..019965acd 100644 --- a/include/qdp_scalar_specific.h +++ b/include/qdp_scalar_specific.h @@ -287,7 +287,7 @@ copymask(OSubLattice d, const OLattice& mask, const OLattice& s1) for(int j=0; j < s.numSiteTable(); ++j) { int i = tab[j]; - copymask(dd.getF()[i], mask.elem(i), s1.elem(i)); + copymask(d.getF()[i], mask.elem(i), s1.elem(i)); } }