From be03cee9cac9d22b3bda35a8204a40684f7b4068 Mon Sep 17 00:00:00 2001
From: Benjamin Glaessle <benjamin.glaessle@physik.uni-r.de>
Date: Wed, 12 Nov 2014 14:23:34 +0100
Subject: [PATCH 1/2] fixed missing subtype.field() bug

* the bug appeared because field() member functions where missing
  in template instantations of copymask, random, gaussian ...

* the fix mostly allows code to be compiled against qdp++, the result should be ok
  for scalar and parscalar builds without threading, but I havent
  run any tests! For threaded builds, it is at least slower, if not wrong

* I cannot predict the behaviour for jit, parvecscalar, etc builds
  additional fixes might be necessary ...

* two issues worth mentioning:
  1. it is an architectural choice how threaded RNG is supposed to behave
     (same result as non-threaded? how is this supposed to be enforced?)
  2. currently "random(ferm[subset])" and "random(ferm,subset)" might
     behave differently (same for gaussian)

* I am unsure about the OSubScalar behaviour, is it correct that
  this is simply supposed to be OScalar again, with no effective Subset?
---
 include/qdp_globalfuncs_subtype.h   | 21 +++++++++++++++++++++
 include/qdp_outersubtype.h          | 10 +++++-----
 include/qdp_parscalar_specific.h    | 19 ++++++-------------
 include/qdp_parscalarvec_specific.h | 18 ++++--------------
 include/qdp_scalar_specific.h       | 21 +++++++--------------
 include/qdp_scalarvec_specific.h    | 18 ++++--------------
 6 files changed, 47 insertions(+), 60 deletions(-)
diff --git a/include/qdp_globalfuncs_subtype.h b/include/qdp_globalfuncs_subtype.h
index d1accaa0a..26cc1fa2a 100644
--- a/include/qdp_globalfuncs_subtype.h
+++ b/include/qdp_globalfuncs_subtype.h
@@ -131,6 +131,27 @@ sum( const OSubLattice<T>& s1 )
       }
   }
 
+  template<class T>
+  inline
+  void random_F( T* dest, const Subset& s)
+  {
+    // omp - or is this in conflict with RNG functionality?
+    for(int j=0; j < s.numSiteTable(); ++j)
+      {
+	random( dest[j] );
+      }
+  }
+
+  template<class T>
+  inline
+  void gaussian_F( T* dest, const Subset& s)
+  {
+    // omp - see above?
+    for(int j=0; j < s.numSiteTable(); ++j)
+      {
+	gaussian( dest[j] );
+      }
+  }
 
   //! dest  = 0 
   template<class T>
diff --git a/include/qdp_outersubtype.h b/include/qdp_outersubtype.h
index 6ef19c3c4..d740c1157 100644
--- a/include/qdp_outersubtype.h
+++ b/include/qdp_outersubtype.h
@@ -305,14 +305,14 @@ struct DoublePrecType<OSubLattice<T> >
 template<class T> 
 void zero_rep(OScalar<T>& dest, const Subset& s) 
 {
-  zero_rep(dest.field().elem());
+  zero_rep(dest.elem());
 }
 
 //! dest = 0
 template<class T>
 void zero_rep(OSubScalar<T> dest) 
 {
-  zero_rep(dest.field().elem());
+  zero_rep(*dest.getF());
 }
 
 //! dest = (mask) ? s1 : dest
@@ -320,7 +320,7 @@ template<class T1, class T2>
 void copymask(OSubScalar<T2> dest, const OScalar<T1>& mask, 
 	      const OScalar<T2>& s1) 
 {
-  copymask(dest.field().elem(), mask.elem(), s1.elem());
+  copymask(*dest.getF(), mask.elem(), s1.elem());
 }
 
 
@@ -335,7 +335,7 @@ void random(OSubScalar<T> d);
 template<class T>
 void gaussian(OSubScalar<T> dd)
 {
-  OLattice<T>& d = dd.field();
+  T& d = *(dd.getF());
   const Subset& s = dd.subset();
 
   OScalar<T>  r1, r2;
@@ -343,7 +343,7 @@ void gaussian(OSubScalar<T> dd)
   random(r1(s));
   random(r2(s));
 
-  fill_gaussian(d.elem(), r1.elem(), r2.elem());
+  fill_gaussian(d, r1.elem(), r2.elem());
 }
 
 } // namespace QDP
diff --git a/include/qdp_parscalar_specific.h b/include/qdp_parscalar_specific.h
index e5842f124..5c517d3ea 100644
--- a/include/qdp_parscalar_specific.h
+++ b/include/qdp_parscalar_specific.h
@@ -463,7 +463,6 @@ void evaluate_F(T* dest, const Op& op, const QDPExpr<RHS,OLattice<T1> >& rhs,
 template<class T1, class T2>
 void copymask(OSubLattice<T2> d, const OLattice<T1>& mask, const OLattice<T2>& s1) 
 {
-	OLattice<T2>& dest = d.field();
 	const Subset& s = d.subset();
 
 	const int *tab = s.siteTable().slice();
@@ -471,7 +470,7 @@ void copymask(OSubLattice<T2> d, const OLattice<T1>& mask, const OLattice<T2>& s
 	for(int j=0; j < s.numSiteTable(); ++j) 
 	{
 		int i = tab[j];
-		copymask(dest.elem(i), mask.elem(i), s1.elem(i));
+		copymask(d.getF()[i], mask.elem(i), s1.elem(i));
 	}
 }
 
@@ -547,10 +546,7 @@ random(OLattice<T>& d, const Subset& s)
 template<class T>
 void random(OSubLattice<T> dd)
 {
-	OLattice<T>& d = dd.field();
-	const Subset& s = dd.subset();
-
-	random(d,s);
+	random_F(dd.getF(),dd.subset());
 }
 
 
@@ -586,10 +582,7 @@ void gaussian(OLattice<T>& d, const Subset& s)
 template<class T>
 void gaussian(OSubLattice<T> dd)
 {
-	OLattice<T>& d = dd.field();
-	const Subset& s = dd.subset();
-
-	gaussian(d,s);
+	gaussian_F(dd.getF(),dd.subset());
 }
 
 
@@ -2558,9 +2551,9 @@ void writeOLattice(BinaryWriter& bin,
 template<class T>
 void write(BinaryWriter& bin, OSubLattice<T> dd)
 {
-	const OLattice<T>& d = dd.field();
+	T* d = dd.getF();
 
-	writeOLattice(bin, (const char *)&(d.elem(0)), 
+	writeOLattice(bin, (const char *)d,
 		sizeof(typename WordType<T>::Type_t), 
 		sizeof(T) / sizeof(typename WordType<T>::Type_t),
 		dd.subset());
@@ -2610,7 +2603,7 @@ void readOLattice(BinaryReader& bin,
 template<class T>
 void read(BinaryReader& bin, OSubLattice<T> d)
 {
-	readOLattice(bin, (char *)(d.field().getF()),
+	readOLattice(bin, (char *)(d.getF()),
 				 sizeof(typename WordType<T>::Type_t), 
 				 sizeof(T) / sizeof(typename WordType<T>::Type_t),
 				 d.subset());
diff --git a/include/qdp_parscalarvec_specific.h b/include/qdp_parscalarvec_specific.h
index 4fe3af7bd..aec8e9641 100644
--- a/include/qdp_parscalarvec_specific.h
+++ b/include/qdp_parscalarvec_specific.h
@@ -208,7 +208,6 @@ template<class T1, class T2>
 void 
 copymask(OSubLattice<T2,Subset> d, const OLattice<T1>& mask, const OLattice<T2>& s1) 
 {
-  OLattice<T2>& dest = d.field();
   const Subset& s = d.subset();
 
 #if ! defined(QDP_NOT_IMPLEMENTED)
@@ -216,7 +215,7 @@ copymask(OSubLattice<T2,Subset> d, const OLattice<T1>& mask, const OLattice<T2>&
   for(int j=0; j < s.numSiteTable(); ++j) 
   {
     int i = tab[j];
-    copymask(dest.elem(i), mask.elem(i), s1.elem(i));
+    copymask(d.getF()[i], mask.elem(i), s1.elem(i));
   }
 #else
   QDP_error("copymask_Subset not implemented");
@@ -292,10 +291,7 @@ random(OLattice<T>& d, const Subset& s)
 template<class T, class S>
 void random(const OSubLattice<T,S>& dd)
 {
-  OLattice<T>& d = const_cast<OSubLattice<T,S>&>(dd).field();
-  const S& s = dd.subset();
-
-  random(d,s);
+  random_F(const_cast<OSubLattice<T,S>&>(dd).getF(),dd.subset());
 }
 
 
@@ -334,10 +330,7 @@ void gaussian(OLattice<T>& d, const Subset& s)
 template<class T, class S>
 void gaussian(const OSubLattice<T,S>& dd)
 {
-  OLattice<T>& d = const_cast<OSubLattice<T,S>&>(dd).field();
-  const S& s = dd.subset();
-
-  gaussian(d,s);
+  gaussian_F(const_cast<OSubLattice<T,S>&>(dd).getF(),dd.subset());
 }
 
 
@@ -374,10 +367,7 @@ void zero_rep(OLattice<T>& dest, const Subset& s)
 template<class T, class S>
 void zero_rep(OSubLattice<T,S> dd) 
 {
-  OLattice<T>& d = dd.field();
-  const S& s = dd.subset();
-  
-  zero_rep(d,s);
+  zero_rep_F(dd.getF(),dd.subset());
 }
 
 
diff --git a/include/qdp_scalar_specific.h b/include/qdp_scalar_specific.h
index 372a90308..a5a9f007a 100644
--- a/include/qdp_scalar_specific.h
+++ b/include/qdp_scalar_specific.h
@@ -280,7 +280,6 @@ template<class T1, class T2>
 void 
 copymask(OSubLattice<T2> d, const OLattice<T1>& mask, const OLattice<T2>& s1) 
 {
-  OLattice<T2>& dest = d.field();
   const Subset& s = d.subset();
 
   const int *tab = s.siteTable().slice();
@@ -288,7 +287,7 @@ copymask(OSubLattice<T2> d, const OLattice<T1>& mask, const OLattice<T2>& s1)
   for(int j=0; j < s.numSiteTable(); ++j) 
   {
     int i = tab[j];
-    copymask(dest.elem(i), mask.elem(i), s1.elem(i));
+    copymask(dd.getF()[i], mask.elem(i), s1.elem(i));
   }
 }
 
@@ -367,10 +366,7 @@ random(OLattice<T>& d, const Subset& s)
 template<class T>
 void random(OSubLattice<T> dd)
 {
-  OLattice<T>& d = dd.field();
-  const Subset& s = dd.subset();
-
-  random(d,s);
+  random_F(dd.getF(),dd.subset());
 }
 
 
@@ -407,10 +403,7 @@ void gaussian(OLattice<T>& d, const Subset& s)
 template<class T>
 void gaussian(OSubLattice<T> dd)
 {
-  OLattice<T>& d = dd.field();
-  const Subset& s = dd.subset();
-
-  gaussian(d,s);
+  gaussian_F(dd.getF(),dd.subset());
 }
 
 
@@ -2120,7 +2113,7 @@ void write(BinaryWriter& bin, OSubLattice<T> dd)
   const Subset& sub = dd.subset();
   const Set& set    = sub.getSet();
 
-  const OLattice<T>& d = dd.field();
+  const T* d = dd.getF();
 
   const multi1d<int>& lat_color = set.latticeColoring();
   const int color = sub.color();
@@ -2132,7 +2125,7 @@ void write(BinaryWriter& bin, OSubLattice<T> dd)
     int i = Layout::linearSiteIndex(site);
     if (lat_color[i] == color)
     {
-      bin.writeArray((const char*)&(d.elem(i)), 
+      bin.writeArray((const char*)&(d[i]),
 		     sizeof(typename WordType<T>::Type_t), 
 		     sizeof(T) / sizeof(typename WordType<T>::Type_t));
     }
@@ -2195,7 +2188,7 @@ void read(BinaryReader& bin, OSubLattice<T> dd)
   const Subset& sub = dd.subset();
   const Set& set    = sub.getSet();
 
-  OLattice<T>& d = dd.field();
+  T* d = dd.getF();
 
   const multi1d<int>& lat_color = set.latticeColoring();
   const int color = sub.color();
@@ -2207,7 +2200,7 @@ void read(BinaryReader& bin, OSubLattice<T> dd)
     int i = Layout::linearSiteIndex(site);
     if (lat_color[i] == color)
     {
-      bin.readArray((char*)&(d.elem(i)), 
+      bin.readArray((char*)&(d[i]),
 		    sizeof(typename WordType<T>::Type_t), 
 		    sizeof(T) / sizeof(typename WordType<T>::Type_t));
     }
diff --git a/include/qdp_scalarvec_specific.h b/include/qdp_scalarvec_specific.h
index 922449067..792f9c648 100644
--- a/include/qdp_scalarvec_specific.h
+++ b/include/qdp_scalarvec_specific.h
@@ -142,7 +142,6 @@ template<class T1, class T2>
 void 
 copymask(OSubLattice<T2,Subset> d, const OLattice<T1>& mask, const OLattice<T2>& s1) 
 {
-  OLattice<T2>& dest = d.field();
   const Subset& s = d.subset();
 
 #if ! defined(QDP_NOT_IMPLEMENTED)
@@ -150,7 +149,7 @@ copymask(OSubLattice<T2,Subset> d, const OLattice<T1>& mask, const OLattice<T2>&
   for(int j=0; j < s.numSiteTable(); ++j) 
   {
     int i = tab[j];
-    copymask(dest.elem(i), mask.elem(i), s1.elem(i));
+    copymask(d.getF()[i], mask.elem(i), s1.elem(i));
   }
 #else
   QDP_error_exit("copymask_Subset not implemented");
@@ -228,10 +227,7 @@ random(OLattice<T>& d, const Subset& s)
 template<class T, class S>
 void random(const OSubLattice<T,S>& dd)
 {
-  OLattice<T>& d = const_cast<OSubLattice<T,S>&>(dd).field();
-  const S& s = dd.subset();
-
-  random(d,s);
+  random_F(const_cast<OSubLattice<T,S>&>(dd).getF(),dd.subset());
 }
 
 
@@ -269,10 +265,7 @@ void gaussian(OLattice<T>& d, const Subset& s)
 template<class T, class S>
 void gaussian(const OSubLattice<T,S>& dd)
 {
-  OLattice<T>& d = const_cast<OSubLattice<T,S>&>(dd).field();
-  const S& s = dd.subset();
-
-  gaussian(d,s);
+  gaussian_F(const_cast<OSubLattice<T,S>&>(dd).getF(),dd.subset());
 }
 
 
@@ -309,10 +302,7 @@ void zero_rep(OLattice<T>& dest, const Subset& s)
 template<class T, class S>
 void zero_rep(OSubLattice<T,S> dd) 
 {
-  OLattice<T>& d = dd.field();
-  const S& s = dd.subset();
-  
-  zero_rep(d,s);
+  zero_rep_F(dd.getF(),dd.subset());
 }
 
 

From 36e5ddee882a1dedf74f46a6d76a89b307287947 Mon Sep 17 00:00:00 2001
From: Benjamin Glaessle <benjamin.glaessle@physik.uni-r.de>
Date: Fri, 19 Dec 2014 18:20:30 +0100
Subject: [PATCH 2/2] fixed typo that forbid scalar build

---
 include/qdp_scalar_specific.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/include/qdp_scalar_specific.h b/include/qdp_scalar_specific.h
index a5a9f007a..019965acd 100644
--- a/include/qdp_scalar_specific.h
+++ b/include/qdp_scalar_specific.h
@@ -287,7 +287,7 @@ copymask(OSubLattice<T2> d, const OLattice<T1>& mask, const OLattice<T2>& s1)
   for(int j=0; j < s.numSiteTable(); ++j) 
   {
     int i = tab[j];
-    copymask(dd.getF()[i], mask.elem(i), s1.elem(i));
+    copymask(d.getF()[i], mask.elem(i), s1.elem(i));
   }
 }