From 5fd8cf6fb863839a981eeb010ced040b4517b3bb Mon Sep 17 00:00:00 2001
From: "Steven G. Johnson" <stevenj@alum.mit.edu>
Date: Fri, 9 Oct 2020 22:09:14 -0400
Subject: [PATCH 01/29] subpixel averaging for boundaries of variable-material
 objects

---
 src/material_data.hpp |   5 +-
 src/meepgeom.cpp      | 111 +++++++++++++++++++++++++-----------------
 2 files changed, 71 insertions(+), 45 deletions(-)

diff --git a/src/material_data.hpp b/src/material_data.hpp
index b219112c6..863641648 100644
--- a/src/material_data.hpp
+++ b/src/material_data.hpp
@@ -122,7 +122,6 @@ struct material_data {
   // these fields used only if which_subclass==MATERIAL_USER
   user_material_func user_func;
   void *user_data;
-  bool do_averaging;
 
   // these fields used only if which_subclass==MATERIAL_FILE
   double *epsilon_data;
@@ -159,6 +158,10 @@ struct material_data {
   */
   enum { U_MIN = 0, U_PROD = 1, U_MEAN = 2, U_DEFAULT = 3 } material_grid_kinds;
 
+  // used for any variable material (USER, FILE, or GRID):
+  // indicates whether we should use the fallback subpixel averaging via quadrature
+  bool do_averaging;
+
   material_data();
 
   void copy_from(const material_data& from);
diff --git a/src/meepgeom.cpp b/src/meepgeom.cpp
index f15468ebf..ae6217d14 100644
--- a/src/meepgeom.cpp
+++ b/src/meepgeom.cpp
@@ -283,15 +283,14 @@ bool is_material_grid(material_type mt) {
 }
 bool is_material_grid(void *md) { return is_material_grid((material_type)md); }
 
+// return whether mt is spatially varying
 bool is_variable(material_type mt) {
-  return (mt->which_subclass == material_data::MATERIAL_USER) ||
-         (mt->which_subclass == material_data::MATERIAL_GRID);
+  return (mt && ((mt->which_subclass == material_data::MATERIAL_USER) ||
+                 (mt->which_subclass == material_data::MATERIAL_GRID) ||
+                 (mt->which_subclass == material_data::MATERIAL_FILE)));
 }
 bool is_variable(void *md) { return is_variable((material_type)md); }
 
-bool is_file(material_type md) { return (md->which_subclass == material_data::MATERIAL_FILE); }
-bool is_file(void *md) { return is_file((material_type)md); }
-
 bool is_medium(material_type md, medium_struct **m) {
   if (md->which_subclass == material_data::MEDIUM) {
     *m = &(md->medium);
@@ -302,24 +301,29 @@ bool is_medium(material_type md, medium_struct **m) {
 
 bool is_medium(void *md, medium_struct **m) { return is_medium((material_type)md, m); }
 
+// note: is_metal assumes that eval_material_pt has already been called for variable materials
 bool is_metal(meep::field_type ft, const material_type *material) {
   material_data *md = *material;
   if (ft == meep::E_stuff) switch (md->which_subclass) {
       case material_data::MEDIUM:
+      case material_data::MATERIAL_USER:
+      case material_data::MATERIAL_FILE:
       case material_data::MATERIAL_GRID:
         return (md->medium.epsilon_diag.x < 0 || md->medium.epsilon_diag.y < 0 ||
                 md->medium.epsilon_diag.z < 0);
       case material_data::PERFECT_METAL: return true;
-      default: meep::abort("unknown material type"); return false;
+      default: meep::abort("unknown material type in is_metal"); return false;
     }
   else
     switch (md->which_subclass) {
       case material_data::MEDIUM:
+      case material_data::MATERIAL_USER:
+      case material_data::MATERIAL_FILE:
       case material_data::MATERIAL_GRID:
         return (md->medium.mu_diag.x < 0 || md->medium.mu_diag.y < 0 || md->medium.mu_diag.z < 0);
       case material_data::PERFECT_METAL:
         return false; // is an electric conductor, but not a magnetic conductor
-      default: meep::abort("unknown material type"); return false;
+      default: meep::abort("unknown material type in is_metal"); return false;
     }
 }
 
@@ -408,7 +412,7 @@ meep::vec material_grid_grad(vector3 p, material_data *md, const geometric_objec
 #undef D
 
   // [du_dx,du_dy,du_dz] is the gradient ∇u with respect to the transformed coordinate
-  // r1 of the matgrid_val function but what we want is the gradient of u(g(r2)) with 
+  // r1 of the matgrid_val function but what we want is the gradient of u(g(r2)) with
   // respect to r2 where g(r2) is the to_geom_object_coords function (in libctl/utils/geom.c).
   // computing this quantity involves using the chain rule and thus the vector-Jacobian product
   // ∇u J where J is the Jacobian matrix of g.
@@ -689,6 +693,7 @@ class geom_epsilon : public meep::material_function {
 
 private:
   void get_material_pt(material_type &material, const meep::vec &r);
+  void eval_material_pt(material_type &material, vector3 p);
 
   material_type_list extra_materials;
   pol *current_pol;
@@ -815,7 +820,7 @@ static void material_epsmu(meep::field_type ft, material_type material, symmetri
         epsmu_inv->m01 = epsmu_inv->m02 = epsmu_inv->m12 = 0.0;
         break;
 
-      default: meep::abort("unknown material type");
+      default: meep::abort("unknown material type in epsmu");
     }
   else
     switch (md->which_subclass) {
@@ -843,7 +848,7 @@ static void material_epsmu(meep::field_type ft, material_type material, symmetri
         epsmu_inv->m01 = epsmu_inv->m02 = epsmu_inv->m12 = 0.0;
         break;
 
-      default: meep::abort("unknown material type");
+      default: meep::abort("unknown material type in epsmu");
     }
 }
 
@@ -855,26 +860,30 @@ void geom_epsilon::get_material_pt(material_type &material, const meep::vec &r)
   boolean inobject;
   material =
       (material_type)material_of_unshifted_point_in_tree_inobject(p, restricted_tree, &inobject);
-  material_data *md = material;
+  eval_material_pt(material, p);
+}
 
-  switch (md->which_subclass) {
+// evaluate the material at the given point p if necessary — this is needed if
+// the material is variable (a grid, function, or file); otherwise it is a no-op.
+void geom_epsilon::eval_material_pt(material_type &material, vector3 p) {
+  switch (material->which_subclass) {
     // material grid: interpolate onto user specified material grid to get properties at r
-    case material_data::MATERIAL_GRID:
+    case material_data::MATERIAL_GRID: {
       double u;
       int oi;
       geom_box_tree tp;
-
       tp = geom_tree_search(p, restricted_tree, &oi);
       // interpolate and project onto material grid
-      u = tanh_projection(matgrid_val(p, tp, oi, md), md->beta, md->eta);
+      u = tanh_projection(matgrid_val(p, tp, oi, material), material->beta, material->eta);
       // interpolate material from material grid point
-      epsilon_material_grid(md, u);
+      epsilon_material_grid(material, u);
 
       return;
+    }
     // material read from file: interpolate to get properties at r
     case material_data::MATERIAL_FILE:
-      if (md->epsilon_data)
-        epsilon_file_material(md, p);
+      if (material->epsilon_data)
+        epsilon_file_material(material, p);
       else
         material = (material_type)default_material;
       return;
@@ -885,16 +894,16 @@ void geom_epsilon::get_material_pt(material_type &material, const meep::vec &r)
     // the user's function only needs to fill in whatever is
     // different from vacuum.
     case material_data::MATERIAL_USER:
-      md->medium = medium_struct();
-      md->user_func(p, md->user_data, &(md->medium));
-      md->medium.check_offdiag_im_zero_or_abort();
+      material->medium = medium_struct();
+      material->user_func(p, material->user_data, &(material->medium));
+      material->medium.check_offdiag_im_zero_or_abort();
       return;
 
     // position-independent material or metal: there is nothing to do
     case material_data::MEDIUM:
     case material_data::PERFECT_METAL: return;
 
-    default: meep::abort("unknown material type");
+    default: meep::abort("unknown material type in eval_material_pt");
   };
 }
 
@@ -919,16 +928,17 @@ double geom_epsilon::chi1p1(meep::field_type ft, const meep::vec &r) {
 }
 
 /* Find frontmost object in v, along with the constant material behind it.
-   Returns false if material behind the object is not constant.
+   Returns false if more than two objects & materials intersect the pixel.
 
    Requires moderately horrifying logic to figure things out properly,
    stolen from MPB. */
 static bool get_front_object(const meep::volume &v, geom_box_tree geometry_tree, vector3 &pcenter,
                              const geometric_object **o_front, vector3 &shiftby_front,
-                             material_type &mat_front, material_type &mat_behind) {
+                             material_type &mat_front, material_type &mat_behind,
+                             vector3 &p_front, vector3 &p_behind) {
   vector3 p;
   const geometric_object *o1 = 0, *o2 = 0;
-  vector3 shiftby1 = {0, 0, 0}, shiftby2 = {0, 0, 0};
+  vector3 shiftby1 = {0, 0, 0}, shiftby2 = {0, 0, 0}, p1 = {0,0,0}, p2 = {0,0,0};
   geom_box pixel;
   material_type mat1 = vacuum, mat2 = vacuum;
   int id1 = -1, id2 = -1;
@@ -990,6 +1000,7 @@ static bool get_front_object(const meep::volume &v, geom_box_tree geometry_tree,
       shiftby1 = shiftby;
       id1 = id;
       mat1 = mat;
+      p1 = q;
     }
     else if (id2 == -1 ||
              ((id >= id1 && id >= id2) && (id1 == id2 || material_type_equal(mat1, mat2)))) {
@@ -997,6 +1008,7 @@ static bool get_front_object(const meep::volume &v, geom_box_tree geometry_tree,
       shiftby2 = shiftby;
       id2 = id;
       mat2 = mat;
+      p2 = q;
     }
     else if (!(id1 < id2 && (id1 == id || material_type_equal(mat1, mat))) &&
              !(id2 < id1 && (id2 == id || material_type_equal(mat2, mat))))
@@ -1008,28 +1020,31 @@ static bool get_front_object(const meep::volume &v, geom_box_tree geometry_tree,
     id2 = id1;
     o2 = o1;
     mat2 = mat1;
+    p2 = p1;
     shiftby2 = shiftby1;
   }
 
-  if ((o1 && is_variable(o1->material)) || (o2 && is_variable(o2->material)) ||
-      ((is_variable(default_material) || is_file(default_material)) &&
-       (!o1 || is_file(o1->material) || !o2 || is_file(o2->material))))
-    return false;
-
   if (id1 >= id2) {
     *o_front = o1;
     shiftby_front = shiftby1;
     mat_front = mat1;
-    if (id1 == id2)
+    p_front = p1;
+    if (id1 == id2) {
       mat_behind = mat1;
-    else
+      p_behind = p1;
+    }
+    else {
       mat_behind = mat2;
+      p_behind = p2;
+    }
   }
   if (id2 > id1) {
     *o_front = o2;
     shiftby_front = shiftby2;
     mat_front = mat2;
+    p_front = p2;
     mat_behind = mat1;
+    p_behind = p1;
   }
   return true;
 }
@@ -1070,11 +1085,13 @@ void geom_epsilon::eff_chi1inv_matrix(meep::component c, symmetric_matrix *chi1i
                                       bool &fallback) {
   const geometric_object *o;
   material_type mat, mat_behind;
+  vector3 p_mat, p_mat_behind;
   symmetric_matrix meps;
   vector3 p, shiftby, normal;
   fallback = false;
 
-  if (maxeval == 0) {
+  if (maxeval == 0 ||
+      !get_front_object(v, geometry_tree, p, &o, shiftby, mat, mat_behind, p_mat, p_mat_behind)) {
   noavg:
     get_material_pt(mat, v.center());
   trivial:
@@ -1083,20 +1100,25 @@ void geom_epsilon::eff_chi1inv_matrix(meep::component c, symmetric_matrix *chi1i
     return;
   }
 
-  if (!get_front_object(v, geometry_tree, p, &o, shiftby, mat, mat_behind)) {
-    get_material_pt(mat, v.center());
-    if (mat && (mat->which_subclass == material_data::MATERIAL_USER ||
-                mat->which_subclass == material_data::MATERIAL_GRID)
-        && mat->do_averaging) {
-      fallback = true;
-      return;
-    }
-    else {
-      goto trivial;
-    }
+  // for variable materials with do_averaging == true, switch over to slow fallback integration method
+  if ((is_variable(mat) && mat->do_averaging) || (is_variable(mat_behind) && mat_behind->do_averaging)) {
+    fallback = true;
+    return;
   }
 
   /* check for trivial case of only one object/material */
+  if (material_type_equal(mat, mat_behind)) {
+    if (is_variable(mat)) eval_material_pt(mat, p_mat);
+    goto trivial;
+  }
+
+  /* Evaluate materials in case they are variable.  This allows us to do fast subpixel averaging
+     at the boundary of an object with a variable material, while remaining accurate enough if the
+     material is continuous over the pixel.  (We make a first-order error by averaging as if the material
+     were constant, but only in a boundary layer of thickness 1/resolution, so the net effect should
+     still be second-order.) */
+  eval_material_pt(mat, p_mat);
+  eval_material_pt(mat_behind, p_mat_behind);
   if (material_type_equal(mat, mat_behind)) goto trivial;
 
   // it doesn't make sense to average metals (electric or magnetic)
@@ -2037,6 +2059,7 @@ material_type make_user_material(user_material_func user_func, void *user_data,
 material_type make_file_material(const char *eps_input_file) {
   material_data *md = new material_data();
   md->which_subclass = material_data::MATERIAL_FILE;
+  md->do_averaging = false;
 
   md->epsilon_dims[0] = md->epsilon_dims[1] = md->epsilon_dims[2] = 1;
   if (eps_input_file && eps_input_file[0]) { // file specified

From d67b1f225c0c4605140b6efce9443b03fc9544e6 Mon Sep 17 00:00:00 2001
From: "Steven G. Johnson" <stevenj@alum.mit.edu>
Date: Wed, 14 Oct 2020 16:15:14 -0400
Subject: [PATCH 02/29] rm obsolete references to meepgeom::is_file

---
 libpympb/pympb.cpp | 7 ++-----
 python/meep.i      | 2 --
 src/meepgeom.hpp   | 2 --
 3 files changed, 2 insertions(+), 9 deletions(-)

diff --git a/libpympb/pympb.cpp b/libpympb/pympb.cpp
index 0344f7b85..05b59bf08 100644
--- a/libpympb/pympb.cpp
+++ b/libpympb/pympb.cpp
@@ -357,12 +357,9 @@ int mode_solver::mean_epsilon(symmetric_matrix *meps, symmetric_matrix *meps_inv
 
   bool o1_is_var = o1 && meep_geom::is_variable(o1->material);
   bool o2_is_var = o2 && meep_geom::is_variable(o2->material);
-  bool default_is_var_or_file =
-      meep_geom::is_variable(default_material) || meep_geom::is_file(default_material);
+  bool default_is_var = meep_geom::is_variable(default_material);
 
-  if (o1_is_var || o2_is_var ||
-      (default_is_var_or_file &&
-       (!o1 || !o2 || meep_geom::is_file(o1->material) || meep_geom::is_file(o2->material)))) {
+  if (o1_is_var || o2_is_var || (default_is_var && (!o1 || !o2))) {
     return 0; /* arbitrary material functions are non-analyzable */
   }
 
diff --git a/python/meep.i b/python/meep.i
index 80ef75024..66d2ea8c1 100644
--- a/python/meep.i
+++ b/python/meep.i
@@ -1464,8 +1464,6 @@ void _get_gradient(PyObject *grad, PyObject *fields_a, PyObject *fields_f, PyObj
 %ignore material_type_equal;
 %ignore is_variable;
 %ignore is_variable;
-%ignore is_file;
-%ignore is_file;
 %ignore is_medium;
 %ignore is_medium;
 %ignore is_metal;
diff --git a/src/meepgeom.hpp b/src/meepgeom.hpp
index 990c537f1..a90e375c6 100644
--- a/src/meepgeom.hpp
+++ b/src/meepgeom.hpp
@@ -183,8 +183,6 @@ bool is_material_grid(material_type mt);
 bool is_material_grid(void *md);
 bool is_variable(material_type mt);
 bool is_variable(void *md);
-bool is_file(material_type md);
-bool is_file(void *md);
 bool is_medium(material_type md, medium_struct **m);
 bool is_medium(void *md, medium_struct **m);
 bool is_metal(meep::field_type ft, const material_type *material);

From 6a9f7dd662afd5014c4b1bc0bed0339b56ff28d8 Mon Sep 17 00:00:00 2001
From: "Steven G. Johnson" <stevenj@alum.mit.edu>
Date: Tue, 20 Oct 2020 17:27:46 -0400
Subject: [PATCH 03/29] bugfix

---
 src/meepgeom.cpp | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/meepgeom.cpp b/src/meepgeom.cpp
index ae6217d14..99a82569d 100644
--- a/src/meepgeom.cpp
+++ b/src/meepgeom.cpp
@@ -1108,7 +1108,7 @@ void geom_epsilon::eff_chi1inv_matrix(meep::component c, symmetric_matrix *chi1i
 
   /* check for trivial case of only one object/material */
   if (material_type_equal(mat, mat_behind)) {
-    if (is_variable(mat)) eval_material_pt(mat, p_mat);
+    if (is_variable(mat)) eval_material_pt(mat, vec_to_vector3(v.center()));
     goto trivial;
   }
 

From bf5296c2ceca06f7c60ad8d2020aed97a57db4d2 Mon Sep 17 00:00:00 2001
From: "Steven G. Johnson" <stevenj@alum.mit.edu>
Date: Tue, 22 Jun 2021 14:06:17 -0400
Subject: [PATCH 04/29] undo spurious change

---
 src/material_data.hpp | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/src/material_data.hpp b/src/material_data.hpp
index 863641648..6aa32c898 100644
--- a/src/material_data.hpp
+++ b/src/material_data.hpp
@@ -123,6 +123,10 @@ struct material_data {
   user_material_func user_func;
   void *user_data;
 
+  // used for any variable material (USER, FILE, or GRID):
+  // indicates whether we should use the fallback subpixel averaging via quadrature
+  bool do_averaging;
+
   // these fields used only if which_subclass==MATERIAL_FILE
   double *epsilon_data;
   size_t epsilon_dims[3];
@@ -158,10 +162,6 @@ struct material_data {
   */
   enum { U_MIN = 0, U_PROD = 1, U_MEAN = 2, U_DEFAULT = 3 } material_grid_kinds;
 
-  // used for any variable material (USER, FILE, or GRID):
-  // indicates whether we should use the fallback subpixel averaging via quadrature
-  bool do_averaging;
-
   material_data();
 
   void copy_from(const material_data& from);

From a480ffd9c08cfbbaf2badfa32871b08e3ff16817 Mon Sep 17 00:00:00 2001
From: smartalecH <alec.m.hammond@gmail.com>
Date: Wed, 16 Feb 2022 11:30:54 -0500
Subject: [PATCH 05/29] add proper kernels for sphere caps

---
 src/meepgeom.cpp | 81 ++++++++++++++++++++++++++++++++++++++++++------
 1 file changed, 71 insertions(+), 10 deletions(-)

diff --git a/src/meepgeom.cpp b/src/meepgeom.cpp
index 8d5b262f6..47ccf9972 100644
--- a/src/meepgeom.cpp
+++ b/src/meepgeom.cpp
@@ -1021,6 +1021,43 @@ static bool get_front_object(const meep::volume &v, geom_box_tree geometry_tree,
   return true;
 }
 
+double get_material_grid_fill(meep::ndim dim, double d, double r, double u, double eta,
+  material_type &mat, material_type &mat_behind){
+  /* Lets assume that the "background material" is void (u=0) and that the
+  "foreground material is solid (u=1)". The fill fraction then describes
+  the amount of foreground material inhabits the cell.
+
+  The analytic volume expressions for the end cap only specify the fill fraction
+  of the cap relative to the whole sphere... but we don't know what the cap 
+  actually is (solid or void). So we need a little extra logic to sort that out.
+  
+  Occasionally, our distance to the nearest interface is outside the current
+  sphere, which means we don't need to do any averaging (the fill is 1). Again,
+  we don't know if that we means we are in void or solid, however, until we look
+  at u. Then we can determine whether the fill is 0 or 1. To make things easy, we'll
+  always assume the cap is solid.
+  */
+  double rel_fill;
+  if (abs(d) > abs(r)){
+    mat_behind = mat;
+    return -1.0; // garbage fill
+  } else {
+    if (dim == meep::D1)
+      rel_fill = (r-d)/(2*r);
+    else if (dim == meep::D2 || dim == meep::Dcyl){
+      rel_fill = (1/(r*r*meep::pi)) * (r*r*std::acos(d/r)-d*std::sqrt(r*r-d*d));
+    }
+    else if (dim == meep::D3)
+      rel_fill = (((r-d)*(r-d))/(4*meep::pi*r*r*r))*(2*r+d);
+  }
+
+  if (u <= eta){
+    return rel_fill;
+  }else{
+    return 1-rel_fill;
+  }
+}
+
 void geom_epsilon::eff_chi1inv_row(meep::component c, double chi1inv_row[3], const meep::volume &v,
                                    double tol, int maxeval) {
   symm_matrix meps_inv;
@@ -1059,6 +1096,7 @@ void geom_epsilon::eff_chi1inv_matrix(meep::component c, symm_matrix *chi1inv_ma
   material_type mat, mat_behind;
   symm_matrix meps;
   vector3 p, shiftby, normal;
+  double fill;
   fallback = false;
 
   if (maxeval == 0) {
@@ -1072,31 +1110,54 @@ void geom_epsilon::eff_chi1inv_matrix(meep::component c, symm_matrix *chi1inv_ma
 
   if (!get_front_object(v, geometry_tree, p, &o, shiftby, mat, mat_behind)) {
     get_material_pt(mat, v.center());
-    if (mat && (mat->which_subclass == material_data::MATERIAL_USER ||
-                mat->which_subclass == material_data::MATERIAL_GRID)
-        && mat->do_averaging) {
+    if (mat && (mat->which_subclass == material_data::MATERIAL_USER)) {
       fallback = true;
       return;
     }
+    else if(mat && (mat->which_subclass == material_data::MATERIAL_GRID)){
+      // debug
+      //if (mat_behind)
+      //  printf("c %d | mat %d | behind %d | (x,y) (%f,%f) \n",c,mat->which_subclass,mat_behind->which_subclass,v.center().x(),v.center().y());
+      // proceed
+    }
     else {
       goto trivial;
     }
   }
 
+  if (mat->which_subclass == material_data::MATERIAL_GRID){
+    int oi;
+    geom_box_tree tp = geom_tree_search(p, restricted_tree, &oi);
+    
+    meep::vec normal_vec(matgrid_grad(p, tp, oi, mat));
+    double nabsinv = 1.0 / meep::abs(normal_vec);
+    LOOP_OVER_DIRECTIONS(normal_vec.dim, k) { normal_vec.set_direction(k,normal_vec.in_direction(k)*nabsinv); }
+    
+    double uval = matgrid_val(p, tp, oi, mat)+this->u_p;
+    double d = (mat->eta-uval) * nabsinv;
+    double r = v.diameter()/2;
+    
+    fill = get_material_grid_fill(normal_vec.dim,d,r,uval,mat->eta,mat,mat_behind);
+    normal = vec_to_vector3(normal_vec);
+  }
+
   /* check for trivial case of only one object/material */
   if (material_type_equal(mat, mat_behind)) goto trivial;
 
   // it doesn't make sense to average metals (electric or magnetic)
   if (is_metal(meep::type(c), &mat) || is_metal(meep::type(c), &mat_behind)) goto noavg;
 
-  normal = unit_vector3(normal_to_fixed_object(vector3_minus(p, shiftby), *o));
-  if (normal.x == 0 && normal.y == 0 && normal.z == 0)
-    goto noavg; // couldn't get normal vector for this point, punt
-  geom_box pixel = gv2box(v);
-  pixel.low = vector3_minus(pixel.low, shiftby);
-  pixel.high = vector3_minus(pixel.high, shiftby);
+  if (mat->which_subclass != material_data::MATERIAL_GRID) {
+    normal = unit_vector3(normal_to_fixed_object(vector3_minus(p, shiftby), *o));
+    if (normal.x == 0 && normal.y == 0 && normal.z == 0)
+      goto noavg; // couldn't get normal vector for this point, punt
+    
+    geom_box pixel = gv2box(v);
+    pixel.low = vector3_minus(pixel.low, shiftby);
+    pixel.high = vector3_minus(pixel.high, shiftby);
 
-  double fill = box_overlap_with_object(pixel, *o, tol, maxeval);
+    fill = box_overlap_with_object(pixel, *o, tol, maxeval);
+  }
 
   material_epsmu(meep::type(c), mat, &meps, chi1inv_matrix);
   symm_matrix eps2, epsinv2;

From 5c38aa7c7b9e502e7c75c60454eb99a39e423094 Mon Sep 17 00:00:00 2001
From: smartalecH <alec.m.hammond@gmail.com>
Date: Wed, 16 Feb 2022 11:53:48 -0500
Subject: [PATCH 06/29] clean up comments

---
 src/meepgeom.cpp | 16 ++++++++--------
 1 file changed, 8 insertions(+), 8 deletions(-)

diff --git a/src/meepgeom.cpp b/src/meepgeom.cpp
index 47ccf9972..780c2ae56 100644
--- a/src/meepgeom.cpp
+++ b/src/meepgeom.cpp
@@ -1031,15 +1031,15 @@ double get_material_grid_fill(meep::ndim dim, double d, double r, double u, doub
   of the cap relative to the whole sphere... but we don't know what the cap 
   actually is (solid or void). So we need a little extra logic to sort that out.
   
-  Occasionally, our distance to the nearest interface is outside the current
-  sphere, which means we don't need to do any averaging (the fill is 1). Again,
-  we don't know if that we means we are in void or solid, however, until we look
-  at u. Then we can determine whether the fill is 0 or 1. To make things easy, we'll
-  always assume the cap is solid.
+  Occasionally, the distance to the nearest interface is outside the current
+  sphere, which means we don't need to do any averaging (the fill is 0 or 1). Again,
+  we don't know if that means we are in void or solid, however, until we look
+  at u. To make things easy, we'll assume the cap is solid until the end, when we
+  can verify.
   */
   double rel_fill;
   if (abs(d) > abs(r)){
-    mat_behind = mat;
+    mat_behind = mat; // set materials to be equal so that no averaging is performed later
     return -1.0; // garbage fill
   } else {
     if (dim == meep::D1)
@@ -1052,9 +1052,9 @@ double get_material_grid_fill(meep::ndim dim, double d, double r, double u, doub
   }
 
   if (u <= eta){
-    return rel_fill;
+    return rel_fill;   // center is void, so cap must be sold
   }else{
-    return 1-rel_fill;
+    return 1-rel_fill; // center is solid, so cap must be void
   }
 }
 

From bb07eaa4d2a48dcd7a21c4ef24d4d413b80c3013 Mon Sep 17 00:00:00 2001
From: smartalecH <alec.m.hammond@gmail.com>
Date: Fri, 18 Feb 2022 10:09:53 -0500
Subject: [PATCH 07/29] checkpoint

---
 src/meepgeom.cpp | 47 +++++++++++++++++++++++++++++++++--------------
 1 file changed, 33 insertions(+), 14 deletions(-)

diff --git a/src/meepgeom.cpp b/src/meepgeom.cpp
index 780c2ae56..32d081e21 100644
--- a/src/meepgeom.cpp
+++ b/src/meepgeom.cpp
@@ -912,10 +912,11 @@ double geom_epsilon::chi1p1(meep::field_type ft, const meep::vec &r) {
    stolen from MPB. */
 static bool get_front_object(const meep::volume &v, geom_box_tree geometry_tree, vector3 &pcenter,
                              const geometric_object **o_front, vector3 &shiftby_front,
-                             material_type &mat_front, material_type &mat_behind) {
+                             material_type &mat_front, material_type &mat_behind,
+                             vector3 &p_front, vector3 &p_behind) {
   vector3 p;
   const geometric_object *o1 = 0, *o2 = 0;
-  vector3 shiftby1 = {0, 0, 0}, shiftby2 = {0, 0, 0};
+  vector3 shiftby1 = {0, 0, 0}, shiftby2 = {0, 0, 0}, p1 = {0,0,0}, p2 = {0,0,0};
   geom_box pixel;
   material_type mat1 = vacuum, mat2 = vacuum;
   int id1 = -1, id2 = -1;
@@ -977,6 +978,7 @@ static bool get_front_object(const meep::volume &v, geom_box_tree geometry_tree,
       shiftby1 = shiftby;
       id1 = id;
       mat1 = mat;
+      p1 = q;
     }
     else if (id2 == -1 ||
              ((id >= id1 && id >= id2) && (id1 == id2 || material_type_equal(mat1, mat2)))) {
@@ -984,6 +986,7 @@ static bool get_front_object(const meep::volume &v, geom_box_tree geometry_tree,
       shiftby2 = shiftby;
       id2 = id;
       mat2 = mat;
+      p2 = q;
     }
     else if (!(id1 < id2 && (id1 == id || material_type_equal(mat1, mat))) &&
              !(id2 < id1 && (id2 == id || material_type_equal(mat2, mat))))
@@ -995,34 +998,36 @@ static bool get_front_object(const meep::volume &v, geom_box_tree geometry_tree,
     id2 = id1;
     o2 = o1;
     mat2 = mat1;
+    p2 = p1;
     shiftby2 = shiftby1;
   }
 
-  if ((o1 && is_variable(o1->material)) || (o2 && is_variable(o2->material)) ||
-      ((is_variable(default_material) || is_file(default_material)) &&
-       (!o1 || is_file(o1->material) || !o2 || is_file(o2->material))))
-    return false;
-
   if (id1 >= id2) {
     *o_front = o1;
     shiftby_front = shiftby1;
     mat_front = mat1;
-    if (id1 == id2)
+    p_front = p1;
+    if (id1 == id2) {
       mat_behind = mat1;
-    else
+      p_behind = p1;
+    } else {
       mat_behind = mat2;
+      p_behind = p2;
+    }
   }
   if (id2 > id1) {
     *o_front = o2;
     shiftby_front = shiftby2;
     mat_front = mat2;
+    p_front = p2;
     mat_behind = mat1;
+    p_behind = p1;
   }
   return true;
 }
 
 double get_material_grid_fill(meep::ndim dim, double d, double r, double u, double eta,
-  material_type &mat, material_type &mat_behind){
+  bool &mg_averaging){
   /* Lets assume that the "background material" is void (u=0) and that the
   "foreground material is solid (u=1)". The fill fraction then describes
   the amount of foreground material inhabits the cell.
@@ -1039,7 +1044,7 @@ double get_material_grid_fill(meep::ndim dim, double d, double r, double u, doub
   */
   double rel_fill;
   if (abs(d) > abs(r)){
-    mat_behind = mat; // set materials to be equal so that no averaging is performed later
+    mg_averaging = false;
     return -1.0; // garbage fill
   } else {
     if (dim == meep::D1)
@@ -1051,6 +1056,7 @@ double get_material_grid_fill(meep::ndim dim, double d, double r, double u, doub
       rel_fill = (((r-d)*(r-d))/(4*meep::pi*r*r*r))*(2*r+d);
   }
 
+  mg_averaging = true;
   if (u <= eta){
     return rel_fill;   // center is void, so cap must be sold
   }else{
@@ -1094,10 +1100,12 @@ void geom_epsilon::eff_chi1inv_matrix(meep::component c, symm_matrix *chi1inv_ma
                                       bool &fallback) {
   const geometric_object *o;
   material_type mat, mat_behind;
+  vector3 p_mat, p_mat_behind;
   symm_matrix meps;
   vector3 p, shiftby, normal;
   double fill;
   fallback = false;
+  bool mg_averaging = false;
 
   if (maxeval == 0) {
   noavg:
@@ -1125,6 +1133,9 @@ void geom_epsilon::eff_chi1inv_matrix(meep::component c, symm_matrix *chi1inv_ma
     }
   }
 
+  // it doesn't make sense to average metals (electric or magnetic)
+  if (is_metal(meep::type(c), &mat) || is_metal(meep::type(c), &mat_behind)) goto noavg;
+
   if (mat->which_subclass == material_data::MATERIAL_GRID){
     int oi;
     geom_box_tree tp = geom_tree_search(p, restricted_tree, &oi);
@@ -1142,10 +1153,18 @@ void geom_epsilon::eff_chi1inv_matrix(meep::component c, symm_matrix *chi1inv_ma
   }
 
   /* check for trivial case of only one object/material */
-  if (material_type_equal(mat, mat_behind)) goto trivial;
+  if (material_type_equal(mat, mat_behind)){
+    // we are at an interface within the grid and need to average
+    if ((mat->which_subclass != material_data::MATERIAL_GRID) && (mg_averaging)) {
 
-  // it doesn't make sense to average metals (electric or magnetic)
-  if (is_metal(meep::type(c), &mat) || is_metal(meep::type(c), &mat_behind)) goto noavg;
+    // we are far from an interface, so we can just sample the grid at this point
+    }else if ((mat->which_subclass != material_data::MATERIAL_GRID) && (!mg_averaging)) {
+
+    }else {
+      // no material grids, so no averaging
+      goto trivial;
+    }
+  } 
 
   if (mat->which_subclass != material_data::MATERIAL_GRID) {
     normal = unit_vector3(normal_to_fixed_object(vector3_minus(p, shiftby), *o));

From 1652a624fb0ffbff039e5df5318cbd145dcdbb65 Mon Sep 17 00:00:00 2001
From: smartalecH <alec.m.hammond@gmail.com>
Date: Sat, 19 Feb 2022 13:17:26 -0500
Subject: [PATCH 08/29] minor tweaks to kill errors

---
 src/meepgeom.cpp | 67 +++++++++++++++++++++++++++++++-----------------
 1 file changed, 43 insertions(+), 24 deletions(-)

diff --git a/src/meepgeom.cpp b/src/meepgeom.cpp
index 578c865cc..62ad51c77 100644
--- a/src/meepgeom.cpp
+++ b/src/meepgeom.cpp
@@ -468,7 +468,6 @@ meep::vec matgrid_grad(vector3 p, geom_box_tree tp, int oi, material_data *md) {
   // iterate through object tree at current point
   if (tp) {
     do {
-      printf("entered %d\n",is_material_grid((material_data *)tp->objects[oi].o->material));
       gradient += material_grid_grad(to_geom_box_coords(p, &tp->objects[oi]),
                                      (material_data *)tp->objects[oi].o->material,
                                      tp->objects[oi].o);
@@ -1008,8 +1007,9 @@ static bool get_front_object(const meep::volume &v, geom_box_tree geometry_tree,
       p2 = q;
     }
     else if (!(id1 < id2 && (id1 == id || material_type_equal(mat1, mat))) &&
-             !(id2 < id1 && (id2 == id || material_type_equal(mat2, mat))))
+             !(id2 < id1 && (id2 == id || material_type_equal(mat2, mat)))){
       return false;
+      }
   }
 
   // CHECK(id1 > -1, "bug in object_of_point_in_tree?");
@@ -1020,8 +1020,6 @@ static bool get_front_object(const meep::volume &v, geom_box_tree geometry_tree,
     p2 = p1;
     shiftby2 = shiftby1;
   }
-    //if ((o1 && is_variable(o1->material)) || (o2 && is_variable(o2->material)))
-    //return false;
 
   if (id1 >= id2) {
     *o_front = o1;
@@ -1128,10 +1126,8 @@ void geom_epsilon::eff_chi1inv_matrix(meep::component c, symm_matrix *chi1inv_ma
   double fill;
   fallback = false;
   bool mg_averaging = false;
-
   if (maxeval == 0 ||
-      (!get_front_object(v, geometry_tree, p, &o, shiftby, mat, mat_behind, p_mat, p_mat_behind) &&
-      !is_material_grid(mat))
+      (!get_front_object(v, geometry_tree, p, &o, shiftby, mat, mat_behind, p_mat, p_mat_behind))
       ) {
   noavg:
     get_material_pt(mat, v.center());
@@ -1141,34 +1137,56 @@ void geom_epsilon::eff_chi1inv_matrix(meep::component c, symm_matrix *chi1inv_ma
     return;
   }
 
-  // for variable materials with do_averaging == true, switch over to slow fallback integration method
-  // for material grids, however, we have to do some more logic first... so let's exclude them from
-  // our check (the false in is_variable)
-  if ((is_variable(mat,false) && mat->do_averaging) || (is_variable(mat_behind,false) && mat_behind->do_averaging)) {
-    fallback = true;
-    return;
+  // For variable materials with do_averaging == true, switch over to slow fallback integration method.
+  // For material grids, however, we have to do some more logic first...
+  if ((is_variable(mat) && mat->do_averaging) || (is_variable(mat_behind) && mat_behind->do_averaging)) {
+    if ((!is_material_grid(mat)) && (!is_material_grid(mat_behind))){
+      fallback = true;
+      return;
+    }
   }
 
   /* if we have a material grid, we need to calculate the fill fraction,
   normal vector, etc. we also need to determine if we really need
   to do any averaging, or if the current pixel is smooth enough
-  to just evaluate */
+  to just evaluate.
+  
+  Keep in mind that if both the front and back materials are material grids,
+  we assume they are the same material grid. (all of the complicated overlapping
+  symmetry conditions are taken care of elsewhere).
+   */
+  double fill_front, fill_back;
+  vector3 normal_front, normal_back;
   if (is_material_grid(mat)){
     int oi;
-    printf("start\n");
-    geom_box_tree tp = geom_tree_search(p, restricted_tree, &oi);
-    printf("o %d\n",oi);
+    geom_box_tree tp = geom_tree_search(p_mat, restricted_tree, &oi);
     
-    meep::vec normal_vec(matgrid_grad(p, tp, oi, mat));
-    double nabsinv = 1.0 / meep::abs(normal_vec);
-    LOOP_OVER_DIRECTIONS(normal_vec.dim, k) { normal_vec.set_direction(k,normal_vec.in_direction(k)*nabsinv); }
+    meep::vec normal_vec_front(matgrid_grad(p_mat, tp, oi, mat));
     
-    double uval = matgrid_val(p, tp, oi, mat)+this->u_p;
+    double nabsinv = 1.0 / meep::abs(normal_vec_front);
+    LOOP_OVER_DIRECTIONS(normal_vec_front.dim, k) { normal_vec_front.set_direction(k,normal_vec_front.in_direction(k)*nabsinv); }
+    
+    double uval = matgrid_val(p_mat, tp, oi, mat)+this->u_p;
     double d = (mat->eta-uval) * nabsinv;
     double r = v.diameter()/2;
     
-    fill = get_material_grid_fill(normal_vec.dim,d,r,uval,mat->eta,&mg_averaging);
-    normal = vec_to_vector3(normal_vec);
+    fill = get_material_grid_fill(normal_vec_front.dim,d,r,uval,mat->eta,&mg_averaging);
+    normal = vec_to_vector3(normal_vec_front);
+  }
+  if (is_material_grid(mat_behind)){
+    int oi;
+    geom_box_tree tp = geom_tree_search(p_mat_behind, restricted_tree, &oi);
+    meep::vec normal_vec_behind(matgrid_grad(p_mat_behind, tp, oi, mat_behind));
+    
+    double nabsinv = 1.0 / meep::abs(normal_vec_behind);
+    LOOP_OVER_DIRECTIONS(normal_vec_behind.dim, k) { normal_vec_behind.set_direction(k,normal_vec_behind.in_direction(k)*nabsinv); }
+    
+    double uval = matgrid_val(p_mat_behind, tp, oi, mat_behind)+this->u_p;
+    double d = (mat_behind->eta-uval) * nabsinv;
+    double r = v.diameter()/2;
+    
+    fill = get_material_grid_fill(normal_vec_behind.dim,d,r,uval,mat_behind->eta,&mg_averaging);
+    normal = vec_to_vector3(normal_vec_behind);
   }
 
   /* check for trivial case of only one object/material
@@ -1198,7 +1216,7 @@ void geom_epsilon::eff_chi1inv_matrix(meep::component c, symm_matrix *chi1inv_ma
   // it doesn't make sense to average metals (electric or magnetic)
   if (is_metal(meep::type(c), &mat) || is_metal(meep::type(c), &mat_behind)) goto noavg;
 
-  if (!is_material_grid(mat)) {
+  if (!is_material_grid(mat) || (!material_type_equal(mat, mat_behind))) {
     normal = unit_vector3(normal_to_fixed_object(vector3_minus(p, shiftby), *o));
     if (normal.x == 0 && normal.y == 0 && normal.z == 0)
       goto noavg; // couldn't get normal vector for this point, punt
@@ -3080,6 +3098,7 @@ void material_grids_addgradient(double *v, size_t ng, std::complex<meep::realnum
                 adjoint_c, forward_c, fwd, adj, frequencies[f_i], gv, du);
           /* anisotropic materials require interpolation/restriction */
           } else if (md->do_averaging ||
+                     !is_material_grid(md) ||
                      md->medium_1.epsilon_offdiag.x.re != 0 ||
                      md->medium_1.epsilon_offdiag.y.re != 0 ||
                      md->medium_1.epsilon_offdiag.z.re != 0 ||

From ee071dec1d1fca6dbeb9b63796ae4b5d960ef30e Mon Sep 17 00:00:00 2001
From: smartalecH <alec.m.hammond@gmail.com>
Date: Mon, 21 Feb 2022 16:18:24 -0500
Subject: [PATCH 09/29] enable smoothing

---
 src/meepgeom.cpp | 43 +++++++++++++++++++++++++------------------
 src/meepgeom.hpp |  4 ++--
 2 files changed, 27 insertions(+), 20 deletions(-)

diff --git a/src/meepgeom.cpp b/src/meepgeom.cpp
index 62ad51c77..798bbe02c 100644
--- a/src/meepgeom.cpp
+++ b/src/meepgeom.cpp
@@ -16,7 +16,6 @@
 
 #include <algorithm>
 #include <vector>
-#include <csignal>
 #include "meepgeom.hpp"
 #include "meep_internals.hpp"
 
@@ -289,20 +288,12 @@ bool is_material_grid(material_type mt) {
 bool is_material_grid(void *md) { return is_material_grid((material_type)md); }
 
 // return whether mt is spatially varying
-bool is_variable(material_type mt, bool include_mg) {
-  /* sometimes we don't want to consider a material grid as a 
-  "variable material", so we can use the include_mg flag
-  to exclude that corner case. The defauly behavior (include_mg=true)
-  assumes that a material grid is indeed a variable material. */
-
-  if (!include_mg && is_material_grid(mt))
-    return false;
-  else
+bool is_variable(material_type mt) {
     return (mt && ((mt->which_subclass == material_data::MATERIAL_USER) ||
                  (mt->which_subclass == material_data::MATERIAL_GRID) ||
                  (mt->which_subclass == material_data::MATERIAL_FILE)));
 }
-bool is_variable(void *md, bool include_mg) { return is_variable((material_type)md, include_mg); }
+bool is_variable(void *md) { return is_variable((material_type)md); }
 
 bool is_medium(material_type md, medium_struct **m) {
   if (md->which_subclass == material_data::MEDIUM) {
@@ -1047,7 +1038,7 @@ static bool get_front_object(const meep::volume &v, geom_box_tree geometry_tree,
 }
 
 double get_material_grid_fill(meep::ndim dim, double d, double r, double u, double eta,
-  bool *mg_averaging){
+  bool *mg_averaging, bool *center_is_void){
   /* Lets assume that the "background material" is void (u=0) and that the
   "foreground material is solid (u=1)". The fill fraction then describes
   the amount of foreground material inhabits the cell.
@@ -1063,7 +1054,7 @@ double get_material_grid_fill(meep::ndim dim, double d, double r, double u, doub
   can verify.
   */
   double rel_fill;
-  if (abs(d) > abs(r)){
+  if (abs(d) >= abs(r)){
     *mg_averaging = false;
     return -1.0; // garbage fill
   } else {
@@ -1078,8 +1069,10 @@ double get_material_grid_fill(meep::ndim dim, double d, double r, double u, doub
 
   *mg_averaging = true;
   if (u <= eta){
+    *center_is_void = true;
     return rel_fill;   // center is void, so cap must be sold
   }else{
+    *center_is_void = false;
     return 1-rel_fill; // center is solid, so cap must be void
   }
 }
@@ -1126,9 +1119,12 @@ void geom_epsilon::eff_chi1inv_matrix(meep::component c, symm_matrix *chi1inv_ma
   double fill;
   fallback = false;
   bool mg_averaging = false;
+  bool center_is_void = true;
+
   if (maxeval == 0 ||
-      (!get_front_object(v, geometry_tree, p, &o, shiftby, mat, mat_behind, p_mat, p_mat_behind))
-      ) {
+      (!get_front_object(v, geometry_tree, p, &o, shiftby, mat, mat_behind, p_mat, p_mat_behind)
+      //&& !is_material_grid(mat) && !is_material_grid(mat_behind)
+  )) {
   noavg:
     get_material_pt(mat, v.center());
   trivial:
@@ -1170,7 +1166,7 @@ void geom_epsilon::eff_chi1inv_matrix(meep::component c, symm_matrix *chi1inv_ma
     double d = (mat->eta-uval) * nabsinv;
     double r = v.diameter()/2;
     
-    fill = get_material_grid_fill(normal_vec_front.dim,d,r,uval,mat->eta,&mg_averaging);
+    fill = get_material_grid_fill(normal_vec_front.dim,d,r,uval,mat->eta,&mg_averaging,&center_is_void);
     normal = vec_to_vector3(normal_vec_front);
   }
   if (is_material_grid(mat_behind)){
@@ -1185,7 +1181,7 @@ void geom_epsilon::eff_chi1inv_matrix(meep::component c, symm_matrix *chi1inv_ma
     double d = (mat_behind->eta-uval) * nabsinv;
     double r = v.diameter()/2;
     
-    fill = get_material_grid_fill(normal_vec_behind.dim,d,r,uval,mat_behind->eta,&mg_averaging);
+    fill = get_material_grid_fill(normal_vec_behind.dim,d,r,uval,mat_behind->eta,&mg_averaging,&center_is_void);
     normal = vec_to_vector3(normal_vec_behind);
   }
 
@@ -1194,11 +1190,22 @@ void geom_epsilon::eff_chi1inv_matrix(meep::component c, symm_matrix *chi1inv_ma
    to do any interface averaging within.
    */
   if (material_type_equal(mat, mat_behind)) {
+    // no averaging is needed
     if (is_variable(mat) && !mg_averaging) {
       eval_material_pt(mat, vec_to_vector3(v.center()));
       goto trivial;
+    /* we have a material grid interface within our pixel -- let's set our foreground
+    and background materials accordingly. Based on our earlier calculations,
+    we know which material corresponds to void and which corresponds to solid. */
     } else if(is_material_grid(mat) && mg_averaging){
-
+        if (center_is_void){
+          epsilon_material_grid(mat, 0.0);
+          epsilon_material_grid(mat_behind, 1.0);
+        }else{
+          epsilon_material_grid(mat, 1.0);
+          epsilon_material_grid(mat_behind, 0.0);
+        }
+    // materials are non variable and uniform -- no need to average
     } else{
       goto trivial;
     }
diff --git a/src/meepgeom.hpp b/src/meepgeom.hpp
index 5c641a09d..6174ffb95 100644
--- a/src/meepgeom.hpp
+++ b/src/meepgeom.hpp
@@ -265,8 +265,8 @@ void material_free(material_type m);
 bool material_type_equal(const material_type m1, const material_type m2);
 bool is_material_grid(material_type mt);
 bool is_material_grid(void *md);
-bool is_variable(material_type mt, bool include_mg = true);
-bool is_variable(void *md, bool include_mg = true);
+bool is_variable(material_type mt);
+bool is_variable(void *md);
 bool is_medium(material_type md, medium_struct **m);
 bool is_medium(void *md, medium_struct **m);
 bool is_metal(meep::field_type ft, const material_type *material);

From b544afd87c752f87ae99cefb133f565943eb6863 Mon Sep 17 00:00:00 2001
From: smartalecH <alec.m.hammond@gmail.com>
Date: Wed, 23 Feb 2022 14:28:31 -0500
Subject: [PATCH 10/29] get gradients working both with and without averaging

---
 python/meep.i    |   2 +-
 src/meepgeom.cpp | 164 +++++++++++++++++++++--------------------------
 src/meepgeom.hpp |   4 +-
 3 files changed, 76 insertions(+), 94 deletions(-)

diff --git a/python/meep.i b/python/meep.i
index 0a0218569..cb3a3f974 100644
--- a/python/meep.i
+++ b/python/meep.i
@@ -1992,7 +1992,7 @@ meep_geom::geom_epsilon* _set_materials(meep::structure * s,
         geps = existing_geps;
     } else {
         geps = meep_geom::make_geom_epsilon(s, &gobj_list, center, _ensure_periodicity, _default_material,
-                                                extra_materials);
+                                                extra_materials, use_anisotropic_averaging);
     }
     if (set_materials) {
         meep_geom::set_materials_from_geom_epsilon(s, geps, use_anisotropic_averaging, tol,
diff --git a/src/meepgeom.cpp b/src/meepgeom.cpp
index 798bbe02c..9fad7d1c0 100644
--- a/src/meepgeom.cpp
+++ b/src/meepgeom.cpp
@@ -1037,11 +1037,8 @@ static bool get_front_object(const meep::volume &v, geom_box_tree geometry_tree,
   return true;
 }
 
-double get_material_grid_fill(meep::ndim dim, double d, double r, double u, double eta,
-  bool *mg_averaging, bool *center_is_void){
-  /* Lets assume that the "background material" is void (u=0) and that the
-  "foreground material is solid (u=1)". The fill fraction then describes
-  the amount of foreground material inhabits the cell.
+double get_material_grid_fill(meep::ndim dim, double d, double r, double u, double eta){
+  /* The fill fraction should describe the amount of u=1 material in the current pixel.
 
   The analytic volume expressions for the end cap only specify the fill fraction
   of the cap relative to the whole sphere... but we don't know what the cap 
@@ -1055,7 +1052,6 @@ double get_material_grid_fill(meep::ndim dim, double d, double r, double u, doub
   */
   double rel_fill;
   if (abs(d) >= abs(r)){
-    *mg_averaging = false;
     return -1.0; // garbage fill
   } else {
     if (dim == meep::D1)
@@ -1067,13 +1063,10 @@ double get_material_grid_fill(meep::ndim dim, double d, double r, double u, doub
       rel_fill = (((r-d)*(r-d))/(4*meep::pi*r*r*r))*(2*r+d);
   }
 
-  *mg_averaging = true;
   if (u <= eta){
-    *center_is_void = true;
-    return rel_fill;   // center is void, so cap must be sold
+    return rel_fill;   // center is void, so cap must be solid
   }else{
-    *center_is_void = false;
-    return 1-rel_fill; // center is solid, so cap must be void
+    return 1-rel_fill; // center is solid, so cap must be void (and we need complement)
   }
 }
 
@@ -1118,8 +1111,8 @@ void geom_epsilon::eff_chi1inv_matrix(meep::component c, symm_matrix *chi1inv_ma
   vector3 p, shiftby, normal;
   double fill;
   fallback = false;
-  bool mg_averaging = false;
-  bool center_is_void = true;
+  bool needs_cleanup=false;
+  medium_struct *medium_1, *medium_0;
 
   if (maxeval == 0 ||
       (!get_front_object(v, geometry_tree, p, &o, shiftby, mat, mat_behind, p_mat, p_mat_behind)
@@ -1142,99 +1135,79 @@ void geom_epsilon::eff_chi1inv_matrix(meep::component c, symm_matrix *chi1inv_ma
     }
   }
 
-  /* if we have a material grid, we need to calculate the fill fraction,
-  normal vector, etc. we also need to determine if we really need
-  to do any averaging, or if the current pixel is smooth enough
-  to just evaluate.
-  
-  Keep in mind that if both the front and back materials are material grids,
-  we assume they are the same material grid. (all of the complicated overlapping
-  symmetry conditions are taken care of elsewhere).
-   */
-  double fill_front, fill_back;
-  vector3 normal_front, normal_back;
-  if (is_material_grid(mat)){
-    int oi;
-    geom_box_tree tp = geom_tree_search(p_mat, restricted_tree, &oi);
-    
-    meep::vec normal_vec_front(matgrid_grad(p_mat, tp, oi, mat));
-    
-    double nabsinv = 1.0 / meep::abs(normal_vec_front);
-    LOOP_OVER_DIRECTIONS(normal_vec_front.dim, k) { normal_vec_front.set_direction(k,normal_vec_front.in_direction(k)*nabsinv); }
-    
-    double uval = matgrid_val(p_mat, tp, oi, mat)+this->u_p;
-    double d = (mat->eta-uval) * nabsinv;
-    double r = v.diameter()/2;
-    
-    fill = get_material_grid_fill(normal_vec_front.dim,d,r,uval,mat->eta,&mg_averaging,&center_is_void);
-    normal = vec_to_vector3(normal_vec_front);
-  }
-  if (is_material_grid(mat_behind)){
-    int oi;
-    geom_box_tree tp = geom_tree_search(p_mat_behind, restricted_tree, &oi);
-    meep::vec normal_vec_behind(matgrid_grad(p_mat_behind, tp, oi, mat_behind));
-    
-    double nabsinv = 1.0 / meep::abs(normal_vec_behind);
-    LOOP_OVER_DIRECTIONS(normal_vec_behind.dim, k) { normal_vec_behind.set_direction(k,normal_vec_behind.in_direction(k)*nabsinv); }
-    
-    double uval = matgrid_val(p_mat_behind, tp, oi, mat_behind)+this->u_p;
-    double d = (mat_behind->eta-uval) * nabsinv;
-    double r = v.diameter()/2;
-    
-    fill = get_material_grid_fill(normal_vec_behind.dim,d,r,uval,mat_behind->eta,&mg_averaging,&center_is_void);
-    normal = vec_to_vector3(normal_vec_behind);
-  }
-
   /* check for trivial case of only one object/material
    in the case of the material grid, make sure we don't need
    to do any interface averaging within.
    */
   if (material_type_equal(mat, mat_behind)) {
-    // no averaging is needed
-    if (is_variable(mat) && !mg_averaging) {
+    /* if we have a material grid, we need to calculate the fill fraction,
+    normal vector, etc. we also need to determine if we really need
+    to do any averaging, or if the current pixel is smooth enough
+    to just evaluate.
+    */
+    if (is_material_grid(mat)){
+      int oi;
+      geom_box_tree tp = geom_tree_search(p_mat, restricted_tree, &oi);
+      
+      meep::vec normal_vec_front(matgrid_grad(p_mat, tp, oi, mat));
+      
+      double nabsinv = 1.0 / meep::abs(normal_vec_front);
+      LOOP_OVER_DIRECTIONS(normal_vec_front.dim, k) { normal_vec_front.set_direction(k,normal_vec_front.in_direction(k)*nabsinv); }
+      
+      double uval = matgrid_val(p_mat, tp, oi, mat)+this->u_p;
+      double d = (mat->eta-uval) * nabsinv;
+      double r = v.diameter()/2;
+      
+      fill = get_material_grid_fill(normal_vec_front.dim,d,r,uval,mat->eta);
+      normal = vec_to_vector3(normal_vec_front);
+
+      if (fill < 0){
+        // no averaging is needed
+        eval_material_pt(mat, vec_to_vector3(v.center()));
+        goto trivial;
+      } else{
+        /* we have a material grid interface within our pixel */
+        needs_cleanup = true;
+        medium_1 = new medium_struct(mat->medium_2);
+        medium_0 = new medium_struct(mat->medium_1);
+        mat = new material_data();
+        mat_behind = new material_data();
+        mat->medium = *medium_1;
+        mat_behind->medium = *medium_0;
+      }
+    } else if(is_variable(mat)) {
+      // no averaging is needed
       eval_material_pt(mat, vec_to_vector3(v.center()));
       goto trivial;
-    /* we have a material grid interface within our pixel -- let's set our foreground
-    and background materials accordingly. Based on our earlier calculations,
-    we know which material corresponds to void and which corresponds to solid. */
-    } else if(is_material_grid(mat) && mg_averaging){
-        if (center_is_void){
-          epsilon_material_grid(mat, 0.0);
-          epsilon_material_grid(mat_behind, 1.0);
-        }else{
-          epsilon_material_grid(mat, 1.0);
-          epsilon_material_grid(mat_behind, 0.0);
-        }
     // materials are non variable and uniform -- no need to average
-    } else{
+    } else {
       goto trivial;
     }
-  /* Evaluate materials in case they are variable.  This allows us to do fast subpixel averaging
-    at the boundary of an object with a variable material, while remaining accurate enough if the
-    material is continuous over the pixel.  (We make a first-order error by averaging as if the material
-    were constant, but only in a boundary layer of thickness 1/resolution, so the net effect should
-    still be second-order.) */   
-  } else{
-    eval_material_pt(mat, p_mat);
-    eval_material_pt(mat_behind, p_mat_behind);
-    if (material_type_equal(mat, mat_behind)) goto trivial;
-  }
-
-  // it doesn't make sense to average metals (electric or magnetic)
-  if (is_metal(meep::type(c), &mat) || is_metal(meep::type(c), &mat_behind)) goto noavg;
-
-  if (!is_material_grid(mat) || (!material_type_equal(mat, mat_behind))) {
+  } else {
     normal = unit_vector3(normal_to_fixed_object(vector3_minus(p, shiftby), *o));
     if (normal.x == 0 && normal.y == 0 && normal.z == 0)
       goto noavg; // couldn't get normal vector for this point, punt
-    
     geom_box pixel = gv2box(v);
     pixel.low = vector3_minus(pixel.low, shiftby);
     pixel.high = vector3_minus(pixel.high, shiftby);
-
     fill = box_overlap_with_object(pixel, *o, tol, maxeval);
+    /* Evaluate materials in case they are variable.  This allows us to do fast subpixel averaging
+      at the boundary of an object with a variable material, while remaining accurate enough if the
+      material is continuous over the pixel.  (We make a first-order error by averaging as if the material
+      were constant, but only in a boundary layer of thickness 1/resolution, so the net effect should
+      still be second-order.) */
+    if (is_variable(mat)){
+      eval_material_pt(mat, p_mat);
+      eval_material_pt(mat_behind, p_mat_behind);
+      if (material_type_equal(mat, mat_behind)) goto trivial;
+    }
   }
 
+  // it doesn't make sense to average metals (electric or magnetic)
+  if (is_metal(meep::type(c), &mat) || is_metal(meep::type(c), &mat_behind)) goto noavg;
+
+  /*     kotkke algorithm      */
+
   material_epsmu(meep::type(c), mat, &meps, chi1inv_matrix);
   symm_matrix eps2, epsinv2;
   symm_matrix eps1, delta;
@@ -1322,6 +1295,10 @@ void geom_epsilon::eff_chi1inv_matrix(meep::component c, symm_matrix *chi1inv_ma
 #endif
 
   sym_matrix_invert(chi1inv_matrix, &meps);
+
+  if (needs_cleanup){
+    delete medium_0, medium_1, mat, mat_behind;
+  }
 }
 
 static int eps_ever_negative = 0;
@@ -2064,7 +2041,7 @@ void add_absorbing_layer(absorber_list alist, double thickness, int direction, i
 if needed */
 geom_epsilon* make_geom_epsilon(meep::structure *s, geometric_object_list *g, vector3 center,
                                 bool _ensure_periodicity, material_type _default_material,
-                                material_type_list extra_materials) {
+                                material_type_list extra_materials, bool _use_anisotropic_averaging) {
   // set global variables in libctlgeom based on data fields in s
   geom_initialize();
   geometry_center = center;
@@ -2115,6 +2092,7 @@ geom_epsilon* make_geom_epsilon(meep::structure *s, geometric_object_list *g, ve
   }
 
   geom_epsilon *geps = new geom_epsilon(*g, extra_materials, gv.pad().surroundings());
+  geps->use_anisotropic_averaging = _use_anisotropic_averaging;
   return geps;
 }
 
@@ -2125,7 +2103,8 @@ void set_materials_from_geometry(meep::structure *s, geometric_object_list g, ve
                                  bool _ensure_periodicity, material_type _default_material,
                                  absorber_list alist, material_type_list extra_materials) {
   meep_geom::geom_epsilon *geps = meep_geom::make_geom_epsilon(s, &g, center, _ensure_periodicity,
-                                                               _default_material, extra_materials);
+                                                               _default_material, extra_materials,
+                                                               use_anisotropic_averaging);
   set_materials_from_geom_epsilon(s, geps, use_anisotropic_averaging, tol,
                                   maxeval, alist);
   delete geps;
@@ -2833,10 +2812,11 @@ std::complex<double> get_material_gradient(
     }
     double row_1[3], row_2[3], dA_du[3];
     double orig = u[idx];
+    double maxevals = geps->use_anisotropic_averaging ? geps->maxeval : 0; // punt if no smoothing
     u[idx] -= du;
-    geps->eff_chi1inv_row(adjoint_c, row_1, v, geps->tol, geps->maxeval);
+    geps->eff_chi1inv_row(adjoint_c, row_1, v, geps->tol, maxevals);
     u[idx] += 2*du;
-    geps->eff_chi1inv_row(adjoint_c, row_2, v, geps->tol, geps->maxeval);
+    geps->eff_chi1inv_row(adjoint_c, row_2, v, geps->tol, maxevals);
     u[idx] = orig;
 
     for (int i=0;i<3;i++) dA_du[i] = (row_1[i] - row_2[i])/(2*du);
@@ -3104,7 +3084,7 @@ void material_grids_addgradient(double *v, size_t ng, std::complex<meep::realnum
                 v+ng*f_i, vec_to_vector3(p), scalegrad*cyl_scale, geps,
                 adjoint_c, forward_c, fwd, adj, frequencies[f_i], gv, du);
           /* anisotropic materials require interpolation/restriction */
-          } else if (md->do_averaging ||
+          } else if (geps->use_anisotropic_averaging ||
                      !is_material_grid(md) ||
                      md->medium_1.epsilon_offdiag.x.re != 0 ||
                      md->medium_1.epsilon_offdiag.y.re != 0 ||
diff --git a/src/meepgeom.hpp b/src/meepgeom.hpp
index 6174ffb95..ab962615e 100644
--- a/src/meepgeom.hpp
+++ b/src/meepgeom.hpp
@@ -173,6 +173,7 @@ class geom_epsilon : public meep::material_function {
 
 public:
   double u_p = 0;
+  bool use_anisotropic_averaging;
   geom_box_tree geometry_tree;
   geom_box_tree restricted_tree;
   geometric_object_list geometry;
@@ -229,7 +230,8 @@ geom_epsilon* make_geom_epsilon(meep::structure *s, geometric_object_list *g,
                                  vector3 center = make_vector3(),
                                  bool ensure_periodicity = false,
                                  material_type _default_material = vacuum,
-                                 material_type_list extra_materials = material_type_list());
+                                 material_type_list extra_materials = material_type_list(),
+                                 bool use_anisotropic_averaging = false);
 //, geometric_object_list g, material_type_list extra_materials
 void set_materials_from_geometry(meep::structure *s, geometric_object_list g,
                                  vector3 center = make_vector3(),

From 5f5901440e5e8ef9b12dec28b70eec3d8fc8b6ac Mon Sep 17 00:00:00 2001
From: smartalecH <alec.m.hammond@gmail.com>
Date: Sat, 26 Feb 2022 13:50:02 -0500
Subject: [PATCH 11/29] adjust FD step

---
 python/adjoint/utils.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/python/adjoint/utils.py b/python/adjoint/utils.py
index e6a5d1581..328576311 100644
--- a/python/adjoint/utils.py
+++ b/python/adjoint/utils.py
@@ -19,7 +19,7 @@
 _Z_AXIS = 1
 
 # default finite difference step size when calculating Aᵤ
-FD_DEFAULT = 1e-3
+FD_DEFAULT = 1e-6
 
 class DesignRegion(object):
     def __init__(

From 8bfbb7741d8dfb904df2e250a08f17f3301e05ae Mon Sep 17 00:00:00 2001
From: smartalecH <alec.m.hammond@gmail.com>
Date: Wed, 2 Mar 2022 17:00:30 -0500
Subject: [PATCH 12/29] fix boundary case when spatial gradient doesn't exist

---
 src/meepgeom.cpp | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

diff --git a/src/meepgeom.cpp b/src/meepgeom.cpp
index 9fad7d1c0..898b92101 100644
--- a/src/meepgeom.cpp
+++ b/src/meepgeom.cpp
@@ -1150,6 +1150,8 @@ void geom_epsilon::eff_chi1inv_matrix(meep::component c, symm_matrix *chi1inv_ma
       geom_box_tree tp = geom_tree_search(p_mat, restricted_tree, &oi);
       
       meep::vec normal_vec_front(matgrid_grad(p_mat, tp, oi, mat));
+      if (normal_vec_front.x() == 0 && normal_vec_front.y() == 0 && normal_vec_front.z() == 0)
+        goto noavg; // couldn't get normal vector for this point, punt
       
       double nabsinv = 1.0 / meep::abs(normal_vec_front);
       LOOP_OVER_DIRECTIONS(normal_vec_front.dim, k) { normal_vec_front.set_direction(k,normal_vec_front.in_direction(k)*nabsinv); }
@@ -1297,7 +1299,8 @@ void geom_epsilon::eff_chi1inv_matrix(meep::component c, symm_matrix *chi1inv_ma
   sym_matrix_invert(chi1inv_matrix, &meps);
 
   if (needs_cleanup){
-    delete medium_0, medium_1, mat, mat_behind;
+    delete medium_0, medium_1;
+    material_free(mat); material_free(mat_behind);
   }
 }
 

From 1880b0dce0f637a8ebf64995ebb5c1595c819083 Mon Sep 17 00:00:00 2001
From: smartalecH <alec.m.hammond@gmail.com>
Date: Tue, 8 Mar 2022 11:14:43 -0500
Subject: [PATCH 13/29] refactor

---
 src/meepgeom.cpp | 194 ++++++++++++++++++++++++-----------------------
 1 file changed, 101 insertions(+), 93 deletions(-)

diff --git a/src/meepgeom.cpp b/src/meepgeom.cpp
index 898b92101..9c870cb2a 100644
--- a/src/meepgeom.cpp
+++ b/src/meepgeom.cpp
@@ -16,9 +16,12 @@
 
 #include <algorithm>
 #include <vector>
+#include <duals/dual>
 #include "meepgeom.hpp"
 #include "meep_internals.hpp"
 
+namespace dlit = duals::literals;
+
 namespace meep_geom {
 
 #define master_printf meep::master_printf
@@ -1101,6 +1104,101 @@ void geom_epsilon::eff_chi1inv_row(meep::component c, double chi1inv_row[3], con
   }
 }
 
+void kottke_algorithm(meep::component c, symm_matrix *chi1inv_matrix, symm_matrix meps, material_type mat, 
+  /* 
+
+  */
+  
+  material_type mat_behind, vector3 normal, double fill){
+  material_epsmu(meep::type(c), mat, &meps, chi1inv_matrix);
+  symm_matrix eps2, epsinv2;
+  symm_matrix eps1, delta;
+  double Rot[3][3];
+  material_epsmu(meep::type(c), mat_behind, &eps2, &epsinv2);
+  eps1 = meps;
+
+  Rot[0][0] = normal.x;
+  Rot[1][0] = normal.y;
+  Rot[2][0] = normal.z;
+  if (fabs(normal.x) > 1e-2 || fabs(normal.y) > 1e-2) {
+    Rot[0][2] = normal.y;
+    Rot[1][2] = -normal.x;
+    Rot[2][2] = 0;
+  }
+  else { /* n is not parallel to z direction, use (x x n) instead */
+    Rot[0][2] = 0;
+    Rot[1][2] = -normal.z;
+    Rot[2][2] = normal.y;
+  }
+  { /* normalize second column */
+    double s = Rot[0][2] * Rot[0][2] + Rot[1][2] * Rot[1][2] + Rot[2][2] * Rot[2][2];
+    s = 1.0 / sqrt(s);
+    Rot[0][2] *= s;
+    Rot[1][2] *= s;
+    Rot[2][2] *= s;
+  }
+  /* 1st column is 2nd column x 0th column */
+  Rot[0][1] = Rot[1][2] * Rot[2][0] - Rot[2][2] * Rot[1][0];
+  Rot[1][1] = Rot[2][2] * Rot[0][0] - Rot[0][2] * Rot[2][0];
+  Rot[2][1] = Rot[0][2] * Rot[1][0] - Rot[1][2] * Rot[0][0];
+
+  /* rotate epsilon tensors to surface parallel/perpendicular axes */
+  sym_matrix_rotate(&eps1, &eps1, Rot);
+  sym_matrix_rotate(&eps2, &eps2, Rot);
+
+#define AVG (fill * (EXPR(eps1)) + (1 - fill) * (EXPR(eps2)))
+#define SQR(x) ((x) * (x))
+
+#define EXPR(eps) (-1 / eps.m00)
+  delta.m00 = AVG;
+#undef EXPR
+#define EXPR(eps) (eps.m11 - SQR(eps.m01) / eps.m00)
+  delta.m11 = AVG;
+#undef EXPR
+#define EXPR(eps) (eps.m22 - SQR(eps.m02) / eps.m00)
+  delta.m22 = AVG;
+#undef EXPR
+
+#define EXPR(eps) (eps.m01 / eps.m00)
+  delta.m01 = AVG;
+#undef EXPR
+#define EXPR(eps) (eps.m02 / eps.m00)
+  delta.m02 = AVG;
+#undef EXPR
+#define EXPR(eps) (eps.m12 - eps.m02 * eps.m01 / eps.m00)
+  delta.m12 = AVG;
+#undef EXPR
+
+  meps.m00 = -1 / delta.m00;
+  meps.m11 = delta.m11 - SQR(delta.m01) / delta.m00;
+  meps.m22 = delta.m22 - SQR(delta.m02) / delta.m00;
+  meps.m01 = -delta.m01 / delta.m00;
+  meps.m02 = -delta.m02 / delta.m00;
+  meps.m12 = delta.m12 - (delta.m02 * delta.m01) / delta.m00;
+
+#undef SQR
+
+#define SWAP(a, b)                                                                                 \
+  {                                                                                                \
+    double xxx = a;                                                                                \
+    a = b;                                                                                         \
+    b = xxx;                                                                                       \
+  }
+  /* invert rotation matrix = transpose */
+  SWAP(Rot[0][1], Rot[1][0]);
+  SWAP(Rot[0][2], Rot[2][0]);
+  SWAP(Rot[2][1], Rot[1][2]);
+  sym_matrix_rotate(&meps, &meps, Rot); /* rotate back */
+#undef SWAP
+
+#ifdef DEBUG
+  if (!sym_matrix_positive_definite(&meps))
+    meep::abort("negative mean epsilon from Kottke algorithm");
+#endif
+
+  sym_matrix_invert(chi1inv_matrix, &meps);
+}
+
 void geom_epsilon::eff_chi1inv_matrix(meep::component c, symm_matrix *chi1inv_matrix,
                                       const meep::volume &v, double tol, int maxeval,
                                       bool &fallback) {
@@ -1115,9 +1213,7 @@ void geom_epsilon::eff_chi1inv_matrix(meep::component c, symm_matrix *chi1inv_ma
   medium_struct *medium_1, *medium_0;
 
   if (maxeval == 0 ||
-      (!get_front_object(v, geometry_tree, p, &o, shiftby, mat, mat_behind, p_mat, p_mat_behind)
-      //&& !is_material_grid(mat) && !is_material_grid(mat_behind)
-  )) {
+      (!get_front_object(v, geometry_tree, p, &o, shiftby, mat, mat_behind, p_mat, p_mat_behind))) {
   noavg:
     get_material_pt(mat, v.center());
   trivial:
@@ -1151,7 +1247,7 @@ void geom_epsilon::eff_chi1inv_matrix(meep::component c, symm_matrix *chi1inv_ma
       
       meep::vec normal_vec_front(matgrid_grad(p_mat, tp, oi, mat));
       if (normal_vec_front.x() == 0 && normal_vec_front.y() == 0 && normal_vec_front.z() == 0)
-        goto noavg; // couldn't get normal vector for this point, punt
+        goto trivial; // couldn't get normal vector for this point
       
       double nabsinv = 1.0 / meep::abs(normal_vec_front);
       LOOP_OVER_DIRECTIONS(normal_vec_front.dim, k) { normal_vec_front.set_direction(k,normal_vec_front.in_direction(k)*nabsinv); }
@@ -1208,95 +1304,7 @@ void geom_epsilon::eff_chi1inv_matrix(meep::component c, symm_matrix *chi1inv_ma
   // it doesn't make sense to average metals (electric or magnetic)
   if (is_metal(meep::type(c), &mat) || is_metal(meep::type(c), &mat_behind)) goto noavg;
 
-  /*     kotkke algorithm      */
-
-  material_epsmu(meep::type(c), mat, &meps, chi1inv_matrix);
-  symm_matrix eps2, epsinv2;
-  symm_matrix eps1, delta;
-  double Rot[3][3];
-  material_epsmu(meep::type(c), mat_behind, &eps2, &epsinv2);
-  eps1 = meps;
-
-  Rot[0][0] = normal.x;
-  Rot[1][0] = normal.y;
-  Rot[2][0] = normal.z;
-  if (fabs(normal.x) > 1e-2 || fabs(normal.y) > 1e-2) {
-    Rot[0][2] = normal.y;
-    Rot[1][2] = -normal.x;
-    Rot[2][2] = 0;
-  }
-  else { /* n is not parallel to z direction, use (x x n) instead */
-    Rot[0][2] = 0;
-    Rot[1][2] = -normal.z;
-    Rot[2][2] = normal.y;
-  }
-  { /* normalize second column */
-    double s = Rot[0][2] * Rot[0][2] + Rot[1][2] * Rot[1][2] + Rot[2][2] * Rot[2][2];
-    s = 1.0 / sqrt(s);
-    Rot[0][2] *= s;
-    Rot[1][2] *= s;
-    Rot[2][2] *= s;
-  }
-  /* 1st column is 2nd column x 0th column */
-  Rot[0][1] = Rot[1][2] * Rot[2][0] - Rot[2][2] * Rot[1][0];
-  Rot[1][1] = Rot[2][2] * Rot[0][0] - Rot[0][2] * Rot[2][0];
-  Rot[2][1] = Rot[0][2] * Rot[1][0] - Rot[1][2] * Rot[0][0];
-
-  /* rotate epsilon tensors to surface parallel/perpendicular axes */
-  sym_matrix_rotate(&eps1, &eps1, Rot);
-  sym_matrix_rotate(&eps2, &eps2, Rot);
-
-#define AVG (fill * (EXPR(eps1)) + (1 - fill) * (EXPR(eps2)))
-#define SQR(x) ((x) * (x))
-
-#define EXPR(eps) (-1 / eps.m00)
-  delta.m00 = AVG;
-#undef EXPR
-#define EXPR(eps) (eps.m11 - SQR(eps.m01) / eps.m00)
-  delta.m11 = AVG;
-#undef EXPR
-#define EXPR(eps) (eps.m22 - SQR(eps.m02) / eps.m00)
-  delta.m22 = AVG;
-#undef EXPR
-
-#define EXPR(eps) (eps.m01 / eps.m00)
-  delta.m01 = AVG;
-#undef EXPR
-#define EXPR(eps) (eps.m02 / eps.m00)
-  delta.m02 = AVG;
-#undef EXPR
-#define EXPR(eps) (eps.m12 - eps.m02 * eps.m01 / eps.m00)
-  delta.m12 = AVG;
-#undef EXPR
-
-  meps.m00 = -1 / delta.m00;
-  meps.m11 = delta.m11 - SQR(delta.m01) / delta.m00;
-  meps.m22 = delta.m22 - SQR(delta.m02) / delta.m00;
-  meps.m01 = -delta.m01 / delta.m00;
-  meps.m02 = -delta.m02 / delta.m00;
-  meps.m12 = delta.m12 - (delta.m02 * delta.m01) / delta.m00;
-
-#undef SQR
-
-#define SWAP(a, b)                                                                                 \
-  {                                                                                                \
-    double xxx = a;                                                                                \
-    a = b;                                                                                         \
-    b = xxx;                                                                                       \
-  }
-  /* invert rotation matrix = transpose */
-  SWAP(Rot[0][1], Rot[1][0]);
-  SWAP(Rot[0][2], Rot[2][0]);
-  SWAP(Rot[2][1], Rot[1][2]);
-  sym_matrix_rotate(&meps, &meps, Rot); /* rotate back */
-#undef SWAP
-
-#ifdef DEBUG
-  if (!sym_matrix_positive_definite(&meps))
-    meep::abort("negative mean epsilon from Kottke algorithm");
-#endif
-
-  sym_matrix_invert(chi1inv_matrix, &meps);
+  kottke_algorithm(c,chi1inv_matrix,meps,mat,mat_behind,normal,fill);
 
   if (needs_cleanup){
     delete medium_0, medium_1;

From fa35e16f464867f19919b6b58cbbca7cddefeb48 Mon Sep 17 00:00:00 2001
From: smartalecH <alec.m.hammond@gmail.com>
Date: Wed, 9 Mar 2022 15:29:42 -0500
Subject: [PATCH 14/29] add duals

---
 python/typemap_utils.cpp |    8 +-
 src/material_data.cpp    |    6 +-
 src/material_data.hpp    |    5 +-
 src/meepgeom.cpp         |  536 ++++++++------
 src/meepgeom.hpp         |   22 +-
 src/support/dual.hpp     | 1471 ++++++++++++++++++++++++++++++++++++++
 6 files changed, 1836 insertions(+), 212 deletions(-)
 create mode 100644 src/support/dual.hpp

diff --git a/python/typemap_utils.cpp b/python/typemap_utils.cpp
index 3c1fa1b66..3053a4787 100644
--- a/python/typemap_utils.cpp
+++ b/python/typemap_utils.cpp
@@ -529,8 +529,12 @@ static int pymaterial_grid_to_material_grid(PyObject *po, material_data *md) {
   if (!PyArray_ISCARRAY(pao)) {
     meep::abort("Numpy array weights must be C-style contiguous.");
   }
-  md->weights = new double[PyArray_SIZE(pao)];
-  memcpy(md->weights, (double *)PyArray_DATA(pao), PyArray_SIZE(pao) * sizeof(double));
+  md->weights.clear();
+  for (size_t i=0;i<PyArray_SIZE(pao);i++){
+    md->weights.push_back(((double *)PyArray_DATA(pao))[i]);
+  }
+  //md->weights = new double[PyArray_SIZE(pao)];
+  //memcpy(md->weights, (double *)PyArray_DATA(pao), PyArray_SIZE(pao) * sizeof(double));
 
   // if needed, combine sus structs to main object
   PyObject *py_e_sus_m1 = PyObject_GetAttrString(po_medium1, "E_susceptibilities");
diff --git a/src/material_data.cpp b/src/material_data.cpp
index 774f2a3d3..81b998410 100644
--- a/src/material_data.cpp
+++ b/src/material_data.cpp
@@ -76,10 +76,10 @@ void material_data::copy_from(const material_data &from) {
   }
 
   grid_size = from.grid_size;
-  if (from.weights) {
+  if (from.weights.size() != 0) {
     size_t N = from.grid_size.x * from.grid_size.y * from.grid_size.z;
-    weights = new double[N];
-    std::copy_n(from.weights, N, weights);
+    weights.clear();
+    weights.insert(weights.end(), from.weights.begin(), from.weights.end());
   }
 
   medium_1 = from.medium_1;
diff --git a/src/material_data.hpp b/src/material_data.hpp
index af754ffa1..df9886bcc 100644
--- a/src/material_data.hpp
+++ b/src/material_data.hpp
@@ -25,6 +25,7 @@
 
 #include <meep.hpp>
 #include <ctlgeom.h>
+#include "support/dual.hpp"
 
 #include "meep.hpp"
 
@@ -133,7 +134,7 @@ struct material_data {
 
   // these fields used only if which_subclass==MATERIAL_GRID
   vector3 grid_size;
-  double *weights;
+  std::vector<duals::duald> weights;
   medium_struct medium_1;
   medium_struct medium_2;
   double beta;
@@ -187,7 +188,7 @@ material_type make_user_material(user_material_func user_func, void *user_data);
 material_type make_file_material(char *epsilon_input_file);
 material_type make_material_grid(bool do_averaging, double beta, double eta, double damping);
 void read_epsilon_file(const char *eps_input_file);
-void update_weights(material_type matgrid, double *weights);
+void update_weights(material_type matgrid, duals::duald *weights);
 
 }; // namespace meep_geom
 
diff --git a/src/meepgeom.cpp b/src/meepgeom.cpp
index 9c870cb2a..23fc9650b 100644
--- a/src/meepgeom.cpp
+++ b/src/meepgeom.cpp
@@ -14,13 +14,14 @@
 %  Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
 */
 
+#include <iostream> 
 #include <algorithm>
 #include <vector>
-#include <duals/dual>
 #include "meepgeom.hpp"
 #include "meep_internals.hpp"
 
 namespace dlit = duals::literals;
+using namespace duals::literals;
 
 namespace meep_geom {
 
@@ -124,9 +125,6 @@ void material_free(material_type m) {
   // object so will assume that the caller keeps track of its lifetime.
   delete[] m->epsilon_data;
   m->epsilon_data = NULL;
-
-  delete[] m->weights;
-  m->weights = NULL;
   delete m;
 }
 
@@ -151,9 +149,9 @@ bool material_type_equal(const material_type m1, const material_type m2) {
 /* rotate A by a unitary (real) rotation matrix R:
       RAR = transpose(R) * A * R
 */
-void sym_matrix_rotate(symm_matrix *RAR, const symm_matrix *A_, const double R[3][3]) {
+void sym_matrix_rotate(symm_matrix *RAR, const symm_matrix *A_, const duals::duald R[3][3]) {
   int i, j;
-  double A[3][3], AR[3][3];
+  duals::duald A[3][3], AR[3][3];
   A[0][0] = A_->m00;
   A[1][1] = A_->m11;
   A[2][2] = A_->m22;
@@ -176,8 +174,8 @@ void sym_matrix_rotate(symm_matrix *RAR, const symm_matrix *A_, const double R[3
 
 /* Set Vinv = inverse of V, where both V and Vinv are real-symmetric matrices.*/
 void sym_matrix_invert(symm_matrix *Vinv, const symm_matrix *V) {
-  double m00 = V->m00, m11 = V->m11, m22 = V->m22;
-  double m01 = V->m01, m02 = V->m02, m12 = V->m12;
+  duals::duald m00 = V->m00, m11 = V->m11, m22 = V->m22;
+  duals::duald m01 = V->m01, m02 = V->m02, m12 = V->m12;
 
   if (m01 == 0.0 && m02 == 0.0 && m12 == 0.0) {
     /* optimize common case of a diagonal matrix: */
@@ -187,13 +185,13 @@ void sym_matrix_invert(symm_matrix *Vinv, const symm_matrix *V) {
     Vinv->m01 = Vinv->m02 = Vinv->m12 = 0.0;
   }
   else {
-    double detinv;
+    duals::duald detinv;
 
     /* compute the determinant: */
     detinv = m00 * m11 * m22 - m02 * m11 * m02 + 2.0 * m01 * m12 * m02 - m01 * m01 * m22 -
              m12 * m12 * m00;
 
-    if (detinv == 0.0) meep::abort("singular 3x3 matrix");
+    if (detinv.rpart() == 0.0) meep::abort("singular 3x3 matrix");
 
     detinv = 1.0 / detinv;
 
@@ -209,18 +207,19 @@ void sym_matrix_invert(symm_matrix *Vinv, const symm_matrix *V) {
 
 /* Returns whether or not V is positive-definite. */
 int sym_matrix_positive_definite(symm_matrix *V) {
-  double det2, det3;
-  double m00 = V->m00, m11 = V->m11, m22 = V->m22;
+  duals::duald det2, det3;
+  duals::duald m00 = V->m00, m11 = V->m11, m22 = V->m22;
 
 #if defined(WITH_HERMITIAN_EPSILON)
-  scalar_complex m01 = V->m01, m02 = V->m02, m12 = V->m12;
+  // FIXME
+  scalar_complex m01 = V->m01.rpart(), m02 = V->m02.rpart(), m12 = V->m12.rpart();
 
   det2 = m00 * m11 - CSCALAR_NORMSQR(m01);
   det3 = det2 * m22 - m11 * CSCALAR_NORMSQR(m02) - CSCALAR_NORMSQR(m12) * m00 +
          2.0 * ((m01.re * m12.re - m01.im * m12.im) * m02.re +
                 (m01.re * m12.im + m01.im * m12.re) * m02.im);
 #else  /* real matrix */
-  double m01 = V->m01, m02 = V->m02, m12 = V->m12;
+  duals::duald m01 = V->m01, m02 = V->m02, m12 = V->m12;
 
   det2 = m00 * m11 - m01 * m01;
   det3 = det2 * m22 - m02 * m11 * m02 + 2.0 * m01 * m12 * m02 - m12 * m12 * m00;
@@ -336,37 +335,41 @@ bool is_metal(meep::field_type ft, const material_type *material) {
 
 // computes the vector-Jacobian product of the gradient of the matgrid_val function v
 // with the Jacobian of the to_geom_box_coords function for geometric_object o
-vector3 to_geom_object_coords_VJP(vector3 v, const geometric_object *o) {
+cvector3 to_geom_object_coords_VJP(cvector3 v, const geometric_object *o) {
   if (!o) { meep::abort("must pass a geometric_object to to_geom_object_coords_VJP.\n"); }
 
   switch (o->which_subclass) {
     default: {
-      vector3 po = {0, 0, 0};
+      cvector3 po = cvector_zero();
       return po;
     }
     case geometric_object::SPHERE: {
       number radius = o->subclass.sphere_data->radius;
-      return vector3_scale(0.5 / radius, v);
+      return cvector3_scale(0.5 / radius, v);
     }
     /* case geometric_object::CYLINDER:
        NOT YET IMPLEMENTED */
     case geometric_object::BLOCK: {
+      vector3 v_real = cvector3_re(v);
+      vector3 v_imag = cvector3_im(v);
+
       vector3 size = o->subclass.block_data->size;
-      if (size.x != 0.0) v.x /= size.x;
-      if (size.y != 0.0) v.y /= size.y;
-      if (size.z != 0.0) v.z /= size.z;
-      return matrix3x3_transpose_vector3_mult(o->subclass.block_data->projection_matrix, v);
+      if (size.x != 0.0) {v_real.x /= size.x; v_imag.x /= size.x;}
+      if (size.y != 0.0) {v_real.y /= size.y; v_imag.y /= size.y;}
+      if (size.z != 0.0) {v_real.z /= size.z; v_imag.z /= size.z;}
+      v_real =  matrix3x3_transpose_vector3_mult(o->subclass.block_data->projection_matrix, v_real);
+      v_imag =  matrix3x3_transpose_vector3_mult(o->subclass.block_data->projection_matrix, v_imag);
+      return make_cvector3(v_real,v_imag);
     }
     /* case geometric_object::PRISM:
        NOT YET IMPLEMENTED */
   }
 }
 
-meep::vec material_grid_grad(vector3 p, material_data *md, const geometric_object *o) {
+cvector3 material_grid_grad(vector3 p, material_data *md, const geometric_object *o) {
   if (!is_material_grid(md)) {meep::abort("Invalid material grid detected.\n"); }
 
-  meep::vec gradient(zero_vec(dim));
-  double *data = md->weights;
+  cvector3 gradient = cvector_zero();
   int nx = md->grid_size.x;
   int ny = md->grid_size.y;
   int nz = md->grid_size.z;
@@ -398,19 +401,19 @@ meep::vec material_grid_grad(vector3 p, material_data *md, const geometric_objec
 
   /* define a macro to give us data(x,y,z) on the grid,
      in row-major order: */
-#define D(x, y, z) (data[(((x)*ny + (y)) * nz + (z)) * stride])
+#define D(x, y, z) ((md->weights)[(((x)*ny + (y)) * nz + (z)) * stride])
 
-  double du_dx = (signflip_dx ? -1.0 : 1.0) *
+  duals::duald du_dx = (signflip_dx ? -1.0 : 1.0) *
     (((-D(x1, y1, z1) + D(x2, y1, z1)) * (1.0 - dy) +
       (-D(x1, y2, z1) + D(x2, y2, z1)) * dy) * (1.0 - dz) +
      ((-D(x1, y1, z2) + D(x2, y1, z2)) * (1.0 - dy) +
       (-D(x1, y2, z2) + D(x2, y2, z2)) * dy) * dz);
-  double du_dy = (signflip_dy ? -1.0 : 1.0) *
+  duals::duald du_dy = (signflip_dy ? -1.0 : 1.0) *
     ((-(D(x1, y1, z1) * (1.0 - dx) + D(x2, y1, z1) * dx) +
       (D(x1, y2, z1) * (1.0 - dx) + D(x2, y2, z1) * dx)) * (1.0 - dz) +
      (-(D(x1, y1, z2) * (1.0 - dx) + D(x2, y1, z2) * dx) +
       (D(x1, y2, z2) * (1.0 - dx) + D(x2, y2, z2) * dx)) * dz);
-  double du_dz = (signflip_dz ? -1.0 : 1.0) *
+  duals::duald du_dz = (signflip_dz ? -1.0 : 1.0) *
     (-((D(x1, y1, z1) * (1.0 - dx) + D(x2, y1, z1) * dx) * (1.0 - dy) +
        (D(x1, y2, z1) * (1.0 - dx) + D(x2, y2, z1) * dx) * dy) +
      ((D(x1, y1, z2) * (1.0 - dx) + D(x2, y1, z2) * dx) * (1.0 - dy) +
@@ -423,22 +426,21 @@ meep::vec material_grid_grad(vector3 p, material_data *md, const geometric_objec
   // respect to r2 where g(r2) is the to_geom_object_coords function (in libctl/utils/geom.c).
   // computing this quantity involves using the chain rule and thus the vector-Jacobian product
   // ∇u J where J is the Jacobian matrix of g.
-  vector3 grad_u;
-  grad_u.x = du_dx * nx;
-  grad_u.y = du_dy * ny;
-  grad_u.z = du_dz * nz;
+  vector3 g_real = make_vector3(nx*du_dx.rpart(),ny*du_dy.rpart(),nz*du_dz.rpart());
+  vector3 g_imag = make_vector3(nx*du_dx.dpart(),ny*du_dy.dpart(),nz*du_dz.dpart());
+  cvector3 grad_u = make_cvector3(g_real,g_imag);
+
   if (o != NULL) {
-    vector3 grad_u_J = to_geom_object_coords_VJP(grad_u, o);
-    gradient.set_direction(meep::X, grad_u_J.x);
-    gradient.set_direction(meep::Y, grad_u_J.y);
-    gradient.set_direction(meep::Z, grad_u_J.z);
-  }
-  else {
-    gradient.set_direction(meep::X, geometry_lattice.size.x == 0 ? 0 : grad_u.x / geometry_lattice.size.x);
-    gradient.set_direction(meep::Y, geometry_lattice.size.y == 0 ? 0 : grad_u.y / geometry_lattice.size.y);
-    gradient.set_direction(meep::Z, geometry_lattice.size.z == 0 ? 0 : grad_u.z / geometry_lattice.size.z);
+    gradient = to_geom_object_coords_VJP(grad_u, o);
+  } else {
+    g_real = make_vector3(nx*du_dx.rpart()/ geometry_lattice.size.x,
+      ny*du_dy.rpart()/ geometry_lattice.size.y,
+      nz*du_dz.rpart()/ geometry_lattice.size.z);
+    g_imag = make_vector3(nx*du_dx.dpart()/ geometry_lattice.size.x,
+      ny*du_dy.dpart()/ geometry_lattice.size.y,
+      nz*du_dz.dpart()/ geometry_lattice.size.z);
+    gradient = make_cvector3(g_real,g_imag);
   }
-
   return gradient;
 }
 
@@ -451,20 +453,20 @@ void map_lattice_coordinates(double &px, double &py, double &pz) {
     : 0.5 + (pz - geometry_center.z) / geometry_lattice.size.z;
 }
 
-meep::vec matgrid_grad(vector3 p, geom_box_tree tp, int oi, material_data *md) {
+cvector3 matgrid_grad(vector3 p, geom_box_tree tp, int oi, material_data *md) {
   if (md->material_grid_kinds == material_data::U_MIN ||
       md->material_grid_kinds == material_data::U_PROD)
     meep::abort("%s:%i:matgrid_grad does not support overlapping grids with U_MIN or U_PROD\n",__FILE__,__LINE__);
 
-  meep::vec gradient(zero_vec(dim));
+  cvector3 gradient = cvector_zero();
   int matgrid_val_count = 0;
 
   // iterate through object tree at current point
   if (tp) {
     do {
-      gradient += material_grid_grad(to_geom_box_coords(p, &tp->objects[oi]),
+      gradient = cvector_add(gradient,material_grid_grad(to_geom_box_coords(p, &tp->objects[oi]),
                                      (material_data *)tp->objects[oi].o->material,
-                                     tp->objects[oi].o);
+                                     tp->objects[oi].o));
       if (md->material_grid_kinds == material_data::U_DEFAULT) break;
       ++matgrid_val_count;
       tp = geom_tree_search_next(p, tp, &oi);
@@ -478,30 +480,54 @@ meep::vec matgrid_grad(vector3 p, geom_box_tree tp, int oi, material_data *md) {
   }
 
   if (md->material_grid_kinds == material_data::U_MEAN)
-    gradient = gradient * 1.0/matgrid_val_count;
+    gradient = cvector3_scale(1.0/matgrid_val_count,gradient);
 
   return gradient;
 }
+duals::duald dual_linear_interpolate(double rx, double ry, double rz, std::vector<duals::duald> &data,
+                          int nx, int ny, int nz, int stride) {
+
+  int x1, y1, z1, x2, y2, z2;
+  double dx, dy, dz;
+
+  meep::map_coordinates(rx, ry, rz, nx, ny, nz,
+                  x1, y1, z1, x2, y2, z2,
+                  dx, dy, dz);
+
+  /* define a macro to give us data(x,y,z) on the grid,
+     in row-major order (the order used by HDF5): */
+#define D(x, y, z) (data[(((x)*ny + (y)) * nz + (z)) * stride])
+
+  return (((D(x1, y1, z1) * (1.0 - dx) + D(x2, y1, z1) * dx) * (1.0 - dy) +
+           (D(x1, y2, z1) * (1.0 - dx) + D(x2, y2, z1) * dx) * dy) *
+              (1.0 - dz) +
+          ((D(x1, y1, z2) * (1.0 - dx) + D(x2, y1, z2) * dx) * (1.0 - dy) +
+           (D(x1, y2, z2) * (1.0 - dx) + D(x2, y2, z2) * dx) * dy) *
+              dz);
 
-double material_grid_val(vector3 p, material_data *md) {
+#undef D
+}
+duals::duald material_grid_val(vector3 p, material_data *md) {
   // given the relative location, p, interpolate the material grid point.
 
   if (!is_material_grid(md)) { meep::abort("Invalid material grid detected.\n"); }
-  return meep::linear_interpolate(p.x, p.y, p.z, md->weights, md->grid_size.x,
+  return dual_linear_interpolate(p.x, p.y, p.z, md->weights, md->grid_size.x,
                                   md->grid_size.y, md->grid_size.z, 1);
 
 }
 
-static double tanh_projection(double u, double beta, double eta) {
+static duals::duald tanh_projection(duals::duald u, double beta, double eta) {
   if (beta == 0) return u;
   if (u == eta) return 0.5; // avoid NaN when beta is Inf
-  double tanh_beta_eta = tanh(beta*eta);
-  return (tanh_beta_eta + tanh(beta*(u-eta))) /
+  duals::duald tanh_beta_eta = tanh(beta*eta);
+  duals::duald temp = (tanh_beta_eta + tanh(beta*(u-eta))) /
     (tanh_beta_eta + tanh(beta*(1-eta)));
+    //master_printf("temp %f %f\n",temp.rpart(),temp.dpart());
+  return temp;
 }
 
-double matgrid_val(vector3 p, geom_box_tree tp, int oi, material_data *md) {
-  double uprod = 1.0, umin = 1.0, usum = 0.0, udefault = 0.0, u;
+duals::duald matgrid_val(vector3 p, geom_box_tree tp, int oi, material_data *md) {
+  duals::duald uprod = 1.0, umin = 1.0, usum = 0.0, udefault = 0.0, u;
   int matgrid_val_count = 0;
 
   // iterate through object tree at current point
@@ -568,7 +594,7 @@ static void interp_tensors(vector3 diag_in_1, vector3 offdiag_in_1, vector3 diag
 void epsilon_material_grid(material_data *md, double u) {
   // NOTE: assume p lies on normalized grid within (0,1)
 
-  if (!(md->weights)) meep::abort("material params were not initialized!");
+  if (md->weights.size()==0) meep::abort("material params were not initialized!");
 
   medium_struct *mm = &(md->medium);
   medium_struct *m1 = &(md->medium_1);
@@ -858,15 +884,15 @@ void geom_epsilon::eval_material_pt(material_type &material, vector3 p) {
   switch (material->which_subclass) {
     // material grid: interpolate onto user specified material grid to get properties at r
     case material_data::MATERIAL_GRID: {
-      double u;
+      duals::duald u;
       int oi;
       geom_box_tree tp;
 
       tp = geom_tree_search(p, restricted_tree, &oi);
       // interpolate and project onto material grid
-      u = tanh_projection(matgrid_val(p, tp, oi, material)+this->u_p, material->beta, material->eta);
+      u = tanh_projection(matgrid_val(p, tp, oi, material), material->beta, material->eta);
       // interpolate material from material grid point
-      epsilon_material_grid(material, u);
+      epsilon_material_grid(material, u.rpart());
 
       return;
     }
@@ -914,7 +940,7 @@ double geom_epsilon::chi1p1(meep::field_type ft, const meep::vec &r) {
   material_epsmu(ft, material, &chi1p1, &chi1p1_inv);
   material_gc(material);
 
-  return (chi1p1.m00 + chi1p1.m11 + chi1p1.m22) / 3;
+  return (chi1p1.m00.rpart() + chi1p1.m11.rpart() + chi1p1.m22.rpart()) / 3;
 }
 
 /* Find frontmost object in v, along with the constant material behind it.
@@ -1040,98 +1066,97 @@ static bool get_front_object(const meep::volume &v, geom_box_tree geometry_tree,
   return true;
 }
 
-double get_material_grid_fill(meep::ndim dim, double d, double r, double u, double eta){
-  /* The fill fraction should describe the amount of u=1 material in the current pixel.
-
-  The analytic volume expressions for the end cap only specify the fill fraction
-  of the cap relative to the whole sphere... but we don't know what the cap 
-  actually is (solid or void). So we need a little extra logic to sort that out.
-  
-  Occasionally, the distance to the nearest interface is outside the current
-  sphere, which means we don't need to do any averaging (the fill is 0 or 1). Again,
-  we don't know if that means we are in void or solid, however, until we look
-  at u. To make things easy, we'll assume the cap is solid until the end, when we
-  can verify.
-  */
-  double rel_fill;
-  if (abs(d) >= abs(r)){
-    return -1.0; // garbage fill
-  } else {
-    if (dim == meep::D1)
-      rel_fill = (r-d)/(2*r);
-    else if (dim == meep::D2 || dim == meep::Dcyl){
-      rel_fill = (1/(r*r*meep::pi)) * (r*r*std::acos(d/r)-d*std::sqrt(r*r-d*d));
-    }
-    else if (dim == meep::D3)
-      rel_fill = (((r-d)*(r-d))/(4*meep::pi*r*r*r))*(2*r+d);
-  }
-
-  if (u <= eta){
-    return rel_fill;   // center is void, so cap must be solid
-  }else{
-    return 1-rel_fill; // center is solid, so cap must be void (and we need complement)
-  }
-}
-
 void geom_epsilon::eff_chi1inv_row(meep::component c, double chi1inv_row[3], const meep::volume &v,
                                    double tol, int maxeval) {
+  /* for speed reasons, we need to "wrap" the 
+  actual eff_chi1inv_row_grad function below, since
+  the original eff_chi1inv_row is a virtual function
+  and doesn't play well with default arguments */
+  eff_chi1inv_row_grad(c,chi1inv_row,v,tol,maxeval,false);
+}
+void geom_epsilon::eff_chi1inv_row_grad(meep::component c, double chi1inv_row[3], const meep::volume &v,
+                                   double tol, int maxeval, bool needs_grad) {
   symm_matrix meps_inv;
   bool fallback;
   eff_chi1inv_matrix(c, &meps_inv, v, tol, maxeval, fallback);
 
   if (fallback) { fallback_chi1inv_row(c, chi1inv_row, v, tol, maxeval); }
   else {
-    switch (component_direction(c)) {
+    /* for gradient calculations, we need to return
+    the dual part of the calcuation, which corresponds 
+    to the first derivative w.r.t. u_i */
+    if (needs_grad) {
+      switch (component_direction(c)) {
+      case meep::X:
+      case meep::R:
+          chi1inv_row[0] = meps_inv.m00.dpart();
+          chi1inv_row[1] = meps_inv.m01.dpart();
+          chi1inv_row[2] = meps_inv.m02.dpart();
+        break;
+      case meep::Y:
+      case meep::P:
+        chi1inv_row[0] = meps_inv.m01.dpart();
+        chi1inv_row[1] = meps_inv.m11.dpart();
+        chi1inv_row[2] = meps_inv.m12.dpart();
+        break;
+      case meep::Z:
+        chi1inv_row[0] = meps_inv.m02.dpart();
+        chi1inv_row[1] = meps_inv.m12.dpart();
+        chi1inv_row[2] = meps_inv.m22.dpart();
+        break;
+      case meep::NO_DIRECTION: chi1inv_row[0] = chi1inv_row[1] = chi1inv_row[2] = 0; break;
+      }
+    } else { // no gradient needed; standard epsilon evaluations
+      switch (component_direction(c)) {
       case meep::X:
       case meep::R:
-        chi1inv_row[0] = meps_inv.m00;
-        chi1inv_row[1] = meps_inv.m01;
-        chi1inv_row[2] = meps_inv.m02;
+          chi1inv_row[0] = meps_inv.m00.rpart();
+          chi1inv_row[1] = meps_inv.m01.rpart();
+          chi1inv_row[2] = meps_inv.m02.rpart();
         break;
       case meep::Y:
       case meep::P:
-        chi1inv_row[0] = meps_inv.m01;
-        chi1inv_row[1] = meps_inv.m11;
-        chi1inv_row[2] = meps_inv.m12;
+        chi1inv_row[0] = meps_inv.m01.rpart();
+        chi1inv_row[1] = meps_inv.m11.rpart();
+        chi1inv_row[2] = meps_inv.m12.rpart();
         break;
       case meep::Z:
-        chi1inv_row[0] = meps_inv.m02;
-        chi1inv_row[1] = meps_inv.m12;
-        chi1inv_row[2] = meps_inv.m22;
+        chi1inv_row[0] = meps_inv.m02.rpart();
+        chi1inv_row[1] = meps_inv.m12.rpart();
+        chi1inv_row[2] = meps_inv.m22.rpart();
         break;
       case meep::NO_DIRECTION: chi1inv_row[0] = chi1inv_row[1] = chi1inv_row[2] = 0; break;
+      }
     }
   }
 }
 
-void kottke_algorithm(meep::component c, symm_matrix *chi1inv_matrix, symm_matrix meps, material_type mat, 
-  /* 
-
+void kottke_algorithm(meep::component c, symm_matrix *chi1inv_matrix, symm_matrix &meps, 
+  symm_matrix &eps2, cvector3 &cnormal, duals::duald fill){
+  /* Ref: " Perturbation theory for anisotropic dielectric interfaces, 
+    and application to subpixel smoothing of discretized numerical methods",
+    https://math.mit.edu/~stevenj/papers/KottkeFa08.pdf
   */
-  
-  material_type mat_behind, vector3 normal, double fill){
-  material_epsmu(meep::type(c), mat, &meps, chi1inv_matrix);
-  symm_matrix eps2, epsinv2;
+
   symm_matrix eps1, delta;
-  double Rot[3][3];
-  material_epsmu(meep::type(c), mat_behind, &eps2, &epsinv2);
+  duals::duald Rot[3][3];
   eps1 = meps;
 
-  Rot[0][0] = normal.x;
-  Rot[1][0] = normal.y;
-  Rot[2][0] = normal.z;
-  if (fabs(normal.x) > 1e-2 || fabs(normal.y) > 1e-2) {
-    Rot[0][2] = normal.y;
-    Rot[1][2] = -normal.x;
+  Rot[0][0] = cnormal.x.re + 1_e*cnormal.x.im;
+  Rot[1][0] = cnormal.y.re + 1_e*cnormal.y.im;
+  Rot[2][0] = cnormal.z.re + 1_e*cnormal.z.im;
+  if (fabs(cnormal.x.re) > 1e-2 || fabs(cnormal.y.re) > 1e-2) {
+    Rot[0][2] = cnormal.y.re + 1_e*cnormal.y.im;
+    Rot[1][2] = -(cnormal.x.re + 1_e*cnormal.x.im);
     Rot[2][2] = 0;
   }
   else { /* n is not parallel to z direction, use (x x n) instead */
     Rot[0][2] = 0;
-    Rot[1][2] = -normal.z;
-    Rot[2][2] = normal.y;
+    Rot[1][2] = -(cnormal.z.re + 1_e*cnormal.z.im);
+    Rot[2][2] = cnormal.y.re + 1_e*cnormal.y.im;
   }
   { /* normalize second column */
-    double s = Rot[0][2] * Rot[0][2] + Rot[1][2] * Rot[1][2] + Rot[2][2] * Rot[2][2];
+    duals::duald s = Rot[0][2] * Rot[0][2] + Rot[1][2] * Rot[1][2] + Rot[2][2] * Rot[2][2];
     s = 1.0 / sqrt(s);
     Rot[0][2] *= s;
     Rot[1][2] *= s;
@@ -1180,7 +1205,7 @@ void kottke_algorithm(meep::component c, symm_matrix *chi1inv_matrix, symm_matri
 
 #define SWAP(a, b)                                                                                 \
   {                                                                                                \
-    double xxx = a;                                                                                \
+    duals::duald xxx = a;                                                                          \
     a = b;                                                                                         \
     b = xxx;                                                                                       \
   }
@@ -1199,21 +1224,94 @@ void kottke_algorithm(meep::component c, symm_matrix *chi1inv_matrix, symm_matri
   sym_matrix_invert(chi1inv_matrix, &meps);
 }
 
+duals::duald get_material_grid_fill(meep::ndim dim, duals::duald d, double r, duals::duald u, double eta){
+  /* The fill fraction should describe the amount of u=1 material in the current pixel.
+
+  The analytic volume expressions for the end cap only specify the fill fraction
+  of the cap relative to the whole sphere... but we don't know what the cap 
+  actually is (solid or void). So we need a little extra logic to sort that out.
+  
+  Occasionally, the distance to the nearest interface is outside the current
+  sphere, which means we don't need to do any averaging (the fill is 0 or 1). Again,
+  we don't know if that means we are in void or solid, however, until we look
+  at u. To make things easy, we'll assume the cap is solid until the end, when we
+  can verify.
+  */
+  duals::duald rel_fill;
+  if (abs(d) >= abs(r)){
+    return -1.0; // garbage fill
+  } else {
+    if (dim == meep::D1)
+      rel_fill = (r-d)/(2*r);
+    else if (dim == meep::D2 || dim == meep::Dcyl){
+      rel_fill = (1/(r*r*meep::pi)) * (r*r*acos(d/r)-d*sqrt(r*r-d*d));
+    }
+    else if (dim == meep::D3)
+      rel_fill = (((r-d)*(r-d))/(4*meep::pi*r*r*r))*(2*r+d);
+  }
+
+  if (u <= eta){
+    return rel_fill;   // center is void, so cap must be solid
+  }else{
+    return 1-rel_fill; // center is solid, so cap must be void (and we need complement)
+  }
+}
+
+void normalize_dual(cvector3 &cv){
+  duals::duald d[3], norm;
+  d[0] = cv.x.re + 1_e*cv.x.im;
+  d[1] = cv.y.re + 1_e*cv.y.im;
+  d[2] = cv.z.re + 1_e*cv.z.im;
+
+  norm = sqrt(abs(d[0])*abs(d[0]) + abs(d[1])*abs(d[1]) + abs(d[2])*abs(d[2]));
+
+  d[0] = d[0] / norm;
+  d[1] = d[1] / norm;
+  d[2] = d[2] / norm;
+
+  cv.x.re = d[0].rpart(); cv.x.im = d[0].dpart();
+  cv.y.re = d[1].rpart(); cv.y.im = d[1].dpart();
+  cv.z.re = d[2].rpart(); cv.z.im = d[2].dpart();
+}
+
+duals::duald get_distance(cvector3 &cv, duals::duald uval, double eta){
+  duals::duald d[3], norm;
+  d[0] = cv.x.re + 1_e*cv.x.im;
+  d[1] = cv.y.re + 1_e*cv.y.im;
+  d[2] = cv.z.re + 1_e*cv.z.im;
+
+  norm = sqrt(abs(d[0])*abs(d[0]) + abs(d[1])*abs(d[1]) + abs(d[2])*abs(d[2]));
+  return (eta - uval) / norm;
+}
+
+void dual_interpolate_mat(material_type mat,symm_matrix &eps, duals::duald u) {
+  duals::duald u_bar = tanh_projection(u, mat->beta, mat->eta); // projection
+  if (std::isnan(u_bar.dpart())) u_bar = u_bar.rpart(); // when beta=inf, we need to correct when gradient breaks down
+  eps.m00 = mat->medium_1.epsilon_diag.x + u_bar * (mat->medium_2.epsilon_diag.x-mat->medium_1.epsilon_diag.x);
+  eps.m11 = mat->medium_1.epsilon_diag.y + u_bar * (mat->medium_2.epsilon_diag.y-mat->medium_1.epsilon_diag.y);
+  eps.m22 = mat->medium_1.epsilon_diag.z + u_bar * (mat->medium_2.epsilon_diag.z-mat->medium_1.epsilon_diag.z);
+  eps.m01 = mat->medium_1.epsilon_offdiag.x.re + u_bar * (mat->medium_2.epsilon_offdiag.x.re-mat->medium_1.epsilon_offdiag.x.re);
+  eps.m02 = mat->medium_1.epsilon_offdiag.y.re + u_bar * (mat->medium_2.epsilon_offdiag.y.re-mat->medium_1.epsilon_offdiag.y.re);
+  eps.m12 = mat->medium_1.epsilon_offdiag.z.re + u_bar * (mat->medium_2.epsilon_offdiag.z.re-mat->medium_1.epsilon_offdiag.z.re);
+}
+
 void geom_epsilon::eff_chi1inv_matrix(meep::component c, symm_matrix *chi1inv_matrix,
                                       const meep::volume &v, double tol, int maxeval,
                                       bool &fallback) {
   const geometric_object *o;
   material_type mat, mat_behind;
   vector3 p_mat, p_mat_behind;
-  symm_matrix meps;
-  vector3 p, shiftby, normal;
-  double fill;
+  symm_matrix meps, eps2, epsinv2;
+  vector3 p, shiftby;
+  cvector3 normal;
+  duals::duald fill, uval;
   fallback = false;
-  bool needs_cleanup=false;
-  medium_struct *medium_1, *medium_0;
+  
+  int oi;
+  geom_box_tree tp;
 
-  if (maxeval == 0 ||
-      (!get_front_object(v, geometry_tree, p, &o, shiftby, mat, mat_behind, p_mat, p_mat_behind))) {
+  if (!get_front_object(v, geometry_tree, p, &o, shiftby, mat, mat_behind, p_mat, p_mat_behind)
+      && (!is_material_grid(mat))) {
   noavg:
     get_material_pt(mat, v.center());
   trivial:
@@ -1222,6 +1320,20 @@ void geom_epsilon::eff_chi1inv_matrix(meep::component c, symm_matrix *chi1inv_ma
     return;
   }
 
+  if (maxeval == 0){
+    if (is_material_grid(mat)){
+      tp = geom_tree_search(vec_to_vector3(v.center()), restricted_tree, &oi);
+      uval = matgrid_val(vec_to_vector3(v.center()), tp, oi, mat);
+    mgavg:
+      dual_interpolate_mat(mat,meps,uval);
+      sym_matrix_invert(chi1inv_matrix, &meps);
+      material_gc(mat);
+      return;
+    }else{
+      goto noavg;
+    }
+  }
+
   // For variable materials with do_averaging == true, switch over to slow fallback integration method.
   // For material grids, however, we have to do some more logic first...
   if ((is_variable(mat) && mat->do_averaging) || (is_variable(mat_behind) && mat_behind->do_averaging)) {
@@ -1242,53 +1354,57 @@ void geom_epsilon::eff_chi1inv_matrix(meep::component c, symm_matrix *chi1inv_ma
     to just evaluate.
     */
     if (is_material_grid(mat)){
-      int oi;
-      geom_box_tree tp = geom_tree_search(p_mat, restricted_tree, &oi);
+      tp = geom_tree_search(p_mat, restricted_tree, &oi);
+      uval = matgrid_val(p_mat, tp, oi, mat);
       
-      meep::vec normal_vec_front(matgrid_grad(p_mat, tp, oi, mat));
-      if (normal_vec_front.x() == 0 && normal_vec_front.y() == 0 && normal_vec_front.z() == 0)
-        goto trivial; // couldn't get normal vector for this point
+      normal = matgrid_grad(p_mat, tp, oi, mat);
+      if (cvector3_re(normal).x == 0 && cvector3_re(normal).y == 0 && cvector3_re(normal).z == 0)
+        /* couldn't get normal vector for this point; no averaging is needed,
+        but we still need to interpolate onto our grid using duals */
+        goto mgavg;
       
-      double nabsinv = 1.0 / meep::abs(normal_vec_front);
-      LOOP_OVER_DIRECTIONS(normal_vec_front.dim, k) { normal_vec_front.set_direction(k,normal_vec_front.in_direction(k)*nabsinv); }
+      duals::duald d = get_distance(normal, uval, mat->eta);
+      normalize_dual(normal);
       
-      double uval = matgrid_val(p_mat, tp, oi, mat)+this->u_p;
-      double d = (mat->eta-uval) * nabsinv;
       double r = v.diameter()/2;
-      
-      fill = get_material_grid_fill(normal_vec_front.dim,d,r,uval,mat->eta);
-      normal = vec_to_vector3(normal_vec_front);
-
-      if (fill < 0){
-        // no averaging is needed
-        eval_material_pt(mat, vec_to_vector3(v.center()));
-        goto trivial;
+      fill = get_material_grid_fill(v.dim,d,r,uval,mat->eta);
+      if (fill.rpart() < 0){
+        /* we are far enough away from an interface that
+        we don't need any averaging -- but we still need
+        to interpolate from the material grid */
+        goto mgavg;
       } else{
-        /* we have a material grid interface within our pixel */
-        needs_cleanup = true;
-        medium_1 = new medium_struct(mat->medium_2);
-        medium_0 = new medium_struct(mat->medium_1);
-        mat = new material_data();
-        mat_behind = new material_data();
-        mat->medium = *medium_1;
-        mat_behind->medium = *medium_0;
+        /* we have a material grid interface within our pixel.
+        we therefore need to set the two materials used for
+        averaging to our corresponding solid and voide materials.
+        */
+        medium_struct medium_1 = medium_struct(mat->medium_2);
+        medium_struct medium_0 = medium_struct(mat->medium_1);
+        material_data mat_temp = material_data();
+        material_data mat_behind_temp = material_data();
+        mat_temp.medium = medium_1;
+        mat_behind_temp.medium = medium_0;
+        material_epsmu(meep::type(c), &mat_temp, &meps, chi1inv_matrix);
+        material_epsmu(meep::type(c), &mat_behind_temp, &eps2, &epsinv2);
       }
     } else if(is_variable(mat)) {
-      // no averaging is needed
+      // no averaging is needed (not a material grid)
       eval_material_pt(mat, vec_to_vector3(v.center()));
       goto trivial;
     // materials are non variable and uniform -- no need to average
     } else {
       goto trivial;
     }
-  } else {
-    normal = unit_vector3(normal_to_fixed_object(vector3_minus(p, shiftby), *o));
-    if (normal.x == 0 && normal.y == 0 && normal.z == 0)
+  } else { // Two different materials (perhaps a m.g. and a geom object)
+    
+    vector3 normal_re = unit_vector3(normal_to_fixed_object(vector3_minus(p, shiftby), *o));
+    if (normal_re.x == 0 && normal_re.y == 0 && normal_re.z == 0)
       goto noavg; // couldn't get normal vector for this point, punt
+    normal = make_cvector3(normal_re,make_vector3());
     geom_box pixel = gv2box(v);
     pixel.low = vector3_minus(pixel.low, shiftby);
     pixel.high = vector3_minus(pixel.high, shiftby);
-    fill = box_overlap_with_object(pixel, *o, tol, maxeval);
+    fill = duals::duald(box_overlap_with_object(pixel, *o, tol, maxeval),0);
     /* Evaluate materials in case they are variable.  This allows us to do fast subpixel averaging
       at the boundary of an object with a variable material, while remaining accurate enough if the
       material is continuous over the pixel.  (We make a first-order error by averaging as if the material
@@ -1299,17 +1415,35 @@ void geom_epsilon::eff_chi1inv_matrix(meep::component c, symm_matrix *chi1inv_ma
       eval_material_pt(mat_behind, p_mat_behind);
       if (material_type_equal(mat, mat_behind)) goto trivial;
     }
+    
+    /* to properly determine the tensors of a material 
+      that has a material grid, we need to be careful.
+      Specifically, we need to include the effects of u_i
+      in the dual part of the material tensor */
+    if (is_material_grid(mat)){
+      int oi;
+      geom_box_tree tp = geom_tree_search(p_mat, restricted_tree, &oi);
+      duals::duald uval = matgrid_val(p_mat, tp, oi, mat);
+      dual_interpolate_mat(mat,meps,uval);
+      sym_matrix_invert(chi1inv_matrix, &meps);    
+    } else {
+      material_epsmu(meep::type(c), mat, &meps, chi1inv_matrix);
+    }
+    if (is_material_grid(mat_behind)){
+      int oi;
+      geom_box_tree tp = geom_tree_search(p_mat_behind, restricted_tree, &oi);
+      duals::duald uval = matgrid_val(p_mat_behind, tp, oi, mat);
+      dual_interpolate_mat(mat_behind,eps2,uval);
+      sym_matrix_invert(&epsinv2, &eps2); // not really needed
+    } else {
+      material_epsmu(meep::type(c), mat_behind, &eps2, &epsinv2);
+    }
   }
 
   // it doesn't make sense to average metals (electric or magnetic)
   if (is_metal(meep::type(c), &mat) || is_metal(meep::type(c), &mat_behind)) goto noavg;
 
-  kottke_algorithm(c,chi1inv_matrix,meps,mat,mat_behind,normal,fill);
-
-  if (needs_cleanup){
-    delete medium_0, medium_1;
-    material_free(mat); material_free(mat_behind);
-  }
+  kottke_algorithm(c,chi1inv_matrix,meps,eps2,normal,fill);
 }
 
 static int eps_ever_negative = 0;
@@ -1328,7 +1462,7 @@ struct matgrid_volavg {
 
 static void get_uproj_w(const matgrid_volavg *mgva, double x0, double &u_proj, double &w) {
   // use a linear approximation for the material grid weights around the Yee grid point
-  u_proj = tanh_projection(mgva->uval + mgva->ugrad_abs*x0, mgva->beta, mgva->eta);
+  //u_proj = tanh_projection(mgva->uval + mgva->ugrad_abs*x0, mgva->beta, mgva->eta);
   if (mgva->dim == meep::D1)
     w = 1/(2*mgva->rad);
   else if (mgva->dim == meep::D2 || mgva->dim == meep::Dcyl)
@@ -1436,13 +1570,13 @@ void geom_epsilon::fallback_chi1inv_row(meep::component c, double chi1inv_row[3]
   material_data *md = material;
   meep::vec gradient(zero_vec(v.dim));
   double uval = 0;
-
+  // TODO cleanup and remove
   if (md->which_subclass == material_data::MATERIAL_GRID) {
     geom_box_tree tp;
     int oi;
     tp = geom_tree_search(p, restricted_tree, &oi);
-    gradient = matgrid_grad(p, tp, oi, md);
-    uval = matgrid_val(p, tp, oi, md)+this->u_p;
+    //gradient = matgrid_grad(p, tp, oi, md);
+    //uval = matgrid_val(p, tp, oi, md)+this->u_p;
   }
   else {
     gradient = normal_vector(meep::type(c), v);
@@ -1455,19 +1589,19 @@ void geom_epsilon::fallback_chi1inv_row(meep::component c, double chi1inv_row[3]
       chi1p1.m11 != chi1p1.m22 || chi1p1.m00 != chi1p1.m22 || meep::abs(gradient) < 1e-8) {
     int rownum = meep::component_direction(c) % 3;
     if (rownum == 0) {
-      chi1inv_row[0] = chi1p1_inv.m00;
-      chi1inv_row[1] = chi1p1_inv.m01;
-      chi1inv_row[2] = chi1p1_inv.m02;
+      chi1inv_row[0] = chi1p1_inv.m00.rpart();
+      chi1inv_row[1] = chi1p1_inv.m01.rpart();
+      chi1inv_row[2] = chi1p1_inv.m02.rpart();
     }
     else if (rownum == 1) {
-      chi1inv_row[0] = chi1p1_inv.m01;
-      chi1inv_row[1] = chi1p1_inv.m11;
-      chi1inv_row[2] = chi1p1_inv.m12;
+      chi1inv_row[0] = chi1p1_inv.m01.rpart();
+      chi1inv_row[1] = chi1p1_inv.m11.rpart();
+      chi1inv_row[2] = chi1p1_inv.m12.rpart();
     }
     else {
-      chi1inv_row[0] = chi1p1_inv.m02;
-      chi1inv_row[1] = chi1p1_inv.m12;
-      chi1inv_row[2] = chi1p1_inv.m22;
+      chi1inv_row[0] = chi1p1_inv.m02.rpart();
+      chi1inv_row[1] = chi1p1_inv.m12.rpart();
+      chi1inv_row[2] = chi1p1_inv.m22.rpart();
     }
     return;
   }
@@ -1783,7 +1917,7 @@ void geom_epsilon::sigma_row(meep::component c, double sigrow[3], const meep::ve
 
     tp = geom_tree_search(p, restricted_tree, &oi);
     // interpolate and project onto material grid
-    u = tanh_projection(matgrid_val(p, tp, oi, mat)+this->u_p, mat->beta, mat->eta);
+    u = tanh_projection(matgrid_val(p, tp, oi, mat), mat->beta, mat->eta).rpart();
     epsilon_material_grid(mat, u);   // interpolate material from material grid point
     mat->medium.check_offdiag_im_zero_or_abort();
   }
@@ -2215,7 +2349,9 @@ material_type make_material_grid(bool do_averaging, double beta, double eta, dou
 
 void update_weights(material_type matgrid, double *weights) {
   size_t N = matgrid->grid_size.x * matgrid->grid_size.y * matgrid->grid_size.z;
-  memcpy(matgrid->weights, weights, N * sizeof(double));
+  //memcpy(matgrid->weights, weights, N * sizeof(double));
+  for (size_t i=0;i<N;i++)
+    matgrid->weights.push_back(weights[i]);
 }
 
 /******************************************************************************/
@@ -2778,7 +2914,7 @@ std::complex<double> get_material_gradient(
     geom_epsilon *geps,               // material
     meep::grid_volume &gv,            // simulation grid volume
     double du,                        // step size
-    double *u,                        // matgrid
+    std::vector<duals::duald> &u,     // matgrid
     int idx                           // matgrid index
 ) {
   /*Compute the Aᵤx product from the -λᵀAᵤx calculation.
@@ -2821,25 +2957,26 @@ std::complex<double> get_material_gradient(
       v.set_direction_min(d, r.in_direction(d) - 0.5*gv.inva*sd);
       v.set_direction_max(d, r.in_direction(d) + 0.5*gv.inva*sd);
     }
-    double row_1[3], row_2[3], dA_du[3];
-    double orig = u[idx];
+    double dA_du[3];
     double maxevals = geps->use_anisotropic_averaging ? geps->maxeval : 0; // punt if no smoothing
-    u[idx] -= du;
-    geps->eff_chi1inv_row(adjoint_c, row_1, v, geps->tol, maxevals);
-    u[idx] += 2*du;
-    geps->eff_chi1inv_row(adjoint_c, row_2, v, geps->tol, maxevals);
-    u[idx] = orig;
-
-    for (int i=0;i<3;i++) dA_du[i] = (row_1[i] - row_2[i])/(2*du);
-    return dA_du[dir_idx] * fields_f;
+    /* we can calculate the gradient w.r.t. u_i using
+    a dual method. We just need to "flag" the index
+    within our weights array with a dual value
+    of "1", and the duals library will take care of the
+    rest. */
+    u[idx] += 1_e; // add dual component
+    geps->eff_chi1inv_row_grad(adjoint_c, dA_du, v, geps->tol, maxevals, true);
+    u[idx] -= 1_e; // remove dual component
+    
+    return -dA_du[dir_idx] * fields_f; // note the minus sign! (from Steven's adjoint notes)
+  
   } else {
-    double orig = u[idx];
     std::complex<double> row_1[3], row_2[3], dA_du[3];
     u[idx] -= du;
     eff_chi1inv_row_disp(adjoint_c,row_1,r,freq,geps);
     u[idx] += 2*du;
     eff_chi1inv_row_disp(adjoint_c,row_2,r,freq,geps);
-    u[idx] = orig;
+    u[idx] -= du;
 
     for (int i=0;i<3;i++) dA_du[i] = (row_1[i] - row_2[i])/(2*du);
     return dA_du[dir_idx] * fields_f  * cond_cmp(forward_c,r,freq,geps);
@@ -2855,13 +2992,14 @@ using finite differences (at the cost of slightly less accurate gradients
 due to floating-point roundoff errors). */
 void add_interpolate_weights(double rx, double ry, double rz,
                              double *data, int nx, int ny, int nz, int stride,
-                             double scaleby, double *udata, int ukind, double uval,
+                             double scaleby, std::vector<duals::duald> &udata, int ukind, duals::duald uval,
                              meep::vec r, geom_epsilon *geps,
                              meep::component adjoint_c, meep::component forward_c,
                              std::complex<double> fwd, std::complex<double> adj,
                              double freq, meep::grid_volume &gv, double du) {
   int x1, y1, z1, x2, y2, z2;
-  double dx, dy, dz, u;
+  double dx, dy, dz;
+  duals::duald u;
 
   meep::map_coordinates(rx, ry, rz, nx, ny, nz,
                         x1, y1, z1, x2, y2, z2,
@@ -2885,7 +3023,7 @@ in row-major order (the order used by HDF5): */
            dz);
 
   if (ukind == material_data::U_MIN && u != uval) return; // TODO look into this
-  if (ukind == material_data::U_PROD) scaleby *= uval / u;
+  if (ukind == material_data::U_PROD) scaleby *= uval.rpart() / u.rpart();
 
   for (int xi=0;xi<lx;xi++){
     for (int yi=0;yi<ly;yi++){
@@ -2911,7 +3049,7 @@ void material_grids_addgradient_point(double *v, vector3 p, double scalegrad, ge
   geom_box_tree tp;
   int oi, ois;
   material_data *mg, *mg_sum;
-  double uval;
+  duals::duald uval;
   int kind;
   tp = geom_tree_search(p, geps->geometry_tree, &oi);
 
@@ -2956,12 +3094,11 @@ void material_grids_addgradient_point(double *v, vector3 p, double scalegrad, ge
     it's up to the user to only have one unique design grid in this volume.*/
     vector3 sz = mg->grid_size;
     double *vcur = v;
-    double *ucur = mg->weights;
     uval = tanh_projection(matgrid_val(p, tp, oi, mg), mg->beta, mg->eta);
     do {
       vector3 pb = to_geom_box_coords(p, &tp->objects[oi]);
       add_interpolate_weights(pb.x, pb.y, pb.z, vcur, sz.x, sz.y, sz.z, 1,
-                              scalegrad, ucur, kind, uval,
+                              scalegrad, mg->weights, kind, uval,
                               vector3_to_vec(p), geps, adjoint_c, forward_c,
                               fwd, adj, freq, gv, tol);
       if (kind == material_data::U_DEFAULT) break;
@@ -2973,10 +3110,9 @@ void material_grids_addgradient_point(double *v, vector3 p, double scalegrad, ge
     map_lattice_coordinates(p.x,p.y,p.z);
     vector3 sz = mg->grid_size;
     double *vcur = v;
-    double *ucur = mg->weights;
     uval = tanh_projection(material_grid_val(p, mg), mg->beta, mg->eta);
     add_interpolate_weights(p.x, p.y, p.z, vcur, sz.x, sz.y, sz.z, 1,
-                            scalegrad, ucur, kind, uval,
+                            scalegrad, mg->weights, kind, uval,
                             vector3_to_vec(p), geps, adjoint_c, forward_c,
                             fwd, adj, freq, gv, tol);
   }
diff --git a/src/meepgeom.hpp b/src/meepgeom.hpp
index ab962615e..1e87dbf20 100644
--- a/src/meepgeom.hpp
+++ b/src/meepgeom.hpp
@@ -153,8 +153,20 @@ inline vector3 make_vector3(double x = 0.0, double y = 0.0, double z = 0.0) {
   return v;
 }
 
+inline cvector3 cvector3_scale(number s, cvector3 v) {
+  return make_cvector3(vector3_scale(s,cvector3_re(v)),vector3_scale(s,cvector3_im(v)));
+}
+
+inline cvector3 cvector_zero(){
+  return make_cvector3(make_vector3(),make_vector3());
+}
+
+inline cvector3 cvector_add(cvector3 cv1, cvector3 cv2){
+  return make_cvector3(vector3_plus(cvector3_re(cv1),cvector3_re(cv2)),vector3_plus(cvector3_im(cv1),cvector3_im(cv2)));
+}
+
 typedef struct {
-  double m00, m01, m02, m11, m12, m22;
+  duals::duald m00, m01, m02, m11, m12, m22;
 } symm_matrix;
 
 struct pol {
@@ -207,6 +219,8 @@ class geom_epsilon : public meep::material_function {
   virtual double chi1p1(meep::field_type ft, const meep::vec &r);
   virtual void eff_chi1inv_row(meep::component c, double chi1inv_row[3], const meep::volume &v,
                                double tol, int maxeval);
+  void eff_chi1inv_row_grad(meep::component c, double chi1inv_row[3], const meep::volume &v,
+                               double tol, int maxeval, bool needs_grad);
 
   void eff_chi1inv_matrix(meep::component c, symm_matrix *chi1inv_matrix,
                           const meep::volume &v, double tol, int maxeval, bool &fallback);
@@ -293,10 +307,8 @@ void init_libctl(material_type default_mat, bool ensure_per,
 // material grid functions
 /***************************************************************/
 void update_weights(material_type matgrid, double *weights);
-meep::vec matgrid_grad(vector3 p, geom_box_tree tp, int oi, material_data *md);
-meep::vec material_grid_grad(vector3 p, material_data *md, const geometric_object *o);
-double matgrid_val(vector3 p, geom_box_tree tp, int oi, material_data *md);
-double material_grid_val(vector3 p, material_data *md);
+duals::duald matgrid_val(vector3 p, geom_box_tree tp, int oi, material_data *md);
+duals::duald material_grid_val(vector3 p, material_data *md);
 geom_box_tree calculate_tree(const meep::volume &v, geometric_object_list g);
 void material_grids_addgradient(double *v, size_t ng, std::complex<meep::realnum> *fields_a,
                                 std::complex<meep::realnum> *fields_f, size_t fields_shapes[12],
diff --git a/src/support/dual.hpp b/src/support/dual.hpp
new file mode 100644
index 000000000..58f476dea
--- /dev/null
+++ b/src/support/dual.hpp
@@ -0,0 +1,1471 @@
+//===-- duals/dual - Dual number class --------------------------*- C++ -*-===//
+//
+// Part of the cppduals project.
+// https://tesch1.gitlab.io/cppduals
+//
+// (c)2019 Michael Tesch. tesch1@gmail.com
+//
+// See https://gitlab.com/tesch1/cppduals/blob/master/LICENSE.txt for
+// license information.
+//
+// This Source Code Form is subject to the terms of the Mozilla
+// Public License v. 2.0. If a copy of the MPL was not distributed
+// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
+
+#ifndef CPPDUALS_DUAL
+#define CPPDUALS_DUAL
+
+#ifndef PARSED_BY_DOXYGEN
+#include <cmath>
+#include <ctgmath>
+#include <limits>
+#include <type_traits>
+#include <complex>
+#include <random>
+#include <iostream>
+#endif
+
+#if !defined(CPPDUALS_IGNORE_COMPILER_VERSION) && !defined(_WIN32)
+#if __cplusplus < 201103L
+  #error CPPDUALS needs at least a C++11 compliant compiler
+#endif
+#endif
+
+/// Configure whether system has POSIX extern int signgam;
+#ifndef CPPDUALS_HAVE_SIGNGAM
+#ifndef _WIN32
+#define CPPDUALS_HAVE_SIGNGAM 1
+#endif
+#endif
+namespace duals {
+
+/**
+\file       dual
+\brief      Dual number class.
+
+\mainpage   cppduals
+
+\ref duals/dual is a single-header [Dual
+number](https://en.wikipedia.org/wiki/Dual_number) C++ template
+library that implements numbers of the type \f$(a + b \cdot
+\epsilon)\f$ where \f$ \epsilon \ne 0\f$, and \f$\epsilon^2 = 0\f$.
+
+`duals::dual<>` can be used for "automatic" differentiation, it can
+also recursively nest with itself for higher orders of differentiation
+(ie for the second derivative use `duals::dual<duals::dual<T>>`).  It
+can also be used to differentiate parts of complex functions as
+`std::complex<duals::dual<T>>`.  This file can be used stand-alone for
+dual number support, but for Eigen vectorization support rather the
+file \ref duals/dual_eigen should be included.
+
+```
+#include <duals/dual>
+
+using namespace duals::literals;
+
+template <class T> T   f(T x) { return pow(x,pow(x,x)); }
+template <class T> T  df(T x) { return pow(x,-1. + x + pow(x,x)) * (1. + x*log(x) + x*pow(log(x),2.)); }
+template <class T> T ddf(T x) { return (pow(x,pow(x,x)) * pow(pow(x,x - 1.) + pow(x,x)*log(x)*(log(x) + 1.), 2.) +
+                                        pow(x,pow(x,x)) * (pow(x,x - 1.) * log(x) +
+                                                           pow(x,x - 1.) * (log(x) + 1.) +
+                                                           pow(x,x - 1.) * ((x - 1.)/x + log(x)) +
+                                                           pow(x,x) * log(x) * pow(log(x) + 1., 2.) )); }
+
+int main()
+{
+  std::cout << "  f(2.)            = " << f(2.)    << "\n";
+  std::cout << " df(2.)            = " << df(2.)   << "\n";
+  std::cout << "ddf(2.)            = " << ddf(2.)  << "\n";
+  std::cout << "  f(2+1_e)         = " << f(2+1_e) << "\n";
+  std::cout << "  f(2+1_e).dpart() = " << f(2+1_e).dpart() << "\n";
+  duals::hyperduald x(2+1_e,1+0_e);
+  std::cout << "  f((2+1_e) + (1+0_e)_e).dpart().dpart() = " << f(x).dpart().dpart() << "\n";
+}
+```
+Produces (notice the derivative in the dual-part):
+```
+  f(2.)            = 16
+ df(2.)            = 107.11
+ddf(2.)            = 958.755
+  f(2+1_e)         = (16+107.11_e)
+  f(2+1_e).dpart() = 107.11
+  f((2+1_e) + (1+0_e)_e).dpart().dpart() = 958.755
+```
+
+How this works can be seen by inspecting the infinite [Taylor
+series](https://en.wikipedia.org/wiki/Taylor_series) expansion of a
+function \f$ f(x) \f$ at \f$ a \f$ with \f$ x = a + b\epsilon \f$.
+The series truncates itself due to the property \f$ \epsilon^2 = 0
+\f$, leaving the function's derivative at \f$ a \f$ in the "dual part"
+of the result (when \f$ b = 1 \f$):
+
+\f[
+\begin{split}
+f(a + b \epsilon) &= f(a) + f'(a)(b \epsilon) + \frac{f''(a)}{2!}(b \epsilon)^2
++ \frac{f'''(a)}{3!}(b \epsilon)^3 + \ldots   \\        \
+&= f(a) + f'(a)(b \epsilon)
+\end{split}
+\f]
+
+The class is contained in a single c++11 header `#include
+<duals/dual>`, for extended [Eigen](http://eigen.tuxfamily.org)
+support, instead include the header \ref duals/dual_eigen "#include <duals/dual_eigen>".
+
+Type X in the templates below can be any value which can be
+assigned to value_type.
+
+Type X also indicates a limitation to dual numbers of the same depth
+but (possibly) different value_type as `duals::dual<T>`.  For example,
+you can assign (or add/sub/mul/div) `duals::dual<float>` and
+`duals::dual<double>`, but you can not assign
+`duals::dual<duals::dual<float>>` to `duals::dual<float>`.
+
+Here is a synopsis of the class:
+
+```
+namespace duals {
+
+template<class T> class dual {
+
+    typedef T value_type;
+
+    dual(const & re = T(), const & du = T());
+    dual(const dual &);
+    template<class X> dual(const dual<X> &);
+
+    T rpart() const;
+    T dpart() const;
+
+    void rpart(T);
+    void dpart(T);
+
+    dual<T> operator-() const;
+    dual<T> operator+() const;
+
+    dual<T> & operator= (const T &);
+    dual<T> & operator+=(const T &);
+    dual<T> & operator-=(const T &);
+    dual<T> & operator*=(const T &);
+    dual<T> & operator/=(const T &);
+
+    dual<T> & operator=(const dual<T> &);
+    template<class X> dual<T> & operator= (const dual<X> &);
+    template<class X> dual<T> & operator+=(const dual<X> &);
+    template<class X> dual<T> & operator-=(const dual<X> &);
+    template<class X> dual<T> & operator*=(const dual<X> &);
+    template<class X> dual<T> & operator/=(const dual<X> &);
+
+    // The comparison operators are not strictly well-defined,
+    // they are implemented as comparison of the real part.
+
+    bool operator ==(const X &b) const;
+    bool operator !=(const X &b) const;
+
+    bool operator <(const X &b) const;
+    bool operator >(const X &b) const;
+    bool operator <=(const X &b) const;
+    bool operator >=(const X &b) const;
+    bool operator <(const dual<X> &b) const;
+    bool operator >(const dual<X> &b) const;
+    bool operator <=(const dual<X> &b) const;
+    bool operator >=(const dual<X> &b) const;
+
+};
+
+// Non-member functions:
+
+T rpart(dual<T>)        // Real part
+T dpart(dual<T>)        // Dual part
+dual<T> dconj(dual<T>)  // Dual-conjugate
+
+dual<T> random(dual<T> low = {0,0}, dual<T> high = {1,0})
+
+dual<T> exp(dual<T>)
+dual<T> log(dual<T>)
+dual<T> log10(dual<T>)
+dual<T> pow(dual<T>, U)
+dual<T> pow(U, dual<T>)
+dual<T> pow(dual<T>, dual<T>)
+dual<T> sqrt(dual<T>)
+dual<T> cbrt(dual<T>)
+dual<T> sin(dual<T>)
+dual<T> cos(dual<T>)
+dual<T> tan(dual<T>)
+dual<T> asin(dual<T>)
+dual<T> acos(dual<T>)
+dual<T> atan(dual<T>)
+
+// TODO:
+dual<T> sinh(dual<T>)
+dual<T> cosh(dual<T>)
+dual<T> tanh(dual<T>)
+dual<T> asinh(dual<T>)
+dual<T> acosh(dual<T>)
+dual<T> atanh(dual<T>)
+
+// Non-differentiable operations on the real part.
+T frexp(duals::dual<T> arg, int* exp );
+duals::dual<T> ldexp(duals::dual<T> arg, int exp );
+T trunc(duals::dual<T> d);
+T floor(duals::dual<T> d);
+T ceil(duals::dual<T> d);
+T round(duals::dual<T> d);
+int fpclassify(duals::dual<T> d);
+bool isfinite(duals::dual<T> d);
+bool isnormal(duals::dual<T> d);
+bool isinf(duals::dual<T> d);
+bool isnan(duals::dual<T> d);
+
+// Stream IO
+template<T> operator>>(basic_istream<charT, traits> &, dual<T> &);
+template<T> operator<<(basic_ostream<charT, traits> &, const dual<T> &);
+
+}
+```
+
+Some useful typedefs:
+
+```
+typedef dual<float> dualf;
+typedef dual<double> duald;
+typedef dual<long double> dualld;
+typedef dual<dualf> hyperdualf;
+typedef dual<duald> hyperduald;
+typedef dual<dualld> hyperdualld;
+typedef std::complex<dualf> cdualf;
+typedef std::complex<duald> cduald;
+typedef std::complex<dualld> cdualld;
+```
+
+There are also literals for dual parts defined in the inline
+namespace duals::literals.  They are available with `using
+namespace duals;` or `using namespace duals::literals`.
+
+```
+using namespace duals::literals;
+dualf  x = 3 + 4_ef;
+duald  y = 3 + 4_e;
+dualld z = 3 + 4_el;
+```
+
+And in case you dislike iostreams, there are some formatters for the
+[`{fmt}`](https://github.com/fmtlib/fmt) formatting library.  These
+are disabled by default, but can be enabled by `#define
+CPPDUALS_LIBFMT` for the `dual<>` formatter, and/or `#define
+CPPDUALS_LIBFMT_COMPLEX` for the std::complex<> formatter. There are
+three custom formatting flags that control how the dual numbers are
+printed, these must come before the normal `{fmt}` formatting spec:
+'$', '*', ','.  For me these are about 3x faster than iostreams.
+
+```
+#define CPPDUALS_LIBFMT
+#define CPPDUALS_LIBFMT_COMPLEX
+#inlcude <duals/dual>
+using namespace duals::literals;
+  ...
+  string s = fmt::format("{:}",  1 + 2_e); // s = "(1.0+2.0_e)"
+  string s = fmt::format("{:g}", 1 + 2_e); // s = "(1+2_e)"
+  string s = fmt::format("{:*}", 1 + 2_e); // s = "(1.0+2.0*e)"
+  string s = fmt::format("{:,}", 1 + 2_e); // s = "(1.0,2.0)"
+  string s = fmt::format("{:*,g}", complexd(1,2_e)); // s = "((1)+(0,2)*i)"
+```
+
+The "archaic Greek epsilon" logo is from [Wikimedia
+commons](https://commons.wikimedia.org/wiki/File:Greek_Epsilon_archaic.svg)
+
+Some casual reading material:
+
+- [ON QUATERNIONS, William Rowan Hamilton, Proceedings of the Royal Irish Academy, 3 (1847), pp. 1–16.](https://www.maths.tcd.ie/pub/HistMath/People/Hamilton/Quatern2/)
+- [Basic Space-Time Transformations Expressed by Means of Two-Component Number Systems](https://doi.org/10.12693%2Faphyspola.86.291)
+
+ */
+
+#ifndef PARSED_BY_DOXYGEN
+template<class T> class dual;
+#endif
+
+/// Check if T is dual<>, match non-duals.
+template <class T> struct is_dual : std::false_type {};
+
+#ifndef PARSED_BY_DOXYGEN
+
+/// Check if T is dual<>, match dual<>.
+template <class T> struct is_dual<dual<T> > : std::true_type {};
+
+#endif
+
+/// Check if T is std::complex<>, match non- std::complex<>.
+template <class T> struct is_complex : std::false_type {};
+
+#ifndef PARSED_BY_DOXYGEN
+
+/// Check if T is std::complex<>, match std::complex<>.
+template <class T> struct is_complex<std::complex<T> > : std::true_type {};
+
+#endif
+
+/// Dual_traits helper class.
+template <class T> struct dual_traits
+{
+  /// Depth of T - for T=scalar this is 0. for dual_traits<double> it
+  /// is 1.
+  enum { depth = 0 }; // -Wenum-compare
+
+  /// The real storage type.
+  typedef T real_type;
+};
+
+#ifndef PARSED_BY_DOXYGEN
+
+/// dual_traits for dual<> types
+template <class T> struct dual_traits<dual<T>>
+{
+  /// Depth to which this dual<> type is nested.  One (1) is a
+  /// first-level dual, whereas non-duals have a depth of 0.
+  enum { depth = dual_traits<T>::depth + 1 };
+
+  /// The real storage type.
+  typedef typename dual_traits<T>::real_type real_type;
+};
+
+template <class T> struct dual_traits<std::complex<dual<T>>>
+{
+  /// complex<dual<T>> have the same 'depth' as their dual.
+  enum { depth = dual_traits<T>::depth };
+
+  /// The real storage type.
+  typedef typename dual_traits<T>::real_type real_type;
+};
+
+namespace detail {
+
+template<class T>
+struct Void { typedef void type; };
+template<class T, class U = void>
+struct has_member_type : std::false_type {};
+template<class T>
+struct has_member_type<T, typename Void<typename T::type>::type > : std::true_type {
+  struct wrap {
+    typedef typename T::type type;
+    typedef typename T::type ReturnType;
+  };
+};
+
+} // namespace detail
+
+/// Promote two types - default according to common_type
+template <class T, class U, class V = void> struct promote : std::common_type<T,U> {};
+
+/// Can types A and B be promoted to a common type?
+template<class A, class B> using can_promote = detail::has_member_type<promote<A,B>>;
+
+template <class T, class U>
+struct promote<dual<T>, dual<U>,
+               typename std::enable_if<(can_promote<T,U>::value
+                                        && (int)dual_traits<T>::depth == (int)dual_traits<U>::depth)>::type>
+{
+  typedef dual<typename promote<U,T>::type> type;
+};
+template <class T, class U>
+struct promote<dual<T>, U,
+               typename std::enable_if<(can_promote<T,U>::value
+                                        && (int)dual_traits<T>::depth >= (int)dual_traits<U>::depth
+                                        && !is_complex<U>::value)>::type>
+{
+  typedef dual<typename promote<U,T>::type> type;
+};
+template <class T, class U>
+struct promote<U, dual<T>,
+               typename std::enable_if<(can_promote<T,U>::value
+                                        && (int)dual_traits<T>::depth >= (int)dual_traits<U>::depth
+                                        && !is_complex<U>::value)>::type>
+{
+  typedef dual<typename promote<U,T>::type> type;
+};
+// /////////////////////////////////////////////////
+template <class T, class U>
+struct promote<std::complex<T>, std::complex<U>,
+               typename std::enable_if<(can_promote<T,U>::value
+                                        && (is_dual<T>::value || is_dual<U>::value))>::type>
+{
+  typedef std::complex<typename promote<T,U>::type> type;
+};
+template <class T, class U>
+struct promote<std::complex<T>, U,
+               typename std::enable_if<(can_promote<T,U>::value
+                                        && (is_dual<T>::value || is_dual<U>::value)
+                                        && !is_complex<U>::value)>::type>
+{
+  typedef std::complex<typename promote<T,U>::type> type;
+};
+template <class T, class U>
+struct promote<U, std::complex<T>,
+               typename std::enable_if<(can_promote<T,U>::value
+                                        && (is_dual<T>::value || is_dual<U>::value)
+                                        && !is_complex<U>::value)>::type>
+{
+  typedef std::complex<typename promote<T,U>::type> type;
+};
+// /////////////////////////////////////////////////
+
+#endif // PARSED_BY_DOXYGEN
+
+} // namespace duals
+
+#ifndef PARSED_BY_DOXYGEN
+
+#define NOMACRO // thwart macroification
+#ifdef EIGEN_PI
+#define MY_PI EIGEN_PI
+#else
+#define MY_PI M_PI
+#endif
+
+#endif // PARSED_BY_DOXYGEN
+
+#ifdef _LIBCPP_BEGIN_NAMESPACE_STD
+_LIBCPP_BEGIN_NAMESPACE_STD
+#else
+namespace std {
+#endif
+
+#ifdef CPPDUALS_ENABLE_STD_IS_ARITHMETIC
+
+/// Duals are as arithmetic as their value_type is arithmetic.
+template <class T>
+struct is_arithmetic<duals::dual<T>> : is_arithmetic<T> {};
+
+#endif // CPPDUALS_ENABLE_IS_ARITHMETIC
+
+/// Duals are compound types.
+template <class T>
+struct is_compound<duals::dual<T>> : true_type {};
+
+// Modification of std::numeric_limits<> per
+// C++03 17.4.3.1/1, and C++11 18.3.2.3/1.
+template <class T>
+struct numeric_limits<duals::dual<T>> : numeric_limits<T> {
+  static constexpr bool is_specialized = true;
+  static constexpr duals::dual<T> min NOMACRO ()  { return numeric_limits<T>::min NOMACRO (); }
+  static constexpr duals::dual<T> lowest()        { return numeric_limits<T>::lowest(); }
+  static constexpr duals::dual<T> max NOMACRO ()  { return numeric_limits<T>::max NOMACRO (); }
+  static constexpr duals::dual<T> epsilon()       { return numeric_limits<T>::epsilon(); }
+  static constexpr duals::dual<T> round_error()   { return numeric_limits<T>::round_error(); }
+  static constexpr duals::dual<T> infinity()      { return numeric_limits<T>::infinity(); }
+  static constexpr duals::dual<T> quiet_NaN()     { return numeric_limits<T>::quiet_NaN(); }
+  static constexpr duals::dual<T> signaling_NaN() { return numeric_limits<T>::signaling_NaN(); }
+  static constexpr duals::dual<T> denorm_min()    { return numeric_limits<T>::denorm_min(); }
+};
+
+#ifdef _LIBCPP_BEGIN_NAMESPACE_STD
+_LIBCPP_END_NAMESPACE_STD
+#else
+} // namespace std
+#endif
+
+namespace duals {
+
+#ifndef PARSED_BY_DOXYGEN
+
+// T and X are wrapped in a dual<>
+#define CPPDUALS_ONLY_SAME_DEPTH_AS_T(T,X)                              \
+  typename std::enable_if<(int)duals::dual_traits<X>::depth ==          \
+                          (int)duals::dual_traits<T>::depth, int>::type = 0,   \
+    typename std::enable_if<can_promote<T,X>::value,int>::type = 0
+
+// Both T and U are wrapped in a dual<>
+#define CPPDUALS_ENABLE_SAME_DEPTH_AND_COMMON_T(T,U)                    \
+  typename std::enable_if<(int)duals::dual_traits<T>::depth ==          \
+                          (int)duals::dual_traits<U>::depth, int>::type = 0, \
+    typename std::enable_if<can_promote<T,U>::value,int>::type = 0,     \
+    typename common_t = dual<typename duals::promote<T,U>::type>
+
+// T is wrapped in a dual<>, U may or may not be.
+#define CPPDUALS_ENABLE_LEQ_DEPTH_AND_COMMON_T(T,U)                     \
+  typename std::enable_if<((int)duals::dual_traits<T>::depth >=         \
+                           (int)duals::dual_traits<U>::depth), int>::type = 0, \
+    typename std::enable_if<can_promote<dual<T>,U>::value,int>::type = 0, \
+    typename common_t = typename duals::promote<dual<T>, U>::type
+
+// T is wrapped in complex<dual<>>
+#define CPPDUALS_ENABLE_LEQ_DEPTH_AND_CX_COMMON_T(T,U)                  \
+  typename std::enable_if<((int)duals::dual_traits<T>::depth >=         \
+                           (int)duals::dual_traits<U>::depth), int>::type = 0, \
+  typename std::enable_if<can_promote<std::complex<dual<T>>,U>::value,int>::type = 0, \
+    typename common_t = typename duals::promote<std::complex<dual<T> >,U>::type
+
+#define CPPDUALS_ENABLE_IF(...) typename std::enable_if< (__VA_ARGS__) , int>::type = 0
+
+#endif
+
+/*! \page user Background
+  TODO: Add text here...
+*/
+
+/// Abstract dual number class.  Can nest with other dual numbers and
+/// complex numbers.
+template<class T>
+class dual
+{
+public:
+  /// Class type of rpart() and dpart().  This type can be nested
+  /// dual<> or std::complex<>.
+  typedef T value_type;
+
+private:
+  /// The real part.
+  value_type _real;
+  /// The dual part.
+  value_type _dual;
+
+public:
+
+  /// Construct dual from optional real and dual parts.
+  constexpr
+  dual(const value_type re = value_type(), const value_type du = value_type())
+    : _real(re), _dual(du) {}
+
+  /// Copy construct from a dual of equal depth.
+  template<class X, CPPDUALS_ONLY_SAME_DEPTH_AS_T(T,X),
+           CPPDUALS_ENABLE_IF(!is_complex<X>::value)>
+  dual(const dual<X> & x)
+    : _real(x.rpart()), _dual(x.dpart()) {}
+
+  /// Cast to a complex<dual<>> with real part equal to *this.
+  operator std::complex<dual<T>>() { return std::complex<dual<T>>(*this); }
+
+  /// Explicit cast to an arithmetic type retains the rpart()
+  template <class X,
+            CPPDUALS_ENABLE_IF(std::is_arithmetic<X>::value && !is_dual<X>::value)>
+  explicit operator X() const { return X(_real); }
+
+  /// Get the real part.
+  T rpart() const { return _real; }
+
+  /// Get the dual part.
+  T dpart() const { return _dual; }
+
+  /// Set the real part.
+  void rpart(value_type re) { _real = re; }
+
+  /// Get the dual part.
+  void dpart(value_type du) { _dual = du; }
+
+  /// Unary negation
+  dual<T> operator-() const { return dual<T>(-_real, -_dual); }
+
+  /// Unary nothing
+  dual<T> operator+() const { return *this; }
+
+  /// Assignment of `value_type` assigns the real part and zeros the dual part.
+  dual<T> & operator= (const T & x) { _real =  x; _dual = value_type(); return *this; }
+
+  /// Add a relatively-scalar to this dual.
+  dual<T> & operator+=(const T & x) { _real += x; return *this; }
+
+  /// Subtract a relatively-scalar from this dual.
+  dual<T> & operator-=(const T & x) { _real -= x; return *this; }
+
+  /// Multiply a relatively-scalar with this dual.
+  dual<T> & operator*=(const T & x) { _real *= x; _dual *= x; return *this; }
+
+  /// Divide this dual by relatively-scalar.
+  dual<T> & operator/=(const T & x) { _real /= x; _dual /= x; return *this; }
+
+  //dua & operator=(const dual & x)  { _real =  x.rpart(); _dual =  x.dpart(); }
+  template<class X, CPPDUALS_ONLY_SAME_DEPTH_AS_T(T,X)>
+  dual<T> & operator= (const dual<X> & x) { _real =  x.rpart(); _dual =  x.dpart(); return *this; }
+
+  /// Add a dual of the same depth to this dual.
+  template<class X, CPPDUALS_ONLY_SAME_DEPTH_AS_T(T,X)>
+  dual<T> & operator+=(const dual<X> & x) { _real += x.rpart(); _dual += x.dpart(); return *this; }
+
+  /// Subtract a dual of the same depth from this dual.
+  template<class X, CPPDUALS_ONLY_SAME_DEPTH_AS_T(T,X)>
+  dual<T> & operator-=(const dual<X> & x) { _real -= x.rpart(); _dual -= x.dpart(); return *this; }
+
+  /// Multiply this dual with a dual of same depth.
+  template<class X, CPPDUALS_ONLY_SAME_DEPTH_AS_T(T,X)>
+  dual<T> & operator*=(const dual<X> & x) {
+    _dual = _real * x.dpart() + _dual * x.rpart();
+    _real = _real * x.rpart();
+    return *this;
+  }
+
+  /// Divide this dual by another dual of the same or lower depth.
+  template<class X, CPPDUALS_ONLY_SAME_DEPTH_AS_T(T,X)>
+  dual<T> & operator/=(const dual<X> & x) {
+    _dual = (_dual * x.rpart() - _real * x.dpart()) / (x.rpart() * x.rpart());
+    _real = _real / x.rpart();
+    return *this;
+  }
+
+  //The following comparison operators are not strictly well-defined,
+  //they are implemented as comparison of the real parts.
+#if 0
+  /// Compare this dual with another dual, comparing real parts for equality.
+  template<class X, CPPDUALS_ONLY_SAME_DEPTH_AS_T(T,X)>
+  bool operator ==(const dual<X> &b) const { return _real == b.rpart(); }
+
+  /// Compare this dual with another dual, comparing real parts for inequality.
+  template<class X, CPPDUALS_ONLY_SAME_DEPTH_AS_T(T,X)>
+  bool operator !=(const dual<X> &b) const { return _real != b.rpart(); }
+
+  /// Compare real part against real part of b
+  template<class X, CPPDUALS_ENABLE_LEQ_DEPTH_AND_COMMON_T(T,X)>
+  bool operator <(const dual<X> &b) const { return _real < b.rpart(); }
+
+  /// Compare real part against real part of b
+  template<class X, CPPDUALS_ENABLE_LEQ_DEPTH_AND_COMMON_T(T,X)>
+  bool operator >(const dual<X> &b) const { return _real > b.rpart(); }
+
+  /// Compare real part against real part of b
+  template<class X, CPPDUALS_ENABLE_LEQ_DEPTH_AND_COMMON_T(T,X)>
+  bool operator <=(const dual<X> &b) const { return _real <= b.rpart(); }
+
+  /// Compare real part against real part of b
+  template<class X, CPPDUALS_ENABLE_LEQ_DEPTH_AND_COMMON_T(T,X)>
+  bool operator >=(const dual<X> &b) const { return _real >= b.rpart(); }
+#endif
+};
+
+/// Get the dual's real part.
+template <class T> T rpart(const dual<T> & x) { return x.rpart(); }
+
+/// Get the dual's dual part.
+template <class T> T dpart(const dual<T> & x) { return x.dpart(); }
+
+/// R-part of complex<dual<>> is non-dual complex<> (not to be confused with real())
+template <class T> std::complex<T> rpart(const std::complex<dual<T>> & x)
+{ return std::complex<T>(x.real().rpart(), x.imag().rpart()); }
+
+/// Dual part of complex<dual<>> is complex<>
+template <class T> std::complex<T> dpart(const std::complex<dual<T>> & x)
+{ return std::complex<T>(x.real().dpart(), x.imag().dpart()); }
+
+/// Get a non-dual's real part.
+template <class T,
+          CPPDUALS_ENABLE_IF((std::is_arithmetic<T>::value &&
+                              !std::is_compound<T>::value) || is_complex<T>::value)>
+T rpart(const T & x) { return x; }
+
+/// Get a non-dual's dual part := zero.
+template <class T,
+          CPPDUALS_ENABLE_IF((std::is_arithmetic<T>::value &&
+                              !std::is_compound<T>::value) || is_complex<T>::value)>
+T dpart(const T & ) { return T(0); }
+
+#ifndef PARSED_BY_DOXYGEN
+
+/// Dual +-*/ ops with another entity
+#define CPPDUALS_BINARY_OP(op)                                          \
+  template<class T, class U, CPPDUALS_ENABLE_SAME_DEPTH_AND_COMMON_T(T,U)> \
+  common_t                                                              \
+  operator op(const dual<T> & z, const dual<U> & w) {                   \
+    common_t x(z);                                                      \
+    return x op##= w;                                                   \
+  }                                                                     \
+  template<class T, class U, CPPDUALS_ENABLE_LEQ_DEPTH_AND_COMMON_T(T,U), \
+    CPPDUALS_ENABLE_IF(!std::is_same<U,std::complex<dual<T>>>::value)>  \
+  common_t                                                              \
+  operator op(const dual<T> & z, const U & w) {                         \
+    common_t x(z);                                                      \
+    return x op##= w;                                                   \
+  }                                                                     \
+  template<class T, class U, CPPDUALS_ENABLE_LEQ_DEPTH_AND_COMMON_T(T,U), \
+    CPPDUALS_ENABLE_IF(!std::is_same<U,std::complex<dual<T>>>::value)>  \
+  common_t                                                              \
+  operator op(const U & z, const dual<T> & w) {                         \
+    common_t x(z);                                                      \
+    return x op##= w;                                                   \
+  }                                                                     \
+  template<class T, class U, CPPDUALS_ENABLE_LEQ_DEPTH_AND_CX_COMMON_T(T,U), \
+    CPPDUALS_ENABLE_IF(!std::is_same<U,std::complex<dual<T>>>::value)>  \
+  common_t                                                              \
+  operator op(const std::complex<dual<T>> & z, const U & w) {           \
+    common_t x(z);                                                      \
+    return x op##= w;                                                   \
+  }                                                                     \
+  template<class T, class U, CPPDUALS_ENABLE_LEQ_DEPTH_AND_CX_COMMON_T(T,U), \
+    CPPDUALS_ENABLE_IF(!std::is_same<U,std::complex<dual<T>>>::value)>  \
+  common_t                                                              \
+  operator op(const U & z, const std::complex<dual<T>> & w) {           \
+    common_t x(z);                                                      \
+    return x op##= w;                                                   \
+  }
+
+CPPDUALS_BINARY_OP(+)
+CPPDUALS_BINARY_OP(-)
+CPPDUALS_BINARY_OP(*)
+CPPDUALS_BINARY_OP(/)
+
+/// Dual compared to a non-complex lower rank thing
+#define CPPDUALS_LHS_COMPARISON(op)                                     \
+  template<class T, class U, CPPDUALS_ENABLE_SAME_DEPTH_AND_COMMON_T(T,U)> \
+  bool                                                                  \
+  operator op(const dual<T> & a, const dual<U> & b) {                   \
+    return a.rpart() op b.rpart();                                      \
+  }                                                                     \
+  template<class T, class U, CPPDUALS_ENABLE_LEQ_DEPTH_AND_COMMON_T(T,U), \
+           CPPDUALS_ENABLE_IF(!is_complex<U>::value)>                   \
+  bool                                                                  \
+  operator op(const U & a, const dual<T> & b) {                         \
+    return a op b.rpart();                                              \
+  }                                                                     \
+  template<class T, class U, CPPDUALS_ENABLE_LEQ_DEPTH_AND_COMMON_T(T,U), \
+           CPPDUALS_ENABLE_IF(!is_complex<U>::value)>                   \
+  bool                                                                  \
+  operator op(const dual<T> & a, const U & b) {                         \
+    return a.rpart() op b;                                              \
+  }
+
+CPPDUALS_LHS_COMPARISON(<)
+CPPDUALS_LHS_COMPARISON(>)
+CPPDUALS_LHS_COMPARISON(<=)
+CPPDUALS_LHS_COMPARISON(>=)
+CPPDUALS_LHS_COMPARISON(==)
+CPPDUALS_LHS_COMPARISON(!=)
+
+#endif // PARSED_BY_DOXYGEN
+
+} // namespace duals
+
+// some useful functions of solely the real part
+#ifdef _LIBCPP_BEGIN_NAMESPACE_STD
+_LIBCPP_BEGIN_NAMESPACE_STD
+#else
+namespace std {
+#endif
+
+#ifndef PARSED_BY_DOXYGEN
+
+#define make_math(T)                                                    \
+  inline T (frexp)(const duals::dual<T> & arg, int* exp ) { return (frexp)(arg.rpart(), exp); } \
+  inline duals::dual<T> (ldexp)(const duals::dual<T> & arg, int exp ) { return arg * std::pow((T)2,exp); } \
+  inline T (trunc)(const duals::dual<T> & d) { return (trunc)(d.rpart()); } \
+  inline T (floor)(const duals::dual<T> & d) { return (floor)(d.rpart()); } \
+  inline T (ceil)(const duals::dual<T> & d)  { return (ceil)(d.rpart()); } \
+  inline T (round)(const duals::dual<T> & d) { return (round)(d.rpart()); } \
+  inline int (fpclassify)(const duals::dual<T> & d) { return (fpclassify)(d.rpart()); } \
+  inline bool (isfinite)(const duals::dual<T> & d) { return (isfinite)(d.rpart()); } \
+  inline bool (isnormal)(const duals::dual<T> & d) { return (isnormal)(d.rpart()); } \
+  inline bool (isinf)(const duals::dual<T> & d) { return (isinf)(d.rpart()); } \
+  inline bool (isnan)(const duals::dual<T> & d) { return (isnan)(d.rpart()); } \
+
+make_math(float)
+make_math(double)
+make_math(long double)
+
+#undef make_math
+
+#endif // PARSED_BY_DOXYGEN
+
+#ifdef _LIBCPP_BEGIN_NAMESPACE_STD
+_LIBCPP_END_NAMESPACE_STD
+#else
+} // namespace std
+#endif
+namespace duals {
+
+namespace randos {
+
+// Random real value between a and b.
+template<class T,
+         typename dist = std::uniform_real_distribution<T>,
+         CPPDUALS_ENABLE_IF(!is_complex<T>::value && !is_dual<T>::value)>
+T random(T a = T(0), T b = T(1)) {
+  static std::default_random_engine generator;
+  dist distribution(a, b);
+  return distribution(generator);
+}
+
+template<class T,
+         typename dist = std::uniform_real_distribution<T>,
+         CPPDUALS_ENABLE_IF(!is_complex<T>::value && !is_dual<T>::value)>
+T random2(T a = T(0), T b = T(1)) { return random<T,dist>(a,b); }
+
+// Helper class for testing - also random value in dual part.
+template <class DT,
+          CPPDUALS_ENABLE_IF(is_dual<DT>::value)>
+DT random2(DT a = DT(0,0), DT b = DT(1,1)) {
+  using randos::random;
+  return DT(a.rpart() + random2<typename DT::value_type>() * (b.rpart() - a.rpart()),
+            a.dpart() + random2<typename DT::value_type>() * (b.dpart() - a.dpart()));
+}
+
+// Helper class for testing - also random value in dual part of the complex.
+template<class CT,
+         CPPDUALS_ENABLE_IF(is_complex<CT>::value)>
+CT random2(CT a = CT(0,0), CT b = CT(1,1)) {
+  return CT(a.real() + random2<typename CT::value_type>() * (b.real() - a.real()),
+            a.imag() + random2<typename CT::value_type>() * (b.imag() - a.imag()));
+}
+
+} // randos
+
+/// Random real and dual parts, used by Eigen's Random(), by default
+// the returned value has zero dual part and is in the range [0+0_e,
+// 1+0_e].
+template <class DT,
+          typename dist = std::uniform_real_distribution<typename DT::value_type>,
+          CPPDUALS_ENABLE_IF(is_dual<DT>::value)>
+DT random(DT a = DT(0,0), DT b = DT(1,0)) {
+  using randos::random;
+  return DT(random<typename DT::value_type,dist>(a.rpart(),b.rpart()),
+            random<typename DT::value_type,dist>(a.dpart(),b.dpart()));
+}
+
+/// Complex Conjugate of a dual is just the dual.
+template<class T> dual<T> conj(const dual<T> & x) { return x; }
+
+/// Conjugate a thing that's not dual and not complex -- it has no
+/// complex value so just return it.  This is different from
+/// std::conj() which promotes the T to a std::complex<T>.
+template<class T, CPPDUALS_ENABLE_IF(!is_dual<T>::value &&
+                                     !is_complex<T>::value &&
+                                     std::is_arithmetic<T>::value)>
+T conj(const T & x) { return x; }
+
+/// Dual Conjugate, such that x*dconj(x) = rpart(x)^2.  Different
+/// function name from complex conjugate conj().
+template<class T> dual<T> dconj(const dual<T> & x) {
+  return dual<T>(x.rpart(), - x.dpart());
+}
+
+/// Dual Conjugate of a complex, such that x*dconj(x) = rpart(x)^2.
+/// Different function name from complex conjugate conj().
+template<class T> std::complex<T> dconj(const std::complex<T> & x) {
+  return std::complex<T>(dconj(x.real()), dconj(x.imag()));
+}
+
+/// Conjugate a thing that's not dual and not complex.
+template<class T, CPPDUALS_ENABLE_IF(!is_dual<T>::value &&
+                                     !is_complex<T>::value &&
+                                     std::is_arithmetic<T>::value)>
+T dconj(const T & x) { return x; }
+
+/// Exponential e^x
+template<class T> dual<T> exp(const dual<T> & x) {
+  using std::exp;
+  T v = exp(x.rpart());
+  return dual<T>(v,
+                 v * x.dpart());
+}
+
+/// Natural log ln(x)
+template<class T> dual<T> log(const dual<T> & x) {
+  using std::log;
+  T v = log(x.rpart());
+  if (x.dpart() == T(0))
+    return v;
+  else
+    return dual<T>(v, x.dpart() / x.rpart());
+}
+
+template<class T> dual<T> log10(const dual<T> & x) {
+  using std::log;
+  return log(x) / log(static_cast<T>(10));
+}
+
+template<class T> dual<T> log2(const dual<T> & x) {
+  using std::log;
+  return log(x) / log(static_cast<T>(2));
+}
+
+template<class T, class U, CPPDUALS_ENABLE_LEQ_DEPTH_AND_COMMON_T(T,U)>
+common_t
+pow(const dual<T> & x, const U & y) {
+  using std::pow;
+  typedef typename common_t::value_type V;
+
+  return common_t(pow(x.rpart(), y),
+                  x.dpart() == V(0) ? V(0)
+                  : x.dpart() * y * pow(x.rpart(), y - U(1)));
+}
+
+template<class T, class U, CPPDUALS_ENABLE_LEQ_DEPTH_AND_COMMON_T(T,U)>
+common_t
+pow(const U & x, const dual<T> & y) {
+  return pow(common_t(x), y);
+}
+
+template<class T, class U, CPPDUALS_ENABLE_SAME_DEPTH_AND_COMMON_T(T,U)>
+common_t
+pow(const dual<T> & f, const dual<U> & g) {
+  using std::pow;
+  using std::log;
+#if 1
+  using std::floor;
+  typedef typename common_t::value_type V;
+  common_t result;
+
+ if (f.rpart() == T(0) && g.rpart() >= U(1)) {
+    if (g.rpart() > U(1)) {
+      result = common_t(0);
+    } else {
+      result = f;
+    }
+
+  } else {
+    if (f.rpart() < T(0) && g.rpart() == floor(g.rpart())) {
+      V const tmp = g.rpart() * pow(f.rpart(), g.rpart() - U(1.0));
+      result = common_t(pow(f.rpart(), g.rpart()),
+                        f.dpart() == T(0) ? T(0) : f.dpart() * tmp);
+      if (g.dpart() != U(0.0)) {
+        // Return a NaN when g.dpart() != 0.
+        result.dpart(std::numeric_limits<T>::quiet_NaN());
+      }
+    } else {
+      // Handle the remaining cases. For cases 4,5,6,9 we allow the log()
+      // function to generate -HUGE_VAL or NaN, since those cases result in a
+      // nonfinite derivative.
+      V const tmp1 = pow(f.rpart(), g.rpart());
+      V const tmp2 = g.rpart() * pow(f.rpart(), g.rpart() - T(1.0));
+      V const tmp3 = tmp1 * log(f.rpart());
+      result = common_t(tmp1,
+                        f.dpart() == T(0) && g.dpart() == U(0) ? V(0)
+                        : tmp2 * f.dpart() + tmp3 * g.dpart());
+    }
+  }
+
+  return result;
+#else
+  T v = pow(f.rpart(), g.rpart());
+  return common_t(v,
+                  pow(f.rpart(), g.rpart() - T(1)) *
+                  (g.rpart() * f.dpart()
+                   + f.rpart() * log(f.rpart()) * g.dpart()));
+#endif
+}
+
+namespace utils {
+template <typename T> int sgn(T val) {
+  return (T(0) < val) - (val < T(0));
+}
+}
+
+template<class T> dual<T> abs(const dual<T> & x) {
+  using std::abs;
+  return dual<T>(abs(x.rpart()),
+                 x.dpart() * utils::sgn(x.rpart()));
+}
+
+template<class T> dual<T> fabs(const dual<T> & x) {
+  using std::fabs;
+  return dual<T>(fabs(x.rpart()),
+                 x.dpart() * utils::sgn(x.rpart()));
+}
+
+#if 0
+// TODO?
+template<class T> dual<T> abs2(const dual<T> & x) {
+  using std::abs;
+  return dual<T>(x.rpart() * x.rpart(),
+                 xxx x.dpart() * utils::sgn(x.rpart()));
+}
+#endif
+
+template<class T> duals::dual<T> copysign(const duals::dual<T> & x, const duals::dual<T> & y) {
+  using std::copysign;
+  T r = copysign(x.rpart(), y.rpart());
+  return duals::dual<T>(r, r == x.rpart() ? x.dpart() : -x.dpart());
+}
+
+template<class T> duals::dual<T> hypot(const duals::dual<T> & x, const duals::dual<T> & y) {
+  return sqrt(x*x + y*y);
+}
+
+template<class T> duals::dual<T> scalbn(const duals::dual<T> & arg, int ex) {
+  return arg * std::pow((T)2, ex);
+}
+
+template<class T> duals::dual<T> (fmax)(const duals::dual<T> & x, const duals::dual<T> & y) {
+  return x.rpart() > y.rpart() ? x : y;
+}
+
+template<class T> duals::dual<T> (fmin)(const duals::dual<T> & x, const duals::dual<T> & y) {
+  return x.rpart() <= y.rpart() ? x : y;
+}
+
+template<class T> duals::dual<T> logb(const duals::dual<T> & x) {
+  return duals::log2(x);
+}
+
+template<class T> int (fpclassify)(const duals::dual<T> & d) { using std::fpclassify; return (fpclassify)(d.rpart()); }
+template<class T> bool (isfinite)(const duals::dual<T> & d) { using std::isfinite; return (isfinite)(d.rpart()); }
+template<class T> bool (isnormal)(const duals::dual<T> & d) { using std::isnormal; return (isnormal)(d.rpart()); }
+template<class T> bool (isinf)(const duals::dual<T> & d) { using std::isinf; return (isinf)(d.rpart()); }
+template<class T> bool (isnan)(const duals::dual<T> & d) { using std::isnan; return (isnan)(d.rpart()); }
+template<class T> bool (signbit)(const duals::dual<T> & d) { using std::signbit; return (signbit)(d.rpart()); }
+
+template<class T> dual<T> sqrt(const dual<T> & x) {
+  using std::sqrt;
+  T v = sqrt(x.rpart());
+  if (x.dpart() == T(0))
+    return v;
+  else
+    return dual<T>(v, x.dpart() / (T(2) * v) );
+}
+
+template<class T> dual<T> cbrt(const dual<T> & x) {
+  using std::cbrt;
+  T v = cbrt(x.rpart());
+  if (x.dpart() == T(0))
+    return v;
+  else
+    return dual<T>(v, x.dpart() / (T(3) * v * v) );
+}
+
+template<class T> dual<T> sin(const dual<T> & x) {
+  using std::sin;
+  using std::cos;
+  return dual<T>(sin(x.rpart()),
+                 x.dpart() * cos(x.rpart()));
+}
+
+template<class T> dual<T> cos(const dual<T> & x) {
+  using std::cos;
+  using std::sin;
+  return dual<T>(cos(x.rpart()),
+                 -sin(x.rpart()) * x.dpart());
+}
+
+template<class T> dual<T> tan(const dual<T> & x) {
+  using std::tan;
+  T v = tan(x.rpart());
+  return dual<T>(v, x.dpart() * (v*v + 1));
+}
+
+template<class T> dual<T> asin(const dual<T> & x) {
+  using std::asin;
+  using std::sqrt;
+  T v = asin(x.rpart());
+  if (x.dpart() == T(0))
+    return v;
+  else
+    return dual<T>(v, x.dpart() / sqrt(1 - x.rpart()*x.rpart()));
+}
+
+template<class T> dual<T> acos(const dual<T> & x) {
+  using std::acos;
+  using std::sqrt;
+  T v = acos(x.rpart());
+  if (x.dpart() == T(0))
+    return v;
+  else
+    return dual<T>(v, -x.dpart() / sqrt(1 - x.rpart()*x.rpart()));
+}
+
+template<class T> dual<T> atan(const dual<T> & x) {
+  using std::atan;
+  T v = atan(x.rpart());
+  if (x.dpart() == T(0))
+    return v;
+  else
+    return dual<T>(v, x.dpart() / (1 + x.rpart() * x.rpart()));
+}
+
+template<class T> dual<T> atan2(const dual<T> & y, const dual<T> & x) {
+  using std::atan2;
+  T v = atan2(y.rpart(), x.rpart());
+  return dual<T>(v,
+                 (x.rpart() * y.dpart() - y.rpart() * x.dpart()) /
+                 (y.rpart() * y.rpart() + x.rpart() * x.rpart()));
+}
+
+// TODO
+template<class T, class U, CPPDUALS_ENABLE_LEQ_DEPTH_AND_COMMON_T(T,U)>
+common_t
+atan2(const dual<T> & y, const U & x) {
+  using std::atan2;
+  T v = atan2(y.rpart(), x);
+  return dual<T>(v,
+                 (x * y.dpart()) /
+                 (y.rpart() * y.rpart() + x * x));
+}
+
+// TODO
+template<class T, class U, CPPDUALS_ENABLE_LEQ_DEPTH_AND_COMMON_T(T,U)>
+common_t
+atan2(const U & y, const dual<T> & x) {
+  using std::atan2;
+  T v = atan2(y, x.rpart());
+  return dual<T>(v,
+                 (-y * x.dpart()) /
+                 (y * y + x.rpart() * x.rpart()));
+}
+
+// TODO
+template<class T> dual<T> sinh(const dual<T> & x);
+template<class T> dual<T> cosh(const dual<T> & x);
+template<class T> dual<T> tanh(const dual<T> & x);
+template<class T> dual<T> asinh(const dual<T> & x);
+template<class T> dual<T> acosh(const dual<T> & x);
+template<class T> dual<T> atanh(const dual<T> & x);
+template<class T> dual<T> log1p(const dual<T> & x);
+template<class T> dual<T> expm1(const dual<T> & x);
+
+////////////////////////////////////////////////////////////////
+// added for meep usage
+////////////////////////////////////////////////////////////////
+
+
+/// The tanh function.  Make sure to `#include <math.h>` before
+/// `#include <duals/dual>` to use this function.
+template<class T> dual<T> tanh(const dual<T> & x) {
+  using std::tanh;
+  double sech = 1.0 / std::cosh(x.dpart());
+  return dual<T>(tanh(x.rpart()), x.dpart() * sech * sech);
+}
+
+////////////////////////////////////////////////////////////////
+
+////////////////////////////////////////////////////////////////
+
+/// The error function.  Make sure to `#include <math.h>` before
+/// `#include <duals/dual>` to use this function.
+template<class T> dual<T> erf(const dual<T> & x) {
+  using std::erf;
+  using std::sqrt;
+  using std::pow;
+  using std::exp;
+  return dual<T>(erf(x.rpart()),
+                 x.dpart() * T(2)/sqrt(T(MY_PI)) * exp(-pow(x.rpart(),T(2))));
+}
+
+/// Error function complement (1 - erf()).
+template<class T> dual<T> erfc(const dual<T> & x) {
+  using std::erfc;
+  using std::sqrt;
+  using std::pow;
+  using std::exp;
+  return dual<T>(erfc(x.rpart()),
+                 x.dpart() * -T(2)/sqrt(T(MY_PI)) * exp(-pow(x.rpart(),T(2))));
+}
+
+/// Gamma function.  Approximation of the dual part.
+// TODO specialize for integers
+template<class T> dual<T> tgamma(const dual<T> & x) {
+  using std::tgamma;
+  T v = tgamma(x.rpart());
+  if (x.dpart() == T(0))
+    return v;
+  else {
+    int errno_saved = errno;
+    T h(T(1) / (1ull << (std::numeric_limits<T>::digits / 3)));
+    T w((tgamma(x.rpart()+h) - tgamma(x.rpart()-h))/(2*h));
+    errno = errno_saved;
+    return dual<T>(v, x.dpart() * w);
+  }
+}
+
+/// Log of absolute value of gamma function.  Approximation of the dual part.
+template<class T> dual<T> lgamma(const dual<T> & x) {
+  using std::lgamma;
+  T v = lgamma(x.rpart());
+  if (x.dpart() == T(0))
+    return v;
+  else {
+#if CPPDUALS_HAVE_SIGNGAM
+    int signgam_saved = signgam;
+#endif
+    int errno_saved = errno;
+    T h(T(1) / (1ull << (std::numeric_limits<T>::digits / 3)));
+    T w((lgamma(x.rpart()+h) - lgamma(x.rpart()-h))/(2*h));
+#if CPPDUALS_HAVE_SIGNGAM
+    signgam = signgam_saved;
+#endif
+    errno = errno_saved;
+    return dual<T>(v, x.dpart() * w);
+  }
+}
+
+/// Putto operator
+template<class T, class _CharT, class _Traits>
+std::basic_ostream<_CharT, _Traits> &
+operator<<(std::basic_ostream<_CharT, _Traits> & os, const dual<T> & x)
+{
+  std::basic_ostringstream<_CharT, _Traits> s;
+  s.flags(os.flags());
+  s.imbue(os.getloc());
+  s.precision(os.precision());
+  s << '(' << x.rpart()
+    << (x.dpart() < 0 ? "" : "+")
+    << x.dpart()
+    << "_e" << (std::is_same<typename std::decay<T>::type,float>::value ? "f" :
+                std::is_same<typename std::decay<T>::type,double>::value ? "" :
+                std::is_same<typename std::decay<T>::type,long double>::value ? "l" : "")
+    << ")";
+  return os << s.str();
+}
+
+/// Stream reader
+template<class T, class CharT, class Traits>
+std::basic_istream<CharT, Traits> &
+operator>>(std::basic_istream<CharT, Traits> & is, dual<T> & x)
+{
+  if (is.good()) {
+    ws(is);
+    if (is.peek() == CharT('(')) {
+      is.get();
+      T r;
+      is >> r;
+      if (!is.fail()) {
+        CharT c = is.peek();
+        if (c != CharT('_')) {
+          ws(is);
+          c = is.peek();
+        }
+        if (c == CharT('+') || c == CharT('-') || c == CharT('_')) {
+          if (c == CharT('+'))
+            is.get();
+          T d;
+          if (c == CharT('_')) {
+            d = r;
+            r = 0;
+          }
+          else
+            is >> d;
+          if (!is.fail()) {
+            ws(is);
+            c = is.peek();
+            if (c == CharT('_')) {
+              is.get();
+              c = is.peek();
+              if (c == CharT('e')) {
+                is.get();
+                c = is.peek();
+                if ((c == 'f' && !std::is_same<typename std::decay<T>::type,float>::value) ||
+                    (c == 'l' && !std::is_same<typename std::decay<T>::type,long double>::value))
+                  is.setstate(std::ios_base::failbit);
+                else {
+                  if (c == 'f' || c == 'l')
+                    is.get();
+                  ws(is);
+                  c = is.peek();
+                  if (c == ')') {
+                    is.get();
+                    x = dual<T>(r, d);
+                  }
+                  else
+                    is.setstate(std::ios_base::failbit);
+                }
+              }
+              else
+                is.setstate(std::ios_base::failbit);
+            }
+            else
+              is.setstate(std::ios_base::failbit);
+          }
+          else
+            is.setstate(std::ios_base::failbit);
+        }
+        else if (c == CharT(')')) {
+          is.get();
+          x = dual<T>(r, T(0));
+        }
+        else
+          is.setstate(std::ios_base::failbit);
+      }
+      else
+        is.setstate(std::ios_base::failbit);
+    }
+    else {
+      T r;
+      is >> r;
+      if (!is.fail())
+        x = dual<T>(r, T(0));
+      else
+        is.setstate(std::ios_base::failbit);
+    }
+  }
+  else
+    is.setstate(std::ios_base::failbit);
+  return is;
+}
+
+#if __cpp_user_defined_literals >= 200809
+/// Dual number literals in namespace duals::literals
+inline namespace literals
+{
+using duals::dual;
+constexpr dual<float> operator""_ef(long double du)
+{
+  return { 0.0f, static_cast<float>(du) };
+}
+constexpr dual<float> operator""_ef(unsigned long long du)
+{
+  return { 0.0f, static_cast<float>(du) };
+}
+constexpr dual<double> operator""_e(long double du)
+{
+  return { 0.0, static_cast<double>(du) };
+}
+constexpr dual<double> operator""_e(unsigned long long du)
+{
+  return { 0.0, static_cast<double>(du) };
+}
+constexpr dual<long double> operator""_el(long double du)
+{
+  return { 0.0l, du };
+}
+constexpr dual<long double> operator""_el(unsigned long long du)
+{
+  return { 0.0l, static_cast<long double>(du) };
+}
+}
+#endif
+
+typedef dual<float> dualf;
+typedef dual<double> duald;
+typedef dual<long double> dualld;
+typedef dual<dualf> hyperdualf;
+typedef dual<duald> hyperduald;
+typedef dual<dualld> hyperdualld;
+typedef std::complex<dualf> cdualf;
+typedef std::complex<duald> cduald;
+typedef std::complex<dualld> cdualld;
+
+} // namespace duals
+
+#ifdef CPPDUALS_LIBFMT
+#include <fmt/format.h>
+/// duals::dual<> Formatter for libfmt https://github.com/fmtlib/fmt
+///
+/// Formats a dual number (r,d) as (r+d_e), offering the same
+/// formatting options as the underlying type - with the addition of
+/// three optional format options, only one of which may appear
+/// directly after the ':' in the format spec: '$', '*', and ','.  The
+/// '*' flag changes the separating _ to a *, producing (r+d*e), where
+/// r and d are the formatted value_type values.  The ',' flag simply
+/// prints the real and dual parts separated by a comma.  As a
+/// concrete exmple, this formatter can produce either (3+5.4_e) or
+/// (3+5.4*e) or (3,5.4) for a dual<double> using the specs {:g},
+/// {:*g}, or {:,g}, respectively.  When the '*' is NOT specified, the
+/// output should be compatible with the input operator>> and the dual
+/// literals below. (this implementation is a bit hacky - glad for
+/// cleanups).
+template <typename T, typename Char>
+struct fmt::formatter<duals::dual<T>,Char> : public fmt::formatter<T,Char>
+{
+  typedef fmt::formatter<T,Char> base;
+  enum style { expr, star, pair } style_ = expr;
+  fmt::detail::dynamic_format_specs<Char> specs_;
+  FMT_CONSTEXPR auto parse(format_parse_context & ctx) -> decltype(ctx.begin()) {
+    using handler_type = fmt::detail::dynamic_specs_handler<format_parse_context>;
+    auto type = fmt::detail::type_constant<T, Char>::value;
+    fmt::detail::specs_checker<handler_type> handler(handler_type(specs_, ctx), type);
+    auto it = ctx.begin();
+    if (it != ctx.end())
+      switch (*it) {
+      case '$': style_ = style::expr; ctx.advance_to(++it); break;
+      case '*': style_ = style::star; ctx.advance_to(++it); break;
+      case ',': style_ = style::pair; ctx.advance_to(++it); break;
+      default: break;
+    }
+    parse_format_specs(ctx.begin(), ctx.end(), handler);
+    return base::parse(ctx);
+  }
+  template <typename FormatCtx>
+  auto format(const duals::dual<T> & x, FormatCtx & ctx) -> decltype(ctx.out()) {
+    format_to(ctx.out(), "(");
+    if (style_ == style::pair) {
+      base::format(x.rpart(), ctx);
+      format_to(ctx.out(), ",");
+      base::format(x.dpart(), ctx);
+      return format_to(ctx.out(), ")");
+    }
+    if (x.rpart() || !x.dpart())
+      base::format(x.rpart(), ctx);
+    if (x.dpart()) {
+      if (x.rpart() && x.dpart() >= 0 && specs_.sign != sign::plus)
+        format_to(ctx.out(), "+");
+      base::format(x.dpart(), ctx);
+      if (style_ == style::star)
+        format_to(ctx.out(), "*e");
+      else
+        format_to(ctx.out(), "_e");
+      if (std::is_same<typename std::decay<T>::type,float>::value)       format_to(ctx.out(), "f");
+      if (std::is_same<typename std::decay<T>::type,long double>::value) format_to(ctx.out(), "l");
+    }
+    return format_to(ctx.out(), ")");
+  }
+};
+#endif
+
+#ifdef CPPDUALS_LIBFMT_COMPLEX
+#ifndef CPPDUALS_LIBFMT
+#include <fmt/format.h>
+#endif
+/// std::complex<> Formatter for libfmt https://github.com/fmtlib/fmt
+///
+/// libfmt does not provide a formatter for std::complex<>, although
+/// one is proposed for c++20.  Anyway, at the expense of a k or two,
+/// you can define CPPDUALS_LIBFMT_COMPLEX and get this one.
+///
+/// The standard iostreams formatting of complex numbers is (a,b),
+/// where a and b are the real and imaginary parts.  This formats a
+/// complex number (a+bi) as (a+bi), offering the same formatting
+/// options as the underlying type - with the addition of three
+/// optional format options, only one of which may appear directly
+/// after the ':' in the format spec (before any fill or align): '$'
+/// (the default if no flag is specified), '*', and ','.  The '*' flag
+/// adds a * before the 'i', producing (a+b*i), where a and b are the
+/// formatted value_type values.  The ',' flag simply prints the real
+/// and complex parts separated by a comma (same as iostreams' format).
+/// As a concrete exmple, this formatter can produce either (3+5.4i)
+/// or (3+5.4*i) or (3,5.4) for a complex<double> using the specs {:g}
+/// | {:$g}, {:*g}, or {:,g}, respectively.  (this implementation is a
+/// bit hacky - glad for cleanups).
+///
+template <typename T, typename Char>
+struct fmt::formatter<std::complex<T>,Char> : public fmt::formatter<T,Char>
+{
+  typedef fmt::formatter<T,Char> base;
+  enum style { expr, star, pair } style_ = expr;
+  fmt::detail::dynamic_format_specs<Char> specs_;
+  FMT_CONSTEXPR auto parse(format_parse_context & ctx) -> decltype(ctx.begin()) {
+    using handler_type = fmt::detail::dynamic_specs_handler<format_parse_context>;
+    auto type = fmt::detail::type_constant<T, Char>::value;
+    fmt::detail::specs_checker<handler_type> handler(handler_type(specs_, ctx), type);
+    auto it = ctx.begin();
+    if (it != ctx.end()) {
+      switch (*it) {
+      case '$': style_ = style::expr; ctx.advance_to(++it); break;
+      case '*': style_ = style::star; ctx.advance_to(++it); break;
+      case ',': style_ = style::pair; ctx.advance_to(++it); break;
+      default: break;
+      }
+    }
+    parse_format_specs(ctx.begin(), ctx.end(), handler);
+    //todo: fixup alignment
+    return base::parse(ctx);
+  }
+  template <typename FormatCtx>
+  auto format(const std::complex<T> & x, FormatCtx & ctx) -> decltype(ctx.out()) {
+    format_to(ctx.out(), "(");
+    if (style_ == style::pair) {
+      base::format(x.real(), ctx);
+      format_to(ctx.out(), ",");
+      base::format(x.imag(), ctx);
+      return format_to(ctx.out(), ")");
+    }
+    if (x.real() || !x.imag())
+      base::format(x.real(), ctx);
+    if (x.imag()) {
+      if (x.real() && x.imag() >= 0 && specs_.sign != sign::plus)
+        format_to(ctx.out(), "+");
+      base::format(x.imag(), ctx);
+      if (style_ == style::star)
+        format_to(ctx.out(), "*i");
+      else
+        format_to(ctx.out(), "i");
+      if (std::is_same<typename std::decay<T>::type,float>::value)       format_to(ctx.out(), "f");
+      if (std::is_same<typename std::decay<T>::type,long double>::value) format_to(ctx.out(), "l");
+    }
+    return format_to(ctx.out(), ")");
+  }
+};
+#endif
+
+#endif // CPPDUALS_DUAL

From 3aa3c096c95a4b4ccf145c27be0d62685d04b882 Mon Sep 17 00:00:00 2001
From: smartalecH <alec.m.hammond@gmail.com>
Date: Thu, 10 Mar 2022 11:49:50 -0500
Subject: [PATCH 15/29] fix interface artifacts and get gradients working

---
 src/meepgeom.cpp     | 62 +++++++++++++++++++++++++++-----------------
 src/meepgeom.hpp     | 33 +++++++++++++++++++++--
 src/support/dual.hpp |  6 ++---
 3 files changed, 72 insertions(+), 29 deletions(-)

diff --git a/src/meepgeom.cpp b/src/meepgeom.cpp
index 23fc9650b..f99eb3a21 100644
--- a/src/meepgeom.cpp
+++ b/src/meepgeom.cpp
@@ -510,7 +510,7 @@ duals::duald dual_linear_interpolate(double rx, double ry, double rz, std::vecto
 duals::duald material_grid_val(vector3 p, material_data *md) {
   // given the relative location, p, interpolate the material grid point.
 
-  if (!is_material_grid(md)) { meep::abort("Invalid material grid detected.\n"); }
+  if (!is_material_grid(md)) { abort(); }
   return dual_linear_interpolate(p.x, p.y, p.z, md->weights, md->grid_size.x,
                                   md->grid_size.y, md->grid_size.z, 1);
 
@@ -520,10 +520,8 @@ static duals::duald tanh_projection(duals::duald u, double beta, double eta) {
   if (beta == 0) return u;
   if (u == eta) return 0.5; // avoid NaN when beta is Inf
   duals::duald tanh_beta_eta = tanh(beta*eta);
-  duals::duald temp = (tanh_beta_eta + tanh(beta*(u-eta))) /
+  return (tanh_beta_eta + tanh(beta*(u-eta))) /
     (tanh_beta_eta + tanh(beta*(1-eta)));
-    //master_printf("temp %f %f\n",temp.rpart(),temp.dpart());
-  return temp;
 }
 
 duals::duald matgrid_val(vector3 p, geom_box_tree tp, int oi, material_data *md) {
@@ -1257,21 +1255,24 @@ duals::duald get_material_grid_fill(meep::ndim dim, duals::duald d, double r, du
   }
 }
 
-void normalize_dual(cvector3 &cv){
+duals::duald normalize_dual(cvector3 &cv){
   duals::duald d[3], norm;
+  // compute the norm, u_0'
   d[0] = cv.x.re + 1_e*cv.x.im;
   d[1] = cv.y.re + 1_e*cv.y.im;
   d[2] = cv.z.re + 1_e*cv.z.im;
 
   norm = sqrt(abs(d[0])*abs(d[0]) + abs(d[1])*abs(d[1]) + abs(d[2])*abs(d[2]));
 
+  // normalize the normal vector
   d[0] = d[0] / norm;
   d[1] = d[1] / norm;
   d[2] = d[2] / norm;
-
   cv.x.re = d[0].rpart(); cv.x.im = d[0].dpart();
   cv.y.re = d[1].rpart(); cv.y.im = d[1].dpart();
   cv.z.re = d[2].rpart(); cv.z.im = d[2].dpart();
+
+  return norm;
 }
 
 duals::duald get_distance(cvector3 &cv, duals::duald uval, double eta){
@@ -1300,18 +1301,18 @@ void geom_epsilon::eff_chi1inv_matrix(meep::component c, symm_matrix *chi1inv_ma
                                       bool &fallback) {
   const geometric_object *o;
   material_type mat, mat_behind;
-  vector3 p_mat, p_mat_behind;
+  vector3 p_mat, p_mat_behind, p, shiftby;
   symm_matrix meps, eps2, epsinv2;
-  vector3 p, shiftby;
   cvector3 normal;
   duals::duald fill, uval;
-  fallback = false;
-  
   int oi;
   geom_box_tree tp;
+  fallback = false;
 
-  if (!get_front_object(v, geometry_tree, p, &o, shiftby, mat, mat_behind, p_mat, p_mat_behind)
-      && (!is_material_grid(mat))) {
+  /* first let's check if our pixel has more than a single
+  interface (i.e. a corner) in which case we abandon any
+  smoothing efforts*/
+  if (!get_front_object(v, geometry_tree, p, &o, shiftby, mat, mat_behind, p_mat, p_mat_behind)) {
   noavg:
     get_material_pt(mat, v.center());
   trivial:
@@ -1320,17 +1321,25 @@ void geom_epsilon::eff_chi1inv_matrix(meep::component c, symm_matrix *chi1inv_ma
     return;
   }
 
+  /* we may have entered this routine (e.g. because of a material grid
+  backprop routine) but don't want to actually do any averaging */
   if (maxeval == 0){
+    get_material_pt(mat, v.center());
     if (is_material_grid(mat)){
+      //boolean inobject;
+      //mat =
+      //(material_type)material_of_unshifted_point_in_tree_inobject(vec_to_vector3(v.center()), restricted_tree, &inobject);
       tp = geom_tree_search(vec_to_vector3(v.center()), restricted_tree, &oi);
       uval = matgrid_val(vec_to_vector3(v.center()), tp, oi, mat);
     mgavg:
       dual_interpolate_mat(mat,meps,uval);
       sym_matrix_invert(chi1inv_matrix, &meps);
+      //get_material_pt(mat, v.center());
+      //material_epsmu(meep::type(c), mat, &meps, chi1inv_matrix);
       material_gc(mat);
       return;
     }else{
-      goto noavg;
+      goto trivial;
     }
   }
 
@@ -1362,9 +1371,8 @@ void geom_epsilon::eff_chi1inv_matrix(meep::component c, symm_matrix *chi1inv_ma
         /* couldn't get normal vector for this point; no averaging is needed,
         but we still need to interpolate onto our grid using duals */
         goto mgavg;
-      
-      duals::duald d = get_distance(normal, uval, mat->eta);
-      normalize_dual(normal);
+      duals::duald u_prime = normalize_dual(normal);
+      duals::duald d = (mat->eta - uval) / u_prime;
       
       double r = v.diameter()/2;
       fill = get_material_grid_fill(v.dim,d,r,uval,mat->eta);
@@ -1378,14 +1386,20 @@ void geom_epsilon::eff_chi1inv_matrix(meep::component c, symm_matrix *chi1inv_ma
         we therefore need to set the two materials used for
         averaging to our corresponding solid and voide materials.
         */
-        medium_struct medium_1 = medium_struct(mat->medium_2);
-        medium_struct medium_0 = medium_struct(mat->medium_1);
-        material_data mat_temp = material_data();
-        material_data mat_behind_temp = material_data();
-        mat_temp.medium = medium_1;
-        mat_behind_temp.medium = medium_0;
-        material_epsmu(meep::type(c), &mat_temp, &meps, chi1inv_matrix);
-        material_epsmu(meep::type(c), &mat_behind_temp, &eps2, &epsinv2);
+        symm_matrix eps_0, eps_plus, eps_minus;
+        dual_interpolate_mat(mat,eps_0,uval);
+        dual_interpolate_mat(mat,eps_plus,uval+r*u_prime);
+        dual_interpolate_mat(mat,eps_minus,uval-r*u_prime);
+        
+        if (d > 0) {
+          // Case 1: d > 0. Then let ε̃₊ = ε₊ and let ε̃₋ = [ε₀d + ε₋(Δx/2 – d)] / (Δx/2).
+          meps = eps_plus;
+          eps2 = (eps_0*d + eps_minus*(r-d)) / (r);
+        }else {
+          // Case 2: d < 0. Then let ε̃₋ = ε₋ and let ε̃₊ = [-ε₀d + ε₊(Δx/2 + d)] / (Δx/2).
+          eps2 = eps_minus;
+          meps = (-eps_0*d + eps_plus*(r+d)) / (r);
+        }
       }
     } else if(is_variable(mat)) {
       // no averaging is needed (not a material grid)
diff --git a/src/meepgeom.hpp b/src/meepgeom.hpp
index 1e87dbf20..f60e36c99 100644
--- a/src/meepgeom.hpp
+++ b/src/meepgeom.hpp
@@ -165,9 +165,38 @@ inline cvector3 cvector_add(cvector3 cv1, cvector3 cv2){
   return make_cvector3(vector3_plus(cvector3_re(cv1),cvector3_re(cv2)),vector3_plus(cvector3_im(cv1),cvector3_im(cv2)));
 }
 
-typedef struct {
+struct symm_matrix{
   duals::duald m00, m01, m02, m11, m12, m22;
-} symm_matrix;
+  struct symm_matrix& operator+=(const symm_matrix& rhs) { 
+    m00 += rhs.m00; m11 += rhs.m11; m22 += rhs.m22;
+    m01 += rhs.m01; m02 += rhs.m02; m12 += rhs.m12;
+    return *this; 
+  }
+  struct symm_matrix& operator+=(const duals::duald& k) { 
+    m00 += k; m11 += k; m22 += k;
+    return *this; 
+   }
+  struct symm_matrix& operator+(const symm_matrix& rhs) { 
+    m00 += rhs.m00; m11 += rhs.m11; m22 += rhs.m22;
+    m01 += rhs.m01; m02 += rhs.m02; m12 += rhs.m12;
+    return *this; 
+   }
+  struct symm_matrix& operator*( const duals::duald& k) { 
+    m00 *= k; m11 *= k; m22 *= k;
+    m01 *= k; m02 *= k; m12 *= k;
+    return *this; 
+   }
+  struct symm_matrix& operator/( const duals::duald& k) { 
+    m00 /= k; m11 /= k; m22 /= k;
+    m01 /= k; m02 /= k; m12 /= k;
+    return *this; 
+   }
+  struct symm_matrix& operator-() { 
+    m00 = -m00; m11 = -m11; m22 = -m22;
+    m01 = -m01; m02 = -m02; m12 = -m12;
+    return *this; 
+   }
+} ;
 
 struct pol {
   meep_geom::susceptibility user_s;
diff --git a/src/support/dual.hpp b/src/support/dual.hpp
index 58f476dea..ba17dbe94 100644
--- a/src/support/dual.hpp
+++ b/src/support/dual.hpp
@@ -1106,7 +1106,7 @@ template<class T> dual<T> log1p(const dual<T> & x);
 template<class T> dual<T> expm1(const dual<T> & x);
 
 ////////////////////////////////////////////////////////////////
-// added for meep usage
+// added for meep forward-diff operations
 ////////////////////////////////////////////////////////////////
 
 
@@ -1114,12 +1114,12 @@ template<class T> dual<T> expm1(const dual<T> & x);
 /// `#include <duals/dual>` to use this function.
 template<class T> dual<T> tanh(const dual<T> & x) {
   using std::tanh;
-  double sech = 1.0 / std::cosh(x.dpart());
+  T sech = 1.0 / std::cosh(x.rpart());
   return dual<T>(tanh(x.rpart()), x.dpart() * sech * sech);
 }
 
 ////////////////////////////////////////////////////////////////
-
+//
 ////////////////////////////////////////////////////////////////
 
 /// The error function.  Make sure to `#include <math.h>` before

From 5c07d7893c02b350924c78409ee12839a165c998 Mon Sep 17 00:00:00 2001
From: smartalecH <alec.m.hammond@gmail.com>
Date: Mon, 21 Mar 2022 10:02:25 -0400
Subject: [PATCH 16/29] minor formatting

---
 src/meepgeom.cpp | 18 ++++++++++++------
 1 file changed, 12 insertions(+), 6 deletions(-)

diff --git a/src/meepgeom.cpp b/src/meepgeom.cpp
index f99eb3a21..46b179e4b 100644
--- a/src/meepgeom.cpp
+++ b/src/meepgeom.cpp
@@ -1326,6 +1326,7 @@ void geom_epsilon::eff_chi1inv_matrix(meep::component c, symm_matrix *chi1inv_ma
   if (maxeval == 0){
     get_material_pt(mat, v.center());
     if (is_material_grid(mat)){
+      master_printf("point: %f %f \n",v.center().x(),v.center().y());
       //boolean inobject;
       //mat =
       //(material_type)material_of_unshifted_point_in_tree_inobject(vec_to_vector3(v.center()), restricted_tree, &inobject);
@@ -1352,7 +1353,10 @@ void geom_epsilon::eff_chi1inv_matrix(meep::component c, symm_matrix *chi1inv_ma
     }
   }
 
-  /* check for trivial case of only one object/material
+  /* ------------------------------------------- */
+  // One material
+  /* ------------------------------------------- */
+  /* check for case of only one object/material
    in the case of the material grid, make sure we don't need
    to do any interface averaging within.
    */
@@ -1409,7 +1413,10 @@ void geom_epsilon::eff_chi1inv_matrix(meep::component c, symm_matrix *chi1inv_ma
     } else {
       goto trivial;
     }
-  } else { // Two different materials (perhaps a m.g. and a geom object)
+  /* ------------------------------------------- */
+  // Two different materials (perhaps a m.g. and a geom object)
+  /* ------------------------------------------- */
+  } else {
     
     vector3 normal_re = unit_vector3(normal_to_fixed_object(vector3_minus(p, shiftby), *o));
     if (normal_re.x == 0 && normal_re.y == 0 && normal_re.z == 0)
@@ -1435,8 +1442,7 @@ void geom_epsilon::eff_chi1inv_matrix(meep::component c, symm_matrix *chi1inv_ma
       Specifically, we need to include the effects of u_i
       in the dual part of the material tensor */
     if (is_material_grid(mat)){
-      int oi;
-      geom_box_tree tp = geom_tree_search(p_mat, restricted_tree, &oi);
+      tp = geom_tree_search(p_mat, restricted_tree, &oi);
       duals::duald uval = matgrid_val(p_mat, tp, oi, mat);
       dual_interpolate_mat(mat,meps,uval);
       sym_matrix_invert(chi1inv_matrix, &meps);    
@@ -1444,8 +1450,7 @@ void geom_epsilon::eff_chi1inv_matrix(meep::component c, symm_matrix *chi1inv_ma
       material_epsmu(meep::type(c), mat, &meps, chi1inv_matrix);
     }
     if (is_material_grid(mat_behind)){
-      int oi;
-      geom_box_tree tp = geom_tree_search(p_mat_behind, restricted_tree, &oi);
+      tp = geom_tree_search(p_mat_behind, restricted_tree, &oi);
       duals::duald uval = matgrid_val(p_mat_behind, tp, oi, mat);
       dual_interpolate_mat(mat_behind,eps2,uval);
       sym_matrix_invert(&epsinv2, &eps2); // not really needed
@@ -1458,6 +1463,7 @@ void geom_epsilon::eff_chi1inv_matrix(meep::component c, symm_matrix *chi1inv_ma
   if (is_metal(meep::type(c), &mat) || is_metal(meep::type(c), &mat_behind)) goto noavg;
 
   kottke_algorithm(c,chi1inv_matrix,meps,eps2,normal,fill);
+  material_gc(mat);
 }
 
 static int eps_ever_negative = 0;

From 25f943795a5ebf37579c1f49f435ecff3d989a6d Mon Sep 17 00:00:00 2001
From: smartalecH <alec.m.hammond@gmail.com>
Date: Thu, 7 Apr 2022 08:46:07 -0400
Subject: [PATCH 17/29] update makefile

---
 src/support/Makefile.am | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/support/Makefile.am b/src/support/Makefile.am
index f65ba7ddb..603b0c461 100644
--- a/src/support/Makefile.am
+++ b/src/support/Makefile.am
@@ -1,3 +1,3 @@
 noinst_LTLIBRARIES = libsupport.la
 
-libsupport_la_SOURCES = mt19937ar.c meep_mt.h
+libsupport_la_SOURCES = mt19937ar.c meep_mt.h dual.hpp

From b69a5d11b1bd182480c1c7f9a1c9024e4813fad1 Mon Sep 17 00:00:00 2001
From: smartalecH <alec.m.hammond@gmail.com>
Date: Wed, 18 May 2022 15:12:50 -0400
Subject: [PATCH 18/29] fix bug in fill fraction

---
 src/meepgeom.cpp | 18 +++---------------
 1 file changed, 3 insertions(+), 15 deletions(-)

diff --git a/src/meepgeom.cpp b/src/meepgeom.cpp
index 46b179e4b..fef87d4bc 100644
--- a/src/meepgeom.cpp
+++ b/src/meepgeom.cpp
@@ -1224,10 +1224,6 @@ void kottke_algorithm(meep::component c, symm_matrix *chi1inv_matrix, symm_matri
 
 duals::duald get_material_grid_fill(meep::ndim dim, duals::duald d, double r, duals::duald u, double eta){
   /* The fill fraction should describe the amount of u=1 material in the current pixel.
-
-  The analytic volume expressions for the end cap only specify the fill fraction
-  of the cap relative to the whole sphere... but we don't know what the cap 
-  actually is (solid or void). So we need a little extra logic to sort that out.
   
   Occasionally, the distance to the nearest interface is outside the current
   sphere, which means we don't need to do any averaging (the fill is 0 or 1). Again,
@@ -1247,12 +1243,8 @@ duals::duald get_material_grid_fill(meep::ndim dim, duals::duald d, double r, du
     else if (dim == meep::D3)
       rel_fill = (((r-d)*(r-d))/(4*meep::pi*r*r*r))*(2*r+d);
   }
-
-  if (u <= eta){
-    return rel_fill;   // center is void, so cap must be solid
-  }else{
-    return 1-rel_fill; // center is solid, so cap must be void (and we need complement)
-  }
+  
+  return rel_fill;
 }
 
 duals::duald normalize_dual(cvector3 &cv){
@@ -1326,10 +1318,6 @@ void geom_epsilon::eff_chi1inv_matrix(meep::component c, symm_matrix *chi1inv_ma
   if (maxeval == 0){
     get_material_pt(mat, v.center());
     if (is_material_grid(mat)){
-      master_printf("point: %f %f \n",v.center().x(),v.center().y());
-      //boolean inobject;
-      //mat =
-      //(material_type)material_of_unshifted_point_in_tree_inobject(vec_to_vector3(v.center()), restricted_tree, &inobject);
       tp = geom_tree_search(vec_to_vector3(v.center()), restricted_tree, &oi);
       uval = matgrid_val(vec_to_vector3(v.center()), tp, oi, mat);
     mgavg:
@@ -1388,7 +1376,7 @@ void geom_epsilon::eff_chi1inv_matrix(meep::component c, symm_matrix *chi1inv_ma
       } else{
         /* we have a material grid interface within our pixel.
         we therefore need to set the two materials used for
-        averaging to our corresponding solid and voide materials.
+        averaging to our corresponding solid and void materials.
         */
         symm_matrix eps_0, eps_plus, eps_minus;
         dual_interpolate_mat(mat,eps_0,uval);

From 7a558fdd19d29416065718cf0e905300946e07ff Mon Sep 17 00:00:00 2001
From: smartalecH <alec.m.hammond@gmail.com>
Date: Wed, 25 May 2022 16:03:54 -0400
Subject: [PATCH 19/29] fix matgrid test bug

---
 src/meepgeom.cpp | 14 ++++++++------
 1 file changed, 8 insertions(+), 6 deletions(-)

diff --git a/src/meepgeom.cpp b/src/meepgeom.cpp
index 5b7ebcea2..63863b846 100644
--- a/src/meepgeom.cpp
+++ b/src/meepgeom.cpp
@@ -445,12 +445,14 @@ cvector3 material_grid_grad(vector3 p, material_data *md, const geometric_object
   if (o != NULL) {
     gradient = to_geom_object_coords_VJP(grad_u, o);
   } else {
-    g_real = make_vector3(nx*du_dx.rpart()/ geometry_lattice.size.x,
-      ny*du_dy.rpart()/ geometry_lattice.size.y,
-      nz*du_dz.rpart()/ geometry_lattice.size.z);
-    g_imag = make_vector3(nx*du_dx.dpart()/ geometry_lattice.size.x,
-      ny*du_dy.dpart()/ geometry_lattice.size.y,
-      nz*du_dz.dpart()/ geometry_lattice.size.z);
+    g_real = make_vector3(
+      geometry_lattice.size.x == 0 ? 0 : nx*du_dx.rpart()/ geometry_lattice.size.x,
+      geometry_lattice.size.y == 0 ? 0 : ny*du_dy.rpart()/ geometry_lattice.size.y,
+      geometry_lattice.size.z == 0 ? 0 : nz*du_dz.rpart()/ geometry_lattice.size.z);
+    g_imag = make_vector3(
+      geometry_lattice.size.x == 0 ? 0 : nx*du_dx.dpart()/ geometry_lattice.size.x,
+      geometry_lattice.size.y == 0 ? 0 : ny*du_dy.dpart()/ geometry_lattice.size.y,
+      geometry_lattice.size.z == 0 ? 0 : nz*du_dz.dpart()/ geometry_lattice.size.z);
     gradient = make_cvector3(g_real,g_imag);
   }
   return gradient;

From 9ac153e9e60662e7e3c856e8a8d630be01ead7ac Mon Sep 17 00:00:00 2001
From: smartalecH <alec.m.hammond@gmail.com>
Date: Wed, 25 May 2022 16:29:22 -0400
Subject: [PATCH 20/29] fix damping test failure

---
 src/meepgeom.cpp | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/src/meepgeom.cpp b/src/meepgeom.cpp
index 63863b846..eed88d58c 100644
--- a/src/meepgeom.cpp
+++ b/src/meepgeom.cpp
@@ -1369,6 +1369,9 @@ void geom_epsilon::eff_chi1inv_matrix(meep::component c, symm_matrix *chi1inv_ma
     to just evaluate.
     */
     if (is_material_grid(mat)){
+      // if we have a damping term and β≠∞, we can't smooth
+      if ((mat->damping != 0) && (mat->eta != std::numeric_limits<double>::infinity())) goto noavg;
+
       tp = geom_tree_search(p_mat, restricted_tree, &oi);
       uval = matgrid_val(p_mat, tp, oi, mat);
       

From 3349aee3137f15b244113d4445a3371f3166cbde Mon Sep 17 00:00:00 2001
From: smartalecH <alec.m.hammond@gmail.com>
Date: Wed, 25 May 2022 20:43:43 -0400
Subject: [PATCH 21/29] revamp matgrid test and add comments

---
 python/tests/test_material_grid.py | 116 ++++++++---------------------
 1 file changed, 30 insertions(+), 86 deletions(-)

diff --git a/python/tests/test_material_grid.py b/python/tests/test_material_grid.py
index c50d0dff2..e8bee3562 100644
--- a/python/tests/test_material_grid.py
+++ b/python/tests/test_material_grid.py
@@ -1,4 +1,13 @@
+'''
+Checks that a material grid works with symmetries, and that the
+new subpixel smoothing feature is accurate.
+
+TODO:
+    Create a 3D test that works well with the new smoothing
+'''
+
 import meep as mp
+from meep import mpb
 try:
     import meep.adjoint as mpa
 except:
@@ -7,6 +16,10 @@
 from scipy.ndimage import gaussian_filter
 import unittest
 
+rad = 0.301943
+k_point = mp.Vector3(0.3892,0.1597,0)
+Si = mp.Medium(index=3.5)
+cell_size = mp.Vector3(1,1,0)
 
 def compute_transmittance(matgrid_symmetry=False):
         resolution = 25
@@ -24,13 +37,16 @@ def compute_transmittance(matgrid_symmetry=False):
         rng = np.random.RandomState(2069588)
 
         w = rng.rand(Nx,Ny)
+        # for subpixel smoothing, the underlying design grid must be smoothly varying
+        w = mpa.tanh_projection(rng.rand(Nx,Ny),1e5,0.5)
+        w = mpa.conic_filter(w,0.25,matgrid_size.x,matgrid_size.y,matgrid_resolution)
         weights = 0.5 * (w + np.fliplr(w)) if not matgrid_symmetry else w
 
         matgrid = mp.MaterialGrid(mp.Vector3(Nx,Ny),
                                   mp.air,
                                   mp.Medium(index=3.5),
                                   weights=weights,
-                                  do_averaging=False,
+                                  beta=np.inf,
                                   grid_type='U_MEAN')
 
         geometry = [mp.Block(center=mp.Vector3(),
@@ -78,18 +94,12 @@ def compute_transmittance(matgrid_symmetry=False):
 
 
 def compute_resonant_mode_2d(res,default_mat=False):
-        cell_size = mp.Vector3(1,1,0)
-
-        rad = 0.301943
-
         fcen = 0.3
         df = 0.2*fcen
         sources = [mp.Source(mp.GaussianSource(fcen,fwidth=df),
                              component=mp.Hz,
                              center=mp.Vector3(-0.1057,0.2094,0))]
 
-        k_point = mp.Vector3(0.3892,0.1597,0)
-
         matgrid_size = mp.Vector3(1,1,0)
         matgrid_resolution = 1200
 
@@ -107,7 +117,7 @@ def compute_resonant_mode_2d(res,default_mat=False):
 
         matgrid = mp.MaterialGrid(mp.Vector3(Nx,Ny),
                                   mp.air,
-                                  mp.Medium(index=3.5),
+                                  Si,
                                   weights=filtered_weights,
                                   do_averaging=True,
                                   beta=1000,
@@ -138,84 +148,25 @@ def compute_resonant_mode_2d(res,default_mat=False):
         return freq
 
 
-def compute_resonant_mode_3d(use_matgrid=True):
-    resolution = 25
+def compute_resonant_mode_mpb(resolution=1024):
+    geometry = [mp.Cylinder(rad, material=Si)]
+    geometry_lattice = mp.Lattice(cell_size)
 
-    wvl = 1.27
-    fcen = 1/wvl
-    df = 0.02*fcen
+    ms = mpb.ModeSolver(num_bands=1,
+                        k_points=[k_point],
+                        geometry=geometry,
+                        geometry_lattice=geometry_lattice,
+                        resolution=resolution)
 
-    nSi = 3.45
-    Si = mp.Medium(index=nSi)
-    nSiO2 = 1.45
-    SiO2 = mp.Medium(index=nSiO2)
-
-    s = 1.0
-    cell_size = mp.Vector3(s,s,s)
-
-    rad = 0.34  # radius of sphere
-
-    if use_matgrid:
-        matgrid_resolution = 2*resolution
-        N = int(s*matgrid_resolution)
-        coord = np.linspace(-0.5*s,0.5*s,N)
-        xv, yv, zv = np.meshgrid(coord,coord,coord)
-
-        weights = np.sqrt(np.square(xv) + np.square(yv) + np.square(zv)) < rad
-        filtered_weights = gaussian_filter(weights,
-                                           sigma=4/resolution,
-                                           output=np.double)
-
-        matgrid = mp.MaterialGrid(mp.Vector3(N,N,N),
-                                  SiO2,
-                                  Si,
-                                  weights=filtered_weights,
-                                  do_averaging=True,
-                                  beta=1000,
-                                  eta=0.5)
-
-        geometry = [mp.Block(center=mp.Vector3(),
-                             size=cell_size,
-                             material=matgrid)]
-    else:
-        geometry = [mp.Sphere(center=mp.Vector3(),
-                              radius=rad,
-                              material=Si)]
-
-    sources = [mp.Source(src=mp.GaussianSource(fcen,fwidth=df),
-                         size=mp.Vector3(),
-                         center=mp.Vector3(0.13,0.25,0.06),
-                         component=mp.Ez)]
-
-    k_point = mp.Vector3(0.23,-0.17,0.35)
-
-    sim = mp.Simulation(resolution=resolution,
-                        cell_size=cell_size,
-                        sources=sources,
-                        default_material=SiO2,
-                        k_point=k_point,
-                        geometry=geometry)
-
-    h = mp.Harminv(mp.Ez, mp.Vector3(-0.2684,0.1185,0.0187), fcen, df)
-
-    sim.run(mp.after_sources(h),
-            until_after_sources=200)
-
-    try:
-        for m in h.modes:
-            print("harminv:, {}, {}, {}".format(resolution,m.freq,m.Q))
-        freq = h.modes[0].freq
-    except:
-        raise RuntimeError("No resonant modes found.")
-
-    return freq
 
+    ms.run_te()
+    return ms.freqs[0]
 
 class TestMaterialGrid(unittest.TestCase):
 
     def test_subpixel_smoothing(self):
-        # "exact" frequency computed using MaterialGrid at resolution = 300
-        freq_ref = 0.29826813873225283
+        # "exact" frequency computed using MPB
+        freq_ref = compute_resonant_mode_mpb()
 
         res = [25, 50]
         freq_matgrid = []
@@ -233,13 +184,6 @@ def test_subpixel_smoothing(self):
         freq_matgrid_default_mat = compute_resonant_mode_2d(res[0], True)
         self.assertAlmostEqual(freq_matgrid[0], freq_matgrid_default_mat)
 
-
-    def test_matgrid_3d(self):
-        freq_matgrid = compute_resonant_mode_3d(True)
-        freq_geomobj = compute_resonant_mode_3d(False)
-        self.assertAlmostEqual(freq_matgrid, freq_geomobj, places=2)
-
-
     def test_symmetry(self):
             tran_nosym = compute_transmittance(False)
             tran_sym = compute_transmittance(True)

From 35ec6d8ad9372eff7f5831625c19fff7cbe4ab6e Mon Sep 17 00:00:00 2001
From: smartalecH <alec.m.hammond@gmail.com>
Date: Sat, 28 May 2022 20:24:26 -0400
Subject: [PATCH 22/29] update tests and add comments to code

---
 python/adjoint/filters.py          |  18 ++++
 python/requirements.txt            |   1 +
 python/tests/test_material_grid.py |  80 +++++++++++-----
 src/meepgeom.cpp                   | 144 ++++++++++++-----------------
 4 files changed, 139 insertions(+), 104 deletions(-)

diff --git a/python/adjoint/filters.py b/python/adjoint/filters.py
index c7eddd688..4e45dfb19 100644
--- a/python/adjoint/filters.py
+++ b/python/adjoint/filters.py
@@ -7,6 +7,8 @@
 import meep as mp
 from scipy import special
 from scipy import signal
+import skfmm
+from scipy import interpolate
 
 def _proper_pad(x,n):
     '''
@@ -854,3 +856,19 @@ def gray_indicator(x):
     density-based topology optimization. Archive of Applied Mechanics, 86(1-2), 189-218.
     '''
     return npa.mean(4 * x.flatten() * (1 - x.flatten())) * 100
+
+def make_sdf(data):
+    '''
+    Assume the input, data, is the desired output shape
+    (i.e. 1d, 2d, or 3d) and that it's values are between
+    0 and 1.
+    '''
+    # create signed distance function
+    sd = skfmm.distance(data- 0.5 , dx = 1)
+
+    # interpolate zero-levelset onto 0.5-levelset
+    x = [np.min(sd.flatten()), 0, np.max(sd.flatten())]
+    y = [0, 0.5, 1]
+    f = interpolate.interp1d(x, y, kind='linear')
+
+    return f(sd)
diff --git a/python/requirements.txt b/python/requirements.txt
index 772aac158..c43d34869 100644
--- a/python/requirements.txt
+++ b/python/requirements.txt
@@ -7,3 +7,4 @@ numpy
 parameterized
 pytest
 scipy
+scikit-fmm
diff --git a/python/tests/test_material_grid.py b/python/tests/test_material_grid.py
index e8bee3562..1c4c73bbe 100644
--- a/python/tests/test_material_grid.py
+++ b/python/tests/test_material_grid.py
@@ -13,7 +13,6 @@
 except:
     import adjoint as mpa
 import numpy as np
-from scipy.ndimage import gaussian_filter
 import unittest
 
 rad = 0.301943
@@ -92,8 +91,7 @@ def compute_transmittance(matgrid_symmetry=False):
 
         return tran
 
-
-def compute_resonant_mode_2d(res,default_mat=False):
+def compute_resonant_mode_2d(res,radius=rad,default_mat=False,cylinder=False,cylinder_matgrid=True,do_averaging=True):
         fcen = 0.3
         df = 0.2*fcen
         sources = [mp.Source(mp.GaussianSource(fcen,fwidth=df),
@@ -110,24 +108,37 @@ def compute_resonant_mode_2d(res,default_mat=False):
         x = np.linspace(-0.5*matgrid_size.x,0.5*matgrid_size.x,Nx)
         y = np.linspace(-0.5*matgrid_size.y,0.5*matgrid_size.y,Ny)
         xv, yv = np.meshgrid(x,y)
-        weights = np.sqrt(np.square(xv) + np.square(yv)) < rad
-        filtered_weights = gaussian_filter(weights,
-                                           sigma=3.0,
-                                           output=np.double)
-
+        weights = mpa.make_sdf(np.sqrt(np.square(xv) + np.square(yv)) < radius)
+        if cylinder:
+            weights = 0.0*weights+1.0          
+        
         matgrid = mp.MaterialGrid(mp.Vector3(Nx,Ny),
                                   mp.air,
                                   Si,
-                                  weights=filtered_weights,
-                                  do_averaging=True,
-                                  beta=1000,
+                                  weights=weights,
+                                  beta=np.inf,
                                   eta=0.5)
-
-        geometry = [mp.Block(center=mp.Vector3(),
+        
+        # Use a cylinder object, not a block object
+        if cylinder:
+            # within the cylinder, use a (uniform) material grid
+            if cylinder_matgrid:
+                geometry = [mp.Cylinder(center=mp.Vector3(),
+                             radius=radius,
+                             material=matgrid)]
+            # within the cylinder, just use a normal medium
+            else:
+                geometry = [mp.Cylinder(center=mp.Vector3(),
+                             radius=radius,
+                             material=Si)]
+        # use a block object
+        else:
+            geometry = [mp.Block(center=mp.Vector3(),
                              size=mp.Vector3(matgrid_size.x,matgrid_size.y,0),
                              material=matgrid)]
 
         sim = mp.Simulation(resolution=res,
+                            eps_averaging=do_averaging,
                             cell_size=cell_size,
                             default_material=matgrid if default_mat else mp.Medium(),
                             geometry=geometry if not default_mat else [],
@@ -136,7 +147,7 @@ def compute_resonant_mode_2d(res,default_mat=False):
 
         h = mp.Harminv(mp.Hz, mp.Vector3(0.3718,-0.2076), fcen, df)
         sim.run(mp.after_sources(h),
-                until_after_sources=200)
+                until_after_sources=300)
 
         try:
             for m in h.modes:
@@ -147,13 +158,12 @@ def compute_resonant_mode_2d(res,default_mat=False):
 
         return freq
 
-
-def compute_resonant_mode_mpb(resolution=1024):
+def compute_resonant_mode_mpb(resolution=512):
     geometry = [mp.Cylinder(rad, material=Si)]
     geometry_lattice = mp.Lattice(cell_size)
 
     ms = mpb.ModeSolver(num_bands=1,
-                        k_points=[k_point],
+                        k_points=[mp.cartesian_to_reciprocal(k_point, geometry_lattice)],
                         geometry=geometry,
                         geometry_lattice=geometry_lattice,
                         resolution=resolution)
@@ -165,23 +175,51 @@ def compute_resonant_mode_mpb(resolution=1024):
 class TestMaterialGrid(unittest.TestCase):
 
     def test_subpixel_smoothing(self):
+        res = 25
+
+        def subpixel_test_matrix(radius,do_averaging):
+            freq_cylinder = compute_resonant_mode_2d(res, radius, default_mat=False, cylinder=True, cylinder_matgrid=False, do_averaging=do_averaging)
+            freq_matgrid = compute_resonant_mode_2d(res, radius, default_mat=False, cylinder=False, do_averaging=do_averaging)
+            freq_matgrid_cylinder = compute_resonant_mode_2d(res, radius, default_mat=False, cylinder=True, cylinder_matgrid=True, do_averaging=do_averaging)
+            return [freq_cylinder,freq_matgrid,freq_matgrid_cylinder]
+
+        # when smoothing is off, all three tests should be identical to machine precision
+        no_smoothing = subpixel_test_matrix(rad,False)
+        self.assertEqual(no_smoothing[0], no_smoothing[1])
+        self.assertEqual(no_smoothing[1], no_smoothing[2])
+
+        # when we slightly perturb the radius, the results should be the same as before.
+        no_smoothing_perturbed = subpixel_test_matrix(rad+0.01/res,False)
+        self.assertEqual(no_smoothing, no_smoothing_perturbed)
+
+        # when smoothing is on, the simple material results should be different from the matgrid results
+        smoothing = subpixel_test_matrix(rad,True)
+        self.assertNotEqual(smoothing[0], smoothing[1])
+        self.assertAlmostEqual(smoothing[1], smoothing[2], 3)
+
+        # when we slighty perturb the radius, the results should all be different from before.
+        smoothing_perturbed = subpixel_test_matrix(rad+0.01/res,True)
+        self.assertNotEqual(smoothing, smoothing_perturbed)
+
         # "exact" frequency computed using MPB
         freq_ref = compute_resonant_mode_mpb()
-
-        res = [25, 50]
+        print(freq_ref)
+        res = [50, 100]
         freq_matgrid = []
         for r in res:
-            freq_matgrid.append(compute_resonant_mode_2d(r))
+            freq_matgrid.append(compute_resonant_mode_2d(r,cylinder=False))
             # verify that the frequency of the resonant mode is
             # approximately equal to the reference value
             self.assertAlmostEqual(freq_ref, freq_matgrid[-1], 2)
+        print("results:    ",freq_ref,freq_matgrid)
 
         # verify that the relative error is decreasing with increasing resolution
         # and is better than linear convergence because of subpixel smoothing
         self.assertLess(abs(freq_matgrid[1]-freq_ref)*(res[1]/res[0]),
                         abs(freq_matgrid[0]-freq_ref))
 
-        freq_matgrid_default_mat = compute_resonant_mode_2d(res[0], True)
+        # ensure that a material grid as a default material works
+        freq_matgrid_default_mat = compute_resonant_mode_2d(res[0], rad, True)
         self.assertAlmostEqual(freq_matgrid[0], freq_matgrid_default_mat)
 
     def test_symmetry(self):
diff --git a/src/meepgeom.cpp b/src/meepgeom.cpp
index eed88d58c..9f08db4d1 100644
--- a/src/meepgeom.cpp
+++ b/src/meepgeom.cpp
@@ -362,8 +362,14 @@ cvector3 to_geom_object_coords_VJP(cvector3 v, const geometric_object *o) {
     /* case geometric_object::CYLINDER:
        NOT YET IMPLEMENTED */
     case geometric_object::BLOCK: {
-      vector3 v_real = cvector3_re(v);
-      vector3 v_imag = cvector3_im(v);
+      /* In order to leverage the underlying libctl infrastructure
+      *and* the dual library that makes computing derivatives so easy,
+      me perform a bit of a trick: we use the *complex* libctl vector,
+      storing the real and dual parts of the dual library and *manually*
+      propagate the dual through existing libraries as needed.
+      */
+      vector3 v_real = cvector3_re(v); // real part
+      vector3 v_imag = cvector3_im(v); // "dual" part
 
       vector3 size = o->subclass.block_data->size;
       if (size.x != 0.0) {v_real.x /= size.x; v_imag.x /= size.x;}
@@ -379,6 +385,9 @@ cvector3 to_geom_object_coords_VJP(cvector3 v, const geometric_object *o) {
 }
 
 cvector3 material_grid_grad(vector3 p, material_data *md, const geometric_object *o) {
+  /* computes the actual spatial gradient at point `p` 
+  for the specified material grid `md`. */
+  
   if (!is_material_grid(md)) {meep::abort("Invalid material grid detected.\n"); }
 
   cvector3 gradient = cvector_zero();
@@ -468,6 +477,14 @@ void map_lattice_coordinates(double &px, double &py, double &pz) {
 }
 
 cvector3 matgrid_grad(vector3 p, geom_box_tree tp, int oi, material_data *md) {
+  /* loops through all the material grids at a current point
+  and computes the final *spatial* gradient (w.r.t. x,y,z)
+  after all appropriate transformations (e.g. due to 
+  overlapping grids). Calls the helper function, `material_grid_grad`,
+  which is what actually computes the spatial gradient.
+  */
+
+  // check for proper overlapping grids
   if (md->material_grid_kinds == material_data::U_MIN ||
       md->material_grid_kinds == material_data::U_PROD)
     meep::abort("%s:%i:matgrid_grad does not support overlapping grids with U_MIN or U_PROD\n",__FILE__,__LINE__);
@@ -493,6 +510,7 @@ cvector3 matgrid_grad(vector3 p, geom_box_tree tp, int oi, material_data *md) {
     ++matgrid_val_count;
   }
 
+  // compensate for overlapping grids
   if (md->material_grid_kinds == material_data::U_MEAN)
     gradient = cvector3_scale(1.0/matgrid_val_count,gradient);
 
@@ -1250,12 +1268,11 @@ duals::duald get_material_grid_fill(meep::ndim dim, duals::duald d, double r, du
     return -1.0; // garbage fill
   } else {
     if (dim == meep::D1)
-      rel_fill = (r-d)/(2*r);
-    else if (dim == meep::D2 || dim == meep::Dcyl){
-      rel_fill = (1/(r*r*meep::pi)) * (r*r*acos(d/r)-d*sqrt(r*r-d*d));
-    }
+      return (r-d)/(2*r);
+    else if (dim == meep::D2 || dim == meep::Dcyl)
+      return (1/(r*r*meep::pi)) * (r*r*acos(d/r)-d*sqrt(r*r-d*d));
     else if (dim == meep::D3)
-      rel_fill = (((r-d)*(r-d))/(4*meep::pi*r*r*r))*(2*r+d);
+      return (((r-d)*(r-d))/(4*meep::pi*r*r*r))*(2*r+d);
   }
   
   return rel_fill;
@@ -1593,20 +1610,7 @@ void geom_epsilon::fallback_chi1inv_row(meep::component c, double chi1inv_row[3]
   material =
       (material_type)material_of_unshifted_point_in_tree_inobject(p, restricted_tree, &inobject);
   material_data *md = material;
-  meep::vec gradient(zero_vec(v.dim));
-  double uval = 0;
-  // TODO cleanup and remove
-  if (md->which_subclass == material_data::MATERIAL_GRID) {
-    geom_box_tree tp;
-    int oi;
-    tp = geom_tree_search(p, restricted_tree, &oi);
-    //gradient = matgrid_grad(p, tp, oi, md);
-    //uval = matgrid_val(p, tp, oi, md)+this->u_p;
-  }
-  else {
-    gradient = normal_vector(meep::type(c), v);
-  }
-
+  meep::vec gradient = normal_vector(meep::type(c), v);
   get_material_pt(material, v.center());
   material_epsmu(meep::type(c), material, &chi1p1, &chi1p1_inv);
   material_gc(material);
@@ -1634,73 +1638,47 @@ void geom_epsilon::fallback_chi1inv_row(meep::component c, double chi1inv_row[3]
   integer errflag;
   double meps, minveps;
 
-  if (md->which_subclass == material_data::MATERIAL_GRID) {
-    number xmin[1], xmax[1];
-    matgrid_volavg mgva;
-    mgva.dim = v.dim;
-    mgva.ugrad_abs = meep::abs(gradient);
-    mgva.uval = uval;
-    mgva.rad = v.diameter()/2;
-    mgva.beta = md->beta;
-    mgva.eta = md->eta;
-    mgva.eps1 = (md->medium_1.epsilon_diag.x+md->medium_1.epsilon_diag.y+md->medium_1.epsilon_diag.z)/3;
-    mgva.eps2 = (md->medium_2.epsilon_diag.x+md->medium_2.epsilon_diag.y+md->medium_2.epsilon_diag.z)/3;
-    xmin[0] = -v.diameter()/2;
-    xmax[0] = v.diameter()/2;
-#ifdef CTL_HAS_COMPLEX_INTEGRATION
-    cnumber ret = cadaptive_integration(matgrid_ceps_func, xmin, xmax, 1, (void *)&mgva, 0, tol, maxeval,
-                                        &esterr, &errflag);
-    meps = ret.re;
-    minveps = ret.im;
-#else
-    meps = adaptive_integration(matgrid_eps_func, xmin, xmax, 1, (void *)&mgva, 0, tol, maxeval, &esterr,
-                                &errflag);
-    minveps = adaptive_integration(matgrid_inveps_func, xmin, xmax, 1, (void *)&mgva, 0, tol, maxeval, &esterr,
-                                   &errflag);
-#endif
+  integer n;
+  number xmin[3], xmax[3];
+  vector3 gvmin, gvmax;
+  gvmin = vec_to_vector3(v.get_min_corner());
+  gvmax = vec_to_vector3(v.get_max_corner());
+  xmin[0] = gvmin.x;
+  xmax[0] = gvmax.x;
+  if (dim == meep::Dcyl) {
+    xmin[1] = gvmin.z;
+    xmin[2] = gvmin.y;
+    xmax[1] = gvmax.z;
+    xmax[2] = gvmax.y;
   }
   else {
-    integer n;
-    number xmin[3], xmax[3];
-    vector3 gvmin, gvmax;
-    gvmin = vec_to_vector3(v.get_min_corner());
-    gvmax = vec_to_vector3(v.get_max_corner());
-    xmin[0] = gvmin.x;
-    xmax[0] = gvmax.x;
-    if (dim == meep::Dcyl) {
-      xmin[1] = gvmin.z;
-      xmin[2] = gvmin.y;
-      xmax[1] = gvmax.z;
-      xmax[2] = gvmax.y;
-    }
-    else {
-      xmin[1] = gvmin.y;
-      xmin[2] = gvmin.z;
-      xmax[1] = gvmax.y;
-      xmax[2] = gvmax.z;
-    }
-    if (xmin[2] == xmax[2])
-      n = xmin[1] == xmax[1] ? 1 : 2;
-    else
-      n = 3;
-    double vol = 1;
-    for (int i = 0; i < n; ++i)
-      vol *= xmax[i] - xmin[i];
-    if (dim == meep::Dcyl) vol *= (xmin[0] + xmax[0]) * 0.5;
-    eps_ever_negative = 0;
-    func_ft = meep::type(c);
+    xmin[1] = gvmin.y;
+    xmin[2] = gvmin.z;
+    xmax[1] = gvmax.y;
+    xmax[2] = gvmax.z;
+  }
+  if (xmin[2] == xmax[2])
+    n = xmin[1] == xmax[1] ? 1 : 2;
+  else
+    n = 3;
+  double vol = 1;
+  for (int i = 0; i < n; ++i)
+    vol *= xmax[i] - xmin[i];
+  if (dim == meep::Dcyl) vol *= (xmin[0] + xmax[0]) * 0.5;
+  eps_ever_negative = 0;
+  func_ft = meep::type(c);
 #ifdef CTL_HAS_COMPLEX_INTEGRATION
-    cnumber ret = cadaptive_integration(ceps_func, xmin, xmax, n, (void *)this, 0, tol, maxeval,
-                                        &esterr, &errflag);
-    meps = ret.re / vol;
-    minveps = ret.im / vol;
+  cnumber ret = cadaptive_integration(ceps_func, xmin, xmax, n, (void *)this, 0, tol, maxeval,
+                                      &esterr, &errflag);
+  meps = ret.re / vol;
+  minveps = ret.im / vol;
 #else
-    meps = adaptive_integration(eps_func, xmin, xmax, n, (void *)this, 0, tol, maxeval, &esterr,
-                                &errflag) / vol;
-    minveps = adaptive_integration(inveps_func, xmin, xmax, n, (void *)this, 0, tol, maxeval, &esterr,
-                                   &errflag) / vol;
+  meps = adaptive_integration(eps_func, xmin, xmax, n, (void *)this, 0, tol, maxeval, &esterr,
+                              &errflag) / vol;
+  minveps = adaptive_integration(inveps_func, xmin, xmax, n, (void *)this, 0, tol, maxeval, &esterr,
+                                  &errflag) / vol;
 #endif
-  }
+
   if (eps_ever_negative) // averaging negative eps causes instability
     minveps = 1.0 / (meps = eps(v.center()));
   {

From fabd531b8d6cbb8a930787fad8560507663f2737 Mon Sep 17 00:00:00 2001
From: "Steven G. Johnson" <stevenj@alum.mit.edu>
Date: Fri, 29 Jul 2022 16:55:17 -0400
Subject: [PATCH 23/29] approximate material-grid smoothing

---
 libpympb/pympb.cpp                 |   7 +-
 python/adjoint/filters.py          |  20 +
 python/adjoint/utils.py            |   2 +-
 python/meep.i                      |   4 +-
 python/requirements.txt            |   1 +
 python/tests/test_material_grid.py | 446 +++++++-------
 python/typemap_utils.cpp           |  12 +-
 src/material_data.cpp              |   6 +-
 src/material_data.hpp              |   8 +-
 src/meepgeom.cpp                   | 912 ++++++++++++++++++-----------
 src/meepgeom.hpp                   |  69 ++-
 src/support/Makefile.am            |   2 +-
 12 files changed, 879 insertions(+), 610 deletions(-)

diff --git a/libpympb/pympb.cpp b/libpympb/pympb.cpp
index c6033cb99..5bee3941d 100644
--- a/libpympb/pympb.cpp
+++ b/libpympb/pympb.cpp
@@ -369,12 +369,9 @@ int mode_solver::mean_epsilon(symmetric_matrix *meps, symmetric_matrix *meps_inv
 
   bool o1_is_var = o1 && meep_geom::is_variable(o1->material);
   bool o2_is_var = o2 && meep_geom::is_variable(o2->material);
-  bool default_is_var_or_file =
-      meep_geom::is_variable(default_material) || meep_geom::is_file(default_material);
+  bool default_is_var = meep_geom::is_variable(default_material);
 
-  if (o1_is_var || o2_is_var ||
-      (default_is_var_or_file &&
-       (!o1 || !o2 || meep_geom::is_file(o1->material) || meep_geom::is_file(o2->material)))) {
+  if (o1_is_var || o2_is_var || (default_is_var && (!o1 || !o2))) {
     return 0; /* arbitrary material functions are non-analyzable */
   }
 
diff --git a/python/adjoint/filters.py b/python/adjoint/filters.py
index 262f37937..c095ad7d0 100644
--- a/python/adjoint/filters.py
+++ b/python/adjoint/filters.py
@@ -6,6 +6,10 @@
 from scipy import signal, special
 
 import meep as mp
+from scipy import special
+from scipy import signal
+import skfmm
+from scipy import interpolate
 
 
 def _proper_pad(x, n):
@@ -874,3 +878,19 @@ def gray_indicator(x):
     density-based topology optimization. Archive of Applied Mechanics, 86(1-2), 189-218.
     """
     return npa.mean(4 * x.flatten() * (1 - x.flatten())) * 100
+
+def make_sdf(data):
+    '''
+    Assume the input, data, is the desired output shape
+    (i.e. 1d, 2d, or 3d) and that it's values are between
+    0 and 1.
+    '''
+    # create signed distance function
+    sd = skfmm.distance(data- 0.5 , dx = 1)
+
+    # interpolate zero-levelset onto 0.5-levelset
+    x = [np.min(sd.flatten()), 0, np.max(sd.flatten())]
+    y = [0, 0.5, 1]
+    f = interpolate.interp1d(x, y, kind='linear')
+
+    return f(sd)
diff --git a/python/adjoint/utils.py b/python/adjoint/utils.py
index fb37d2b0b..9683348e1 100644
--- a/python/adjoint/utils.py
+++ b/python/adjoint/utils.py
@@ -20,7 +20,7 @@
 _Z_AXIS = 1
 
 # default finite difference step size when calculating Aᵤ
-FD_DEFAULT = 1e-3
+FD_DEFAULT = 1e-6
 
 
 class DesignRegion:
diff --git a/python/meep.i b/python/meep.i
index 66a4703cf..0cae141dd 100644
--- a/python/meep.i
+++ b/python/meep.i
@@ -1504,8 +1504,6 @@ void _get_gradient(PyObject *grad, double scalegrad,
 %ignore material_type_equal;
 %ignore is_variable;
 %ignore is_variable;
-%ignore is_file;
-%ignore is_file;
 %ignore is_medium;
 %ignore is_medium;
 %ignore is_metal;
@@ -1986,7 +1984,7 @@ meep_geom::geom_epsilon* _set_materials(meep::structure * s,
         geps = existing_geps;
     } else {
         geps = meep_geom::make_geom_epsilon(s, &gobj_list, center, _ensure_periodicity, _default_material,
-                                                extra_materials);
+                                                extra_materials, use_anisotropic_averaging);
     }
     if (set_materials) {
         meep_geom::set_materials_from_geom_epsilon(s, geps, use_anisotropic_averaging, tol,
diff --git a/python/requirements.txt b/python/requirements.txt
index 772aac158..c43d34869 100644
--- a/python/requirements.txt
+++ b/python/requirements.txt
@@ -7,3 +7,4 @@ numpy
 parameterized
 pytest
 scipy
+scikit-fmm
diff --git a/python/tests/test_material_grid.py b/python/tests/test_material_grid.py
index 284b5c4ed..d64b38348 100644
--- a/python/tests/test_material_grid.py
+++ b/python/tests/test_material_grid.py
@@ -1,5 +1,13 @@
-import meep as mp
+'''
+Checks that a material grid works with symmetries, and that the
+new subpixel smoothing feature is accurate.
+
+TODO:
+    Create a 3D test that works well with the new smoothing
+'''
 
+import meep as mp
+from meep import mpb
 try:
     import meep.adjoint as mpa
 except:
@@ -8,258 +16,204 @@
 import unittest
 
 import numpy as np
-from scipy.ndimage import gaussian_filter
+import unittest
 
+rad = 0.301943
+k_point = mp.Vector3(0.3892,0.1597,0)
+Si = mp.Medium(index=3.5)
+cell_size = mp.Vector3(1,1,0)
 
 def compute_transmittance(matgrid_symmetry=False):
-    resolution = 25
-
-    cell_size = mp.Vector3(6, 6, 0)
-
-    boundary_layers = [mp.PML(thickness=1.0)]
-
-    matgrid_size = mp.Vector3(2, 2, 0)
-    matgrid_resolution = 2 * resolution
-
-    Nx, Ny = int(matgrid_size.x * matgrid_resolution), int(
-        matgrid_size.y * matgrid_resolution
-    )
-
-    # ensure reproducible results
-    rng = np.random.RandomState(2069588)
-
-    w = rng.rand(Nx, Ny)
-    weights = w if matgrid_symmetry else 0.5 * (w + np.fliplr(w))
-
-    matgrid = mp.MaterialGrid(
-        mp.Vector3(Nx, Ny),
-        mp.air,
-        mp.Medium(index=3.5),
-        weights=weights,
-        do_averaging=False,
-        grid_type="U_MEAN",
-    )
-
-    geometry = [
-        mp.Block(
-            center=mp.Vector3(),
-            size=mp.Vector3(mp.inf, 1.0, mp.inf),
-            material=mp.Medium(index=3.5),
-        ),
-        mp.Block(
-            center=mp.Vector3(),
-            size=mp.Vector3(matgrid_size.x, matgrid_size.y, 0),
-            material=matgrid,
-        ),
-    ]
-
-    if matgrid_symmetry:
-        geometry.append(
-            mp.Block(
-                center=mp.Vector3(),
-                size=mp.Vector3(matgrid_size.x, matgrid_size.y, 0),
-                material=matgrid,
-                e2=mp.Vector3(y=-1),
-            )
-        )
-
-    eig_parity = mp.ODD_Y + mp.EVEN_Z
-
-    fcen = 0.65
-    df = 0.2 * fcen
-    sources = [
-        mp.EigenModeSource(
-            src=mp.GaussianSource(fcen, fwidth=df),
-            center=mp.Vector3(-2.0, 0),
-            size=mp.Vector3(0, 4.0),
-            eig_parity=eig_parity,
-        )
-    ]
-
-    sim = mp.Simulation(
-        resolution=resolution,
-        cell_size=cell_size,
-        boundary_layers=boundary_layers,
-        sources=sources,
-        geometry=geometry,
-    )
-
-    mode_mon = sim.add_flux(
-        fcen, 0, 1, mp.FluxRegion(center=mp.Vector3(2.0, 0), size=mp.Vector3(0, 4.0))
-    )
-
-    sim.run(until_after_sources=mp.stop_when_dft_decayed())
-
-    mode_coeff = sim.get_eigenmode_coefficients(mode_mon, [1], eig_parity).alpha[
-        0, :, 0
-    ][0]
-
-    tran = np.power(np.abs(mode_coeff), 2)
-    print(f'tran:, {"sym" if matgrid_symmetry else "nosym"}, {tran}')
-
-    return tran
-
-
-def compute_resonant_mode_2d(res, default_mat=False):
-    cell_size = mp.Vector3(1, 1, 0)
-
-    rad = 0.301943
-
-    fcen = 0.3
-    df = 0.2 * fcen
-    sources = [
-        mp.Source(
-            mp.GaussianSource(fcen, fwidth=df),
-            component=mp.Hz,
-            center=mp.Vector3(-0.1057, 0.2094, 0),
-        )
-    ]
-
-    k_point = mp.Vector3(0.3892, 0.1597, 0)
-
-    matgrid_size = mp.Vector3(1, 1, 0)
-    matgrid_resolution = 1200
-
-    # for a fixed resolution, compute the number of grid points
-    # necessary which are defined on the corners of the voxels
-    Nx, Ny = int(matgrid_size.x * matgrid_resolution), int(
-        matgrid_size.y * matgrid_resolution
-    )
-
-    x = np.linspace(-0.5 * matgrid_size.x, 0.5 * matgrid_size.x, Nx)
-    y = np.linspace(-0.5 * matgrid_size.y, 0.5 * matgrid_size.y, Ny)
-    xv, yv = np.meshgrid(x, y)
-    weights = np.sqrt(np.square(xv) + np.square(yv)) < rad
-    filtered_weights = gaussian_filter(weights, sigma=3.0, output=np.double)
-
-    matgrid = mp.MaterialGrid(
-        mp.Vector3(Nx, Ny),
-        mp.air,
-        mp.Medium(index=3.5),
-        weights=filtered_weights,
-        do_averaging=True,
-        beta=1000,
-        eta=0.5,
-    )
-
-    geometry = [
-        mp.Block(
-            center=mp.Vector3(),
-            size=mp.Vector3(matgrid_size.x, matgrid_size.y, 0),
-            material=matgrid,
-        )
-    ]
-
-    sim = mp.Simulation(
-        resolution=res,
-        cell_size=cell_size,
-        default_material=matgrid if default_mat else mp.Medium(),
-        geometry=[] if default_mat else geometry,
-        sources=sources,
-        k_point=k_point,
-    )
-
-    h = mp.Harminv(mp.Hz, mp.Vector3(0.3718, -0.2076), fcen, df)
-    sim.run(mp.after_sources(h), until_after_sources=200)
-
-    try:
-        for m in h.modes:
-            print(f"harminv:, {res}, {m.freq}, {m.Q}")
-        freq = h.modes[0].freq
-    except:
-        raise RuntimeError("No resonant modes found.")
-
-    return freq
-
-
-def compute_resonant_mode_3d(use_matgrid=True):
-    resolution = 25
-
-    wvl = 1.27
-    fcen = 1 / wvl
-    df = 0.02 * fcen
-
-    nSi = 3.45
-    Si = mp.Medium(index=nSi)
-    nSiO2 = 1.45
-    SiO2 = mp.Medium(index=nSiO2)
-
-    s = 1.0
-    cell_size = mp.Vector3(s, s, s)
-
-    rad = 0.34  # radius of sphere
-
-    if use_matgrid:
-        matgrid_resolution = 2 * resolution
-        N = int(s * matgrid_resolution)
-        coord = np.linspace(-0.5 * s, 0.5 * s, N)
-        xv, yv, zv = np.meshgrid(coord, coord, coord)
-
-        weights = np.sqrt(np.square(xv) + np.square(yv) + np.square(zv)) < rad
-        filtered_weights = gaussian_filter(
-            weights, sigma=4 / resolution, output=np.double
-        )
-
-        matgrid = mp.MaterialGrid(
-            mp.Vector3(N, N, N),
-            SiO2,
-            Si,
-            weights=filtered_weights,
-            do_averaging=True,
-            beta=1000,
-            eta=0.5,
-        )
-
-        geometry = [mp.Block(center=mp.Vector3(), size=cell_size, material=matgrid)]
-    else:
-        geometry = [mp.Sphere(center=mp.Vector3(), radius=rad, material=Si)]
-
-    sources = [
-        mp.Source(
-            src=mp.GaussianSource(fcen, fwidth=df),
-            size=mp.Vector3(),
-            center=mp.Vector3(0.13, 0.25, 0.06),
-            component=mp.Ez,
-        )
-    ]
-
-    k_point = mp.Vector3(0.23, -0.17, 0.35)
-
-    sim = mp.Simulation(
-        resolution=resolution,
-        cell_size=cell_size,
-        sources=sources,
-        default_material=SiO2,
-        k_point=k_point,
-        geometry=geometry,
-    )
-
-    h = mp.Harminv(mp.Ez, mp.Vector3(-0.2684, 0.1185, 0.0187), fcen, df)
-
-    sim.run(mp.after_sources(h), until_after_sources=200)
-
-    try:
-        for m in h.modes:
-            print(f"harminv:, {resolution}, {m.freq}, {m.Q}")
-        freq = h.modes[0].freq
-    except:
-        raise RuntimeError("No resonant modes found.")
-
-    return freq
-
+        resolution = 25
+
+        cell_size = mp.Vector3(6,6,0)
+
+        boundary_layers = [mp.PML(thickness=1.0)]
+
+        matgrid_size = mp.Vector3(2,2,0)
+        matgrid_resolution = 2*resolution
+
+        Nx, Ny = int(matgrid_size.x*matgrid_resolution), int(matgrid_size.y*matgrid_resolution)
+
+        # ensure reproducible results
+        rng = np.random.RandomState(2069588)
+
+        w = rng.rand(Nx,Ny)
+        # for subpixel smoothing, the underlying design grid must be smoothly varying
+        w = mpa.tanh_projection(rng.rand(Nx,Ny),1e5,0.5)
+        w = mpa.conic_filter(w,0.25,matgrid_size.x,matgrid_size.y,matgrid_resolution)
+        weights = 0.5 * (w + np.fliplr(w)) if not matgrid_symmetry else w
+
+        matgrid = mp.MaterialGrid(mp.Vector3(Nx,Ny),
+                                  mp.air,
+                                  mp.Medium(index=3.5),
+                                  weights=weights,
+                                  beta=np.inf,
+                                  grid_type='U_MEAN')
+
+        geometry = [mp.Block(center=mp.Vector3(),
+                             size=mp.Vector3(mp.inf,1.0,mp.inf),
+                             material=mp.Medium(index=3.5)),
+                    mp.Block(center=mp.Vector3(),
+                             size=mp.Vector3(matgrid_size.x,matgrid_size.y,0),
+                             material=matgrid)]
+
+        if matgrid_symmetry:
+                geometry.append(mp.Block(center=mp.Vector3(),
+                                         size=mp.Vector3(matgrid_size.x,matgrid_size.y,0),
+                                         material=matgrid,
+                                         e2=mp.Vector3(y=-1)))
+
+        eig_parity = mp.ODD_Y + mp.EVEN_Z
+
+        fcen = 0.65
+        df = 0.2*fcen
+        sources = [mp.EigenModeSource(src=mp.GaussianSource(fcen,fwidth=df),
+                                      center=mp.Vector3(-2.0,0),
+                                      size=mp.Vector3(0,4.0),
+                                      eig_parity=eig_parity)]
+
+        sim = mp.Simulation(resolution=resolution,
+                            cell_size=cell_size,
+                            boundary_layers=boundary_layers,
+                            sources=sources,
+                            geometry=geometry)
+
+        mode_mon = sim.add_flux(fcen,
+                                0,
+                                1,
+                                mp.FluxRegion(center=mp.Vector3(2.0,0),
+                                              size=mp.Vector3(0,4.0)))
+
+        sim.run(until_after_sources=mp.stop_when_dft_decayed())
+
+        mode_coeff = sim.get_eigenmode_coefficients(mode_mon,[1],eig_parity).alpha[0,:,0][0]
+
+        tran = np.power(np.abs(mode_coeff),2)
+        print('tran:, {}, {}'.format("sym" if matgrid_symmetry else "nosym", tran))
+
+        return tran
+
+def compute_resonant_mode_2d(res,radius=rad,default_mat=False,cylinder=False,cylinder_matgrid=True,do_averaging=True):
+        fcen = 0.3
+        df = 0.2*fcen
+        sources = [mp.Source(mp.GaussianSource(fcen,fwidth=df),
+                             component=mp.Hz,
+                             center=mp.Vector3(-0.1057,0.2094,0))]
+
+        matgrid_size = mp.Vector3(1,1,0)
+        matgrid_resolution = 1200
+
+        # for a fixed resolution, compute the number of grid points
+        # necessary which are defined on the corners of the voxels
+        Nx, Ny = int(matgrid_size.x*matgrid_resolution), int(matgrid_size.y*matgrid_resolution)
+
+        x = np.linspace(-0.5*matgrid_size.x,0.5*matgrid_size.x,Nx)
+        y = np.linspace(-0.5*matgrid_size.y,0.5*matgrid_size.y,Ny)
+        xv, yv = np.meshgrid(x,y)
+        weights = mpa.make_sdf(np.sqrt(np.square(xv) + np.square(yv)) < radius)
+        if cylinder:
+            weights = 0.0*weights+1.0          
+        
+        matgrid = mp.MaterialGrid(mp.Vector3(Nx,Ny),
+                                  mp.air,
+                                  Si,
+                                  weights=weights,
+                                  beta=np.inf,
+                                  eta=0.5)
+        
+        # Use a cylinder object, not a block object
+        if cylinder:
+            # within the cylinder, use a (uniform) material grid
+            if cylinder_matgrid:
+                geometry = [mp.Cylinder(center=mp.Vector3(),
+                             radius=radius,
+                             material=matgrid)]
+            # within the cylinder, just use a normal medium
+            else:
+                geometry = [mp.Cylinder(center=mp.Vector3(),
+                             radius=radius,
+                             material=Si)]
+        # use a block object
+        else:
+            geometry = [mp.Block(center=mp.Vector3(),
+                             size=mp.Vector3(matgrid_size.x,matgrid_size.y,0),
+                             material=matgrid)]
+
+        sim = mp.Simulation(resolution=res,
+                            eps_averaging=do_averaging,
+                            cell_size=cell_size,
+                            default_material=matgrid if default_mat else mp.Medium(),
+                            geometry=geometry if not default_mat else [],
+                            sources=sources,
+                            k_point=k_point)
+
+        h = mp.Harminv(mp.Hz, mp.Vector3(0.3718,-0.2076), fcen, df)
+        sim.run(mp.after_sources(h),
+                until_after_sources=300)
+
+        try:
+            for m in h.modes:
+                print("harminv:, {}, {}, {}".format(res,m.freq,m.Q))
+            freq = h.modes[0].freq
+        except:
+            raise RuntimeError("No resonant modes found.")
+
+        return freq
+
+def compute_resonant_mode_mpb(resolution=512):
+    geometry = [mp.Cylinder(rad, material=Si)]
+    geometry_lattice = mp.Lattice(cell_size)
+
+    ms = mpb.ModeSolver(num_bands=1,
+                        k_points=[mp.cartesian_to_reciprocal(k_point, geometry_lattice)],
+                        geometry=geometry,
+                        geometry_lattice=geometry_lattice,
+                        resolution=resolution)
+
+
+    ms.run_te()
+    return ms.freqs[0]
 
 class TestMaterialGrid(unittest.TestCase):
     def test_subpixel_smoothing(self):
-        # "exact" frequency computed using MaterialGrid at resolution = 300
-        freq_ref = 0.29826813873225283
-
-        res = [25, 50]
+        res = 25
+
+        def subpixel_test_matrix(radius,do_averaging):
+            freq_cylinder = compute_resonant_mode_2d(res, radius, default_mat=False, cylinder=True, cylinder_matgrid=False, do_averaging=do_averaging)
+            freq_matgrid = compute_resonant_mode_2d(res, radius, default_mat=False, cylinder=False, do_averaging=do_averaging)
+            freq_matgrid_cylinder = compute_resonant_mode_2d(res, radius, default_mat=False, cylinder=True, cylinder_matgrid=True, do_averaging=do_averaging)
+            return [freq_cylinder,freq_matgrid,freq_matgrid_cylinder]
+
+        # when smoothing is off, all three tests should be identical to machine precision
+        no_smoothing = subpixel_test_matrix(rad,False)
+        self.assertEqual(no_smoothing[0], no_smoothing[1])
+        self.assertEqual(no_smoothing[1], no_smoothing[2])
+
+        # when we slightly perturb the radius, the results should be the same as before.
+        no_smoothing_perturbed = subpixel_test_matrix(rad+0.01/res,False)
+        self.assertEqual(no_smoothing, no_smoothing_perturbed)
+
+        # when smoothing is on, the simple material results should be different from the matgrid results
+        smoothing = subpixel_test_matrix(rad,True)
+        self.assertNotEqual(smoothing[0], smoothing[1])
+        self.assertAlmostEqual(smoothing[1], smoothing[2], 3)
+
+        # when we slighty perturb the radius, the results should all be different from before.
+        smoothing_perturbed = subpixel_test_matrix(rad+0.01/res,True)
+        self.assertNotEqual(smoothing, smoothing_perturbed)
+
+        # "exact" frequency computed using MPB
+        freq_ref = compute_resonant_mode_mpb()
+        print(freq_ref)
+        res = [50, 100]
         freq_matgrid = []
         for r in res:
-            freq_matgrid.append(compute_resonant_mode_2d(r))
+            freq_matgrid.append(compute_resonant_mode_2d(r,cylinder=False))
             # verify that the frequency of the resonant mode is
             # approximately equal to the reference value
             self.assertAlmostEqual(freq_ref, freq_matgrid[-1], 2)
+        print("results:    ",freq_ref,freq_matgrid)
 
         # verify that the relative error is decreasing with increasing resolution
         # and is better than linear convergence because of subpixel smoothing
@@ -268,14 +222,10 @@ def test_subpixel_smoothing(self):
             abs(freq_matgrid[0] - freq_ref),
         )
 
-        freq_matgrid_default_mat = compute_resonant_mode_2d(res[0], True)
+        # ensure that a material grid as a default material works
+        freq_matgrid_default_mat = compute_resonant_mode_2d(res[0], rad, True)
         self.assertAlmostEqual(freq_matgrid[0], freq_matgrid_default_mat)
 
-    def test_matgrid_3d(self):
-        freq_matgrid = compute_resonant_mode_3d(True)
-        freq_geomobj = compute_resonant_mode_3d(False)
-        self.assertAlmostEqual(freq_matgrid, freq_geomobj, places=2)
-
     def test_symmetry(self):
         tran_nosym = compute_transmittance(False)
         tran_sym = compute_transmittance(True)
diff --git a/python/typemap_utils.cpp b/python/typemap_utils.cpp
index ab8c8a294..f18764b26 100644
--- a/python/typemap_utils.cpp
+++ b/python/typemap_utils.cpp
@@ -521,9 +521,15 @@ static int pymaterial_grid_to_material_grid(PyObject *po, material_data *md) {
   PyObject *po_dp = PyObject_GetAttrString(po, "weights");
   PyArrayObject *pao = (PyArrayObject *)po_dp;
   if (!PyArray_Check(pao)) { meep::abort("MaterialGrid weights failed to init."); }
-  if (!PyArray_ISCARRAY(pao)) { meep::abort("Numpy array weights must be C-style contiguous."); }
-  md->weights = new double[PyArray_SIZE(pao)];
-  memcpy(md->weights, (double *)PyArray_DATA(pao), PyArray_SIZE(pao) * sizeof(double));
+  if (!PyArray_ISCARRAY(pao)) {
+    meep::abort("Numpy array weights must be C-style contiguous.");
+  }
+  md->weights.clear();
+  for (size_t i=0;i<PyArray_SIZE(pao);i++){
+    md->weights.push_back(((double *)PyArray_DATA(pao))[i]);
+  }
+  //md->weights = new double[PyArray_SIZE(pao)];
+  //memcpy(md->weights, (double *)PyArray_DATA(pao), PyArray_SIZE(pao) * sizeof(double));
 
   // if needed, combine sus structs to main object
   PyObject *py_e_sus_m1 = PyObject_GetAttrString(po_medium1, "E_susceptibilities");
diff --git a/src/material_data.cpp b/src/material_data.cpp
index a24d87cee..7848aae83 100644
--- a/src/material_data.cpp
+++ b/src/material_data.cpp
@@ -66,10 +66,10 @@ void material_data::copy_from(const material_data &from) {
   }
 
   grid_size = from.grid_size;
-  if (from.weights) {
+  if (from.weights.size() != 0) {
     size_t N = from.grid_size.x * from.grid_size.y * from.grid_size.z;
-    weights = new double[N];
-    std::copy_n(from.weights, N, weights);
+    weights.clear();
+    weights.insert(weights.end(), from.weights.begin(), from.weights.end());
   }
 
   medium_1 = from.medium_1;
diff --git a/src/material_data.hpp b/src/material_data.hpp
index 4e1df20d5..ac6d57e24 100644
--- a/src/material_data.hpp
+++ b/src/material_data.hpp
@@ -25,6 +25,7 @@
 
 #include <meep.hpp>
 #include <ctlgeom.h>
+#include "support/dual.hpp"
 
 #include "meep.hpp"
 
@@ -122,6 +123,9 @@ struct material_data {
   // these fields used only if which_subclass==MATERIAL_USER
   user_material_func user_func;
   void *user_data;
+
+  // used for any variable material (USER, FILE, or GRID):
+  // indicates whether we should use the fallback subpixel averaging via quadrature
   bool do_averaging;
 
   // these fields used only if which_subclass==MATERIAL_FILE
@@ -130,7 +134,7 @@ struct material_data {
 
   // these fields used only if which_subclass==MATERIAL_GRID
   vector3 grid_size;
-  double *weights;
+  std::vector<duals::duald> weights;
   medium_struct medium_1;
   medium_struct medium_2;
   double beta;
@@ -184,7 +188,7 @@ material_type make_user_material(user_material_func user_func, void *user_data);
 material_type make_file_material(char *epsilon_input_file);
 material_type make_material_grid(bool do_averaging, double beta, double eta, double damping);
 void read_epsilon_file(const char *eps_input_file);
-void update_weights(material_type matgrid, double *weights);
+void update_weights(material_type matgrid, duals::duald *weights);
 
 }; // namespace meep_geom
 
diff --git a/src/meepgeom.cpp b/src/meepgeom.cpp
index 6b83d109b..660b15423 100644
--- a/src/meepgeom.cpp
+++ b/src/meepgeom.cpp
@@ -14,11 +14,15 @@
 %  Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
 */
 
+#include <iostream> 
 #include <algorithm>
 #include <vector>
 #include "meepgeom.hpp"
 #include "meep_internals.hpp"
 
+namespace dlit = duals::literals;
+using namespace duals::literals;
+
 namespace meep_geom {
 
 #define master_printf meep::master_printf
@@ -121,9 +125,6 @@ void material_free(material_type m) {
   // object so will assume that the caller keeps track of its lifetime.
   delete[] m->epsilon_data;
   m->epsilon_data = NULL;
-
-  delete[] m->weights;
-  m->weights = NULL;
   delete m;
 }
 
@@ -148,9 +149,9 @@ bool material_type_equal(const material_type m1, const material_type m2) {
 /* rotate A by a unitary (real) rotation matrix R:
       RAR = transpose(R) * A * R
 */
-void sym_matrix_rotate(symm_matrix *RAR, const symm_matrix *A_, const double R[3][3]) {
+void sym_matrix_rotate(symm_matrix *RAR, const symm_matrix *A_, const duals::duald R[3][3]) {
   int i, j;
-  double A[3][3], AR[3][3];
+  duals::duald A[3][3], AR[3][3];
   A[0][0] = A_->m00;
   A[1][1] = A_->m11;
   A[2][2] = A_->m22;
@@ -173,8 +174,8 @@ void sym_matrix_rotate(symm_matrix *RAR, const symm_matrix *A_, const double R[3
 
 /* Set Vinv = inverse of V, where both V and Vinv are real-symmetric matrices.*/
 void sym_matrix_invert(symm_matrix *Vinv, const symm_matrix *V) {
-  double m00 = V->m00, m11 = V->m11, m22 = V->m22;
-  double m01 = V->m01, m02 = V->m02, m12 = V->m12;
+  duals::duald m00 = V->m00, m11 = V->m11, m22 = V->m22;
+  duals::duald m01 = V->m01, m02 = V->m02, m12 = V->m12;
 
   if (m01 == 0.0 && m02 == 0.0 && m12 == 0.0) {
     /* optimize common case of a diagonal matrix: */
@@ -184,13 +185,13 @@ void sym_matrix_invert(symm_matrix *Vinv, const symm_matrix *V) {
     Vinv->m01 = Vinv->m02 = Vinv->m12 = 0.0;
   }
   else {
-    double detinv;
+    duals::duald detinv;
 
     /* compute the determinant: */
     detinv = m00 * m11 * m22 - m02 * m11 * m02 + 2.0 * m01 * m12 * m02 - m01 * m01 * m22 -
              m12 * m12 * m00;
 
-    if (detinv == 0.0) meep::abort("singular 3x3 matrix");
+    if (detinv.rpart() == 0.0) meep::abort("singular 3x3 matrix");
 
     detinv = 1.0 / detinv;
 
@@ -206,18 +207,19 @@ void sym_matrix_invert(symm_matrix *Vinv, const symm_matrix *V) {
 
 /* Returns whether or not V is positive-definite. */
 int sym_matrix_positive_definite(symm_matrix *V) {
-  double det2, det3;
-  double m00 = V->m00, m11 = V->m11, m22 = V->m22;
+  duals::duald det2, det3;
+  duals::duald m00 = V->m00, m11 = V->m11, m22 = V->m22;
 
 #if defined(WITH_HERMITIAN_EPSILON)
-  scalar_complex m01 = V->m01, m02 = V->m02, m12 = V->m12;
+  // FIXME
+  scalar_complex m01 = V->m01.rpart(), m02 = V->m02.rpart(), m12 = V->m12.rpart();
 
   det2 = m00 * m11 - CSCALAR_NORMSQR(m01);
   det3 = det2 * m22 - m11 * CSCALAR_NORMSQR(m02) - CSCALAR_NORMSQR(m12) * m00 +
          2.0 * ((m01.re * m12.re - m01.im * m12.im) * m02.re +
                 (m01.re * m12.im + m01.im * m12.re) * m02.im);
 #else  /* real matrix */
-  double m01 = V->m01, m02 = V->m02, m12 = V->m12;
+  duals::duald m01 = V->m01, m02 = V->m02, m12 = V->m12;
 
   det2 = m00 * m11 - m01 * m01;
   det3 = det2 * m22 - m02 * m11 * m02 + 2.0 * m01 * m12 * m02 - m12 * m12 * m00;
@@ -285,15 +287,14 @@ bool is_material_grid(material_type mt) {
 }
 bool is_material_grid(void *md) { return is_material_grid((material_type)md); }
 
+// return whether mt is spatially varying
 bool is_variable(material_type mt) {
-  return (mt->which_subclass == material_data::MATERIAL_USER) ||
-         (mt->which_subclass == material_data::MATERIAL_GRID);
+    return (mt && ((mt->which_subclass == material_data::MATERIAL_USER) ||
+                 (mt->which_subclass == material_data::MATERIAL_GRID) ||
+                 (mt->which_subclass == material_data::MATERIAL_FILE)));
 }
 bool is_variable(void *md) { return is_variable((material_type)md); }
 
-bool is_file(material_type md) { return (md->which_subclass == material_data::MATERIAL_FILE); }
-bool is_file(void *md) { return is_file((material_type)md); }
-
 bool is_medium(material_type md, medium_struct **m) {
   if (md->which_subclass == material_data::MEDIUM) {
     *m = &(md->medium);
@@ -304,24 +305,29 @@ bool is_medium(material_type md, medium_struct **m) {
 
 bool is_medium(void *md, medium_struct **m) { return is_medium((material_type)md, m); }
 
+// note: is_metal assumes that eval_material_pt has already been called for variable materials
 bool is_metal(meep::field_type ft, const material_type *material) {
   material_data *md = *material;
   if (ft == meep::E_stuff) switch (md->which_subclass) {
       case material_data::MEDIUM:
+      case material_data::MATERIAL_USER:
+      case material_data::MATERIAL_FILE:
       case material_data::MATERIAL_GRID:
         return (md->medium.epsilon_diag.x < 0 || md->medium.epsilon_diag.y < 0 ||
                 md->medium.epsilon_diag.z < 0);
       case material_data::PERFECT_METAL: return true;
-      default: meep::abort("unknown material type"); return false;
+      default: meep::abort("unknown material type in is_metal"); return false;
     }
   else
     switch (md->which_subclass) {
       case material_data::MEDIUM:
+      case material_data::MATERIAL_USER:
+      case material_data::MATERIAL_FILE:
       case material_data::MATERIAL_GRID:
         return (md->medium.mu_diag.x < 0 || md->medium.mu_diag.y < 0 || md->medium.mu_diag.z < 0);
       case material_data::PERFECT_METAL:
         return false; // is an electric conductor, but not a magnetic conductor
-      default: meep::abort("unknown material type"); return false;
+      default: meep::abort("unknown material type in is_metal"); return false;
     }
 }
 
@@ -337,37 +343,50 @@ bool has_offdiag(const medium_struct *material) {
 
 // computes the vector-Jacobian product of the gradient of the matgrid_val function v
 // with the Jacobian of the to_geom_box_coords function for geometric_object o
-vector3 to_geom_object_coords_VJP(vector3 v, const geometric_object *o) {
+cvector3 to_geom_object_coords_VJP(cvector3 v, const geometric_object *o) {
   if (!o) { meep::abort("must pass a geometric_object to to_geom_object_coords_VJP.\n"); }
 
   switch (o->which_subclass) {
     default: {
-      vector3 po = {0, 0, 0};
+      cvector3 po = cvector_zero();
       return po;
     }
     case geometric_object::SPHERE: {
       number radius = o->subclass.sphere_data->radius;
-      return vector3_scale(0.5 / radius, v);
+      return cvector3_scale(0.5 / radius, v);
     }
     /* case geometric_object::CYLINDER:
        NOT YET IMPLEMENTED */
     case geometric_object::BLOCK: {
+      /* In order to leverage the underlying libctl infrastructure
+      *and* the dual library that makes computing derivatives so easy,
+      me perform a bit of a trick: we use the *complex* libctl vector,
+      storing the real and dual parts of the dual library and *manually*
+      propagate the dual through existing libraries as needed.
+      */
+      vector3 v_real = cvector3_re(v); // real part
+      vector3 v_imag = cvector3_im(v); // "dual" part
+
       vector3 size = o->subclass.block_data->size;
-      if (size.x != 0.0) v.x /= size.x;
-      if (size.y != 0.0) v.y /= size.y;
-      if (size.z != 0.0) v.z /= size.z;
-      return matrix3x3_transpose_vector3_mult(o->subclass.block_data->projection_matrix, v);
+      if (size.x != 0.0) {v_real.x /= size.x; v_imag.x /= size.x;}
+      if (size.y != 0.0) {v_real.y /= size.y; v_imag.y /= size.y;}
+      if (size.z != 0.0) {v_real.z /= size.z; v_imag.z /= size.z;}
+      v_real =  matrix3x3_transpose_vector3_mult(o->subclass.block_data->projection_matrix, v_real);
+      v_imag =  matrix3x3_transpose_vector3_mult(o->subclass.block_data->projection_matrix, v_imag);
+      return make_cvector3(v_real,v_imag);
     }
       /* case geometric_object::PRISM:
          NOT YET IMPLEMENTED */
   }
 }
 
-meep::vec material_grid_grad(vector3 p, material_data *md, const geometric_object *o) {
-  if (!is_material_grid(md)) { meep::abort("Invalid material grid detected.\n"); }
+cvector3 material_grid_grad(vector3 p, material_data *md, const geometric_object *o) {
+  /* computes the actual spatial gradient at point `p` 
+  for the specified material grid `md`. */
+  
+  if (!is_material_grid(md)) {meep::abort("Invalid material grid detected.\n"); }
 
-  meep::vec gradient(zero_vec(dim));
-  double *data = md->weights;
+  cvector3 gradient = cvector_zero();
   int nx = md->grid_size.x;
   int ny = md->grid_size.y;
   int nz = md->grid_size.z;
@@ -397,25 +416,23 @@ meep::vec material_grid_grad(vector3 p, material_data *md, const geometric_objec
 
   /* define a macro to give us data(x,y,z) on the grid,
      in row-major order: */
-#define D(x, y, z) (data[(((x)*ny + (y)) * nz + (z)) * stride])
-
-  double du_dx =
-      (signflip_dx ? -1.0 : 1.0) *
-      (((-D(x1, y1, z1) + D(x2, y1, z1)) * (1.0 - dy) + (-D(x1, y2, z1) + D(x2, y2, z1)) * dy) *
-           (1.0 - dz) +
-       ((-D(x1, y1, z2) + D(x2, y1, z2)) * (1.0 - dy) + (-D(x1, y2, z2) + D(x2, y2, z2)) * dy) *
-           dz);
-  double du_dy = (signflip_dy ? -1.0 : 1.0) * ((-(D(x1, y1, z1) * (1.0 - dx) + D(x2, y1, z1) * dx) +
-                                                (D(x1, y2, z1) * (1.0 - dx) + D(x2, y2, z1) * dx)) *
-                                                   (1.0 - dz) +
-                                               (-(D(x1, y1, z2) * (1.0 - dx) + D(x2, y1, z2) * dx) +
-                                                (D(x1, y2, z2) * (1.0 - dx) + D(x2, y2, z2) * dx)) *
-                                                   dz);
-  double du_dz = (signflip_dz ? -1.0 : 1.0) *
-                 (-((D(x1, y1, z1) * (1.0 - dx) + D(x2, y1, z1) * dx) * (1.0 - dy) +
-                    (D(x1, y2, z1) * (1.0 - dx) + D(x2, y2, z1) * dx) * dy) +
-                  ((D(x1, y1, z2) * (1.0 - dx) + D(x2, y1, z2) * dx) * (1.0 - dy) +
-                   (D(x1, y2, z2) * (1.0 - dx) + D(x2, y2, z2) * dx) * dy));
+#define D(x, y, z) ((md->weights)[(((x)*ny + (y)) * nz + (z)) * stride])
+
+  duals::duald du_dx = (signflip_dx ? -1.0 : 1.0) *
+    (((-D(x1, y1, z1) + D(x2, y1, z1)) * (1.0 - dy) +
+      (-D(x1, y2, z1) + D(x2, y2, z1)) * dy) * (1.0 - dz) +
+     ((-D(x1, y1, z2) + D(x2, y1, z2)) * (1.0 - dy) +
+      (-D(x1, y2, z2) + D(x2, y2, z2)) * dy) * dz);
+  duals::duald du_dy = (signflip_dy ? -1.0 : 1.0) *
+    ((-(D(x1, y1, z1) * (1.0 - dx) + D(x2, y1, z1) * dx) +
+      (D(x1, y2, z1) * (1.0 - dx) + D(x2, y2, z1) * dx)) * (1.0 - dz) +
+     (-(D(x1, y1, z2) * (1.0 - dx) + D(x2, y1, z2) * dx) +
+      (D(x1, y2, z2) * (1.0 - dx) + D(x2, y2, z2) * dx)) * dz);
+  duals::duald du_dz = (signflip_dz ? -1.0 : 1.0) *
+    (-((D(x1, y1, z1) * (1.0 - dx) + D(x2, y1, z1) * dx) * (1.0 - dy) +
+       (D(x1, y2, z1) * (1.0 - dx) + D(x2, y2, z1) * dx) * dy) +
+     ((D(x1, y1, z2) * (1.0 - dx) + D(x2, y1, z2) * dx) * (1.0 - dy) +
+      (D(x1, y2, z2) * (1.0 - dx) + D(x2, y2, z2) * dx) * dy));
 
 #undef D
 
@@ -424,25 +441,23 @@ meep::vec material_grid_grad(vector3 p, material_data *md, const geometric_objec
   // respect to r2 where g(r2) is the to_geom_object_coords function (in libctl/utils/geom.c).
   // computing this quantity involves using the chain rule and thus the vector-Jacobian product
   // ∇u J where J is the Jacobian matrix of g.
-  vector3 grad_u;
-  grad_u.x = du_dx * nx;
-  grad_u.y = du_dy * ny;
-  grad_u.z = du_dz * nz;
+  vector3 g_real = make_vector3(nx*du_dx.rpart(),ny*du_dy.rpart(),nz*du_dz.rpart());
+  vector3 g_imag = make_vector3(nx*du_dx.dpart(),ny*du_dy.dpart(),nz*du_dz.dpart());
+  cvector3 grad_u = make_cvector3(g_real,g_imag);
+
   if (o != NULL) {
-    vector3 grad_u_J = to_geom_object_coords_VJP(grad_u, o);
-    gradient.set_direction(meep::X, grad_u_J.x);
-    gradient.set_direction(meep::Y, grad_u_J.y);
-    gradient.set_direction(meep::Z, grad_u_J.z);
-  }
-  else {
-    gradient.set_direction(meep::X,
-                           geometry_lattice.size.x == 0 ? 0 : grad_u.x / geometry_lattice.size.x);
-    gradient.set_direction(meep::Y,
-                           geometry_lattice.size.y == 0 ? 0 : grad_u.y / geometry_lattice.size.y);
-    gradient.set_direction(meep::Z,
-                           geometry_lattice.size.z == 0 ? 0 : grad_u.z / geometry_lattice.size.z);
+    gradient = to_geom_object_coords_VJP(grad_u, o);
+  } else {
+    g_real = make_vector3(
+      geometry_lattice.size.x == 0 ? 0 : nx*du_dx.rpart()/ geometry_lattice.size.x,
+      geometry_lattice.size.y == 0 ? 0 : ny*du_dy.rpart()/ geometry_lattice.size.y,
+      geometry_lattice.size.z == 0 ? 0 : nz*du_dz.rpart()/ geometry_lattice.size.z);
+    g_imag = make_vector3(
+      geometry_lattice.size.x == 0 ? 0 : nx*du_dx.dpart()/ geometry_lattice.size.x,
+      geometry_lattice.size.y == 0 ? 0 : ny*du_dy.dpart()/ geometry_lattice.size.y,
+      geometry_lattice.size.z == 0 ? 0 : nz*du_dz.dpart()/ geometry_lattice.size.z);
+    gradient = make_cvector3(g_real,g_imag);
   }
-
   return gradient;
 }
 
@@ -452,21 +467,29 @@ void map_lattice_coordinates(double &px, double &py, double &pz) {
   pz = geometry_lattice.size.z == 0 ? 0 : 0.5 + (pz - geometry_center.z) / geometry_lattice.size.z;
 }
 
-meep::vec matgrid_grad(vector3 p, geom_box_tree tp, int oi, material_data *md) {
+cvector3 matgrid_grad(vector3 p, geom_box_tree tp, int oi, material_data *md) {
+  /* loops through all the material grids at a current point
+  and computes the final *spatial* gradient (w.r.t. x,y,z)
+  after all appropriate transformations (e.g. due to 
+  overlapping grids). Calls the helper function, `material_grid_grad`,
+  which is what actually computes the spatial gradient.
+  */
+
+  // check for proper overlapping grids
   if (md->material_grid_kinds == material_data::U_MIN ||
       md->material_grid_kinds == material_data::U_PROD)
     meep::abort("%s:%i:matgrid_grad does not support overlapping grids with U_MIN or U_PROD\n",
                 __FILE__, __LINE__);
 
-  meep::vec gradient(zero_vec(dim));
+  cvector3 gradient = cvector_zero();
   int matgrid_val_count = 0;
 
   // iterate through object tree at current point
   if (tp) {
     do {
-      gradient +=
-          material_grid_grad(to_geom_box_coords(p, &tp->objects[oi]),
-                             (material_data *)tp->objects[oi].o->material, tp->objects[oi].o);
+      gradient = cvector_add(gradient,material_grid_grad(to_geom_box_coords(p, &tp->objects[oi]),
+                                     (material_data *)tp->objects[oi].o->material,
+                                     tp->objects[oi].o));
       if (md->material_grid_kinds == material_data::U_DEFAULT) break;
       ++matgrid_val_count;
       tp = geom_tree_search_next(p, tp, &oi);
@@ -480,29 +503,54 @@ meep::vec matgrid_grad(vector3 p, geom_box_tree tp, int oi, material_data *md) {
     ++matgrid_val_count;
   }
 
+  // compensate for overlapping grids
   if (md->material_grid_kinds == material_data::U_MEAN)
-    gradient = gradient * 1.0 / matgrid_val_count;
+    gradient = cvector3_scale(1.0/matgrid_val_count,gradient);
 
   return gradient;
 }
+duals::duald dual_linear_interpolate(double rx, double ry, double rz, std::vector<duals::duald> &data,
+                          int nx, int ny, int nz, int stride) {
+
+  int x1, y1, z1, x2, y2, z2;
+  double dx, dy, dz;
 
-double material_grid_val(vector3 p, material_data *md) {
+  meep::map_coordinates(rx, ry, rz, nx, ny, nz,
+                  x1, y1, z1, x2, y2, z2,
+                  dx, dy, dz);
+
+  /* define a macro to give us data(x,y,z) on the grid,
+     in row-major order (the order used by HDF5): */
+#define D(x, y, z) (data[(((x)*ny + (y)) * nz + (z)) * stride])
+
+  return (((D(x1, y1, z1) * (1.0 - dx) + D(x2, y1, z1) * dx) * (1.0 - dy) +
+           (D(x1, y2, z1) * (1.0 - dx) + D(x2, y2, z1) * dx) * dy) *
+              (1.0 - dz) +
+          ((D(x1, y1, z2) * (1.0 - dx) + D(x2, y1, z2) * dx) * (1.0 - dy) +
+           (D(x1, y2, z2) * (1.0 - dx) + D(x2, y2, z2) * dx) * dy) *
+              dz);
+
+#undef D
+}
+duals::duald material_grid_val(vector3 p, material_data *md) {
   // given the relative location, p, interpolate the material grid point.
 
-  if (!is_material_grid(md)) { meep::abort("Invalid material grid detected.\n"); }
-  return meep::linear_interpolate(p.x, p.y, p.z, md->weights, md->grid_size.x, md->grid_size.y,
-                                  md->grid_size.z, 1);
+  if (!is_material_grid(md)) { abort(); }
+  return dual_linear_interpolate(p.x, p.y, p.z, md->weights, md->grid_size.x,
+                                  md->grid_size.y, md->grid_size.z, 1);
+
 }
 
-static double tanh_projection(double u, double beta, double eta) {
+static duals::duald tanh_projection(duals::duald u, double beta, double eta) {
   if (beta == 0) return u;
   if (u == eta) return 0.5; // avoid NaN when beta is Inf
-  double tanh_beta_eta = tanh(beta * eta);
-  return (tanh_beta_eta + tanh(beta * (u - eta))) / (tanh_beta_eta + tanh(beta * (1 - eta)));
+  duals::duald tanh_beta_eta = tanh(beta*eta);
+  return (tanh_beta_eta + tanh(beta*(u-eta))) /
+    (tanh_beta_eta + tanh(beta*(1-eta)));
 }
 
-double matgrid_val(vector3 p, geom_box_tree tp, int oi, material_data *md) {
-  double uprod = 1.0, umin = 1.0, usum = 0.0, udefault = 0.0, u;
+duals::duald matgrid_val(vector3 p, geom_box_tree tp, int oi, material_data *md) {
+  duals::duald uprod = 1.0, umin = 1.0, usum = 0.0, udefault = 0.0, u;
   int matgrid_val_count = 0;
 
   // iterate through object tree at current point
@@ -569,7 +617,7 @@ static void interp_tensors(vector3 diag_in_1, vector3 offdiag_in_1, vector3 diag
 void epsilon_material_grid(material_data *md, double u) {
   // NOTE: assume p lies on normalized grid within (0,1)
 
-  if (!(md->weights)) meep::abort("material params were not initialized!");
+  if (md->weights.size()==0) meep::abort("material params were not initialized!");
 
   medium_struct *mm = &(md->medium);
   medium_struct *m1 = &(md->medium_1);
@@ -807,7 +855,7 @@ static void material_epsmu(meep::field_type ft, material_type material, symm_mat
         epsmu_inv->m01 = epsmu_inv->m02 = epsmu_inv->m12 = 0.0;
         break;
 
-      default: meep::abort("unknown material type");
+      default: meep::abort("unknown material type in epsmu");
     }
   else
     switch (md->which_subclass) {
@@ -835,7 +883,7 @@ static void material_epsmu(meep::field_type ft, material_type material, symm_mat
         epsmu_inv->m01 = epsmu_inv->m02 = epsmu_inv->m12 = 0.0;
         break;
 
-      default: meep::abort("unknown material type");
+      default: meep::abort("unknown material type in epsmu");
     }
 }
 
@@ -847,26 +895,31 @@ void geom_epsilon::get_material_pt(material_type &material, const meep::vec &r)
   boolean inobject;
   material =
       (material_type)material_of_unshifted_point_in_tree_inobject(p, restricted_tree, &inobject);
-  material_data *md = material;
+  eval_material_pt(material, p);
+}
 
-  switch (md->which_subclass) {
+// evaluate the material at the given point p if necessary — this is needed if
+// the material is variable (a grid, function, or file); otherwise it is a no-op.
+void geom_epsilon::eval_material_pt(material_type &material, vector3 p) {
+  switch (material->which_subclass) {
     // material grid: interpolate onto user specified material grid to get properties at r
-    case material_data::MATERIAL_GRID:
-      double u;
+    case material_data::MATERIAL_GRID: {
+      duals::duald u;
       int oi;
       geom_box_tree tp;
 
       tp = geom_tree_search(p, restricted_tree, &oi);
       // interpolate and project onto material grid
-      u = tanh_projection(matgrid_val(p, tp, oi, md) + this->u_p, md->beta, md->eta);
+      u = tanh_projection(matgrid_val(p, tp, oi, material), material->beta, material->eta);
       // interpolate material from material grid point
-      epsilon_material_grid(md, u);
+      epsilon_material_grid(material, u.rpart());
 
       return;
+    }
     // material read from file: interpolate to get properties at r
     case material_data::MATERIAL_FILE:
-      if (md->epsilon_data)
-        epsilon_file_material(md, p);
+      if (material->epsilon_data)
+        epsilon_file_material(material, p);
       else
         material = (material_type)default_material;
       return;
@@ -877,16 +930,16 @@ void geom_epsilon::get_material_pt(material_type &material, const meep::vec &r)
     // the user's function only needs to fill in whatever is
     // different from vacuum.
     case material_data::MATERIAL_USER:
-      md->medium = medium_struct();
-      md->user_func(p, md->user_data, &(md->medium));
-      md->medium.check_offdiag_im_zero_or_abort();
+      material->medium = medium_struct();
+      material->user_func(p, material->user_data, &(material->medium));
+      material->medium.check_offdiag_im_zero_or_abort();
       return;
 
     // position-independent material or metal: there is nothing to do
     case material_data::MEDIUM:
     case material_data::PERFECT_METAL: return;
 
-    default: meep::abort("unknown material type");
+    default: meep::abort("unknown material type in eval_material_pt");
   };
 }
 
@@ -907,20 +960,21 @@ double geom_epsilon::chi1p1(meep::field_type ft, const meep::vec &r) {
   material_epsmu(ft, material, &chi1p1, &chi1p1_inv);
   material_gc(material);
 
-  return (chi1p1.m00 + chi1p1.m11 + chi1p1.m22) / 3;
+  return (chi1p1.m00.rpart() + chi1p1.m11.rpart() + chi1p1.m22.rpart()) / 3;
 }
 
 /* Find frontmost object in v, along with the constant material behind it.
-   Returns false if material behind the object is not constant.
+   Returns false if more than two objects & materials intersect the pixel.
 
    Requires moderately horrifying logic to figure things out properly,
    stolen from MPB. */
 static bool get_front_object(const meep::volume &v, geom_box_tree geometry_tree, vector3 &pcenter,
                              const geometric_object **o_front, vector3 &shiftby_front,
-                             material_type &mat_front, material_type &mat_behind) {
+                             material_type &mat_front, material_type &mat_behind,
+                             vector3 &p_front, vector3 &p_behind) {
   vector3 p;
   const geometric_object *o1 = 0, *o2 = 0;
-  vector3 shiftby1 = {0, 0, 0}, shiftby2 = {0, 0, 0};
+  vector3 shiftby1 = {0, 0, 0}, shiftby2 = {0, 0, 0}, p1 = {0,0,0}, p2 = {0,0,0};
   geom_box pixel;
   material_type mat1 = vacuum, mat2 = vacuum;
   int id1 = -1, id2 = -1;
@@ -982,6 +1036,7 @@ static bool get_front_object(const meep::volume &v, geom_box_tree geometry_tree,
       shiftby1 = shiftby;
       id1 = id;
       mat1 = mat;
+      p1 = q;
     }
     else if (id2 == -1 ||
              ((id >= id1 && id >= id2) && (id1 == id2 || material_type_equal(mat1, mat2)))) {
@@ -989,10 +1044,12 @@ static bool get_front_object(const meep::volume &v, geom_box_tree geometry_tree,
       shiftby2 = shiftby;
       id2 = id;
       mat2 = mat;
+      p2 = q;
     }
     else if (!(id1 < id2 && (id1 == id || material_type_equal(mat1, mat))) &&
-             !(id2 < id1 && (id2 == id || material_type_equal(mat2, mat))))
+             !(id2 < id1 && (id2 == id || material_type_equal(mat2, mat)))){
       return false;
+      }
   }
 
   // CHECK(id1 > -1, "bug in object_of_point_in_tree?");
@@ -1000,130 +1057,126 @@ static bool get_front_object(const meep::volume &v, geom_box_tree geometry_tree,
     id2 = id1;
     o2 = o1;
     mat2 = mat1;
+    p2 = p1;
     shiftby2 = shiftby1;
   }
 
-  if ((o1 && is_variable(o1->material)) || (o2 && is_variable(o2->material)) ||
-      ((is_variable(default_material) || is_file(default_material)) &&
-       (!o1 || is_file(o1->material) || !o2 || is_file(o2->material))))
-    return false;
-
   if (id1 >= id2) {
     *o_front = o1;
     shiftby_front = shiftby1;
     mat_front = mat1;
-    if (id1 == id2)
+    p_front = p1;
+    if (id1 == id2) {
       mat_behind = mat1;
-    else
+      p_behind = p1;
+    }
+    else {
       mat_behind = mat2;
+      p_behind = p2;
+    }
   }
   if (id2 > id1) {
     *o_front = o2;
     shiftby_front = shiftby2;
     mat_front = mat2;
+    p_front = p2;
     mat_behind = mat1;
+    p_behind = p1;
   }
   return true;
 }
 
 void geom_epsilon::eff_chi1inv_row(meep::component c, double chi1inv_row[3], const meep::volume &v,
                                    double tol, int maxeval) {
+  /* for speed reasons, we need to "wrap" the 
+  actual eff_chi1inv_row_grad function below, since
+  the original eff_chi1inv_row is a virtual function
+  and doesn't play well with default arguments */
+  eff_chi1inv_row_grad(c,chi1inv_row,v,tol,maxeval,false);
+}
+void geom_epsilon::eff_chi1inv_row_grad(meep::component c, double chi1inv_row[3], const meep::volume &v,
+                                   double tol, int maxeval, bool needs_grad) {
   symm_matrix meps_inv;
   bool fallback;
   eff_chi1inv_matrix(c, &meps_inv, v, tol, maxeval, fallback);
 
   if (fallback) { fallback_chi1inv_row(c, chi1inv_row, v, tol, maxeval); }
   else {
-    switch (component_direction(c)) {
+    /* for gradient calculations, we need to return
+    the dual part of the calcuation, which corresponds 
+    to the first derivative w.r.t. u_i */
+    if (needs_grad) {
+      switch (component_direction(c)) {
       case meep::X:
       case meep::R:
-        chi1inv_row[0] = meps_inv.m00;
-        chi1inv_row[1] = meps_inv.m01;
-        chi1inv_row[2] = meps_inv.m02;
+          chi1inv_row[0] = meps_inv.m00.dpart();
+          chi1inv_row[1] = meps_inv.m01.dpart();
+          chi1inv_row[2] = meps_inv.m02.dpart();
         break;
       case meep::Y:
       case meep::P:
-        chi1inv_row[0] = meps_inv.m01;
-        chi1inv_row[1] = meps_inv.m11;
-        chi1inv_row[2] = meps_inv.m12;
+        chi1inv_row[0] = meps_inv.m01.dpart();
+        chi1inv_row[1] = meps_inv.m11.dpart();
+        chi1inv_row[2] = meps_inv.m12.dpart();
         break;
       case meep::Z:
-        chi1inv_row[0] = meps_inv.m02;
-        chi1inv_row[1] = meps_inv.m12;
-        chi1inv_row[2] = meps_inv.m22;
+        chi1inv_row[0] = meps_inv.m02.dpart();
+        chi1inv_row[1] = meps_inv.m12.dpart();
+        chi1inv_row[2] = meps_inv.m22.dpart();
         break;
       case meep::NO_DIRECTION: chi1inv_row[0] = chi1inv_row[1] = chi1inv_row[2] = 0; break;
+      }
+    } else { // no gradient needed; standard epsilon evaluations
+      switch (component_direction(c)) {
+      case meep::X:
+      case meep::R:
+          chi1inv_row[0] = meps_inv.m00.rpart();
+          chi1inv_row[1] = meps_inv.m01.rpart();
+          chi1inv_row[2] = meps_inv.m02.rpart();
+        break;
+      case meep::Y:
+      case meep::P:
+        chi1inv_row[0] = meps_inv.m01.rpart();
+        chi1inv_row[1] = meps_inv.m11.rpart();
+        chi1inv_row[2] = meps_inv.m12.rpart();
+        break;
+      case meep::Z:
+        chi1inv_row[0] = meps_inv.m02.rpart();
+        chi1inv_row[1] = meps_inv.m12.rpart();
+        chi1inv_row[2] = meps_inv.m22.rpart();
+        break;
+      case meep::NO_DIRECTION: chi1inv_row[0] = chi1inv_row[1] = chi1inv_row[2] = 0; break;
+      }
     }
   }
 }
 
-void geom_epsilon::eff_chi1inv_matrix(meep::component c, symm_matrix *chi1inv_matrix,
-                                      const meep::volume &v, double tol, int maxeval,
-                                      bool &fallback) {
-  const geometric_object *o;
-  material_type mat, mat_behind;
-  symm_matrix meps;
-  vector3 p, shiftby, normal;
-  fallback = false;
-
-  if (maxeval == 0) {
-  noavg:
-    get_material_pt(mat, v.center());
-  trivial:
-    material_epsmu(meep::type(c), mat, &meps, chi1inv_matrix);
-    material_gc(mat);
-    return;
-  }
-
-  if (!get_front_object(v, geometry_tree, p, &o, shiftby, mat, mat_behind)) {
-    get_material_pt(mat, v.center());
-    if (mat &&
-        (mat->which_subclass == material_data::MATERIAL_USER ||
-         mat->which_subclass == material_data::MATERIAL_GRID) &&
-        mat->do_averaging) {
-      fallback = true;
-      return;
-    }
-    else { goto trivial; }
-  }
-
-  /* check for trivial case of only one object/material */
-  if (material_type_equal(mat, mat_behind)) goto trivial;
+void kottke_algorithm(meep::component c, symm_matrix *chi1inv_matrix, symm_matrix &meps, 
+  symm_matrix &eps2, cvector3 &cnormal, duals::duald fill){
+  /* Ref: " Perturbation theory for anisotropic dielectric interfaces, 
+    and application to subpixel smoothing of discretized numerical methods",
+    https://math.mit.edu/~stevenj/papers/KottkeFa08.pdf
+  */
 
-  // it doesn't make sense to average metals (electric or magnetic)
-  if (is_metal(meep::type(c), &mat) || is_metal(meep::type(c), &mat_behind)) goto noavg;
-
-  normal = unit_vector3(normal_to_fixed_object(vector3_minus(p, shiftby), *o));
-  if (normal.x == 0 && normal.y == 0 && normal.z == 0)
-    goto noavg; // couldn't get normal vector for this point, punt
-  geom_box pixel = gv2box(v);
-  pixel.low = vector3_minus(pixel.low, shiftby);
-  pixel.high = vector3_minus(pixel.high, shiftby);
-
-  double fill = box_overlap_with_object(pixel, *o, tol, maxeval);
-
-  material_epsmu(meep::type(c), mat, &meps, chi1inv_matrix);
-  symm_matrix eps2, epsinv2;
   symm_matrix eps1, delta;
-  double Rot[3][3];
-  material_epsmu(meep::type(c), mat_behind, &eps2, &epsinv2);
+  duals::duald Rot[3][3];
   eps1 = meps;
 
-  Rot[0][0] = normal.x;
-  Rot[1][0] = normal.y;
-  Rot[2][0] = normal.z;
-  if (fabs(normal.x) > 1e-2 || fabs(normal.y) > 1e-2) {
-    Rot[0][2] = normal.y;
-    Rot[1][2] = -normal.x;
+  Rot[0][0] = cnormal.x.re + 1_e*cnormal.x.im;
+  Rot[1][0] = cnormal.y.re + 1_e*cnormal.y.im;
+  Rot[2][0] = cnormal.z.re + 1_e*cnormal.z.im;
+  if (fabs(cnormal.x.re) > 1e-2 || fabs(cnormal.y.re) > 1e-2) {
+    Rot[0][2] = cnormal.y.re + 1_e*cnormal.y.im;
+    Rot[1][2] = -(cnormal.x.re + 1_e*cnormal.x.im);
     Rot[2][2] = 0;
   }
   else { /* n is not parallel to z direction, use (x x n) instead */
     Rot[0][2] = 0;
-    Rot[1][2] = -normal.z;
-    Rot[2][2] = normal.y;
+    Rot[1][2] = -(cnormal.z.re + 1_e*cnormal.z.im);
+    Rot[2][2] = cnormal.y.re + 1_e*cnormal.y.im;
   }
   { /* normalize second column */
-    double s = Rot[0][2] * Rot[0][2] + Rot[1][2] * Rot[1][2] + Rot[2][2] * Rot[2][2];
+    duals::duald s = Rot[0][2] * Rot[0][2] + Rot[1][2] * Rot[1][2] + Rot[2][2] * Rot[2][2];
     s = 1.0 / sqrt(s);
     Rot[0][2] *= s;
     Rot[1][2] *= s;
@@ -1172,7 +1225,7 @@ void geom_epsilon::eff_chi1inv_matrix(meep::component c, symm_matrix *chi1inv_ma
 
 #define SWAP(a, b)                                                                                 \
   {                                                                                                \
-    double xxx = a;                                                                                \
+    duals::duald xxx = a;                                                                          \
     a = b;                                                                                         \
     b = xxx;                                                                                       \
   }
@@ -1191,6 +1244,240 @@ void geom_epsilon::eff_chi1inv_matrix(meep::component c, symm_matrix *chi1inv_ma
   sym_matrix_invert(chi1inv_matrix, &meps);
 }
 
+duals::duald get_material_grid_fill(meep::ndim dim, duals::duald d, double r, duals::duald u, double eta){
+  /* The fill fraction should describe the amount of u=1 material in the current pixel.
+  
+  Occasionally, the distance to the nearest interface is outside the current
+  sphere, which means we don't need to do any averaging (the fill is 0 or 1). Again,
+  we don't know if that means we are in void or solid, however, until we look
+  at u. To make things easy, we'll assume the cap is solid until the end, when we
+  can verify.
+  */
+  duals::duald rel_fill;
+  if (abs(d) >= abs(r)){
+    return -1.0; // garbage fill
+  } else {
+    if (dim == meep::D1)
+      return (r-d)/(2*r);
+    else if (dim == meep::D2 || dim == meep::Dcyl)
+      return (1/(r*r*meep::pi)) * (r*r*acos(d/r)-d*sqrt(r*r-d*d));
+    else if (dim == meep::D3)
+      return (((r-d)*(r-d))/(4*meep::pi*r*r*r))*(2*r+d);
+  }
+  
+  return rel_fill;
+}
+
+duals::duald normalize_dual(cvector3 &cv){
+  duals::duald d[3], norm;
+  // compute the norm, u_0'
+  d[0] = cv.x.re + 1_e*cv.x.im;
+  d[1] = cv.y.re + 1_e*cv.y.im;
+  d[2] = cv.z.re + 1_e*cv.z.im;
+
+  norm = sqrt(abs(d[0])*abs(d[0]) + abs(d[1])*abs(d[1]) + abs(d[2])*abs(d[2]));
+
+  // normalize the normal vector
+  d[0] = d[0] / norm;
+  d[1] = d[1] / norm;
+  d[2] = d[2] / norm;
+  cv.x.re = d[0].rpart(); cv.x.im = d[0].dpart();
+  cv.y.re = d[1].rpart(); cv.y.im = d[1].dpart();
+  cv.z.re = d[2].rpart(); cv.z.im = d[2].dpart();
+
+  return norm;
+}
+
+duals::duald get_distance(cvector3 &cv, duals::duald uval, double eta){
+  duals::duald d[3], norm;
+  d[0] = cv.x.re + 1_e*cv.x.im;
+  d[1] = cv.y.re + 1_e*cv.y.im;
+  d[2] = cv.z.re + 1_e*cv.z.im;
+
+  norm = sqrt(abs(d[0])*abs(d[0]) + abs(d[1])*abs(d[1]) + abs(d[2])*abs(d[2]));
+  return (eta - uval) / norm;
+}
+
+void dual_interpolate_mat(material_type mat,symm_matrix &eps, duals::duald u) {
+  duals::duald u_bar = tanh_projection(u, mat->beta, mat->eta); // projection
+  if (std::isnan(u_bar.dpart())) u_bar = u_bar.rpart(); // when beta=inf, we need to correct when gradient breaks down
+  eps.m00 = mat->medium_1.epsilon_diag.x + u_bar * (mat->medium_2.epsilon_diag.x-mat->medium_1.epsilon_diag.x);
+  eps.m11 = mat->medium_1.epsilon_diag.y + u_bar * (mat->medium_2.epsilon_diag.y-mat->medium_1.epsilon_diag.y);
+  eps.m22 = mat->medium_1.epsilon_diag.z + u_bar * (mat->medium_2.epsilon_diag.z-mat->medium_1.epsilon_diag.z);
+  eps.m01 = mat->medium_1.epsilon_offdiag.x.re + u_bar * (mat->medium_2.epsilon_offdiag.x.re-mat->medium_1.epsilon_offdiag.x.re);
+  eps.m02 = mat->medium_1.epsilon_offdiag.y.re + u_bar * (mat->medium_2.epsilon_offdiag.y.re-mat->medium_1.epsilon_offdiag.y.re);
+  eps.m12 = mat->medium_1.epsilon_offdiag.z.re + u_bar * (mat->medium_2.epsilon_offdiag.z.re-mat->medium_1.epsilon_offdiag.z.re);
+}
+
+void geom_epsilon::eff_chi1inv_matrix(meep::component c, symm_matrix *chi1inv_matrix,
+                                      const meep::volume &v, double tol, int maxeval,
+                                      bool &fallback) {
+  const geometric_object *o;
+  material_type mat, mat_behind;
+  vector3 p_mat, p_mat_behind, p, shiftby;
+  symm_matrix meps, eps2, epsinv2;
+  cvector3 normal;
+  duals::duald fill, uval;
+  int oi;
+  geom_box_tree tp;
+  fallback = false;
+
+  /* first let's check if our pixel has more than a single
+  interface (i.e. a corner) in which case we abandon any
+  smoothing efforts*/
+  if (!get_front_object(v, geometry_tree, p, &o, shiftby, mat, mat_behind, p_mat, p_mat_behind)) {
+  noavg:
+    get_material_pt(mat, v.center());
+  trivial:
+    material_epsmu(meep::type(c), mat, &meps, chi1inv_matrix);
+    material_gc(mat);
+    return;
+  }
+
+  /* we may have entered this routine (e.g. because of a material grid
+  backprop routine) but don't want to actually do any averaging */
+  if (maxeval == 0){
+    get_material_pt(mat, v.center());
+    if (is_material_grid(mat)){
+      tp = geom_tree_search(vec_to_vector3(v.center()), restricted_tree, &oi);
+      uval = matgrid_val(vec_to_vector3(v.center()), tp, oi, mat);
+    mgavg:
+      dual_interpolate_mat(mat,meps,uval);
+      sym_matrix_invert(chi1inv_matrix, &meps);
+      //get_material_pt(mat, v.center());
+      //material_epsmu(meep::type(c), mat, &meps, chi1inv_matrix);
+      material_gc(mat);
+      return;
+    }else{
+      goto trivial;
+    }
+  }
+
+  // For variable materials with do_averaging == true, switch over to slow fallback integration method.
+  // For material grids, however, we have to do some more logic first...
+  if ((is_variable(mat) && mat->do_averaging) || (is_variable(mat_behind) && mat_behind->do_averaging)) {
+    if ((!is_material_grid(mat)) && (!is_material_grid(mat_behind))){
+      fallback = true;
+      return;
+    }
+  }
+
+  /* ------------------------------------------- */
+  // One material
+  /* ------------------------------------------- */
+  /* check for case of only one object/material
+   in the case of the material grid, make sure we don't need
+   to do any interface averaging within.
+   */
+  if (material_type_equal(mat, mat_behind)) {
+    /* if we have a material grid, we need to calculate the fill fraction,
+    normal vector, etc. we also need to determine if we really need
+    to do any averaging, or if the current pixel is smooth enough
+    to just evaluate.
+    */
+    if (is_material_grid(mat)){
+      // if we have a damping term and β≠∞, we can't smooth
+      if ((mat->damping != 0) && (mat->eta != std::numeric_limits<double>::infinity())) goto noavg;
+
+      tp = geom_tree_search(p_mat, restricted_tree, &oi);
+      uval = matgrid_val(p_mat, tp, oi, mat);
+      
+      normal = matgrid_grad(p_mat, tp, oi, mat);
+      if (cvector3_re(normal).x == 0 && cvector3_re(normal).y == 0 && cvector3_re(normal).z == 0)
+        /* couldn't get normal vector for this point; no averaging is needed,
+        but we still need to interpolate onto our grid using duals */
+        goto mgavg;
+      duals::duald u_prime = normalize_dual(normal);
+      duals::duald d = (mat->eta - uval) / u_prime;
+      
+      double r = v.diameter()/2;
+      fill = get_material_grid_fill(v.dim,d,r,uval,mat->eta);
+      if (fill.rpart() < 0){
+        /* we are far enough away from an interface that
+        we don't need any averaging -- but we still need
+        to interpolate from the material grid */
+        goto mgavg;
+      } else{
+        /* we have a material grid interface within our pixel.
+        we therefore need to set the two materials used for
+        averaging to our corresponding solid and void materials.
+        */
+        symm_matrix eps_0, eps_plus, eps_minus;
+        dual_interpolate_mat(mat,eps_0,uval);
+        dual_interpolate_mat(mat,eps_plus,uval+r*u_prime);
+        dual_interpolate_mat(mat,eps_minus,uval-r*u_prime);
+        
+        if (d > 0) {
+          // Case 1: d > 0. Then let ε̃₊ = ε₊ and let ε̃₋ = [ε₀d + ε₋(Δx/2 – d)] / (Δx/2).
+          meps = eps_plus;
+          eps2 = (eps_0*d + eps_minus*(r-d)) / (r);
+        }else {
+          // Case 2: d < 0. Then let ε̃₋ = ε₋ and let ε̃₊ = [-ε₀d + ε₊(Δx/2 + d)] / (Δx/2).
+          eps2 = eps_minus;
+          meps = (-eps_0*d + eps_plus*(r+d)) / (r);
+        }
+      }
+    } else if(is_variable(mat)) {
+      // no averaging is needed (not a material grid)
+      eval_material_pt(mat, vec_to_vector3(v.center()));
+      goto trivial;
+    // materials are non variable and uniform -- no need to average
+    } else {
+      goto trivial;
+    }
+  /* ------------------------------------------- */
+  // Two different materials (perhaps a m.g. and a geom object)
+  /* ------------------------------------------- */
+  } else {
+    
+    vector3 normal_re = unit_vector3(normal_to_fixed_object(vector3_minus(p, shiftby), *o));
+    if (normal_re.x == 0 && normal_re.y == 0 && normal_re.z == 0)
+      goto noavg; // couldn't get normal vector for this point, punt
+    normal = make_cvector3(normal_re,make_vector3());
+    geom_box pixel = gv2box(v);
+    pixel.low = vector3_minus(pixel.low, shiftby);
+    pixel.high = vector3_minus(pixel.high, shiftby);
+    fill = duals::duald(box_overlap_with_object(pixel, *o, tol, maxeval),0);
+    /* Evaluate materials in case they are variable.  This allows us to do fast subpixel averaging
+      at the boundary of an object with a variable material, while remaining accurate enough if the
+      material is continuous over the pixel.  (We make a first-order error by averaging as if the material
+      were constant, but only in a boundary layer of thickness 1/resolution, so the net effect should
+      still be second-order.) */
+    if (is_variable(mat)){
+      eval_material_pt(mat, p_mat);
+      eval_material_pt(mat_behind, p_mat_behind);
+      if (material_type_equal(mat, mat_behind)) goto trivial;
+    }
+    
+    /* to properly determine the tensors of a material 
+      that has a material grid, we need to be careful.
+      Specifically, we need to include the effects of u_i
+      in the dual part of the material tensor */
+    if (is_material_grid(mat)){
+      tp = geom_tree_search(p_mat, restricted_tree, &oi);
+      duals::duald uval = matgrid_val(p_mat, tp, oi, mat);
+      dual_interpolate_mat(mat,meps,uval);
+      sym_matrix_invert(chi1inv_matrix, &meps);    
+    } else {
+      material_epsmu(meep::type(c), mat, &meps, chi1inv_matrix);
+    }
+    if (is_material_grid(mat_behind)){
+      tp = geom_tree_search(p_mat_behind, restricted_tree, &oi);
+      duals::duald uval = matgrid_val(p_mat_behind, tp, oi, mat);
+      dual_interpolate_mat(mat_behind,eps2,uval);
+      sym_matrix_invert(&epsinv2, &eps2); // not really needed
+    } else {
+      material_epsmu(meep::type(c), mat_behind, &eps2, &epsinv2);
+    }
+  }
+
+  // it doesn't make sense to average metals (electric or magnetic)
+  if (is_metal(meep::type(c), &mat) || is_metal(meep::type(c), &mat_behind)) goto noavg;
+
+  kottke_algorithm(c,chi1inv_matrix,meps,eps2,normal,fill);
+  material_gc(mat);
+}
+
 static int eps_ever_negative = 0;
 static meep::field_type func_ft = meep::E_stuff;
 
@@ -1207,7 +1494,7 @@ struct matgrid_volavg {
 
 static void get_uproj_w(const matgrid_volavg *mgva, double x0, double &u_proj, double &w) {
   // use a linear approximation for the material grid weights around the Yee grid point
-  u_proj = tanh_projection(mgva->uval + mgva->ugrad_abs * x0, mgva->beta, mgva->eta);
+  //u_proj = tanh_projection(mgva->uval + mgva->ugrad_abs*x0, mgva->beta, mgva->eta);
   if (mgva->dim == meep::D1)
     w = 1 / (2 * mgva->rad);
   else if (mgva->dim == meep::D2 || mgva->dim == meep::Dcyl)
@@ -1314,18 +1601,7 @@ void geom_epsilon::fallback_chi1inv_row(meep::component c, double chi1inv_row[3]
   material =
       (material_type)material_of_unshifted_point_in_tree_inobject(p, restricted_tree, &inobject);
   material_data *md = material;
-  meep::vec gradient(zero_vec(v.dim));
-  double uval = 0;
-
-  if (md->which_subclass == material_data::MATERIAL_GRID) {
-    geom_box_tree tp;
-    int oi;
-    tp = geom_tree_search(p, restricted_tree, &oi);
-    gradient = matgrid_grad(p, tp, oi, md);
-    uval = matgrid_val(p, tp, oi, md) + this->u_p;
-  }
-  else { gradient = normal_vector(meep::type(c), v); }
-
+  meep::vec gradient = normal_vector(meep::type(c), v);
   get_material_pt(material, v.center());
   material_epsmu(meep::type(c), material, &chi1p1, &chi1p1_inv);
   material_gc(material);
@@ -1333,19 +1609,19 @@ void geom_epsilon::fallback_chi1inv_row(meep::component c, double chi1inv_row[3]
       chi1p1.m11 != chi1p1.m22 || chi1p1.m00 != chi1p1.m22 || meep::abs(gradient) < 1e-8) {
     int rownum = meep::component_direction(c) % 3;
     if (rownum == 0) {
-      chi1inv_row[0] = chi1p1_inv.m00;
-      chi1inv_row[1] = chi1p1_inv.m01;
-      chi1inv_row[2] = chi1p1_inv.m02;
+      chi1inv_row[0] = chi1p1_inv.m00.rpart();
+      chi1inv_row[1] = chi1p1_inv.m01.rpart();
+      chi1inv_row[2] = chi1p1_inv.m02.rpart();
     }
     else if (rownum == 1) {
-      chi1inv_row[0] = chi1p1_inv.m01;
-      chi1inv_row[1] = chi1p1_inv.m11;
-      chi1inv_row[2] = chi1p1_inv.m12;
+      chi1inv_row[0] = chi1p1_inv.m01.rpart();
+      chi1inv_row[1] = chi1p1_inv.m11.rpart();
+      chi1inv_row[2] = chi1p1_inv.m12.rpart();
     }
     else {
-      chi1inv_row[0] = chi1p1_inv.m02;
-      chi1inv_row[1] = chi1p1_inv.m12;
-      chi1inv_row[2] = chi1p1_inv.m22;
+      chi1inv_row[0] = chi1p1_inv.m02.rpart();
+      chi1inv_row[1] = chi1p1_inv.m12.rpart();
+      chi1inv_row[2] = chi1p1_inv.m22.rpart();
     }
     return;
   }
@@ -1353,79 +1629,47 @@ void geom_epsilon::fallback_chi1inv_row(meep::component c, double chi1inv_row[3]
   integer errflag;
   double meps, minveps;
 
-  if (md->which_subclass == material_data::MATERIAL_GRID) {
-    number xmin[1], xmax[1];
-    matgrid_volavg mgva;
-    mgva.dim = v.dim;
-    mgva.ugrad_abs = meep::abs(gradient);
-    mgva.uval = uval;
-    mgva.rad = v.diameter() / 2;
-    mgva.beta = md->beta;
-    mgva.eta = md->eta;
-    mgva.eps1 =
-        (md->medium_1.epsilon_diag.x + md->medium_1.epsilon_diag.y + md->medium_1.epsilon_diag.z) /
-        3;
-    mgva.eps2 =
-        (md->medium_2.epsilon_diag.x + md->medium_2.epsilon_diag.y + md->medium_2.epsilon_diag.z) /
-        3;
-    xmin[0] = -v.diameter() / 2;
-    xmax[0] = v.diameter() / 2;
-#ifdef CTL_HAS_COMPLEX_INTEGRATION
-    cnumber ret = cadaptive_integration(matgrid_ceps_func, xmin, xmax, 1, (void *)&mgva, 0, tol,
-                                        maxeval, &esterr, &errflag);
-    meps = ret.re;
-    minveps = ret.im;
-#else
-    meps = adaptive_integration(matgrid_eps_func, xmin, xmax, 1, (void *)&mgva, 0, tol, maxeval,
-                                &esterr, &errflag);
-    minveps = adaptive_integration(matgrid_inveps_func, xmin, xmax, 1, (void *)&mgva, 0, tol,
-                                   maxeval, &esterr, &errflag);
-#endif
+  integer n;
+  number xmin[3], xmax[3];
+  vector3 gvmin, gvmax;
+  gvmin = vec_to_vector3(v.get_min_corner());
+  gvmax = vec_to_vector3(v.get_max_corner());
+  xmin[0] = gvmin.x;
+  xmax[0] = gvmax.x;
+  if (dim == meep::Dcyl) {
+    xmin[1] = gvmin.z;
+    xmin[2] = gvmin.y;
+    xmax[1] = gvmax.z;
+    xmax[2] = gvmax.y;
   }
   else {
-    integer n;
-    number xmin[3], xmax[3];
-    vector3 gvmin, gvmax;
-    gvmin = vec_to_vector3(v.get_min_corner());
-    gvmax = vec_to_vector3(v.get_max_corner());
-    xmin[0] = gvmin.x;
-    xmax[0] = gvmax.x;
-    if (dim == meep::Dcyl) {
-      xmin[1] = gvmin.z;
-      xmin[2] = gvmin.y;
-      xmax[1] = gvmax.z;
-      xmax[2] = gvmax.y;
-    }
-    else {
-      xmin[1] = gvmin.y;
-      xmin[2] = gvmin.z;
-      xmax[1] = gvmax.y;
-      xmax[2] = gvmax.z;
-    }
-    if (xmin[2] == xmax[2])
-      n = xmin[1] == xmax[1] ? 1 : 2;
-    else
-      n = 3;
-    double vol = 1;
-    for (int i = 0; i < n; ++i)
-      vol *= xmax[i] - xmin[i];
-    if (dim == meep::Dcyl) vol *= (xmin[0] + xmax[0]) * 0.5;
-    eps_ever_negative = 0;
-    func_ft = meep::type(c);
+    xmin[1] = gvmin.y;
+    xmin[2] = gvmin.z;
+    xmax[1] = gvmax.y;
+    xmax[2] = gvmax.z;
+  }
+  if (xmin[2] == xmax[2])
+    n = xmin[1] == xmax[1] ? 1 : 2;
+  else
+    n = 3;
+  double vol = 1;
+  for (int i = 0; i < n; ++i)
+    vol *= xmax[i] - xmin[i];
+  if (dim == meep::Dcyl) vol *= (xmin[0] + xmax[0]) * 0.5;
+  eps_ever_negative = 0;
+  func_ft = meep::type(c);
 #ifdef CTL_HAS_COMPLEX_INTEGRATION
-    cnumber ret = cadaptive_integration(ceps_func, xmin, xmax, n, (void *)this, 0, tol, maxeval,
-                                        &esterr, &errflag);
-    meps = ret.re / vol;
-    minveps = ret.im / vol;
+  cnumber ret = cadaptive_integration(ceps_func, xmin, xmax, n, (void *)this, 0, tol, maxeval,
+                                      &esterr, &errflag);
+  meps = ret.re / vol;
+  minveps = ret.im / vol;
 #else
-    meps = adaptive_integration(eps_func, xmin, xmax, n, (void *)this, 0, tol, maxeval, &esterr,
-                                &errflag) /
-           vol;
-    minveps = adaptive_integration(inveps_func, xmin, xmax, n, (void *)this, 0, tol, maxeval,
-                                   &esterr, &errflag) /
-              vol;
+  meps = adaptive_integration(eps_func, xmin, xmax, n, (void *)this, 0, tol, maxeval, &esterr,
+                              &errflag) / vol;
+  minveps = adaptive_integration(inveps_func, xmin, xmax, n, (void *)this, 0, tol, maxeval, &esterr,
+                                  &errflag) / vol;
 #endif
-  }
+
   if (eps_ever_negative) // averaging negative eps causes instability
     minveps = 1.0 / (meps = eps(v.center()));
   {
@@ -1667,8 +1911,8 @@ void geom_epsilon::sigma_row(meep::component c, double sigrow[3], const meep::ve
 
     tp = geom_tree_search(p, restricted_tree, &oi);
     // interpolate and project onto material grid
-    u = tanh_projection(matgrid_val(p, tp, oi, mat) + this->u_p, mat->beta, mat->eta);
-    epsilon_material_grid(mat, u); // interpolate material from material grid point
+    u = tanh_projection(matgrid_val(p, tp, oi, mat), mat->beta, mat->eta).rpart();
+    epsilon_material_grid(mat, u);   // interpolate material from material grid point
     mat->medium.check_offdiag_im_zero_or_abort();
   }
 
@@ -1936,7 +2180,7 @@ void add_absorbing_layer(absorber_list alist, double thickness, int direction, i
 if needed */
 geom_epsilon *make_geom_epsilon(meep::structure *s, geometric_object_list *g, vector3 center,
                                 bool _ensure_periodicity, material_type _default_material,
-                                material_type_list extra_materials) {
+                                material_type_list extra_materials, bool _use_anisotropic_averaging) {
   // set global variables in libctlgeom based on data fields in s
   geom_initialize();
   geometry_center = center;
@@ -1987,6 +2231,7 @@ geom_epsilon *make_geom_epsilon(meep::structure *s, geometric_object_list *g, ve
   }
 
   geom_epsilon *geps = new geom_epsilon(*g, extra_materials, gv.pad().surroundings());
+  geps->use_anisotropic_averaging = _use_anisotropic_averaging;
   return geps;
 }
 
@@ -1997,8 +2242,10 @@ void set_materials_from_geometry(meep::structure *s, geometric_object_list g, ve
                                  bool _ensure_periodicity, material_type _default_material,
                                  absorber_list alist, material_type_list extra_materials) {
   meep_geom::geom_epsilon *geps = meep_geom::make_geom_epsilon(s, &g, center, _ensure_periodicity,
-                                                               _default_material, extra_materials);
-  set_materials_from_geom_epsilon(s, geps, use_anisotropic_averaging, tol, maxeval, alist);
+                                                               _default_material, extra_materials,
+                                                               use_anisotropic_averaging);
+  set_materials_from_geom_epsilon(s, geps, use_anisotropic_averaging, tol,
+                                  maxeval, alist);
   delete geps;
 }
 
@@ -2060,6 +2307,7 @@ material_type make_user_material(user_material_func user_func, void *user_data,
 material_type make_file_material(const char *eps_input_file) {
   material_data *md = new material_data();
   md->which_subclass = material_data::MATERIAL_FILE;
+  md->do_averaging = false;
 
   md->epsilon_dims[0] = md->epsilon_dims[1] = md->epsilon_dims[2] = 1;
   if (eps_input_file && eps_input_file[0]) { // file specified
@@ -2095,7 +2343,9 @@ material_type make_material_grid(bool do_averaging, double beta, double eta, dou
 
 void update_weights(material_type matgrid, double *weights) {
   size_t N = matgrid->grid_size.x * matgrid->grid_size.y * matgrid->grid_size.z;
-  memcpy(matgrid->weights, weights, N * sizeof(double));
+  //memcpy(matgrid->weights, weights, N * sizeof(double));
+  for (size_t i=0;i<N;i++)
+    matgrid->weights.push_back(weights[i]);
 }
 
 /******************************************************************************/
@@ -2603,30 +2853,27 @@ std::complex<double> cond_cmp(meep::component c, const meep::vec &r, double freq
   const medium_struct *mm = &(md->medium);
 
   // get the row we care about
-  switch (component_direction(c)) {
-    case meep::X:
-    case meep::R:
-      return std::complex<double>(1.0, mm->D_conductivity_diag.x / (2 * meep::pi * freq));
-    case meep::Y:
-    case meep::P:
-      return std::complex<double>(1.0, mm->D_conductivity_diag.y / (2 * meep::pi * freq));
-    case meep::Z:
-      return std::complex<double>(1.0, mm->D_conductivity_diag.z / (2 * meep::pi * freq));
-    case meep::NO_DIRECTION: meep::abort("Invalid adjoint field component");
-  }
-}
-
-std::complex<double>
-get_material_gradient(const meep::vec &r,              // current point
-                      const meep::component adjoint_c, // adjoint field component
-                      const meep::component forward_c, // forward field component
-                      std::complex<double> fields_f,   // forward field at current point
-                      double freq,                     // frequency
-                      geom_epsilon *geps,              // material
-                      meep::grid_volume &gv,           // simulation grid volume
-                      double du,                       // step size
-                      double *u,                       // matgrid
-                      int idx                          // matgrid index
+      switch (component_direction(c)) {
+      case meep::X:
+      case meep::R: return std::complex<double>(1.0, mm->D_conductivity_diag.x / (2*meep::pi*freq));
+      case meep::Y:
+      case meep::P: return std::complex<double>(1.0, mm->D_conductivity_diag.y / (2*meep::pi*freq));
+      case meep::Z: return std::complex<double>(1.0, mm->D_conductivity_diag.z / (2*meep::pi*freq));
+      case meep::NO_DIRECTION: meep::abort("Invalid adjoint field component");
+    }
+}
+
+std::complex<double> get_material_gradient(
+    const meep::vec &r,               // current point
+    const meep::component adjoint_c,  // adjoint field component
+    const meep::component forward_c,  // forward field component
+    std::complex<double> fields_f,    // forward field at current point
+    double freq,                      // frequency
+    geom_epsilon *geps,               // material
+    meep::grid_volume &gv,            // simulation grid volume
+    double du,                        // step size
+    std::vector<duals::duald> &u,     // matgrid
+    int idx                           // matgrid index
 ) {
   /*Compute the Aᵤx product from the -λᵀAᵤx calculation.
   The current adjoint (λ) field component (adjoint_c)
@@ -2668,26 +2915,26 @@ get_material_gradient(const meep::vec &r,              // current point
       v.set_direction_min(d, r.in_direction(d) - 0.5 * gv.inva * sd);
       v.set_direction_max(d, r.in_direction(d) + 0.5 * gv.inva * sd);
     }
-    double row_1[3], row_2[3], dA_du[3];
-    double orig = u[idx];
-    u[idx] -= du;
-    geps->eff_chi1inv_row(adjoint_c, row_1, v, geps->tol, geps->maxeval);
-    u[idx] += 2 * du;
-    geps->eff_chi1inv_row(adjoint_c, row_2, v, geps->tol, geps->maxeval);
-    u[idx] = orig;
-
-    for (int i = 0; i < 3; i++)
-      dA_du[i] = (row_1[i] - row_2[i]) / (2 * du);
-    return dA_du[dir_idx] * fields_f;
-  }
-  else {
-    double orig = u[idx];
+    double dA_du[3];
+    double maxevals = geps->use_anisotropic_averaging ? geps->maxeval : 0; // punt if no smoothing
+    /* we can calculate the gradient w.r.t. u_i using
+    a dual method. We just need to "flag" the index
+    within our weights array with a dual value
+    of "1", and the duals library will take care of the
+    rest. */
+    u[idx] += 1_e; // add dual component
+    geps->eff_chi1inv_row_grad(adjoint_c, dA_du, v, geps->tol, maxevals, true);
+    u[idx] -= 1_e; // remove dual component
+    
+    return -dA_du[dir_idx] * fields_f; // note the minus sign! (from Steven's adjoint notes)
+  
+  } else {
     std::complex<double> row_1[3], row_2[3], dA_du[3];
     u[idx] -= du;
-    eff_chi1inv_row_disp(adjoint_c, row_1, r, freq, geps);
-    u[idx] += 2 * du;
-    eff_chi1inv_row_disp(adjoint_c, row_2, r, freq, geps);
-    u[idx] = orig;
+    eff_chi1inv_row_disp(adjoint_c,row_1,r,freq,geps);
+    u[idx] += 2*du;
+    eff_chi1inv_row_disp(adjoint_c,row_2,r,freq,geps);
+    u[idx] -= du;
 
     for (int i = 0; i < 3; i++)
       dA_du[i] = (row_1[i] - row_2[i]) / (2 * du);
@@ -2702,14 +2949,16 @@ With the addition of subpixel smoothing, however, the vJp became
 much more complicated and it is easier to calculate the entire gradient
 using finite differences (at the cost of slightly less accurate gradients
 due to floating-point roundoff errors). */
-void add_interpolate_weights(double rx, double ry, double rz, double *data, int nx, int ny, int nz,
-                             int stride, double scaleby, double *udata, int ukind, double uval,
-                             meep::vec r, geom_epsilon *geps, meep::component adjoint_c,
-                             meep::component forward_c, std::complex<double> fwd,
-                             std::complex<double> adj, double freq, meep::grid_volume &gv,
-                             double du) {
+void add_interpolate_weights(double rx, double ry, double rz,
+                             double *data, int nx, int ny, int nz, int stride,
+                             double scaleby, std::vector<duals::duald> &udata, int ukind, duals::duald uval,
+                             meep::vec r, geom_epsilon *geps,
+                             meep::component adjoint_c, meep::component forward_c,
+                             std::complex<double> fwd, std::complex<double> adj,
+                             double freq, meep::grid_volume &gv, double du) {
   int x1, y1, z1, x2, y2, z2;
-  double dx, dy, dz, u;
+  double dx, dy, dz;
+  duals::duald u;
 
   meep::map_coordinates(rx, ry, rz, nx, ny, nz, x1, y1, z1, x2, y2, z2, dx, dy, dz);
   int x_list[2] = {x1, x2}, y_list[2] = {y1, y2}, z_list[2] = {z1, z2};
@@ -2731,7 +2980,7 @@ in row-major order (the order used by HDF5): */
            dz);
 
   if (ukind == material_data::U_MIN && u != uval) return; // TODO look into this
-  if (ukind == material_data::U_PROD) scaleby *= uval / u;
+  if (ukind == material_data::U_PROD) scaleby *= uval.rpart() / u.rpart();
 
   for (int xi = 0; xi < lx; xi++) {
     for (int yi = 0; yi < ly; yi++) {
@@ -2757,7 +3006,7 @@ void material_grids_addgradient_point(double *v, vector3 p, double scalegrad, ge
   geom_box_tree tp;
   int oi, ois;
   material_data *mg, *mg_sum;
-  double uval;
+  duals::duald uval;
   int kind;
   tp = geom_tree_search(p, geps->geometry_tree, &oi);
 
@@ -2804,13 +3053,13 @@ void material_grids_addgradient_point(double *v, vector3 p, double scalegrad, ge
     it's up to the user to only have one unique design grid in this volume.*/
     vector3 sz = mg->grid_size;
     double *vcur = v;
-    double *ucur = mg->weights;
     uval = tanh_projection(matgrid_val(p, tp, oi, mg), mg->beta, mg->eta);
     do {
       vector3 pb = to_geom_box_coords(p, &tp->objects[oi]);
-      add_interpolate_weights(pb.x, pb.y, pb.z, vcur, sz.x, sz.y, sz.z, 1, scalegrad, ucur, kind,
-                              uval, vector3_to_vec(p), geps, adjoint_c, forward_c, fwd, adj, freq,
-                              gv, tol);
+      add_interpolate_weights(pb.x, pb.y, pb.z, vcur, sz.x, sz.y, sz.z, 1,
+                              scalegrad, mg->weights, kind, uval,
+                              vector3_to_vec(p), geps, adjoint_c, forward_c,
+                              fwd, adj, freq, gv, tol);
       if (kind == material_data::U_DEFAULT) break;
       tp = geom_tree_search_next(p, tp, &oi);
     } while (tp && is_material_grid((material_data *)tp->objects[oi].o->material));
@@ -2820,10 +3069,11 @@ void material_grids_addgradient_point(double *v, vector3 p, double scalegrad, ge
     map_lattice_coordinates(p.x, p.y, p.z);
     vector3 sz = mg->grid_size;
     double *vcur = v;
-    double *ucur = mg->weights;
     uval = tanh_projection(material_grid_val(p, mg), mg->beta, mg->eta);
-    add_interpolate_weights(p.x, p.y, p.z, vcur, sz.x, sz.y, sz.z, 1, scalegrad, ucur, kind, uval,
-                            vector3_to_vec(p), geps, adjoint_c, forward_c, fwd, adj, freq, gv, tol);
+    add_interpolate_weights(p.x, p.y, p.z, vcur, sz.x, sz.y, sz.z, 1,
+                            scalegrad, mg->weights, kind, uval,
+                            vector3_to_vec(p), geps, adjoint_c, forward_c,
+                            fwd, adj, freq, gv, tol);
   }
 }
 
diff --git a/src/meepgeom.hpp b/src/meepgeom.hpp
index ef77c74ac..3b5014cb4 100644
--- a/src/meepgeom.hpp
+++ b/src/meepgeom.hpp
@@ -153,9 +153,50 @@ inline vector3 make_vector3(double x = 0.0, double y = 0.0, double z = 0.0) {
   return v;
 }
 
-typedef struct {
-  double m00, m01, m02, m11, m12, m22;
-} symm_matrix;
+inline cvector3 cvector3_scale(number s, cvector3 v) {
+  return make_cvector3(vector3_scale(s,cvector3_re(v)),vector3_scale(s,cvector3_im(v)));
+}
+
+inline cvector3 cvector_zero(){
+  return make_cvector3(make_vector3(),make_vector3());
+}
+
+inline cvector3 cvector_add(cvector3 cv1, cvector3 cv2){
+  return make_cvector3(vector3_plus(cvector3_re(cv1),cvector3_re(cv2)),vector3_plus(cvector3_im(cv1),cvector3_im(cv2)));
+}
+
+struct symm_matrix{
+  duals::duald m00, m01, m02, m11, m12, m22;
+  struct symm_matrix& operator+=(const symm_matrix& rhs) { 
+    m00 += rhs.m00; m11 += rhs.m11; m22 += rhs.m22;
+    m01 += rhs.m01; m02 += rhs.m02; m12 += rhs.m12;
+    return *this; 
+  }
+  struct symm_matrix& operator+=(const duals::duald& k) { 
+    m00 += k; m11 += k; m22 += k;
+    return *this; 
+   }
+  struct symm_matrix& operator+(const symm_matrix& rhs) { 
+    m00 += rhs.m00; m11 += rhs.m11; m22 += rhs.m22;
+    m01 += rhs.m01; m02 += rhs.m02; m12 += rhs.m12;
+    return *this; 
+   }
+  struct symm_matrix& operator*( const duals::duald& k) { 
+    m00 *= k; m11 *= k; m22 *= k;
+    m01 *= k; m02 *= k; m12 *= k;
+    return *this; 
+   }
+  struct symm_matrix& operator/( const duals::duald& k) { 
+    m00 /= k; m11 /= k; m22 /= k;
+    m01 /= k; m02 /= k; m12 /= k;
+    return *this; 
+   }
+  struct symm_matrix& operator-() { 
+    m00 = -m00; m11 = -m11; m22 = -m22;
+    m01 = -m01; m02 = -m02; m12 = -m12;
+    return *this; 
+   }
+} ;
 
 struct pol {
   meep_geom::susceptibility user_s;
@@ -173,6 +214,7 @@ class geom_epsilon : public meep::material_function {
 
 public:
   double u_p = 0;
+  bool use_anisotropic_averaging;
   geom_box_tree geometry_tree;
   geom_box_tree restricted_tree;
   geometric_object_list geometry;
@@ -206,6 +248,8 @@ class geom_epsilon : public meep::material_function {
   virtual double chi1p1(meep::field_type ft, const meep::vec &r);
   virtual void eff_chi1inv_row(meep::component c, double chi1inv_row[3], const meep::volume &v,
                                double tol, int maxeval);
+  void eff_chi1inv_row_grad(meep::component c, double chi1inv_row[3], const meep::volume &v,
+                               double tol, int maxeval, bool needs_grad);
 
   void eff_chi1inv_matrix(meep::component c, symm_matrix *chi1inv_matrix, const meep::volume &v,
                           double tol, int maxeval, bool &fallback);
@@ -221,14 +265,17 @@ class geom_epsilon : public meep::material_function {
 
 private:
   material_type_list extra_materials;
+  void eval_material_pt(material_type &material, vector3 p);
   pol *current_pol;
 };
 
 void set_dimensions(int dims);
-geom_epsilon *make_geom_epsilon(meep::structure *s, geometric_object_list *g,
-                                vector3 center = make_vector3(), bool ensure_periodicity = false,
-                                material_type _default_material = vacuum,
-                                material_type_list extra_materials = material_type_list());
+geom_epsilon* make_geom_epsilon(meep::structure *s, geometric_object_list *g,
+                                 vector3 center = make_vector3(),
+                                 bool ensure_periodicity = false,
+                                 material_type _default_material = vacuum,
+                                 material_type_list extra_materials = material_type_list(),
+                                 bool use_anisotropic_averaging = false);
 //, geometric_object_list g, material_type_list extra_materials
 void set_materials_from_geometry(meep::structure *s, geometric_object_list g,
                                  vector3 center = make_vector3(),
@@ -266,8 +313,6 @@ bool is_material_grid(material_type mt);
 bool is_material_grid(void *md);
 bool is_variable(material_type mt);
 bool is_variable(void *md);
-bool is_file(material_type md);
-bool is_file(void *md);
 bool is_medium(material_type md, medium_struct **m);
 bool is_medium(void *md, medium_struct **m);
 bool is_metal(meep::field_type ft, const material_type *material);
@@ -284,10 +329,8 @@ void init_libctl(material_type default_mat, bool ensure_per, meep::grid_volume *
 // material grid functions
 /***************************************************************/
 void update_weights(material_type matgrid, double *weights);
-meep::vec matgrid_grad(vector3 p, geom_box_tree tp, int oi, material_data *md);
-meep::vec material_grid_grad(vector3 p, material_data *md, const geometric_object *o);
-double matgrid_val(vector3 p, geom_box_tree tp, int oi, material_data *md);
-double material_grid_val(vector3 p, material_data *md);
+duals::duald matgrid_val(vector3 p, geom_box_tree tp, int oi, material_data *md);
+duals::duald material_grid_val(vector3 p, material_data *md);
 geom_box_tree calculate_tree(const meep::volume &v, geometric_object_list g);
 void material_grids_addgradient(double *v, size_t ng, size_t nf,
                                 std::vector<meep::dft_fields *> fields_a,
diff --git a/src/support/Makefile.am b/src/support/Makefile.am
index f65ba7ddb..603b0c461 100644
--- a/src/support/Makefile.am
+++ b/src/support/Makefile.am
@@ -1,3 +1,3 @@
 noinst_LTLIBRARIES = libsupport.la
 
-libsupport_la_SOURCES = mt19937ar.c meep_mt.h
+libsupport_la_SOURCES = mt19937ar.c meep_mt.h dual.hpp

From 9793cf5c138042f66f58a586cfed1f2a0bd9aa65 Mon Sep 17 00:00:00 2001
From: "Steven G. Johnson" <stevenj@alum.mit.edu>
Date: Fri, 29 Jul 2022 17:04:21 -0400
Subject: [PATCH 24/29] reformat

---
 python/typemap_utils.cpp |  10 +-
 src/meepgeom.cpp         | 506 ++++++++++++++++++++-------------------
 src/meepgeom.hpp         | 106 ++++----
 3 files changed, 333 insertions(+), 289 deletions(-)

diff --git a/python/typemap_utils.cpp b/python/typemap_utils.cpp
index f18764b26..ee3e45c2f 100644
--- a/python/typemap_utils.cpp
+++ b/python/typemap_utils.cpp
@@ -521,15 +521,13 @@ static int pymaterial_grid_to_material_grid(PyObject *po, material_data *md) {
   PyObject *po_dp = PyObject_GetAttrString(po, "weights");
   PyArrayObject *pao = (PyArrayObject *)po_dp;
   if (!PyArray_Check(pao)) { meep::abort("MaterialGrid weights failed to init."); }
-  if (!PyArray_ISCARRAY(pao)) {
-    meep::abort("Numpy array weights must be C-style contiguous.");
-  }
+  if (!PyArray_ISCARRAY(pao)) { meep::abort("Numpy array weights must be C-style contiguous."); }
   md->weights.clear();
-  for (size_t i=0;i<PyArray_SIZE(pao);i++){
+  for (size_t i = 0; i < PyArray_SIZE(pao); i++) {
     md->weights.push_back(((double *)PyArray_DATA(pao))[i]);
   }
-  //md->weights = new double[PyArray_SIZE(pao)];
-  //memcpy(md->weights, (double *)PyArray_DATA(pao), PyArray_SIZE(pao) * sizeof(double));
+  // md->weights = new double[PyArray_SIZE(pao)];
+  // memcpy(md->weights, (double *)PyArray_DATA(pao), PyArray_SIZE(pao) * sizeof(double));
 
   // if needed, combine sus structs to main object
   PyObject *py_e_sus_m1 = PyObject_GetAttrString(po_medium1, "E_susceptibilities");
diff --git a/src/meepgeom.cpp b/src/meepgeom.cpp
index 660b15423..09513597b 100644
--- a/src/meepgeom.cpp
+++ b/src/meepgeom.cpp
@@ -14,7 +14,7 @@
 %  Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
 */
 
-#include <iostream> 
+#include <iostream>
 #include <algorithm>
 #include <vector>
 #include "meepgeom.hpp"
@@ -289,7 +289,7 @@ bool is_material_grid(void *md) { return is_material_grid((material_type)md); }
 
 // return whether mt is spatially varying
 bool is_variable(material_type mt) {
-    return (mt && ((mt->which_subclass == material_data::MATERIAL_USER) ||
+  return (mt && ((mt->which_subclass == material_data::MATERIAL_USER) ||
                  (mt->which_subclass == material_data::MATERIAL_GRID) ||
                  (mt->which_subclass == material_data::MATERIAL_FILE)));
 }
@@ -368,12 +368,21 @@ cvector3 to_geom_object_coords_VJP(cvector3 v, const geometric_object *o) {
       vector3 v_imag = cvector3_im(v); // "dual" part
 
       vector3 size = o->subclass.block_data->size;
-      if (size.x != 0.0) {v_real.x /= size.x; v_imag.x /= size.x;}
-      if (size.y != 0.0) {v_real.y /= size.y; v_imag.y /= size.y;}
-      if (size.z != 0.0) {v_real.z /= size.z; v_imag.z /= size.z;}
-      v_real =  matrix3x3_transpose_vector3_mult(o->subclass.block_data->projection_matrix, v_real);
-      v_imag =  matrix3x3_transpose_vector3_mult(o->subclass.block_data->projection_matrix, v_imag);
-      return make_cvector3(v_real,v_imag);
+      if (size.x != 0.0) {
+        v_real.x /= size.x;
+        v_imag.x /= size.x;
+      }
+      if (size.y != 0.0) {
+        v_real.y /= size.y;
+        v_imag.y /= size.y;
+      }
+      if (size.z != 0.0) {
+        v_real.z /= size.z;
+        v_imag.z /= size.z;
+      }
+      v_real = matrix3x3_transpose_vector3_mult(o->subclass.block_data->projection_matrix, v_real);
+      v_imag = matrix3x3_transpose_vector3_mult(o->subclass.block_data->projection_matrix, v_imag);
+      return make_cvector3(v_real, v_imag);
     }
       /* case geometric_object::PRISM:
          NOT YET IMPLEMENTED */
@@ -381,10 +390,10 @@ cvector3 to_geom_object_coords_VJP(cvector3 v, const geometric_object *o) {
 }
 
 cvector3 material_grid_grad(vector3 p, material_data *md, const geometric_object *o) {
-  /* computes the actual spatial gradient at point `p` 
+  /* computes the actual spatial gradient at point `p`
   for the specified material grid `md`. */
-  
-  if (!is_material_grid(md)) {meep::abort("Invalid material grid detected.\n"); }
+
+  if (!is_material_grid(md)) { meep::abort("Invalid material grid detected.\n"); }
 
   cvector3 gradient = cvector_zero();
   int nx = md->grid_size.x;
@@ -418,21 +427,24 @@ cvector3 material_grid_grad(vector3 p, material_data *md, const geometric_object
      in row-major order: */
 #define D(x, y, z) ((md->weights)[(((x)*ny + (y)) * nz + (z)) * stride])
 
-  duals::duald du_dx = (signflip_dx ? -1.0 : 1.0) *
-    (((-D(x1, y1, z1) + D(x2, y1, z1)) * (1.0 - dy) +
-      (-D(x1, y2, z1) + D(x2, y2, z1)) * dy) * (1.0 - dz) +
-     ((-D(x1, y1, z2) + D(x2, y1, z2)) * (1.0 - dy) +
-      (-D(x1, y2, z2) + D(x2, y2, z2)) * dy) * dz);
-  duals::duald du_dy = (signflip_dy ? -1.0 : 1.0) *
-    ((-(D(x1, y1, z1) * (1.0 - dx) + D(x2, y1, z1) * dx) +
-      (D(x1, y2, z1) * (1.0 - dx) + D(x2, y2, z1) * dx)) * (1.0 - dz) +
-     (-(D(x1, y1, z2) * (1.0 - dx) + D(x2, y1, z2) * dx) +
-      (D(x1, y2, z2) * (1.0 - dx) + D(x2, y2, z2) * dx)) * dz);
+  duals::duald du_dx =
+      (signflip_dx ? -1.0 : 1.0) *
+      (((-D(x1, y1, z1) + D(x2, y1, z1)) * (1.0 - dy) + (-D(x1, y2, z1) + D(x2, y2, z1)) * dy) *
+           (1.0 - dz) +
+       ((-D(x1, y1, z2) + D(x2, y1, z2)) * (1.0 - dy) + (-D(x1, y2, z2) + D(x2, y2, z2)) * dy) *
+           dz);
+  duals::duald du_dy =
+      (signflip_dy ? -1.0 : 1.0) * ((-(D(x1, y1, z1) * (1.0 - dx) + D(x2, y1, z1) * dx) +
+                                     (D(x1, y2, z1) * (1.0 - dx) + D(x2, y2, z1) * dx)) *
+                                        (1.0 - dz) +
+                                    (-(D(x1, y1, z2) * (1.0 - dx) + D(x2, y1, z2) * dx) +
+                                     (D(x1, y2, z2) * (1.0 - dx) + D(x2, y2, z2) * dx)) *
+                                        dz);
   duals::duald du_dz = (signflip_dz ? -1.0 : 1.0) *
-    (-((D(x1, y1, z1) * (1.0 - dx) + D(x2, y1, z1) * dx) * (1.0 - dy) +
-       (D(x1, y2, z1) * (1.0 - dx) + D(x2, y2, z1) * dx) * dy) +
-     ((D(x1, y1, z2) * (1.0 - dx) + D(x2, y1, z2) * dx) * (1.0 - dy) +
-      (D(x1, y2, z2) * (1.0 - dx) + D(x2, y2, z2) * dx) * dy));
+                       (-((D(x1, y1, z1) * (1.0 - dx) + D(x2, y1, z1) * dx) * (1.0 - dy) +
+                          (D(x1, y2, z1) * (1.0 - dx) + D(x2, y2, z1) * dx) * dy) +
+                        ((D(x1, y1, z2) * (1.0 - dx) + D(x2, y1, z2) * dx) * (1.0 - dy) +
+                         (D(x1, y2, z2) * (1.0 - dx) + D(x2, y2, z2) * dx) * dy));
 
 #undef D
 
@@ -441,22 +453,21 @@ cvector3 material_grid_grad(vector3 p, material_data *md, const geometric_object
   // respect to r2 where g(r2) is the to_geom_object_coords function (in libctl/utils/geom.c).
   // computing this quantity involves using the chain rule and thus the vector-Jacobian product
   // ∇u J where J is the Jacobian matrix of g.
-  vector3 g_real = make_vector3(nx*du_dx.rpart(),ny*du_dy.rpart(),nz*du_dz.rpart());
-  vector3 g_imag = make_vector3(nx*du_dx.dpart(),ny*du_dy.dpart(),nz*du_dz.dpart());
-  cvector3 grad_u = make_cvector3(g_real,g_imag);
+  vector3 g_real = make_vector3(nx * du_dx.rpart(), ny * du_dy.rpart(), nz * du_dz.rpart());
+  vector3 g_imag = make_vector3(nx * du_dx.dpart(), ny * du_dy.dpart(), nz * du_dz.dpart());
+  cvector3 grad_u = make_cvector3(g_real, g_imag);
 
-  if (o != NULL) {
-    gradient = to_geom_object_coords_VJP(grad_u, o);
-  } else {
+  if (o != NULL) { gradient = to_geom_object_coords_VJP(grad_u, o); }
+  else {
     g_real = make_vector3(
-      geometry_lattice.size.x == 0 ? 0 : nx*du_dx.rpart()/ geometry_lattice.size.x,
-      geometry_lattice.size.y == 0 ? 0 : ny*du_dy.rpart()/ geometry_lattice.size.y,
-      geometry_lattice.size.z == 0 ? 0 : nz*du_dz.rpart()/ geometry_lattice.size.z);
+        geometry_lattice.size.x == 0 ? 0 : nx * du_dx.rpart() / geometry_lattice.size.x,
+        geometry_lattice.size.y == 0 ? 0 : ny * du_dy.rpart() / geometry_lattice.size.y,
+        geometry_lattice.size.z == 0 ? 0 : nz * du_dz.rpart() / geometry_lattice.size.z);
     g_imag = make_vector3(
-      geometry_lattice.size.x == 0 ? 0 : nx*du_dx.dpart()/ geometry_lattice.size.x,
-      geometry_lattice.size.y == 0 ? 0 : ny*du_dy.dpart()/ geometry_lattice.size.y,
-      geometry_lattice.size.z == 0 ? 0 : nz*du_dz.dpart()/ geometry_lattice.size.z);
-    gradient = make_cvector3(g_real,g_imag);
+        geometry_lattice.size.x == 0 ? 0 : nx * du_dx.dpart() / geometry_lattice.size.x,
+        geometry_lattice.size.y == 0 ? 0 : ny * du_dy.dpart() / geometry_lattice.size.y,
+        geometry_lattice.size.z == 0 ? 0 : nz * du_dz.dpart() / geometry_lattice.size.z);
+    gradient = make_cvector3(g_real, g_imag);
   }
   return gradient;
 }
@@ -470,7 +481,7 @@ void map_lattice_coordinates(double &px, double &py, double &pz) {
 cvector3 matgrid_grad(vector3 p, geom_box_tree tp, int oi, material_data *md) {
   /* loops through all the material grids at a current point
   and computes the final *spatial* gradient (w.r.t. x,y,z)
-  after all appropriate transformations (e.g. due to 
+  after all appropriate transformations (e.g. due to
   overlapping grids). Calls the helper function, `material_grid_grad`,
   which is what actually computes the spatial gradient.
   */
@@ -487,9 +498,10 @@ cvector3 matgrid_grad(vector3 p, geom_box_tree tp, int oi, material_data *md) {
   // iterate through object tree at current point
   if (tp) {
     do {
-      gradient = cvector_add(gradient,material_grid_grad(to_geom_box_coords(p, &tp->objects[oi]),
-                                     (material_data *)tp->objects[oi].o->material,
-                                     tp->objects[oi].o));
+      gradient =
+          cvector_add(gradient, material_grid_grad(to_geom_box_coords(p, &tp->objects[oi]),
+                                                   (material_data *)tp->objects[oi].o->material,
+                                                   tp->objects[oi].o));
       if (md->material_grid_kinds == material_data::U_DEFAULT) break;
       ++matgrid_val_count;
       tp = geom_tree_search_next(p, tp, &oi);
@@ -505,19 +517,18 @@ cvector3 matgrid_grad(vector3 p, geom_box_tree tp, int oi, material_data *md) {
 
   // compensate for overlapping grids
   if (md->material_grid_kinds == material_data::U_MEAN)
-    gradient = cvector3_scale(1.0/matgrid_val_count,gradient);
+    gradient = cvector3_scale(1.0 / matgrid_val_count, gradient);
 
   return gradient;
 }
-duals::duald dual_linear_interpolate(double rx, double ry, double rz, std::vector<duals::duald> &data,
-                          int nx, int ny, int nz, int stride) {
+duals::duald dual_linear_interpolate(double rx, double ry, double rz,
+                                     std::vector<duals::duald> &data, int nx, int ny, int nz,
+                                     int stride) {
 
   int x1, y1, z1, x2, y2, z2;
   double dx, dy, dz;
 
-  meep::map_coordinates(rx, ry, rz, nx, ny, nz,
-                  x1, y1, z1, x2, y2, z2,
-                  dx, dy, dz);
+  meep::map_coordinates(rx, ry, rz, nx, ny, nz, x1, y1, z1, x2, y2, z2, dx, dy, dz);
 
   /* define a macro to give us data(x,y,z) on the grid,
      in row-major order (the order used by HDF5): */
@@ -536,17 +547,15 @@ duals::duald material_grid_val(vector3 p, material_data *md) {
   // given the relative location, p, interpolate the material grid point.
 
   if (!is_material_grid(md)) { abort(); }
-  return dual_linear_interpolate(p.x, p.y, p.z, md->weights, md->grid_size.x,
-                                  md->grid_size.y, md->grid_size.z, 1);
-
+  return dual_linear_interpolate(p.x, p.y, p.z, md->weights, md->grid_size.x, md->grid_size.y,
+                                 md->grid_size.z, 1);
 }
 
 static duals::duald tanh_projection(duals::duald u, double beta, double eta) {
   if (beta == 0) return u;
   if (u == eta) return 0.5; // avoid NaN when beta is Inf
-  duals::duald tanh_beta_eta = tanh(beta*eta);
-  return (tanh_beta_eta + tanh(beta*(u-eta))) /
-    (tanh_beta_eta + tanh(beta*(1-eta)));
+  duals::duald tanh_beta_eta = tanh(beta * eta);
+  return (tanh_beta_eta + tanh(beta * (u - eta))) / (tanh_beta_eta + tanh(beta * (1 - eta)));
 }
 
 duals::duald matgrid_val(vector3 p, geom_box_tree tp, int oi, material_data *md) {
@@ -617,7 +626,7 @@ static void interp_tensors(vector3 diag_in_1, vector3 offdiag_in_1, vector3 diag
 void epsilon_material_grid(material_data *md, double u) {
   // NOTE: assume p lies on normalized grid within (0,1)
 
-  if (md->weights.size()==0) meep::abort("material params were not initialized!");
+  if (md->weights.size() == 0) meep::abort("material params were not initialized!");
 
   medium_struct *mm = &(md->medium);
   medium_struct *m1 = &(md->medium_1);
@@ -970,11 +979,11 @@ double geom_epsilon::chi1p1(meep::field_type ft, const meep::vec &r) {
    stolen from MPB. */
 static bool get_front_object(const meep::volume &v, geom_box_tree geometry_tree, vector3 &pcenter,
                              const geometric_object **o_front, vector3 &shiftby_front,
-                             material_type &mat_front, material_type &mat_behind,
-                             vector3 &p_front, vector3 &p_behind) {
+                             material_type &mat_front, material_type &mat_behind, vector3 &p_front,
+                             vector3 &p_behind) {
   vector3 p;
   const geometric_object *o1 = 0, *o2 = 0;
-  vector3 shiftby1 = {0, 0, 0}, shiftby2 = {0, 0, 0}, p1 = {0,0,0}, p2 = {0,0,0};
+  vector3 shiftby1 = {0, 0, 0}, shiftby2 = {0, 0, 0}, p1 = {0, 0, 0}, p2 = {0, 0, 0};
   geom_box pixel;
   material_type mat1 = vacuum, mat2 = vacuum;
   int id1 = -1, id2 = -1;
@@ -1047,9 +1056,9 @@ static bool get_front_object(const meep::volume &v, geom_box_tree geometry_tree,
       p2 = q;
     }
     else if (!(id1 < id2 && (id1 == id || material_type_equal(mat1, mat))) &&
-             !(id2 < id1 && (id2 == id || material_type_equal(mat2, mat)))){
+             !(id2 < id1 && (id2 == id || material_type_equal(mat2, mat)))) {
       return false;
-      }
+    }
   }
 
   // CHECK(id1 > -1, "bug in object_of_point_in_tree?");
@@ -1088,14 +1097,15 @@ static bool get_front_object(const meep::volume &v, geom_box_tree geometry_tree,
 
 void geom_epsilon::eff_chi1inv_row(meep::component c, double chi1inv_row[3], const meep::volume &v,
                                    double tol, int maxeval) {
-  /* for speed reasons, we need to "wrap" the 
+  /* for speed reasons, we need to "wrap" the
   actual eff_chi1inv_row_grad function below, since
   the original eff_chi1inv_row is a virtual function
   and doesn't play well with default arguments */
-  eff_chi1inv_row_grad(c,chi1inv_row,v,tol,maxeval,false);
+  eff_chi1inv_row_grad(c, chi1inv_row, v, tol, maxeval, false);
 }
-void geom_epsilon::eff_chi1inv_row_grad(meep::component c, double chi1inv_row[3], const meep::volume &v,
-                                   double tol, int maxeval, bool needs_grad) {
+void geom_epsilon::eff_chi1inv_row_grad(meep::component c, double chi1inv_row[3],
+                                        const meep::volume &v, double tol, int maxeval,
+                                        bool needs_grad) {
   symm_matrix meps_inv;
   bool fallback;
   eff_chi1inv_matrix(c, &meps_inv, v, tol, maxeval, fallback);
@@ -1103,57 +1113,58 @@ void geom_epsilon::eff_chi1inv_row_grad(meep::component c, double chi1inv_row[3]
   if (fallback) { fallback_chi1inv_row(c, chi1inv_row, v, tol, maxeval); }
   else {
     /* for gradient calculations, we need to return
-    the dual part of the calcuation, which corresponds 
+    the dual part of the calcuation, which corresponds
     to the first derivative w.r.t. u_i */
     if (needs_grad) {
       switch (component_direction(c)) {
-      case meep::X:
-      case meep::R:
+        case meep::X:
+        case meep::R:
           chi1inv_row[0] = meps_inv.m00.dpart();
           chi1inv_row[1] = meps_inv.m01.dpart();
           chi1inv_row[2] = meps_inv.m02.dpart();
-        break;
-      case meep::Y:
-      case meep::P:
-        chi1inv_row[0] = meps_inv.m01.dpart();
-        chi1inv_row[1] = meps_inv.m11.dpart();
-        chi1inv_row[2] = meps_inv.m12.dpart();
-        break;
-      case meep::Z:
-        chi1inv_row[0] = meps_inv.m02.dpart();
-        chi1inv_row[1] = meps_inv.m12.dpart();
-        chi1inv_row[2] = meps_inv.m22.dpart();
-        break;
-      case meep::NO_DIRECTION: chi1inv_row[0] = chi1inv_row[1] = chi1inv_row[2] = 0; break;
+          break;
+        case meep::Y:
+        case meep::P:
+          chi1inv_row[0] = meps_inv.m01.dpart();
+          chi1inv_row[1] = meps_inv.m11.dpart();
+          chi1inv_row[2] = meps_inv.m12.dpart();
+          break;
+        case meep::Z:
+          chi1inv_row[0] = meps_inv.m02.dpart();
+          chi1inv_row[1] = meps_inv.m12.dpart();
+          chi1inv_row[2] = meps_inv.m22.dpart();
+          break;
+        case meep::NO_DIRECTION: chi1inv_row[0] = chi1inv_row[1] = chi1inv_row[2] = 0; break;
       }
-    } else { // no gradient needed; standard epsilon evaluations
+    }
+    else { // no gradient needed; standard epsilon evaluations
       switch (component_direction(c)) {
-      case meep::X:
-      case meep::R:
+        case meep::X:
+        case meep::R:
           chi1inv_row[0] = meps_inv.m00.rpart();
           chi1inv_row[1] = meps_inv.m01.rpart();
           chi1inv_row[2] = meps_inv.m02.rpart();
-        break;
-      case meep::Y:
-      case meep::P:
-        chi1inv_row[0] = meps_inv.m01.rpart();
-        chi1inv_row[1] = meps_inv.m11.rpart();
-        chi1inv_row[2] = meps_inv.m12.rpart();
-        break;
-      case meep::Z:
-        chi1inv_row[0] = meps_inv.m02.rpart();
-        chi1inv_row[1] = meps_inv.m12.rpart();
-        chi1inv_row[2] = meps_inv.m22.rpart();
-        break;
-      case meep::NO_DIRECTION: chi1inv_row[0] = chi1inv_row[1] = chi1inv_row[2] = 0; break;
+          break;
+        case meep::Y:
+        case meep::P:
+          chi1inv_row[0] = meps_inv.m01.rpart();
+          chi1inv_row[1] = meps_inv.m11.rpart();
+          chi1inv_row[2] = meps_inv.m12.rpart();
+          break;
+        case meep::Z:
+          chi1inv_row[0] = meps_inv.m02.rpart();
+          chi1inv_row[1] = meps_inv.m12.rpart();
+          chi1inv_row[2] = meps_inv.m22.rpart();
+          break;
+        case meep::NO_DIRECTION: chi1inv_row[0] = chi1inv_row[1] = chi1inv_row[2] = 0; break;
       }
     }
   }
 }
 
-void kottke_algorithm(meep::component c, symm_matrix *chi1inv_matrix, symm_matrix &meps, 
-  symm_matrix &eps2, cvector3 &cnormal, duals::duald fill){
-  /* Ref: " Perturbation theory for anisotropic dielectric interfaces, 
+void kottke_algorithm(meep::component c, symm_matrix *chi1inv_matrix, symm_matrix &meps,
+                      symm_matrix &eps2, cvector3 &cnormal, duals::duald fill) {
+  /* Ref: " Perturbation theory for anisotropic dielectric interfaces,
     and application to subpixel smoothing of discretized numerical methods",
     https://math.mit.edu/~stevenj/papers/KottkeFa08.pdf
   */
@@ -1162,18 +1173,18 @@ void kottke_algorithm(meep::component c, symm_matrix *chi1inv_matrix, symm_matri
   duals::duald Rot[3][3];
   eps1 = meps;
 
-  Rot[0][0] = cnormal.x.re + 1_e*cnormal.x.im;
-  Rot[1][0] = cnormal.y.re + 1_e*cnormal.y.im;
-  Rot[2][0] = cnormal.z.re + 1_e*cnormal.z.im;
+  Rot[0][0] = cnormal.x.re + 1_e * cnormal.x.im;
+  Rot[1][0] = cnormal.y.re + 1_e * cnormal.y.im;
+  Rot[2][0] = cnormal.z.re + 1_e * cnormal.z.im;
   if (fabs(cnormal.x.re) > 1e-2 || fabs(cnormal.y.re) > 1e-2) {
-    Rot[0][2] = cnormal.y.re + 1_e*cnormal.y.im;
-    Rot[1][2] = -(cnormal.x.re + 1_e*cnormal.x.im);
+    Rot[0][2] = cnormal.y.re + 1_e * cnormal.y.im;
+    Rot[1][2] = -(cnormal.x.re + 1_e * cnormal.x.im);
     Rot[2][2] = 0;
   }
   else { /* n is not parallel to z direction, use (x x n) instead */
     Rot[0][2] = 0;
-    Rot[1][2] = -(cnormal.z.re + 1_e*cnormal.z.im);
-    Rot[2][2] = cnormal.y.re + 1_e*cnormal.y.im;
+    Rot[1][2] = -(cnormal.z.re + 1_e * cnormal.z.im);
+    Rot[2][2] = cnormal.y.re + 1_e * cnormal.y.im;
   }
   { /* normalize second column */
     duals::duald s = Rot[0][2] * Rot[0][2] + Rot[1][2] * Rot[1][2] + Rot[2][2] * Rot[2][2];
@@ -1244,9 +1255,10 @@ void kottke_algorithm(meep::component c, symm_matrix *chi1inv_matrix, symm_matri
   sym_matrix_invert(chi1inv_matrix, &meps);
 }
 
-duals::duald get_material_grid_fill(meep::ndim dim, duals::duald d, double r, duals::duald u, double eta){
+duals::duald get_material_grid_fill(meep::ndim dim, duals::duald d, double r, duals::duald u,
+                                    double eta) {
   /* The fill fraction should describe the amount of u=1 material in the current pixel.
-  
+
   Occasionally, the distance to the nearest interface is outside the current
   sphere, which means we don't need to do any averaging (the fill is 0 or 1). Again,
   we don't know if that means we are in void or solid, however, until we look
@@ -1254,59 +1266,70 @@ duals::duald get_material_grid_fill(meep::ndim dim, duals::duald d, double r, du
   can verify.
   */
   duals::duald rel_fill;
-  if (abs(d) >= abs(r)){
+  if (abs(d) >= abs(r)) {
     return -1.0; // garbage fill
-  } else {
+  }
+  else {
     if (dim == meep::D1)
-      return (r-d)/(2*r);
+      return (r - d) / (2 * r);
     else if (dim == meep::D2 || dim == meep::Dcyl)
-      return (1/(r*r*meep::pi)) * (r*r*acos(d/r)-d*sqrt(r*r-d*d));
+      return (1 / (r * r * meep::pi)) * (r * r * acos(d / r) - d * sqrt(r * r - d * d));
     else if (dim == meep::D3)
-      return (((r-d)*(r-d))/(4*meep::pi*r*r*r))*(2*r+d);
+      return (((r - d) * (r - d)) / (4 * meep::pi * r * r * r)) * (2 * r + d);
   }
-  
+
   return rel_fill;
 }
 
-duals::duald normalize_dual(cvector3 &cv){
+duals::duald normalize_dual(cvector3 &cv) {
   duals::duald d[3], norm;
   // compute the norm, u_0'
-  d[0] = cv.x.re + 1_e*cv.x.im;
-  d[1] = cv.y.re + 1_e*cv.y.im;
-  d[2] = cv.z.re + 1_e*cv.z.im;
+  d[0] = cv.x.re + 1_e * cv.x.im;
+  d[1] = cv.y.re + 1_e * cv.y.im;
+  d[2] = cv.z.re + 1_e * cv.z.im;
 
-  norm = sqrt(abs(d[0])*abs(d[0]) + abs(d[1])*abs(d[1]) + abs(d[2])*abs(d[2]));
+  norm = sqrt(abs(d[0]) * abs(d[0]) + abs(d[1]) * abs(d[1]) + abs(d[2]) * abs(d[2]));
 
   // normalize the normal vector
   d[0] = d[0] / norm;
   d[1] = d[1] / norm;
   d[2] = d[2] / norm;
-  cv.x.re = d[0].rpart(); cv.x.im = d[0].dpart();
-  cv.y.re = d[1].rpart(); cv.y.im = d[1].dpart();
-  cv.z.re = d[2].rpart(); cv.z.im = d[2].dpart();
+  cv.x.re = d[0].rpart();
+  cv.x.im = d[0].dpart();
+  cv.y.re = d[1].rpart();
+  cv.y.im = d[1].dpart();
+  cv.z.re = d[2].rpart();
+  cv.z.im = d[2].dpart();
 
   return norm;
 }
 
-duals::duald get_distance(cvector3 &cv, duals::duald uval, double eta){
+duals::duald get_distance(cvector3 &cv, duals::duald uval, double eta) {
   duals::duald d[3], norm;
-  d[0] = cv.x.re + 1_e*cv.x.im;
-  d[1] = cv.y.re + 1_e*cv.y.im;
-  d[2] = cv.z.re + 1_e*cv.z.im;
+  d[0] = cv.x.re + 1_e * cv.x.im;
+  d[1] = cv.y.re + 1_e * cv.y.im;
+  d[2] = cv.z.re + 1_e * cv.z.im;
 
-  norm = sqrt(abs(d[0])*abs(d[0]) + abs(d[1])*abs(d[1]) + abs(d[2])*abs(d[2]));
+  norm = sqrt(abs(d[0]) * abs(d[0]) + abs(d[1]) * abs(d[1]) + abs(d[2]) * abs(d[2]));
   return (eta - uval) / norm;
 }
 
-void dual_interpolate_mat(material_type mat,symm_matrix &eps, duals::duald u) {
+void dual_interpolate_mat(material_type mat, symm_matrix &eps, duals::duald u) {
   duals::duald u_bar = tanh_projection(u, mat->beta, mat->eta); // projection
-  if (std::isnan(u_bar.dpart())) u_bar = u_bar.rpart(); // when beta=inf, we need to correct when gradient breaks down
-  eps.m00 = mat->medium_1.epsilon_diag.x + u_bar * (mat->medium_2.epsilon_diag.x-mat->medium_1.epsilon_diag.x);
-  eps.m11 = mat->medium_1.epsilon_diag.y + u_bar * (mat->medium_2.epsilon_diag.y-mat->medium_1.epsilon_diag.y);
-  eps.m22 = mat->medium_1.epsilon_diag.z + u_bar * (mat->medium_2.epsilon_diag.z-mat->medium_1.epsilon_diag.z);
-  eps.m01 = mat->medium_1.epsilon_offdiag.x.re + u_bar * (mat->medium_2.epsilon_offdiag.x.re-mat->medium_1.epsilon_offdiag.x.re);
-  eps.m02 = mat->medium_1.epsilon_offdiag.y.re + u_bar * (mat->medium_2.epsilon_offdiag.y.re-mat->medium_1.epsilon_offdiag.y.re);
-  eps.m12 = mat->medium_1.epsilon_offdiag.z.re + u_bar * (mat->medium_2.epsilon_offdiag.z.re-mat->medium_1.epsilon_offdiag.z.re);
+  if (std::isnan(u_bar.dpart()))
+    u_bar = u_bar.rpart(); // when beta=inf, we need to correct when gradient breaks down
+  eps.m00 = mat->medium_1.epsilon_diag.x +
+            u_bar * (mat->medium_2.epsilon_diag.x - mat->medium_1.epsilon_diag.x);
+  eps.m11 = mat->medium_1.epsilon_diag.y +
+            u_bar * (mat->medium_2.epsilon_diag.y - mat->medium_1.epsilon_diag.y);
+  eps.m22 = mat->medium_1.epsilon_diag.z +
+            u_bar * (mat->medium_2.epsilon_diag.z - mat->medium_1.epsilon_diag.z);
+  eps.m01 = mat->medium_1.epsilon_offdiag.x.re +
+            u_bar * (mat->medium_2.epsilon_offdiag.x.re - mat->medium_1.epsilon_offdiag.x.re);
+  eps.m02 = mat->medium_1.epsilon_offdiag.y.re +
+            u_bar * (mat->medium_2.epsilon_offdiag.y.re - mat->medium_1.epsilon_offdiag.y.re);
+  eps.m12 = mat->medium_1.epsilon_offdiag.z.re +
+            u_bar * (mat->medium_2.epsilon_offdiag.z.re - mat->medium_1.epsilon_offdiag.z.re);
 }
 
 void geom_epsilon::eff_chi1inv_matrix(meep::component c, symm_matrix *chi1inv_matrix,
@@ -1336,27 +1359,27 @@ void geom_epsilon::eff_chi1inv_matrix(meep::component c, symm_matrix *chi1inv_ma
 
   /* we may have entered this routine (e.g. because of a material grid
   backprop routine) but don't want to actually do any averaging */
-  if (maxeval == 0){
+  if (maxeval == 0) {
     get_material_pt(mat, v.center());
-    if (is_material_grid(mat)){
+    if (is_material_grid(mat)) {
       tp = geom_tree_search(vec_to_vector3(v.center()), restricted_tree, &oi);
       uval = matgrid_val(vec_to_vector3(v.center()), tp, oi, mat);
     mgavg:
-      dual_interpolate_mat(mat,meps,uval);
+      dual_interpolate_mat(mat, meps, uval);
       sym_matrix_invert(chi1inv_matrix, &meps);
-      //get_material_pt(mat, v.center());
-      //material_epsmu(meep::type(c), mat, &meps, chi1inv_matrix);
+      // get_material_pt(mat, v.center());
+      // material_epsmu(meep::type(c), mat, &meps, chi1inv_matrix);
       material_gc(mat);
       return;
-    }else{
-      goto trivial;
     }
+    else { goto trivial; }
   }
 
-  // For variable materials with do_averaging == true, switch over to slow fallback integration method.
-  // For material grids, however, we have to do some more logic first...
-  if ((is_variable(mat) && mat->do_averaging) || (is_variable(mat_behind) && mat_behind->do_averaging)) {
-    if ((!is_material_grid(mat)) && (!is_material_grid(mat_behind))){
+  // For variable materials with do_averaging == true, switch over to slow fallback integration
+  // method. For material grids, however, we have to do some more logic first...
+  if ((is_variable(mat) && mat->do_averaging) ||
+      (is_variable(mat_behind) && mat_behind->do_averaging)) {
+    if ((!is_material_grid(mat)) && (!is_material_grid(mat_behind))) {
       fallback = true;
       return;
     }
@@ -1375,13 +1398,13 @@ void geom_epsilon::eff_chi1inv_matrix(meep::component c, symm_matrix *chi1inv_ma
     to do any averaging, or if the current pixel is smooth enough
     to just evaluate.
     */
-    if (is_material_grid(mat)){
+    if (is_material_grid(mat)) {
       // if we have a damping term and β≠∞, we can't smooth
       if ((mat->damping != 0) && (mat->eta != std::numeric_limits<double>::infinity())) goto noavg;
 
       tp = geom_tree_search(p_mat, restricted_tree, &oi);
       uval = matgrid_val(p_mat, tp, oi, mat);
-      
+
       normal = matgrid_grad(p_mat, tp, oi, mat);
       if (cvector3_re(normal).x == 0 && cvector3_re(normal).y == 0 && cvector3_re(normal).z == 0)
         /* couldn't get normal vector for this point; no averaging is needed,
@@ -1389,92 +1412,93 @@ void geom_epsilon::eff_chi1inv_matrix(meep::component c, symm_matrix *chi1inv_ma
         goto mgavg;
       duals::duald u_prime = normalize_dual(normal);
       duals::duald d = (mat->eta - uval) / u_prime;
-      
-      double r = v.diameter()/2;
-      fill = get_material_grid_fill(v.dim,d,r,uval,mat->eta);
-      if (fill.rpart() < 0){
+
+      double r = v.diameter() / 2;
+      fill = get_material_grid_fill(v.dim, d, r, uval, mat->eta);
+      if (fill.rpart() < 0) {
         /* we are far enough away from an interface that
         we don't need any averaging -- but we still need
         to interpolate from the material grid */
         goto mgavg;
-      } else{
+      }
+      else {
         /* we have a material grid interface within our pixel.
         we therefore need to set the two materials used for
         averaging to our corresponding solid and void materials.
         */
         symm_matrix eps_0, eps_plus, eps_minus;
-        dual_interpolate_mat(mat,eps_0,uval);
-        dual_interpolate_mat(mat,eps_plus,uval+r*u_prime);
-        dual_interpolate_mat(mat,eps_minus,uval-r*u_prime);
-        
+        dual_interpolate_mat(mat, eps_0, uval);
+        dual_interpolate_mat(mat, eps_plus, uval + r * u_prime);
+        dual_interpolate_mat(mat, eps_minus, uval - r * u_prime);
+
         if (d > 0) {
           // Case 1: d > 0. Then let ε̃₊ = ε₊ and let ε̃₋ = [ε₀d + ε₋(Δx/2 – d)] / (Δx/2).
           meps = eps_plus;
-          eps2 = (eps_0*d + eps_minus*(r-d)) / (r);
-        }else {
+          eps2 = (eps_0 * d + eps_minus * (r - d)) / (r);
+        }
+        else {
           // Case 2: d < 0. Then let ε̃₋ = ε₋ and let ε̃₊ = [-ε₀d + ε₊(Δx/2 + d)] / (Δx/2).
           eps2 = eps_minus;
-          meps = (-eps_0*d + eps_plus*(r+d)) / (r);
+          meps = (-eps_0 * d + eps_plus * (r + d)) / (r);
         }
       }
-    } else if(is_variable(mat)) {
+    }
+    else if (is_variable(mat)) {
       // no averaging is needed (not a material grid)
       eval_material_pt(mat, vec_to_vector3(v.center()));
       goto trivial;
-    // materials are non variable and uniform -- no need to average
-    } else {
-      goto trivial;
+      // materials are non variable and uniform -- no need to average
     }
-  /* ------------------------------------------- */
-  // Two different materials (perhaps a m.g. and a geom object)
-  /* ------------------------------------------- */
-  } else {
-    
+    else { goto trivial; }
+    /* ------------------------------------------- */
+    // Two different materials (perhaps a m.g. and a geom object)
+    /* ------------------------------------------- */
+  }
+  else {
+
     vector3 normal_re = unit_vector3(normal_to_fixed_object(vector3_minus(p, shiftby), *o));
     if (normal_re.x == 0 && normal_re.y == 0 && normal_re.z == 0)
       goto noavg; // couldn't get normal vector for this point, punt
-    normal = make_cvector3(normal_re,make_vector3());
+    normal = make_cvector3(normal_re, make_vector3());
     geom_box pixel = gv2box(v);
     pixel.low = vector3_minus(pixel.low, shiftby);
     pixel.high = vector3_minus(pixel.high, shiftby);
-    fill = duals::duald(box_overlap_with_object(pixel, *o, tol, maxeval),0);
+    fill = duals::duald(box_overlap_with_object(pixel, *o, tol, maxeval), 0);
     /* Evaluate materials in case they are variable.  This allows us to do fast subpixel averaging
       at the boundary of an object with a variable material, while remaining accurate enough if the
-      material is continuous over the pixel.  (We make a first-order error by averaging as if the material
-      were constant, but only in a boundary layer of thickness 1/resolution, so the net effect should
-      still be second-order.) */
-    if (is_variable(mat)){
+      material is continuous over the pixel.  (We make a first-order error by averaging as if the
+      material were constant, but only in a boundary layer of thickness 1/resolution, so the net
+      effect should still be second-order.) */
+    if (is_variable(mat)) {
       eval_material_pt(mat, p_mat);
       eval_material_pt(mat_behind, p_mat_behind);
       if (material_type_equal(mat, mat_behind)) goto trivial;
     }
-    
-    /* to properly determine the tensors of a material 
+
+    /* to properly determine the tensors of a material
       that has a material grid, we need to be careful.
       Specifically, we need to include the effects of u_i
       in the dual part of the material tensor */
-    if (is_material_grid(mat)){
+    if (is_material_grid(mat)) {
       tp = geom_tree_search(p_mat, restricted_tree, &oi);
       duals::duald uval = matgrid_val(p_mat, tp, oi, mat);
-      dual_interpolate_mat(mat,meps,uval);
-      sym_matrix_invert(chi1inv_matrix, &meps);    
-    } else {
-      material_epsmu(meep::type(c), mat, &meps, chi1inv_matrix);
+      dual_interpolate_mat(mat, meps, uval);
+      sym_matrix_invert(chi1inv_matrix, &meps);
     }
-    if (is_material_grid(mat_behind)){
+    else { material_epsmu(meep::type(c), mat, &meps, chi1inv_matrix); }
+    if (is_material_grid(mat_behind)) {
       tp = geom_tree_search(p_mat_behind, restricted_tree, &oi);
       duals::duald uval = matgrid_val(p_mat_behind, tp, oi, mat);
-      dual_interpolate_mat(mat_behind,eps2,uval);
+      dual_interpolate_mat(mat_behind, eps2, uval);
       sym_matrix_invert(&epsinv2, &eps2); // not really needed
-    } else {
-      material_epsmu(meep::type(c), mat_behind, &eps2, &epsinv2);
     }
+    else { material_epsmu(meep::type(c), mat_behind, &eps2, &epsinv2); }
   }
 
   // it doesn't make sense to average metals (electric or magnetic)
   if (is_metal(meep::type(c), &mat) || is_metal(meep::type(c), &mat_behind)) goto noavg;
 
-  kottke_algorithm(c,chi1inv_matrix,meps,eps2,normal,fill);
+  kottke_algorithm(c, chi1inv_matrix, meps, eps2, normal, fill);
   material_gc(mat);
 }
 
@@ -1494,7 +1518,7 @@ struct matgrid_volavg {
 
 static void get_uproj_w(const matgrid_volavg *mgva, double x0, double &u_proj, double &w) {
   // use a linear approximation for the material grid weights around the Yee grid point
-  //u_proj = tanh_projection(mgva->uval + mgva->ugrad_abs*x0, mgva->beta, mgva->eta);
+  // u_proj = tanh_projection(mgva->uval + mgva->ugrad_abs*x0, mgva->beta, mgva->eta);
   if (mgva->dim == meep::D1)
     w = 1 / (2 * mgva->rad);
   else if (mgva->dim == meep::D2 || mgva->dim == meep::Dcyl)
@@ -1665,9 +1689,11 @@ void geom_epsilon::fallback_chi1inv_row(meep::component c, double chi1inv_row[3]
   minveps = ret.im / vol;
 #else
   meps = adaptive_integration(eps_func, xmin, xmax, n, (void *)this, 0, tol, maxeval, &esterr,
-                              &errflag) / vol;
+                              &errflag) /
+         vol;
   minveps = adaptive_integration(inveps_func, xmin, xmax, n, (void *)this, 0, tol, maxeval, &esterr,
-                                  &errflag) / vol;
+                                 &errflag) /
+            vol;
 #endif
 
   if (eps_ever_negative) // averaging negative eps causes instability
@@ -1912,7 +1938,7 @@ void geom_epsilon::sigma_row(meep::component c, double sigrow[3], const meep::ve
     tp = geom_tree_search(p, restricted_tree, &oi);
     // interpolate and project onto material grid
     u = tanh_projection(matgrid_val(p, tp, oi, mat), mat->beta, mat->eta).rpart();
-    epsilon_material_grid(mat, u);   // interpolate material from material grid point
+    epsilon_material_grid(mat, u); // interpolate material from material grid point
     mat->medium.check_offdiag_im_zero_or_abort();
   }
 
@@ -2180,7 +2206,8 @@ void add_absorbing_layer(absorber_list alist, double thickness, int direction, i
 if needed */
 geom_epsilon *make_geom_epsilon(meep::structure *s, geometric_object_list *g, vector3 center,
                                 bool _ensure_periodicity, material_type _default_material,
-                                material_type_list extra_materials, bool _use_anisotropic_averaging) {
+                                material_type_list extra_materials,
+                                bool _use_anisotropic_averaging) {
   // set global variables in libctlgeom based on data fields in s
   geom_initialize();
   geometry_center = center;
@@ -2241,11 +2268,10 @@ void set_materials_from_geometry(meep::structure *s, geometric_object_list g, ve
                                  bool use_anisotropic_averaging, double tol, int maxeval,
                                  bool _ensure_periodicity, material_type _default_material,
                                  absorber_list alist, material_type_list extra_materials) {
-  meep_geom::geom_epsilon *geps = meep_geom::make_geom_epsilon(s, &g, center, _ensure_periodicity,
-                                                               _default_material, extra_materials,
-                                                               use_anisotropic_averaging);
-  set_materials_from_geom_epsilon(s, geps, use_anisotropic_averaging, tol,
-                                  maxeval, alist);
+  meep_geom::geom_epsilon *geps =
+      meep_geom::make_geom_epsilon(s, &g, center, _ensure_periodicity, _default_material,
+                                   extra_materials, use_anisotropic_averaging);
+  set_materials_from_geom_epsilon(s, geps, use_anisotropic_averaging, tol, maxeval, alist);
   delete geps;
 }
 
@@ -2343,8 +2369,8 @@ material_type make_material_grid(bool do_averaging, double beta, double eta, dou
 
 void update_weights(material_type matgrid, double *weights) {
   size_t N = matgrid->grid_size.x * matgrid->grid_size.y * matgrid->grid_size.z;
-  //memcpy(matgrid->weights, weights, N * sizeof(double));
-  for (size_t i=0;i<N;i++)
+  // memcpy(matgrid->weights, weights, N * sizeof(double));
+  for (size_t i = 0; i < N; i++)
     matgrid->weights.push_back(weights[i]);
 }
 
@@ -2853,27 +2879,30 @@ std::complex<double> cond_cmp(meep::component c, const meep::vec &r, double freq
   const medium_struct *mm = &(md->medium);
 
   // get the row we care about
-      switch (component_direction(c)) {
-      case meep::X:
-      case meep::R: return std::complex<double>(1.0, mm->D_conductivity_diag.x / (2*meep::pi*freq));
-      case meep::Y:
-      case meep::P: return std::complex<double>(1.0, mm->D_conductivity_diag.y / (2*meep::pi*freq));
-      case meep::Z: return std::complex<double>(1.0, mm->D_conductivity_diag.z / (2*meep::pi*freq));
-      case meep::NO_DIRECTION: meep::abort("Invalid adjoint field component");
-    }
-}
-
-std::complex<double> get_material_gradient(
-    const meep::vec &r,               // current point
-    const meep::component adjoint_c,  // adjoint field component
-    const meep::component forward_c,  // forward field component
-    std::complex<double> fields_f,    // forward field at current point
-    double freq,                      // frequency
-    geom_epsilon *geps,               // material
-    meep::grid_volume &gv,            // simulation grid volume
-    double du,                        // step size
-    std::vector<duals::duald> &u,     // matgrid
-    int idx                           // matgrid index
+  switch (component_direction(c)) {
+    case meep::X:
+    case meep::R:
+      return std::complex<double>(1.0, mm->D_conductivity_diag.x / (2 * meep::pi * freq));
+    case meep::Y:
+    case meep::P:
+      return std::complex<double>(1.0, mm->D_conductivity_diag.y / (2 * meep::pi * freq));
+    case meep::Z:
+      return std::complex<double>(1.0, mm->D_conductivity_diag.z / (2 * meep::pi * freq));
+    case meep::NO_DIRECTION: meep::abort("Invalid adjoint field component");
+  }
+}
+
+std::complex<double>
+get_material_gradient(const meep::vec &r,              // current point
+                      const meep::component adjoint_c, // adjoint field component
+                      const meep::component forward_c, // forward field component
+                      std::complex<double> fields_f,   // forward field at current point
+                      double freq,                     // frequency
+                      geom_epsilon *geps,              // material
+                      meep::grid_volume &gv,           // simulation grid volume
+                      double du,                       // step size
+                      std::vector<duals::duald> &u,    // matgrid
+                      int idx                          // matgrid index
 ) {
   /*Compute the Aᵤx product from the -λᵀAᵤx calculation.
   The current adjoint (λ) field component (adjoint_c)
@@ -2925,15 +2954,15 @@ std::complex<double> get_material_gradient(
     u[idx] += 1_e; // add dual component
     geps->eff_chi1inv_row_grad(adjoint_c, dA_du, v, geps->tol, maxevals, true);
     u[idx] -= 1_e; // remove dual component
-    
+
     return -dA_du[dir_idx] * fields_f; // note the minus sign! (from Steven's adjoint notes)
-  
-  } else {
+  }
+  else {
     std::complex<double> row_1[3], row_2[3], dA_du[3];
     u[idx] -= du;
-    eff_chi1inv_row_disp(adjoint_c,row_1,r,freq,geps);
-    u[idx] += 2*du;
-    eff_chi1inv_row_disp(adjoint_c,row_2,r,freq,geps);
+    eff_chi1inv_row_disp(adjoint_c, row_1, r, freq, geps);
+    u[idx] += 2 * du;
+    eff_chi1inv_row_disp(adjoint_c, row_2, r, freq, geps);
     u[idx] -= du;
 
     for (int i = 0; i < 3; i++)
@@ -2949,13 +2978,12 @@ With the addition of subpixel smoothing, however, the vJp became
 much more complicated and it is easier to calculate the entire gradient
 using finite differences (at the cost of slightly less accurate gradients
 due to floating-point roundoff errors). */
-void add_interpolate_weights(double rx, double ry, double rz,
-                             double *data, int nx, int ny, int nz, int stride,
-                             double scaleby, std::vector<duals::duald> &udata, int ukind, duals::duald uval,
-                             meep::vec r, geom_epsilon *geps,
+void add_interpolate_weights(double rx, double ry, double rz, double *data, int nx, int ny, int nz,
+                             int stride, double scaleby, std::vector<duals::duald> &udata,
+                             int ukind, duals::duald uval, meep::vec r, geom_epsilon *geps,
                              meep::component adjoint_c, meep::component forward_c,
-                             std::complex<double> fwd, std::complex<double> adj,
-                             double freq, meep::grid_volume &gv, double du) {
+                             std::complex<double> fwd, std::complex<double> adj, double freq,
+                             meep::grid_volume &gv, double du) {
   int x1, y1, z1, x2, y2, z2;
   double dx, dy, dz;
   duals::duald u;
@@ -3056,10 +3084,9 @@ void material_grids_addgradient_point(double *v, vector3 p, double scalegrad, ge
     uval = tanh_projection(matgrid_val(p, tp, oi, mg), mg->beta, mg->eta);
     do {
       vector3 pb = to_geom_box_coords(p, &tp->objects[oi]);
-      add_interpolate_weights(pb.x, pb.y, pb.z, vcur, sz.x, sz.y, sz.z, 1,
-                              scalegrad, mg->weights, kind, uval,
-                              vector3_to_vec(p), geps, adjoint_c, forward_c,
-                              fwd, adj, freq, gv, tol);
+      add_interpolate_weights(pb.x, pb.y, pb.z, vcur, sz.x, sz.y, sz.z, 1, scalegrad, mg->weights,
+                              kind, uval, vector3_to_vec(p), geps, adjoint_c, forward_c, fwd, adj,
+                              freq, gv, tol);
       if (kind == material_data::U_DEFAULT) break;
       tp = geom_tree_search_next(p, tp, &oi);
     } while (tp && is_material_grid((material_data *)tp->objects[oi].o->material));
@@ -3070,10 +3097,9 @@ void material_grids_addgradient_point(double *v, vector3 p, double scalegrad, ge
     vector3 sz = mg->grid_size;
     double *vcur = v;
     uval = tanh_projection(material_grid_val(p, mg), mg->beta, mg->eta);
-    add_interpolate_weights(p.x, p.y, p.z, vcur, sz.x, sz.y, sz.z, 1,
-                            scalegrad, mg->weights, kind, uval,
-                            vector3_to_vec(p), geps, adjoint_c, forward_c,
-                            fwd, adj, freq, gv, tol);
+    add_interpolate_weights(p.x, p.y, p.z, vcur, sz.x, sz.y, sz.z, 1, scalegrad, mg->weights, kind,
+                            uval, vector3_to_vec(p), geps, adjoint_c, forward_c, fwd, adj, freq, gv,
+                            tol);
   }
 }
 
diff --git a/src/meepgeom.hpp b/src/meepgeom.hpp
index 3b5014cb4..00bc8e50c 100644
--- a/src/meepgeom.hpp
+++ b/src/meepgeom.hpp
@@ -154,49 +154,70 @@ inline vector3 make_vector3(double x = 0.0, double y = 0.0, double z = 0.0) {
 }
 
 inline cvector3 cvector3_scale(number s, cvector3 v) {
-  return make_cvector3(vector3_scale(s,cvector3_re(v)),vector3_scale(s,cvector3_im(v)));
+  return make_cvector3(vector3_scale(s, cvector3_re(v)), vector3_scale(s, cvector3_im(v)));
 }
 
-inline cvector3 cvector_zero(){
-  return make_cvector3(make_vector3(),make_vector3());
-}
+inline cvector3 cvector_zero() { return make_cvector3(make_vector3(), make_vector3()); }
 
-inline cvector3 cvector_add(cvector3 cv1, cvector3 cv2){
-  return make_cvector3(vector3_plus(cvector3_re(cv1),cvector3_re(cv2)),vector3_plus(cvector3_im(cv1),cvector3_im(cv2)));
+inline cvector3 cvector_add(cvector3 cv1, cvector3 cv2) {
+  return make_cvector3(vector3_plus(cvector3_re(cv1), cvector3_re(cv2)),
+                       vector3_plus(cvector3_im(cv1), cvector3_im(cv2)));
 }
 
-struct symm_matrix{
+struct symm_matrix {
   duals::duald m00, m01, m02, m11, m12, m22;
-  struct symm_matrix& operator+=(const symm_matrix& rhs) { 
-    m00 += rhs.m00; m11 += rhs.m11; m22 += rhs.m22;
-    m01 += rhs.m01; m02 += rhs.m02; m12 += rhs.m12;
-    return *this; 
+  struct symm_matrix &operator+=(const symm_matrix &rhs) {
+    m00 += rhs.m00;
+    m11 += rhs.m11;
+    m22 += rhs.m22;
+    m01 += rhs.m01;
+    m02 += rhs.m02;
+    m12 += rhs.m12;
+    return *this;
+  }
+  struct symm_matrix &operator+=(const duals::duald &k) {
+    m00 += k;
+    m11 += k;
+    m22 += k;
+    return *this;
+  }
+  struct symm_matrix &operator+(const symm_matrix &rhs) {
+    m00 += rhs.m00;
+    m11 += rhs.m11;
+    m22 += rhs.m22;
+    m01 += rhs.m01;
+    m02 += rhs.m02;
+    m12 += rhs.m12;
+    return *this;
+  }
+  struct symm_matrix &operator*(const duals::duald &k) {
+    m00 *= k;
+    m11 *= k;
+    m22 *= k;
+    m01 *= k;
+    m02 *= k;
+    m12 *= k;
+    return *this;
+  }
+  struct symm_matrix &operator/(const duals::duald &k) {
+    m00 /= k;
+    m11 /= k;
+    m22 /= k;
+    m01 /= k;
+    m02 /= k;
+    m12 /= k;
+    return *this;
   }
-  struct symm_matrix& operator+=(const duals::duald& k) { 
-    m00 += k; m11 += k; m22 += k;
-    return *this; 
-   }
-  struct symm_matrix& operator+(const symm_matrix& rhs) { 
-    m00 += rhs.m00; m11 += rhs.m11; m22 += rhs.m22;
-    m01 += rhs.m01; m02 += rhs.m02; m12 += rhs.m12;
-    return *this; 
-   }
-  struct symm_matrix& operator*( const duals::duald& k) { 
-    m00 *= k; m11 *= k; m22 *= k;
-    m01 *= k; m02 *= k; m12 *= k;
-    return *this; 
-   }
-  struct symm_matrix& operator/( const duals::duald& k) { 
-    m00 /= k; m11 /= k; m22 /= k;
-    m01 /= k; m02 /= k; m12 /= k;
-    return *this; 
-   }
-  struct symm_matrix& operator-() { 
-    m00 = -m00; m11 = -m11; m22 = -m22;
-    m01 = -m01; m02 = -m02; m12 = -m12;
-    return *this; 
-   }
-} ;
+  struct symm_matrix &operator-() {
+    m00 = -m00;
+    m11 = -m11;
+    m22 = -m22;
+    m01 = -m01;
+    m02 = -m02;
+    m12 = -m12;
+    return *this;
+  }
+};
 
 struct pol {
   meep_geom::susceptibility user_s;
@@ -249,7 +270,7 @@ class geom_epsilon : public meep::material_function {
   virtual void eff_chi1inv_row(meep::component c, double chi1inv_row[3], const meep::volume &v,
                                double tol, int maxeval);
   void eff_chi1inv_row_grad(meep::component c, double chi1inv_row[3], const meep::volume &v,
-                               double tol, int maxeval, bool needs_grad);
+                            double tol, int maxeval, bool needs_grad);
 
   void eff_chi1inv_matrix(meep::component c, symm_matrix *chi1inv_matrix, const meep::volume &v,
                           double tol, int maxeval, bool &fallback);
@@ -270,12 +291,11 @@ class geom_epsilon : public meep::material_function {
 };
 
 void set_dimensions(int dims);
-geom_epsilon* make_geom_epsilon(meep::structure *s, geometric_object_list *g,
-                                 vector3 center = make_vector3(),
-                                 bool ensure_periodicity = false,
-                                 material_type _default_material = vacuum,
-                                 material_type_list extra_materials = material_type_list(),
-                                 bool use_anisotropic_averaging = false);
+geom_epsilon *make_geom_epsilon(meep::structure *s, geometric_object_list *g,
+                                vector3 center = make_vector3(), bool ensure_periodicity = false,
+                                material_type _default_material = vacuum,
+                                material_type_list extra_materials = material_type_list(),
+                                bool use_anisotropic_averaging = false);
 //, geometric_object_list g, material_type_list extra_materials
 void set_materials_from_geometry(meep::structure *s, geometric_object_list g,
                                  vector3 center = make_vector3(),

From 83fd9ef41c071b57783ff58fb0a5d6eda7bceb29 Mon Sep 17 00:00:00 2001
From: smartalecH <alec.m.hammond@gmail.com>
Date: Fri, 12 Aug 2022 10:51:10 -0400
Subject: [PATCH 25/29] add function to update m number

---
 python/adjoint/optimization_problem.py |  6 ++--
 python/simulation.py                   | 19 ++++++++++++
 src/meepgeom.cpp                       | 40 +++++++++++++++-----------
 3 files changed, 45 insertions(+), 20 deletions(-)

diff --git a/python/adjoint/optimization_problem.py b/python/adjoint/optimization_problem.py
index 707eb8beb..a7899580e 100644
--- a/python/adjoint/optimization_problem.py
+++ b/python/adjoint/optimization_problem.py
@@ -226,7 +226,7 @@ def adjoint_run(self):
 
         # flip the m number
         if utils._check_if_cylindrical(self.sim):
-            self.sim.m = -self.sim.m
+            self.sim.change_m_number(-self.sim.m)
 
         # flip the k point
         if self.sim.k_point:
@@ -261,11 +261,11 @@ def adjoint_run(self):
 
         # reset the m number
         if utils._check_if_cylindrical(self.sim):
-            self.sim.m = -self.sim.m
+            self.sim.change_m_number(-self.sim.m)
 
         # reset the k point
         if self.sim.k_point:
-            self.sim.k_point *= -1
+            self.sim.change_k_point(-1 * self.sim.k_point)
 
         # update optimizer's state
         self.current_state = "ADJ"
diff --git a/python/simulation.py b/python/simulation.py
index a8afd9e3a..9662d4f5b 100644
--- a/python/simulation.py
+++ b/python/simulation.py
@@ -4278,6 +4278,25 @@ def change_k_point(self, k):
                     self.fields.use_bloch(
                         py_v3_to_vec(self.dimensions, self.k_point, self.is_cylindrical)
                     )
+    
+    def change_m_number(self, m):
+        """
+        Change the `m` number (the angular ϕ dependence).
+        """
+        self.m = m
+        
+        if self.fields:
+            needs_complex_fields = not (
+                not self.m or self.m == 0
+            )
+
+            if needs_complex_fields and self.fields.is_real:
+                self.fields = None
+                self._is_initialized = False
+                self.init_sim()
+            else:
+                if self.m is not None:
+                    self.fields.m = self.m
 
     def change_sources(self, new_sources):
         """
diff --git a/src/meepgeom.cpp b/src/meepgeom.cpp
index 6b83d109b..a37ab2442 100644
--- a/src/meepgeom.cpp
+++ b/src/meepgeom.cpp
@@ -2651,16 +2651,25 @@ get_material_gradient(const meep::vec &r,              // current point
   material_type md;
   geps->get_material_pt(md, r);
 
+  // get the tensor column index corresponding to the forward component
   int dir_idx = 0;
-  if (forward_c == meep::Dx || forward_c == meep::Dr)
-    dir_idx = 0;
-  else if (forward_c == meep::Dy || forward_c == meep::Dp)
-    dir_idx = 1;
-  else if (forward_c == meep::Dz)
-    dir_idx = 2;
-  else
-    meep::abort("Invalid adjoint field component");
+  switch (meep::component_direction(forward_c)) {
+      case meep::X:
+      case meep::R:
+        dir_idx = 0;
+        break;
+      case meep::Y:
+      case meep::P:
+        dir_idx = 1;
+        break;
+      case meep::Z:
+        dir_idx = 2;
+        break;
+      case meep::NO_DIRECTION:
+        meep::abort("Invalid forward component!\n");
+  }
 
+  // materials are non-dispersive
   if (md->trivial) {
     const double sd = 1.0; // FIXME: make user-changable?
     meep::volume v(r);
@@ -2668,18 +2677,17 @@ get_material_gradient(const meep::vec &r,              // current point
       v.set_direction_min(d, r.in_direction(d) - 0.5 * gv.inva * sd);
       v.set_direction_max(d, r.in_direction(d) + 0.5 * gv.inva * sd);
     }
-    double row_1[3], row_2[3], dA_du[3];
+    double row_1[3], row_2[3];
     double orig = u[idx];
     u[idx] -= du;
     geps->eff_chi1inv_row(adjoint_c, row_1, v, geps->tol, geps->maxeval);
     u[idx] += 2 * du;
     geps->eff_chi1inv_row(adjoint_c, row_2, v, geps->tol, geps->maxeval);
     u[idx] = orig;
+    return fields_f * (row_1[dir_idx] - row_2[dir_idx]) / (2 * du);
 
-    for (int i = 0; i < 3; i++)
-      dA_du[i] = (row_1[i] - row_2[i]) / (2 * du);
-    return dA_du[dir_idx] * fields_f;
   }
+  // materials have some dispersion
   else {
     double orig = u[idx];
     std::complex<double> row_1[3], row_2[3], dA_du[3];
@@ -2689,9 +2697,7 @@ get_material_gradient(const meep::vec &r,              // current point
     eff_chi1inv_row_disp(adjoint_c, row_2, r, freq, geps);
     u[idx] = orig;
 
-    for (int i = 0; i < 3; i++)
-      dA_du[i] = (row_1[i] - row_2[i]) / (2 * du);
-    return dA_du[dir_idx] * fields_f * cond_cmp(forward_c, r, freq, geps);
+    return fields_f * (row_1[dir_idx] - row_2[dir_idx]) / (2 * du) * cond_cmp(forward_c, r, freq, geps);
   }
 }
 
@@ -2720,8 +2726,8 @@ void add_interpolate_weights(double rx, double ry, double rz, double *data, int
 /* define a macro to give us data(x,y,z) on the grid,
 in row-major order (the order used by HDF5): */
 #define IDX(x, y, z) (((x)*ny + (y)) * nz + (z)) * stride
-#define D(x, y, z) (data[(((x)*ny + (y)) * nz + (z)) * stride])
-#define U(x, y, z) (udata[(((x)*ny + (y)) * nz + (z)) * stride])
+#define D(x, y, z) (data[IDX(x, y, z)])
+#define U(x, y, z) (udata[IDX(x, y, z)])
 
   u = (((U(x1, y1, z1) * (1.0 - dx) + U(x2, y1, z1) * dx) * (1.0 - dy) +
         (U(x1, y2, z1) * (1.0 - dx) + U(x2, y2, z1) * dx) * dy) *

From 2dff39ef02a81431d36e566725aa8e04f12327fc Mon Sep 17 00:00:00 2001
From: smartalecH <alec.m.hammond@gmail.com>
Date: Fri, 12 Aug 2022 13:58:16 -0400
Subject: [PATCH 26/29] update change m number too

---
 python/simulation.py |  4 ++--
 src/fields.cpp       | 11 +++++++++++
 src/meep.hpp         |  2 ++
 3 files changed, 15 insertions(+), 2 deletions(-)

diff --git a/python/simulation.py b/python/simulation.py
index 9662d4f5b..13aecfbe0 100644
--- a/python/simulation.py
+++ b/python/simulation.py
@@ -4284,7 +4284,7 @@ def change_m_number(self, m):
         Change the `m` number (the angular ϕ dependence).
         """
         self.m = m
-        
+
         if self.fields:
             needs_complex_fields = not (
                 not self.m or self.m == 0
@@ -4296,7 +4296,7 @@ def change_m_number(self, m):
                 self.init_sim()
             else:
                 if self.m is not None:
-                    self.fields.m = self.m
+                    self.fields.change_m_number(m)
 
     def change_sources(self, new_sources):
         """
diff --git a/src/fields.cpp b/src/fields.cpp
index b9f43f338..a8c568fd3 100644
--- a/src/fields.cpp
+++ b/src/fields.cpp
@@ -793,4 +793,15 @@ bool operator==(const comms_key &lhs, const comms_key &rhs) {
   return (lhs.ft == rhs.ft) && (lhs.phase == rhs.phase) && (lhs.pair == rhs.pair);
 }
 
+void fields::change_m_number(double new_m){
+  m = new_m;
+  for (int i = 0; i < num_chunks; i++) {
+    chunks[i]->change_m_number(new_m);
+  }
+}
+
+void fields_chunk::change_m_number(double new_m) {
+  m = new_m;
+}
+
 } // namespace meep
diff --git a/src/meep.hpp b/src/meep.hpp
index f5a721e81..9b82072a2 100644
--- a/src/meep.hpp
+++ b/src/meep.hpp
@@ -1546,6 +1546,7 @@ class fields_chunk {
   void remove_sources();
   void remove_susceptibilities(bool shared_chunks);
   void zero_fields();
+  void change_m_number(double new_m);
 
   // update_eh.cpp
   bool needs_W_prev(component c) const;
@@ -1751,6 +1752,7 @@ class fields {
   void remove_fluxes();
   void reset();
   void log(const char *prefix = "");
+  void change_m_number(double new_m);
 
   // time.cpp
   std::vector<double> time_spent_on(time_sink sink);

From 2230adbf1bc5a7d2cf6b11ce03549cbde1a9bd5f Mon Sep 17 00:00:00 2001
From: smartalecH <alec.m.hammond@gmail.com>
Date: Fri, 12 Aug 2022 14:48:26 -0400
Subject: [PATCH 27/29] precommit cleanup

---
 python/simulation.py |  6 ++----
 src/fields.cpp       |  6 ++----
 src/meepgeom.cpp     | 23 ++++++++---------------
 3 files changed, 12 insertions(+), 23 deletions(-)

diff --git a/python/simulation.py b/python/simulation.py
index 13aecfbe0..6c3e15d63 100644
--- a/python/simulation.py
+++ b/python/simulation.py
@@ -4278,7 +4278,7 @@ def change_k_point(self, k):
                     self.fields.use_bloch(
                         py_v3_to_vec(self.dimensions, self.k_point, self.is_cylindrical)
                     )
-    
+
     def change_m_number(self, m):
         """
         Change the `m` number (the angular ϕ dependence).
@@ -4286,9 +4286,7 @@ def change_m_number(self, m):
         self.m = m
 
         if self.fields:
-            needs_complex_fields = not (
-                not self.m or self.m == 0
-            )
+            needs_complex_fields = not (not self.m or self.m == 0)
 
             if needs_complex_fields and self.fields.is_real:
                 self.fields = None
diff --git a/src/fields.cpp b/src/fields.cpp
index a8c568fd3..6d16eb748 100644
--- a/src/fields.cpp
+++ b/src/fields.cpp
@@ -793,15 +793,13 @@ bool operator==(const comms_key &lhs, const comms_key &rhs) {
   return (lhs.ft == rhs.ft) && (lhs.phase == rhs.phase) && (lhs.pair == rhs.pair);
 }
 
-void fields::change_m_number(double new_m){
+void fields::change_m_number(double new_m) {
   m = new_m;
   for (int i = 0; i < num_chunks; i++) {
     chunks[i]->change_m_number(new_m);
   }
 }
 
-void fields_chunk::change_m_number(double new_m) {
-  m = new_m;
-}
+void fields_chunk::change_m_number(double new_m) { m = new_m; }
 
 } // namespace meep
diff --git a/src/meepgeom.cpp b/src/meepgeom.cpp
index a37ab2442..15fa04076 100644
--- a/src/meepgeom.cpp
+++ b/src/meepgeom.cpp
@@ -2654,19 +2654,12 @@ get_material_gradient(const meep::vec &r,              // current point
   // get the tensor column index corresponding to the forward component
   int dir_idx = 0;
   switch (meep::component_direction(forward_c)) {
-      case meep::X:
-      case meep::R:
-        dir_idx = 0;
-        break;
-      case meep::Y:
-      case meep::P:
-        dir_idx = 1;
-        break;
-      case meep::Z:
-        dir_idx = 2;
-        break;
-      case meep::NO_DIRECTION:
-        meep::abort("Invalid forward component!\n");
+    case meep::X:
+    case meep::R: dir_idx = 0; break;
+    case meep::Y:
+    case meep::P: dir_idx = 1; break;
+    case meep::Z: dir_idx = 2; break;
+    case meep::NO_DIRECTION: meep::abort("Invalid forward component!\n");
   }
 
   // materials are non-dispersive
@@ -2685,7 +2678,6 @@ get_material_gradient(const meep::vec &r,              // current point
     geps->eff_chi1inv_row(adjoint_c, row_2, v, geps->tol, geps->maxeval);
     u[idx] = orig;
     return fields_f * (row_1[dir_idx] - row_2[dir_idx]) / (2 * du);
-
   }
   // materials have some dispersion
   else {
@@ -2697,7 +2689,8 @@ get_material_gradient(const meep::vec &r,              // current point
     eff_chi1inv_row_disp(adjoint_c, row_2, r, freq, geps);
     u[idx] = orig;
 
-    return fields_f * (row_1[dir_idx] - row_2[dir_idx]) / (2 * du) * cond_cmp(forward_c, r, freq, geps);
+    return fields_f * (row_1[dir_idx] - row_2[dir_idx]) / (2 * du) *
+           cond_cmp(forward_c, r, freq, geps);
   }
 }
 

From 33ea63a3497808db06dd510d5dd93c8398faf095 Mon Sep 17 00:00:00 2001
From: smartalecH <alec.m.hammond@gmail.com>
Date: Fri, 12 Aug 2022 14:53:37 -0400
Subject: [PATCH 28/29] precommit cleanup

---
 python/adjoint/filters.py          |    9 +-
 python/adjoint/utils.py            |    1 +
 python/tests/test_material_grid.py |  371 ++++----
 python/typemap_utils.cpp           |    6 +-
 src/meepgeom.cpp                   |   21 +-
 src/meepgeom.hpp                   |   93 +-
 src/support/dual.hpp               | 1392 ++++++++++++++--------------
 7 files changed, 982 insertions(+), 911 deletions(-)

diff --git a/python/adjoint/filters.py b/python/adjoint/filters.py
index c095ad7d0..e77b66855 100644
--- a/python/adjoint/filters.py
+++ b/python/adjoint/filters.py
@@ -879,18 +879,19 @@ def gray_indicator(x):
     """
     return npa.mean(4 * x.flatten() * (1 - x.flatten())) * 100
 
+
 def make_sdf(data):
-    '''
+    """
     Assume the input, data, is the desired output shape
     (i.e. 1d, 2d, or 3d) and that it's values are between
     0 and 1.
-    '''
+    """
     # create signed distance function
-    sd = skfmm.distance(data- 0.5 , dx = 1)
+    sd = skfmm.distance(data - 0.5, dx=1)
 
     # interpolate zero-levelset onto 0.5-levelset
     x = [np.min(sd.flatten()), 0, np.max(sd.flatten())]
     y = [0, 0.5, 1]
-    f = interpolate.interp1d(x, y, kind='linear')
+    f = interpolate.interp1d(x, y, kind="linear")
 
     return f(sd)
diff --git a/python/adjoint/utils.py b/python/adjoint/utils.py
index f01f642dc..9683348e1 100644
--- a/python/adjoint/utils.py
+++ b/python/adjoint/utils.py
@@ -22,6 +22,7 @@
 # default finite difference step size when calculating Aᵤ
 FD_DEFAULT = 1e-6
 
+
 class DesignRegion:
     def __init__(self, design_parameters, volume=None, size=None, center=mp.Vector3()):
         self.volume = volume or mp.Volume(center=center, size=size)
diff --git a/python/tests/test_material_grid.py b/python/tests/test_material_grid.py
index d64b38348..c40a973cf 100644
--- a/python/tests/test_material_grid.py
+++ b/python/tests/test_material_grid.py
@@ -1,13 +1,14 @@
-'''
+"""
 Checks that a material grid works with symmetries, and that the
 new subpixel smoothing feature is accurate.
 
 TODO:
     Create a 3D test that works well with the new smoothing
-'''
+"""
 
 import meep as mp
 from meep import mpb
+
 try:
     import meep.adjoint as mpa
 except:
@@ -19,188 +20,246 @@
 import unittest
 
 rad = 0.301943
-k_point = mp.Vector3(0.3892,0.1597,0)
+k_point = mp.Vector3(0.3892, 0.1597, 0)
 Si = mp.Medium(index=3.5)
-cell_size = mp.Vector3(1,1,0)
+cell_size = mp.Vector3(1, 1, 0)
+
 
 def compute_transmittance(matgrid_symmetry=False):
-        resolution = 25
-
-        cell_size = mp.Vector3(6,6,0)
-
-        boundary_layers = [mp.PML(thickness=1.0)]
-
-        matgrid_size = mp.Vector3(2,2,0)
-        matgrid_resolution = 2*resolution
-
-        Nx, Ny = int(matgrid_size.x*matgrid_resolution), int(matgrid_size.y*matgrid_resolution)
-
-        # ensure reproducible results
-        rng = np.random.RandomState(2069588)
-
-        w = rng.rand(Nx,Ny)
-        # for subpixel smoothing, the underlying design grid must be smoothly varying
-        w = mpa.tanh_projection(rng.rand(Nx,Ny),1e5,0.5)
-        w = mpa.conic_filter(w,0.25,matgrid_size.x,matgrid_size.y,matgrid_resolution)
-        weights = 0.5 * (w + np.fliplr(w)) if not matgrid_symmetry else w
-
-        matgrid = mp.MaterialGrid(mp.Vector3(Nx,Ny),
-                                  mp.air,
-                                  mp.Medium(index=3.5),
-                                  weights=weights,
-                                  beta=np.inf,
-                                  grid_type='U_MEAN')
-
-        geometry = [mp.Block(center=mp.Vector3(),
-                             size=mp.Vector3(mp.inf,1.0,mp.inf),
-                             material=mp.Medium(index=3.5)),
-                    mp.Block(center=mp.Vector3(),
-                             size=mp.Vector3(matgrid_size.x,matgrid_size.y,0),
-                             material=matgrid)]
-
-        if matgrid_symmetry:
-                geometry.append(mp.Block(center=mp.Vector3(),
-                                         size=mp.Vector3(matgrid_size.x,matgrid_size.y,0),
-                                         material=matgrid,
-                                         e2=mp.Vector3(y=-1)))
-
-        eig_parity = mp.ODD_Y + mp.EVEN_Z
-
-        fcen = 0.65
-        df = 0.2*fcen
-        sources = [mp.EigenModeSource(src=mp.GaussianSource(fcen,fwidth=df),
-                                      center=mp.Vector3(-2.0,0),
-                                      size=mp.Vector3(0,4.0),
-                                      eig_parity=eig_parity)]
-
-        sim = mp.Simulation(resolution=resolution,
-                            cell_size=cell_size,
-                            boundary_layers=boundary_layers,
-                            sources=sources,
-                            geometry=geometry)
-
-        mode_mon = sim.add_flux(fcen,
-                                0,
-                                1,
-                                mp.FluxRegion(center=mp.Vector3(2.0,0),
-                                              size=mp.Vector3(0,4.0)))
-
-        sim.run(until_after_sources=mp.stop_when_dft_decayed())
-
-        mode_coeff = sim.get_eigenmode_coefficients(mode_mon,[1],eig_parity).alpha[0,:,0][0]
-
-        tran = np.power(np.abs(mode_coeff),2)
-        print('tran:, {}, {}'.format("sym" if matgrid_symmetry else "nosym", tran))
-
-        return tran
-
-def compute_resonant_mode_2d(res,radius=rad,default_mat=False,cylinder=False,cylinder_matgrid=True,do_averaging=True):
-        fcen = 0.3
-        df = 0.2*fcen
-        sources = [mp.Source(mp.GaussianSource(fcen,fwidth=df),
-                             component=mp.Hz,
-                             center=mp.Vector3(-0.1057,0.2094,0))]
-
-        matgrid_size = mp.Vector3(1,1,0)
-        matgrid_resolution = 1200
-
-        # for a fixed resolution, compute the number of grid points
-        # necessary which are defined on the corners of the voxels
-        Nx, Ny = int(matgrid_size.x*matgrid_resolution), int(matgrid_size.y*matgrid_resolution)
-
-        x = np.linspace(-0.5*matgrid_size.x,0.5*matgrid_size.x,Nx)
-        y = np.linspace(-0.5*matgrid_size.y,0.5*matgrid_size.y,Ny)
-        xv, yv = np.meshgrid(x,y)
-        weights = mpa.make_sdf(np.sqrt(np.square(xv) + np.square(yv)) < radius)
-        if cylinder:
-            weights = 0.0*weights+1.0          
-        
-        matgrid = mp.MaterialGrid(mp.Vector3(Nx,Ny),
-                                  mp.air,
-                                  Si,
-                                  weights=weights,
-                                  beta=np.inf,
-                                  eta=0.5)
-        
-        # Use a cylinder object, not a block object
-        if cylinder:
-            # within the cylinder, use a (uniform) material grid
-            if cylinder_matgrid:
-                geometry = [mp.Cylinder(center=mp.Vector3(),
-                             radius=radius,
-                             material=matgrid)]
-            # within the cylinder, just use a normal medium
-            else:
-                geometry = [mp.Cylinder(center=mp.Vector3(),
-                             radius=radius,
-                             material=Si)]
-        # use a block object
+    resolution = 25
+
+    cell_size = mp.Vector3(6, 6, 0)
+
+    boundary_layers = [mp.PML(thickness=1.0)]
+
+    matgrid_size = mp.Vector3(2, 2, 0)
+    matgrid_resolution = 2 * resolution
+
+    Nx, Ny = int(matgrid_size.x * matgrid_resolution), int(
+        matgrid_size.y * matgrid_resolution
+    )
+
+    # ensure reproducible results
+    rng = np.random.RandomState(2069588)
+
+    w = rng.rand(Nx, Ny)
+    # for subpixel smoothing, the underlying design grid must be smoothly varying
+    w = mpa.tanh_projection(rng.rand(Nx, Ny), 1e5, 0.5)
+    w = mpa.conic_filter(w, 0.25, matgrid_size.x, matgrid_size.y, matgrid_resolution)
+    weights = 0.5 * (w + np.fliplr(w)) if not matgrid_symmetry else w
+
+    matgrid = mp.MaterialGrid(
+        mp.Vector3(Nx, Ny),
+        mp.air,
+        mp.Medium(index=3.5),
+        weights=weights,
+        beta=np.inf,
+        grid_type="U_MEAN",
+    )
+
+    geometry = [
+        mp.Block(
+            center=mp.Vector3(),
+            size=mp.Vector3(mp.inf, 1.0, mp.inf),
+            material=mp.Medium(index=3.5),
+        ),
+        mp.Block(
+            center=mp.Vector3(),
+            size=mp.Vector3(matgrid_size.x, matgrid_size.y, 0),
+            material=matgrid,
+        ),
+    ]
+
+    if matgrid_symmetry:
+        geometry.append(
+            mp.Block(
+                center=mp.Vector3(),
+                size=mp.Vector3(matgrid_size.x, matgrid_size.y, 0),
+                material=matgrid,
+                e2=mp.Vector3(y=-1),
+            )
+        )
+
+    eig_parity = mp.ODD_Y + mp.EVEN_Z
+
+    fcen = 0.65
+    df = 0.2 * fcen
+    sources = [
+        mp.EigenModeSource(
+            src=mp.GaussianSource(fcen, fwidth=df),
+            center=mp.Vector3(-2.0, 0),
+            size=mp.Vector3(0, 4.0),
+            eig_parity=eig_parity,
+        )
+    ]
+
+    sim = mp.Simulation(
+        resolution=resolution,
+        cell_size=cell_size,
+        boundary_layers=boundary_layers,
+        sources=sources,
+        geometry=geometry,
+    )
+
+    mode_mon = sim.add_flux(
+        fcen, 0, 1, mp.FluxRegion(center=mp.Vector3(2.0, 0), size=mp.Vector3(0, 4.0))
+    )
+
+    sim.run(until_after_sources=mp.stop_when_dft_decayed())
+
+    mode_coeff = sim.get_eigenmode_coefficients(mode_mon, [1], eig_parity).alpha[
+        0, :, 0
+    ][0]
+
+    tran = np.power(np.abs(mode_coeff), 2)
+    print("tran:, {}, {}".format("sym" if matgrid_symmetry else "nosym", tran))
+
+    return tran
+
+
+def compute_resonant_mode_2d(
+    res,
+    radius=rad,
+    default_mat=False,
+    cylinder=False,
+    cylinder_matgrid=True,
+    do_averaging=True,
+):
+    fcen = 0.3
+    df = 0.2 * fcen
+    sources = [
+        mp.Source(
+            mp.GaussianSource(fcen, fwidth=df),
+            component=mp.Hz,
+            center=mp.Vector3(-0.1057, 0.2094, 0),
+        )
+    ]
+
+    matgrid_size = mp.Vector3(1, 1, 0)
+    matgrid_resolution = 1200
+
+    # for a fixed resolution, compute the number of grid points
+    # necessary which are defined on the corners of the voxels
+    Nx, Ny = int(matgrid_size.x * matgrid_resolution), int(
+        matgrid_size.y * matgrid_resolution
+    )
+
+    x = np.linspace(-0.5 * matgrid_size.x, 0.5 * matgrid_size.x, Nx)
+    y = np.linspace(-0.5 * matgrid_size.y, 0.5 * matgrid_size.y, Ny)
+    xv, yv = np.meshgrid(x, y)
+    weights = mpa.make_sdf(np.sqrt(np.square(xv) + np.square(yv)) < radius)
+    if cylinder:
+        weights = 0.0 * weights + 1.0
+
+    matgrid = mp.MaterialGrid(
+        mp.Vector3(Nx, Ny), mp.air, Si, weights=weights, beta=np.inf, eta=0.5
+    )
+
+    # Use a cylinder object, not a block object
+    if cylinder:
+        # within the cylinder, use a (uniform) material grid
+        if cylinder_matgrid:
+            geometry = [
+                mp.Cylinder(center=mp.Vector3(), radius=radius, material=matgrid)
+            ]
+        # within the cylinder, just use a normal medium
         else:
-            geometry = [mp.Block(center=mp.Vector3(),
-                             size=mp.Vector3(matgrid_size.x,matgrid_size.y,0),
-                             material=matgrid)]
-
-        sim = mp.Simulation(resolution=res,
-                            eps_averaging=do_averaging,
-                            cell_size=cell_size,
-                            default_material=matgrid if default_mat else mp.Medium(),
-                            geometry=geometry if not default_mat else [],
-                            sources=sources,
-                            k_point=k_point)
-
-        h = mp.Harminv(mp.Hz, mp.Vector3(0.3718,-0.2076), fcen, df)
-        sim.run(mp.after_sources(h),
-                until_after_sources=300)
-
-        try:
-            for m in h.modes:
-                print("harminv:, {}, {}, {}".format(res,m.freq,m.Q))
-            freq = h.modes[0].freq
-        except:
-            raise RuntimeError("No resonant modes found.")
-
-        return freq
+            geometry = [mp.Cylinder(center=mp.Vector3(), radius=radius, material=Si)]
+    # use a block object
+    else:
+        geometry = [
+            mp.Block(
+                center=mp.Vector3(),
+                size=mp.Vector3(matgrid_size.x, matgrid_size.y, 0),
+                material=matgrid,
+            )
+        ]
+
+    sim = mp.Simulation(
+        resolution=res,
+        eps_averaging=do_averaging,
+        cell_size=cell_size,
+        default_material=matgrid if default_mat else mp.Medium(),
+        geometry=geometry if not default_mat else [],
+        sources=sources,
+        k_point=k_point,
+    )
+
+    h = mp.Harminv(mp.Hz, mp.Vector3(0.3718, -0.2076), fcen, df)
+    sim.run(mp.after_sources(h), until_after_sources=300)
+
+    try:
+        for m in h.modes:
+            print(f"harminv:, {res}, {m.freq}, {m.Q}")
+        freq = h.modes[0].freq
+    except:
+        raise RuntimeError("No resonant modes found.")
+
+    return freq
+
 
 def compute_resonant_mode_mpb(resolution=512):
     geometry = [mp.Cylinder(rad, material=Si)]
     geometry_lattice = mp.Lattice(cell_size)
 
-    ms = mpb.ModeSolver(num_bands=1,
-                        k_points=[mp.cartesian_to_reciprocal(k_point, geometry_lattice)],
-                        geometry=geometry,
-                        geometry_lattice=geometry_lattice,
-                        resolution=resolution)
-
+    ms = mpb.ModeSolver(
+        num_bands=1,
+        k_points=[mp.cartesian_to_reciprocal(k_point, geometry_lattice)],
+        geometry=geometry,
+        geometry_lattice=geometry_lattice,
+        resolution=resolution,
+    )
 
     ms.run_te()
     return ms.freqs[0]
 
+
 class TestMaterialGrid(unittest.TestCase):
     def test_subpixel_smoothing(self):
         res = 25
 
-        def subpixel_test_matrix(radius,do_averaging):
-            freq_cylinder = compute_resonant_mode_2d(res, radius, default_mat=False, cylinder=True, cylinder_matgrid=False, do_averaging=do_averaging)
-            freq_matgrid = compute_resonant_mode_2d(res, radius, default_mat=False, cylinder=False, do_averaging=do_averaging)
-            freq_matgrid_cylinder = compute_resonant_mode_2d(res, radius, default_mat=False, cylinder=True, cylinder_matgrid=True, do_averaging=do_averaging)
-            return [freq_cylinder,freq_matgrid,freq_matgrid_cylinder]
+        def subpixel_test_matrix(radius, do_averaging):
+            freq_cylinder = compute_resonant_mode_2d(
+                res,
+                radius,
+                default_mat=False,
+                cylinder=True,
+                cylinder_matgrid=False,
+                do_averaging=do_averaging,
+            )
+            freq_matgrid = compute_resonant_mode_2d(
+                res,
+                radius,
+                default_mat=False,
+                cylinder=False,
+                do_averaging=do_averaging,
+            )
+            freq_matgrid_cylinder = compute_resonant_mode_2d(
+                res,
+                radius,
+                default_mat=False,
+                cylinder=True,
+                cylinder_matgrid=True,
+                do_averaging=do_averaging,
+            )
+            return [freq_cylinder, freq_matgrid, freq_matgrid_cylinder]
 
         # when smoothing is off, all three tests should be identical to machine precision
-        no_smoothing = subpixel_test_matrix(rad,False)
+        no_smoothing = subpixel_test_matrix(rad, False)
         self.assertEqual(no_smoothing[0], no_smoothing[1])
         self.assertEqual(no_smoothing[1], no_smoothing[2])
 
         # when we slightly perturb the radius, the results should be the same as before.
-        no_smoothing_perturbed = subpixel_test_matrix(rad+0.01/res,False)
+        no_smoothing_perturbed = subpixel_test_matrix(rad + 0.01 / res, False)
         self.assertEqual(no_smoothing, no_smoothing_perturbed)
 
         # when smoothing is on, the simple material results should be different from the matgrid results
-        smoothing = subpixel_test_matrix(rad,True)
+        smoothing = subpixel_test_matrix(rad, True)
         self.assertNotEqual(smoothing[0], smoothing[1])
         self.assertAlmostEqual(smoothing[1], smoothing[2], 3)
 
         # when we slighty perturb the radius, the results should all be different from before.
-        smoothing_perturbed = subpixel_test_matrix(rad+0.01/res,True)
+        smoothing_perturbed = subpixel_test_matrix(rad + 0.01 / res, True)
         self.assertNotEqual(smoothing, smoothing_perturbed)
 
         # "exact" frequency computed using MPB
@@ -209,11 +268,11 @@ def subpixel_test_matrix(radius,do_averaging):
         res = [50, 100]
         freq_matgrid = []
         for r in res:
-            freq_matgrid.append(compute_resonant_mode_2d(r,cylinder=False))
+            freq_matgrid.append(compute_resonant_mode_2d(r, cylinder=False))
             # verify that the frequency of the resonant mode is
             # approximately equal to the reference value
             self.assertAlmostEqual(freq_ref, freq_matgrid[-1], 2)
-        print("results:    ",freq_ref,freq_matgrid)
+        print("results:    ", freq_ref, freq_matgrid)
 
         # verify that the relative error is decreasing with increasing resolution
         # and is better than linear convergence because of subpixel smoothing
diff --git a/python/typemap_utils.cpp b/python/typemap_utils.cpp
index eb79697e7..0ad65c6bf 100644
--- a/python/typemap_utils.cpp
+++ b/python/typemap_utils.cpp
@@ -527,11 +527,11 @@ static int pymaterial_grid_to_material_grid(PyObject *po, material_data *md) {
     md->weights.push_back(((double *)PyArray_DATA(pao))[i]);
   }
   md->weights.clear();
-  for (size_t i=0;i<PyArray_SIZE(pao);i++){
+  for (size_t i = 0; i < PyArray_SIZE(pao); i++) {
     md->weights.push_back(((double *)PyArray_DATA(pao))[i]);
   }
-  //md->weights = new double[PyArray_SIZE(pao)];
-  //memcpy(md->weights, (double *)PyArray_DATA(pao), PyArray_SIZE(pao) * sizeof(double));
+  // md->weights = new double[PyArray_SIZE(pao)];
+  // memcpy(md->weights, (double *)PyArray_DATA(pao), PyArray_SIZE(pao) * sizeof(double));
 
   // if needed, combine sus structs to main object
   PyObject *py_e_sus_m1 = PyObject_GetAttrString(po_medium1, "E_susceptibilities");
diff --git a/src/meepgeom.cpp b/src/meepgeom.cpp
index 315a54153..55991283f 100644
--- a/src/meepgeom.cpp
+++ b/src/meepgeom.cpp
@@ -499,9 +499,10 @@ cvector3 matgrid_grad(vector3 p, geom_box_tree tp, int oi, material_data *md) {
   // iterate through object tree at current point
   if (tp) {
     do {
-      gradient = cvector_add(gradient,material_grid_grad(to_geom_box_coords(p, &tp->objects[oi]),
-                                     (material_data *)tp->objects[oi].o->material,
-                                     tp->objects[oi].o));
+      gradient =
+          cvector_add(gradient, material_grid_grad(to_geom_box_coords(p, &tp->objects[oi]),
+                                                   (material_data *)tp->objects[oi].o->material,
+                                                   tp->objects[oi].o));
       if (md->material_grid_kinds == material_data::U_DEFAULT) break;
       ++matgrid_val_count;
       tp = geom_tree_search_next(p, tp, &oi);
@@ -1173,18 +1174,18 @@ void kottke_algorithm(meep::component c, symm_matrix *chi1inv_matrix, symm_matri
   duals::duald Rot[3][3];
   eps1 = meps;
 
-  Rot[0][0] = cnormal.x.re + 1_e*cnormal.x.im;
-  Rot[1][0] = cnormal.y.re + 1_e*cnormal.y.im;
-  Rot[2][0] = cnormal.z.re + 1_e*cnormal.z.im;
+  Rot[0][0] = cnormal.x.re + 1_e * cnormal.x.im;
+  Rot[1][0] = cnormal.y.re + 1_e * cnormal.y.im;
+  Rot[2][0] = cnormal.z.re + 1_e * cnormal.z.im;
   if (fabs(cnormal.x.re) > 1e-2 || fabs(cnormal.y.re) > 1e-2) {
-    Rot[0][2] = cnormal.y.re + 1_e*cnormal.y.im;
-    Rot[1][2] = -(cnormal.x.re + 1_e*cnormal.x.im);
+    Rot[0][2] = cnormal.y.re + 1_e * cnormal.y.im;
+    Rot[1][2] = -(cnormal.x.re + 1_e * cnormal.x.im);
     Rot[2][2] = 0;
   }
   else { /* n is not parallel to z direction, use (x x n) instead */
     Rot[0][2] = 0;
-    Rot[1][2] = -(cnormal.z.re + 1_e*cnormal.z.im);
-    Rot[2][2] = cnormal.y.re + 1_e*cnormal.y.im;
+    Rot[1][2] = -(cnormal.z.re + 1_e * cnormal.z.im);
+    Rot[2][2] = cnormal.y.re + 1_e * cnormal.y.im;
   }
   { /* normalize second column */
     duals::duald s = Rot[0][2] * Rot[0][2] + Rot[1][2] * Rot[1][2] + Rot[2][2] * Rot[2][2];
diff --git a/src/meepgeom.hpp b/src/meepgeom.hpp
index 06bce5e5f..d5f0f6ef1 100644
--- a/src/meepgeom.hpp
+++ b/src/meepgeom.hpp
@@ -154,49 +154,70 @@ inline vector3 make_vector3(double x = 0.0, double y = 0.0, double z = 0.0) {
 }
 
 inline cvector3 cvector3_scale(number s, cvector3 v) {
-  return make_cvector3(vector3_scale(s,cvector3_re(v)),vector3_scale(s,cvector3_im(v)));
+  return make_cvector3(vector3_scale(s, cvector3_re(v)), vector3_scale(s, cvector3_im(v)));
 }
 
-inline cvector3 cvector_zero(){
-  return make_cvector3(make_vector3(),make_vector3());
-}
+inline cvector3 cvector_zero() { return make_cvector3(make_vector3(), make_vector3()); }
 
-inline cvector3 cvector_add(cvector3 cv1, cvector3 cv2){
-  return make_cvector3(vector3_plus(cvector3_re(cv1),cvector3_re(cv2)),vector3_plus(cvector3_im(cv1),cvector3_im(cv2)));
+inline cvector3 cvector_add(cvector3 cv1, cvector3 cv2) {
+  return make_cvector3(vector3_plus(cvector3_re(cv1), cvector3_re(cv2)),
+                       vector3_plus(cvector3_im(cv1), cvector3_im(cv2)));
 }
 
-struct symm_matrix{
+struct symm_matrix {
   duals::duald m00, m01, m02, m11, m12, m22;
-  struct symm_matrix& operator+=(const symm_matrix& rhs) { 
-    m00 += rhs.m00; m11 += rhs.m11; m22 += rhs.m22;
-    m01 += rhs.m01; m02 += rhs.m02; m12 += rhs.m12;
-    return *this; 
+  struct symm_matrix &operator+=(const symm_matrix &rhs) {
+    m00 += rhs.m00;
+    m11 += rhs.m11;
+    m22 += rhs.m22;
+    m01 += rhs.m01;
+    m02 += rhs.m02;
+    m12 += rhs.m12;
+    return *this;
+  }
+  struct symm_matrix &operator+=(const duals::duald &k) {
+    m00 += k;
+    m11 += k;
+    m22 += k;
+    return *this;
+  }
+  struct symm_matrix &operator+(const symm_matrix &rhs) {
+    m00 += rhs.m00;
+    m11 += rhs.m11;
+    m22 += rhs.m22;
+    m01 += rhs.m01;
+    m02 += rhs.m02;
+    m12 += rhs.m12;
+    return *this;
+  }
+  struct symm_matrix &operator*(const duals::duald &k) {
+    m00 *= k;
+    m11 *= k;
+    m22 *= k;
+    m01 *= k;
+    m02 *= k;
+    m12 *= k;
+    return *this;
   }
-  struct symm_matrix& operator+=(const duals::duald& k) { 
-    m00 += k; m11 += k; m22 += k;
-    return *this; 
-   }
-  struct symm_matrix& operator+(const symm_matrix& rhs) { 
-    m00 += rhs.m00; m11 += rhs.m11; m22 += rhs.m22;
-    m01 += rhs.m01; m02 += rhs.m02; m12 += rhs.m12;
-    return *this; 
-   }
-  struct symm_matrix& operator*( const duals::duald& k) { 
-    m00 *= k; m11 *= k; m22 *= k;
-    m01 *= k; m02 *= k; m12 *= k;
-    return *this; 
-   }
-  struct symm_matrix& operator/( const duals::duald& k) { 
-    m00 /= k; m11 /= k; m22 /= k;
-    m01 /= k; m02 /= k; m12 /= k;
-    return *this; 
-   }
-  struct symm_matrix& operator-() { 
-    m00 = -m00; m11 = -m11; m22 = -m22;
-    m01 = -m01; m02 = -m02; m12 = -m12;
-    return *this; 
-   }
-} ;
+  struct symm_matrix &operator/(const duals::duald &k) {
+    m00 /= k;
+    m11 /= k;
+    m22 /= k;
+    m01 /= k;
+    m02 /= k;
+    m12 /= k;
+    return *this;
+  }
+  struct symm_matrix &operator-() {
+    m00 = -m00;
+    m11 = -m11;
+    m22 = -m22;
+    m01 = -m01;
+    m02 = -m02;
+    m12 = -m12;
+    return *this;
+  }
+};
 
 struct pol {
   meep_geom::susceptibility user_s;
diff --git a/src/support/dual.hpp b/src/support/dual.hpp
index ba17dbe94..8c18d8714 100644
--- a/src/support/dual.hpp
+++ b/src/support/dual.hpp
@@ -27,7 +27,7 @@
 
 #if !defined(CPPDUALS_IGNORE_COMPILER_VERSION) && !defined(_WIN32)
 #if __cplusplus < 201103L
-  #error CPPDUALS needs at least a C++11 compliant compiler
+#error CPPDUALS needs at least a C++11 compliant compiler
 #endif
 #endif
 
@@ -64,12 +64,11 @@ file \ref duals/dual_eigen should be included.
 using namespace duals::literals;
 
 template <class T> T   f(T x) { return pow(x,pow(x,x)); }
-template <class T> T  df(T x) { return pow(x,-1. + x + pow(x,x)) * (1. + x*log(x) + x*pow(log(x),2.)); }
-template <class T> T ddf(T x) { return (pow(x,pow(x,x)) * pow(pow(x,x - 1.) + pow(x,x)*log(x)*(log(x) + 1.), 2.) +
-                                        pow(x,pow(x,x)) * (pow(x,x - 1.) * log(x) +
-                                                           pow(x,x - 1.) * (log(x) + 1.) +
-                                                           pow(x,x - 1.) * ((x - 1.)/x + log(x)) +
-                                                           pow(x,x) * log(x) * pow(log(x) + 1., 2.) )); }
+template <class T> T  df(T x) { return pow(x,-1. + x + pow(x,x)) * (1. + x*log(x) +
+x*pow(log(x),2.)); } template <class T> T ddf(T x) { return (pow(x,pow(x,x)) * pow(pow(x,x - 1.) +
+pow(x,x)*log(x)*(log(x) + 1.), 2.) + pow(x,pow(x,x)) * (pow(x,x - 1.) * log(x) + pow(x,x - 1.) *
+(log(x) + 1.) + pow(x,x - 1.) * ((x - 1.)/x + log(x)) + pow(x,x) * log(x) * pow(log(x) + 1., 2.) ));
+}
 
 int main()
 {
@@ -275,13 +274,15 @@ commons](https://commons.wikimedia.org/wiki/File:Greek_Epsilon_archaic.svg)
 
 Some casual reading material:
 
-- [ON QUATERNIONS, William Rowan Hamilton, Proceedings of the Royal Irish Academy, 3 (1847), pp. 1–16.](https://www.maths.tcd.ie/pub/HistMath/People/Hamilton/Quatern2/)
-- [Basic Space-Time Transformations Expressed by Means of Two-Component Number Systems](https://doi.org/10.12693%2Faphyspola.86.291)
+- [ON QUATERNIONS, William Rowan Hamilton, Proceedings of the Royal Irish Academy, 3 (1847), pp.
+1–16.](https://www.maths.tcd.ie/pub/HistMath/People/Hamilton/Quatern2/)
+- [Basic Space-Time Transformations Expressed by Means of Two-Component Number
+Systems](https://doi.org/10.12693%2Faphyspola.86.291)
 
  */
 
 #ifndef PARSED_BY_DOXYGEN
-template<class T> class dual;
+template <class T> class dual;
 #endif
 
 /// Check if T is dual<>, match non-duals.
@@ -305,8 +306,7 @@ template <class T> struct is_complex<std::complex<T> > : std::true_type {};
 #endif
 
 /// Dual_traits helper class.
-template <class T> struct dual_traits
-{
+template <class T> struct dual_traits {
   /// Depth of T - for T=scalar this is 0. for dual_traits<double> it
   /// is 1.
   enum { depth = 0 }; // -Wenum-compare
@@ -318,8 +318,7 @@ template <class T> struct dual_traits
 #ifndef PARSED_BY_DOXYGEN
 
 /// dual_traits for dual<> types
-template <class T> struct dual_traits<dual<T>>
-{
+template <class T> struct dual_traits<dual<T> > {
   /// Depth to which this dual<> type is nested.  One (1) is a
   /// first-level dual, whereas non-duals have a depth of 0.
   enum { depth = dual_traits<T>::depth + 1 };
@@ -328,8 +327,7 @@ template <class T> struct dual_traits<dual<T>>
   typedef typename dual_traits<T>::real_type real_type;
 };
 
-template <class T> struct dual_traits<std::complex<dual<T>>>
-{
+template <class T> struct dual_traits<std::complex<dual<T> > > {
   /// complex<dual<T>> have the same 'depth' as their dual.
   enum { depth = dual_traits<T>::depth };
 
@@ -339,12 +337,10 @@ template <class T> struct dual_traits<std::complex<dual<T>>>
 
 namespace detail {
 
-template<class T>
-struct Void { typedef void type; };
-template<class T, class U = void>
-struct has_member_type : std::false_type {};
-template<class T>
-struct has_member_type<T, typename Void<typename T::type>::type > : std::true_type {
+template <class T> struct Void { typedef void type; };
+template <class T, class U = void> struct has_member_type : std::false_type {};
+template <class T>
+struct has_member_type<T, typename Void<typename T::type>::type> : std::true_type {
   struct wrap {
     typedef typename T::type type;
     typedef typename T::type ReturnType;
@@ -354,57 +350,52 @@ struct has_member_type<T, typename Void<typename T::type>::type > : std::true_ty
 } // namespace detail
 
 /// Promote two types - default according to common_type
-template <class T, class U, class V = void> struct promote : std::common_type<T,U> {};
+template <class T, class U, class V = void> struct promote : std::common_type<T, U> {};
 
 /// Can types A and B be promoted to a common type?
-template<class A, class B> using can_promote = detail::has_member_type<promote<A,B>>;
+template <class A, class B> using can_promote = detail::has_member_type<promote<A, B> >;
 
 template <class T, class U>
-struct promote<dual<T>, dual<U>,
-               typename std::enable_if<(can_promote<T,U>::value
-                                        && (int)dual_traits<T>::depth == (int)dual_traits<U>::depth)>::type>
-{
-  typedef dual<typename promote<U,T>::type> type;
+struct promote<
+    dual<T>, dual<U>,
+    typename std::enable_if<(can_promote<T, U>::value &&
+                             (int)dual_traits<T>::depth == (int)dual_traits<U>::depth)>::type> {
+  typedef dual<typename promote<U, T>::type> type;
 };
 template <class T, class U>
 struct promote<dual<T>, U,
-               typename std::enable_if<(can_promote<T,U>::value
-                                        && (int)dual_traits<T>::depth >= (int)dual_traits<U>::depth
-                                        && !is_complex<U>::value)>::type>
-{
-  typedef dual<typename promote<U,T>::type> type;
+               typename std::enable_if<(can_promote<T, U>::value &&
+                                        (int)dual_traits<T>::depth >= (int)dual_traits<U>::depth &&
+                                        !is_complex<U>::value)>::type> {
+  typedef dual<typename promote<U, T>::type> type;
 };
 template <class T, class U>
 struct promote<U, dual<T>,
-               typename std::enable_if<(can_promote<T,U>::value
-                                        && (int)dual_traits<T>::depth >= (int)dual_traits<U>::depth
-                                        && !is_complex<U>::value)>::type>
-{
-  typedef dual<typename promote<U,T>::type> type;
+               typename std::enable_if<(can_promote<T, U>::value &&
+                                        (int)dual_traits<T>::depth >= (int)dual_traits<U>::depth &&
+                                        !is_complex<U>::value)>::type> {
+  typedef dual<typename promote<U, T>::type> type;
 };
 // /////////////////////////////////////////////////
 template <class T, class U>
 struct promote<std::complex<T>, std::complex<U>,
-               typename std::enable_if<(can_promote<T,U>::value
-                                        && (is_dual<T>::value || is_dual<U>::value))>::type>
-{
-  typedef std::complex<typename promote<T,U>::type> type;
+               typename std::enable_if<(can_promote<T, U>::value &&
+                                        (is_dual<T>::value || is_dual<U>::value))>::type> {
+  typedef std::complex<typename promote<T, U>::type> type;
 };
 template <class T, class U>
-struct promote<std::complex<T>, U,
-               typename std::enable_if<(can_promote<T,U>::value
-                                        && (is_dual<T>::value || is_dual<U>::value)
-                                        && !is_complex<U>::value)>::type>
-{
-  typedef std::complex<typename promote<T,U>::type> type;
+struct promote<
+    std::complex<T>, U,
+    typename std::enable_if<(can_promote<T, U>::value && (is_dual<T>::value || is_dual<U>::value) &&
+                             !is_complex<U>::value)>::type> {
+  typedef std::complex<typename promote<T, U>::type> type;
 };
 template <class T, class U>
-struct promote<U, std::complex<T>,
-               typename std::enable_if<(can_promote<T,U>::value
-                                        && (is_dual<T>::value || is_dual<U>::value)
-                                        && !is_complex<U>::value)>::type>
-{
-  typedef std::complex<typename promote<T,U>::type> type;
+struct promote<
+    U, std::complex<T>,
+    typename std::enable_if<(can_promote<T, U>::value && (is_dual<T>::value || is_dual<U>::value) &&
+                             !is_complex<U>::value)>::type> {
+  typedef std::complex<typename promote<T, U>::type> type;
 };
 // /////////////////////////////////////////////////
 
@@ -432,29 +423,26 @@ namespace std {
 #ifdef CPPDUALS_ENABLE_STD_IS_ARITHMETIC
 
 /// Duals are as arithmetic as their value_type is arithmetic.
-template <class T>
-struct is_arithmetic<duals::dual<T>> : is_arithmetic<T> {};
+template <class T> struct is_arithmetic<duals::dual<T> > : is_arithmetic<T> {};
 
 #endif // CPPDUALS_ENABLE_IS_ARITHMETIC
 
 /// Duals are compound types.
-template <class T>
-struct is_compound<duals::dual<T>> : true_type {};
+template <class T> struct is_compound<duals::dual<T> > : true_type {};
 
 // Modification of std::numeric_limits<> per
 // C++03 17.4.3.1/1, and C++11 18.3.2.3/1.
-template <class T>
-struct numeric_limits<duals::dual<T>> : numeric_limits<T> {
+template <class T> struct numeric_limits<duals::dual<T> > : numeric_limits<T> {
   static constexpr bool is_specialized = true;
-  static constexpr duals::dual<T> min NOMACRO ()  { return numeric_limits<T>::min NOMACRO (); }
-  static constexpr duals::dual<T> lowest()        { return numeric_limits<T>::lowest(); }
-  static constexpr duals::dual<T> max NOMACRO ()  { return numeric_limits<T>::max NOMACRO (); }
-  static constexpr duals::dual<T> epsilon()       { return numeric_limits<T>::epsilon(); }
-  static constexpr duals::dual<T> round_error()   { return numeric_limits<T>::round_error(); }
-  static constexpr duals::dual<T> infinity()      { return numeric_limits<T>::infinity(); }
-  static constexpr duals::dual<T> quiet_NaN()     { return numeric_limits<T>::quiet_NaN(); }
+  static constexpr duals::dual<T> min NOMACRO() { return numeric_limits<T>::min NOMACRO(); }
+  static constexpr duals::dual<T> lowest() { return numeric_limits<T>::lowest(); }
+  static constexpr duals::dual<T> max NOMACRO() { return numeric_limits<T>::max NOMACRO(); }
+  static constexpr duals::dual<T> epsilon() { return numeric_limits<T>::epsilon(); }
+  static constexpr duals::dual<T> round_error() { return numeric_limits<T>::round_error(); }
+  static constexpr duals::dual<T> infinity() { return numeric_limits<T>::infinity(); }
+  static constexpr duals::dual<T> quiet_NaN() { return numeric_limits<T>::quiet_NaN(); }
   static constexpr duals::dual<T> signaling_NaN() { return numeric_limits<T>::signaling_NaN(); }
-  static constexpr duals::dual<T> denorm_min()    { return numeric_limits<T>::denorm_min(); }
+  static constexpr duals::dual<T> denorm_min() { return numeric_limits<T>::denorm_min(); }
 };
 
 #ifdef _LIBCPP_BEGIN_NAMESPACE_STD
@@ -468,33 +456,35 @@ namespace duals {
 #ifndef PARSED_BY_DOXYGEN
 
 // T and X are wrapped in a dual<>
-#define CPPDUALS_ONLY_SAME_DEPTH_AS_T(T,X)                              \
-  typename std::enable_if<(int)duals::dual_traits<X>::depth ==          \
-                          (int)duals::dual_traits<T>::depth, int>::type = 0,   \
-    typename std::enable_if<can_promote<T,X>::value,int>::type = 0
+#define CPPDUALS_ONLY_SAME_DEPTH_AS_T(T, X)                                                        \
+  typename std::enable_if<(int)duals::dual_traits<X>::depth == (int)duals::dual_traits<T>::depth,  \
+                          int>::type = 0,                                                          \
+                          typename std::enable_if<can_promote<T, X>::value, int>::type = 0
 
 // Both T and U are wrapped in a dual<>
-#define CPPDUALS_ENABLE_SAME_DEPTH_AND_COMMON_T(T,U)                    \
-  typename std::enable_if<(int)duals::dual_traits<T>::depth ==          \
-                          (int)duals::dual_traits<U>::depth, int>::type = 0, \
-    typename std::enable_if<can_promote<T,U>::value,int>::type = 0,     \
-    typename common_t = dual<typename duals::promote<T,U>::type>
+#define CPPDUALS_ENABLE_SAME_DEPTH_AND_COMMON_T(T, U)                                              \
+  typename std::enable_if<(int)duals::dual_traits<T>::depth == (int)duals::dual_traits<U>::depth,  \
+                          int>::type = 0,                                                          \
+                          typename std::enable_if<can_promote<T, U>::value, int>::type = 0,        \
+                          typename common_t = dual<typename duals::promote<T, U>::type>
 
 // T is wrapped in a dual<>, U may or may not be.
-#define CPPDUALS_ENABLE_LEQ_DEPTH_AND_COMMON_T(T,U)                     \
-  typename std::enable_if<((int)duals::dual_traits<T>::depth >=         \
-                           (int)duals::dual_traits<U>::depth), int>::type = 0, \
-    typename std::enable_if<can_promote<dual<T>,U>::value,int>::type = 0, \
-    typename common_t = typename duals::promote<dual<T>, U>::type
+#define CPPDUALS_ENABLE_LEQ_DEPTH_AND_COMMON_T(T, U)                                               \
+  typename std::enable_if<((int)duals::dual_traits<T>::depth >=                                    \
+                           (int)duals::dual_traits<U>::depth),                                     \
+                          int>::type = 0,                                                          \
+                          typename std::enable_if<can_promote<dual<T>, U>::value, int>::type = 0,  \
+                          typename common_t = typename duals::promote<dual<T>, U>::type
 
 // T is wrapped in complex<dual<>>
-#define CPPDUALS_ENABLE_LEQ_DEPTH_AND_CX_COMMON_T(T,U)                  \
-  typename std::enable_if<((int)duals::dual_traits<T>::depth >=         \
-                           (int)duals::dual_traits<U>::depth), int>::type = 0, \
-  typename std::enable_if<can_promote<std::complex<dual<T>>,U>::value,int>::type = 0, \
-    typename common_t = typename duals::promote<std::complex<dual<T> >,U>::type
+#define CPPDUALS_ENABLE_LEQ_DEPTH_AND_CX_COMMON_T(T, U)                                            \
+  typename std::enable_if<                                                                         \
+      ((int)duals::dual_traits<T>::depth >= (int)duals::dual_traits<U>::depth),                    \
+      int>::type = 0,                                                                              \
+      typename std::enable_if<can_promote<std::complex<dual<T> >, U>::value, int>::type = 0,       \
+      typename common_t = typename duals::promote<std::complex<dual<T> >, U>::type
 
-#define CPPDUALS_ENABLE_IF(...) typename std::enable_if< (__VA_ARGS__) , int>::type = 0
+#define CPPDUALS_ENABLE_IF(...) typename std::enable_if<(__VA_ARGS__), int>::type = 0
 
 #endif
 
@@ -504,9 +494,7 @@ namespace duals {
 
 /// Abstract dual number class.  Can nest with other dual numbers and
 /// complex numbers.
-template<class T>
-class dual
-{
+template <class T> class dual {
 public:
   /// Class type of rpart() and dpart().  This type can be nested
   /// dual<> or std::complex<>.
@@ -519,25 +507,22 @@ class dual
   value_type _dual;
 
 public:
-
   /// Construct dual from optional real and dual parts.
-  constexpr
-  dual(const value_type re = value_type(), const value_type du = value_type())
-    : _real(re), _dual(du) {}
+  constexpr dual(const value_type re = value_type(), const value_type du = value_type())
+      : _real(re), _dual(du) {}
 
   /// Copy construct from a dual of equal depth.
-  template<class X, CPPDUALS_ONLY_SAME_DEPTH_AS_T(T,X),
-           CPPDUALS_ENABLE_IF(!is_complex<X>::value)>
-  dual(const dual<X> & x)
-    : _real(x.rpart()), _dual(x.dpart()) {}
+  template <class X, CPPDUALS_ONLY_SAME_DEPTH_AS_T(T, X), CPPDUALS_ENABLE_IF(!is_complex<X>::value)>
+  dual(const dual<X> &x) : _real(x.rpart()), _dual(x.dpart()) {}
 
   /// Cast to a complex<dual<>> with real part equal to *this.
-  operator std::complex<dual<T>>() { return std::complex<dual<T>>(*this); }
+  operator std::complex<dual<T> >() { return std::complex<dual<T> >(*this); }
 
   /// Explicit cast to an arithmetic type retains the rpart()
-  template <class X,
-            CPPDUALS_ENABLE_IF(std::is_arithmetic<X>::value && !is_dual<X>::value)>
-  explicit operator X() const { return X(_real); }
+  template <class X, CPPDUALS_ENABLE_IF(std::is_arithmetic<X>::value && !is_dual<X>::value)>
+  explicit operator X() const {
+    return X(_real);
+  }
 
   /// Get the real part.
   T rpart() const { return _real; }
@@ -558,50 +543,75 @@ class dual
   dual<T> operator+() const { return *this; }
 
   /// Assignment of `value_type` assigns the real part and zeros the dual part.
-  dual<T> & operator= (const T & x) { _real =  x; _dual = value_type(); return *this; }
+  dual<T> &operator=(const T &x) {
+    _real = x;
+    _dual = value_type();
+    return *this;
+  }
 
   /// Add a relatively-scalar to this dual.
-  dual<T> & operator+=(const T & x) { _real += x; return *this; }
+  dual<T> &operator+=(const T &x) {
+    _real += x;
+    return *this;
+  }
 
   /// Subtract a relatively-scalar from this dual.
-  dual<T> & operator-=(const T & x) { _real -= x; return *this; }
+  dual<T> &operator-=(const T &x) {
+    _real -= x;
+    return *this;
+  }
 
   /// Multiply a relatively-scalar with this dual.
-  dual<T> & operator*=(const T & x) { _real *= x; _dual *= x; return *this; }
+  dual<T> &operator*=(const T &x) {
+    _real *= x;
+    _dual *= x;
+    return *this;
+  }
 
   /// Divide this dual by relatively-scalar.
-  dual<T> & operator/=(const T & x) { _real /= x; _dual /= x; return *this; }
+  dual<T> &operator/=(const T &x) {
+    _real /= x;
+    _dual /= x;
+    return *this;
+  }
 
-  //dua & operator=(const dual & x)  { _real =  x.rpart(); _dual =  x.dpart(); }
-  template<class X, CPPDUALS_ONLY_SAME_DEPTH_AS_T(T,X)>
-  dual<T> & operator= (const dual<X> & x) { _real =  x.rpart(); _dual =  x.dpart(); return *this; }
+  // dua & operator=(const dual & x)  { _real =  x.rpart(); _dual =  x.dpart(); }
+  template <class X, CPPDUALS_ONLY_SAME_DEPTH_AS_T(T, X)> dual<T> &operator=(const dual<X> &x) {
+    _real = x.rpart();
+    _dual = x.dpart();
+    return *this;
+  }
 
   /// Add a dual of the same depth to this dual.
-  template<class X, CPPDUALS_ONLY_SAME_DEPTH_AS_T(T,X)>
-  dual<T> & operator+=(const dual<X> & x) { _real += x.rpart(); _dual += x.dpart(); return *this; }
+  template <class X, CPPDUALS_ONLY_SAME_DEPTH_AS_T(T, X)> dual<T> &operator+=(const dual<X> &x) {
+    _real += x.rpart();
+    _dual += x.dpart();
+    return *this;
+  }
 
   /// Subtract a dual of the same depth from this dual.
-  template<class X, CPPDUALS_ONLY_SAME_DEPTH_AS_T(T,X)>
-  dual<T> & operator-=(const dual<X> & x) { _real -= x.rpart(); _dual -= x.dpart(); return *this; }
+  template <class X, CPPDUALS_ONLY_SAME_DEPTH_AS_T(T, X)> dual<T> &operator-=(const dual<X> &x) {
+    _real -= x.rpart();
+    _dual -= x.dpart();
+    return *this;
+  }
 
   /// Multiply this dual with a dual of same depth.
-  template<class X, CPPDUALS_ONLY_SAME_DEPTH_AS_T(T,X)>
-  dual<T> & operator*=(const dual<X> & x) {
+  template <class X, CPPDUALS_ONLY_SAME_DEPTH_AS_T(T, X)> dual<T> &operator*=(const dual<X> &x) {
     _dual = _real * x.dpart() + _dual * x.rpart();
     _real = _real * x.rpart();
     return *this;
   }
 
   /// Divide this dual by another dual of the same or lower depth.
-  template<class X, CPPDUALS_ONLY_SAME_DEPTH_AS_T(T,X)>
-  dual<T> & operator/=(const dual<X> & x) {
+  template <class X, CPPDUALS_ONLY_SAME_DEPTH_AS_T(T, X)> dual<T> &operator/=(const dual<X> &x) {
     _dual = (_dual * x.rpart() - _real * x.dpart()) / (x.rpart() * x.rpart());
     _real = _real / x.rpart();
     return *this;
   }
 
-  //The following comparison operators are not strictly well-defined,
-  //they are implemented as comparison of the real parts.
+  // The following comparison operators are not strictly well-defined,
+  // they are implemented as comparison of the real parts.
 #if 0
   /// Compare this dual with another dual, comparing real parts for equality.
   template<class X, CPPDUALS_ONLY_SAME_DEPTH_AS_T(T,X)>
@@ -630,68 +640,69 @@ class dual
 };
 
 /// Get the dual's real part.
-template <class T> T rpart(const dual<T> & x) { return x.rpart(); }
+template <class T> T rpart(const dual<T> &x) { return x.rpart(); }
 
 /// Get the dual's dual part.
-template <class T> T dpart(const dual<T> & x) { return x.dpart(); }
+template <class T> T dpart(const dual<T> &x) { return x.dpart(); }
 
 /// R-part of complex<dual<>> is non-dual complex<> (not to be confused with real())
-template <class T> std::complex<T> rpart(const std::complex<dual<T>> & x)
-{ return std::complex<T>(x.real().rpart(), x.imag().rpart()); }
+template <class T> std::complex<T> rpart(const std::complex<dual<T> > &x) {
+  return std::complex<T>(x.real().rpart(), x.imag().rpart());
+}
 
 /// Dual part of complex<dual<>> is complex<>
-template <class T> std::complex<T> dpart(const std::complex<dual<T>> & x)
-{ return std::complex<T>(x.real().dpart(), x.imag().dpart()); }
+template <class T> std::complex<T> dpart(const std::complex<dual<T> > &x) {
+  return std::complex<T>(x.real().dpart(), x.imag().dpart());
+}
 
 /// Get a non-dual's real part.
 template <class T,
-          CPPDUALS_ENABLE_IF((std::is_arithmetic<T>::value &&
-                              !std::is_compound<T>::value) || is_complex<T>::value)>
-T rpart(const T & x) { return x; }
+          CPPDUALS_ENABLE_IF((std::is_arithmetic<T>::value && !std::is_compound<T>::value) ||
+                             is_complex<T>::value)>
+T rpart(const T &x) {
+  return x;
+}
 
 /// Get a non-dual's dual part := zero.
 template <class T,
-          CPPDUALS_ENABLE_IF((std::is_arithmetic<T>::value &&
-                              !std::is_compound<T>::value) || is_complex<T>::value)>
-T dpart(const T & ) { return T(0); }
+          CPPDUALS_ENABLE_IF((std::is_arithmetic<T>::value && !std::is_compound<T>::value) ||
+                             is_complex<T>::value)>
+T dpart(const T &) {
+  return T(0);
+}
 
 #ifndef PARSED_BY_DOXYGEN
 
 /// Dual +-*/ ops with another entity
-#define CPPDUALS_BINARY_OP(op)                                          \
-  template<class T, class U, CPPDUALS_ENABLE_SAME_DEPTH_AND_COMMON_T(T,U)> \
-  common_t                                                              \
-  operator op(const dual<T> & z, const dual<U> & w) {                   \
-    common_t x(z);                                                      \
-    return x op##= w;                                                   \
-  }                                                                     \
-  template<class T, class U, CPPDUALS_ENABLE_LEQ_DEPTH_AND_COMMON_T(T,U), \
-    CPPDUALS_ENABLE_IF(!std::is_same<U,std::complex<dual<T>>>::value)>  \
-  common_t                                                              \
-  operator op(const dual<T> & z, const U & w) {                         \
-    common_t x(z);                                                      \
-    return x op##= w;                                                   \
-  }                                                                     \
-  template<class T, class U, CPPDUALS_ENABLE_LEQ_DEPTH_AND_COMMON_T(T,U), \
-    CPPDUALS_ENABLE_IF(!std::is_same<U,std::complex<dual<T>>>::value)>  \
-  common_t                                                              \
-  operator op(const U & z, const dual<T> & w) {                         \
-    common_t x(z);                                                      \
-    return x op##= w;                                                   \
-  }                                                                     \
-  template<class T, class U, CPPDUALS_ENABLE_LEQ_DEPTH_AND_CX_COMMON_T(T,U), \
-    CPPDUALS_ENABLE_IF(!std::is_same<U,std::complex<dual<T>>>::value)>  \
-  common_t                                                              \
-  operator op(const std::complex<dual<T>> & z, const U & w) {           \
-    common_t x(z);                                                      \
-    return x op##= w;                                                   \
-  }                                                                     \
-  template<class T, class U, CPPDUALS_ENABLE_LEQ_DEPTH_AND_CX_COMMON_T(T,U), \
-    CPPDUALS_ENABLE_IF(!std::is_same<U,std::complex<dual<T>>>::value)>  \
-  common_t                                                              \
-  operator op(const U & z, const std::complex<dual<T>> & w) {           \
-    common_t x(z);                                                      \
-    return x op##= w;                                                   \
+#define CPPDUALS_BINARY_OP(op)                                                                     \
+  template <class T, class U, CPPDUALS_ENABLE_SAME_DEPTH_AND_COMMON_T(T, U)>                       \
+  common_t operator op(const dual<T> &z, const dual<U> &w) {                                       \
+    common_t x(z);                                                                                 \
+    return x op## = w;                                                                             \
+  }                                                                                                \
+  template <class T, class U, CPPDUALS_ENABLE_LEQ_DEPTH_AND_COMMON_T(T, U),                        \
+            CPPDUALS_ENABLE_IF(!std::is_same<U, std::complex<dual<T> > >::value)>                  \
+  common_t operator op(const dual<T> &z, const U &w) {                                             \
+    common_t x(z);                                                                                 \
+    return x op## = w;                                                                             \
+  }                                                                                                \
+  template <class T, class U, CPPDUALS_ENABLE_LEQ_DEPTH_AND_COMMON_T(T, U),                        \
+            CPPDUALS_ENABLE_IF(!std::is_same<U, std::complex<dual<T> > >::value)>                  \
+  common_t operator op(const U &z, const dual<T> &w) {                                             \
+    common_t x(z);                                                                                 \
+    return x op## = w;                                                                             \
+  }                                                                                                \
+  template <class T, class U, CPPDUALS_ENABLE_LEQ_DEPTH_AND_CX_COMMON_T(T, U),                     \
+            CPPDUALS_ENABLE_IF(!std::is_same<U, std::complex<dual<T> > >::value)>                  \
+  common_t operator op(const std::complex<dual<T> > &z, const U &w) {                              \
+    common_t x(z);                                                                                 \
+    return x op## = w;                                                                             \
+  }                                                                                                \
+  template <class T, class U, CPPDUALS_ENABLE_LEQ_DEPTH_AND_CX_COMMON_T(T, U),                     \
+            CPPDUALS_ENABLE_IF(!std::is_same<U, std::complex<dual<T> > >::value)>                  \
+  common_t operator op(const U &z, const std::complex<dual<T> > &w) {                              \
+    common_t x(z);                                                                                 \
+    return x op## = w;                                                                             \
   }
 
 CPPDUALS_BINARY_OP(+)
@@ -700,23 +711,20 @@ CPPDUALS_BINARY_OP(*)
 CPPDUALS_BINARY_OP(/)
 
 /// Dual compared to a non-complex lower rank thing
-#define CPPDUALS_LHS_COMPARISON(op)                                     \
-  template<class T, class U, CPPDUALS_ENABLE_SAME_DEPTH_AND_COMMON_T(T,U)> \
-  bool                                                                  \
-  operator op(const dual<T> & a, const dual<U> & b) {                   \
-    return a.rpart() op b.rpart();                                      \
-  }                                                                     \
-  template<class T, class U, CPPDUALS_ENABLE_LEQ_DEPTH_AND_COMMON_T(T,U), \
-           CPPDUALS_ENABLE_IF(!is_complex<U>::value)>                   \
-  bool                                                                  \
-  operator op(const U & a, const dual<T> & b) {                         \
-    return a op b.rpart();                                              \
-  }                                                                     \
-  template<class T, class U, CPPDUALS_ENABLE_LEQ_DEPTH_AND_COMMON_T(T,U), \
-           CPPDUALS_ENABLE_IF(!is_complex<U>::value)>                   \
-  bool                                                                  \
-  operator op(const dual<T> & a, const U & b) {                         \
-    return a.rpart() op b;                                              \
+#define CPPDUALS_LHS_COMPARISON(op)                                                                \
+  template <class T, class U, CPPDUALS_ENABLE_SAME_DEPTH_AND_COMMON_T(T, U)>                       \
+  bool operator op(const dual<T> &a, const dual<U> &b) {                                           \
+    return a.rpart() op b.rpart();                                                                 \
+  }                                                                                                \
+  template <class T, class U, CPPDUALS_ENABLE_LEQ_DEPTH_AND_COMMON_T(T, U),                        \
+            CPPDUALS_ENABLE_IF(!is_complex<U>::value)>                                             \
+  bool operator op(const U &a, const dual<T> &b) {                                                 \
+    return a op b.rpart();                                                                         \
+  }                                                                                                \
+  template <class T, class U, CPPDUALS_ENABLE_LEQ_DEPTH_AND_COMMON_T(T, U),                        \
+            CPPDUALS_ENABLE_IF(!is_complex<U>::value)>                                             \
+  bool operator op(const dual<T> &a, const U &b) {                                                 \
+    return a.rpart() op b;                                                                         \
   }
 
 CPPDUALS_LHS_COMPARISON(<)
@@ -739,222 +747,205 @@ namespace std {
 
 #ifndef PARSED_BY_DOXYGEN
 
-#define make_math(T)                                                    \
-  inline T (frexp)(const duals::dual<T> & arg, int* exp ) { return (frexp)(arg.rpart(), exp); } \
-  inline duals::dual<T> (ldexp)(const duals::dual<T> & arg, int exp ) { return arg * std::pow((T)2,exp); } \
-  inline T (trunc)(const duals::dual<T> & d) { return (trunc)(d.rpart()); } \
-  inline T (floor)(const duals::dual<T> & d) { return (floor)(d.rpart()); } \
-  inline T (ceil)(const duals::dual<T> & d)  { return (ceil)(d.rpart()); } \
-  inline T (round)(const duals::dual<T> & d) { return (round)(d.rpart()); } \
-  inline int (fpclassify)(const duals::dual<T> & d) { return (fpclassify)(d.rpart()); } \
-  inline bool (isfinite)(const duals::dual<T> & d) { return (isfinite)(d.rpart()); } \
-  inline bool (isnormal)(const duals::dual<T> & d) { return (isnormal)(d.rpart()); } \
-  inline bool (isinf)(const duals::dual<T> & d) { return (isinf)(d.rpart()); } \
-  inline bool (isnan)(const duals::dual<T> & d) { return (isnan)(d.rpart()); } \
-
-make_math(float)
-make_math(double)
-make_math(long double)
+#define make_math(T)                                                                               \
+  inline T(frexp)(const duals::dual<T> &arg, int *exp) { return (frexp)(arg.rpart(), exp); }       \
+  inline duals::dual<T>(ldexp)(const duals::dual<T> &arg, int exp) {                               \
+    return arg * std::pow((T)2, exp);                                                              \
+  }                                                                                                \
+  inline T(trunc)(const duals::dual<T> &d) { return (trunc)(d.rpart()); }                          \
+  inline T(floor)(const duals::dual<T> &d) { return (floor)(d.rpart()); }                          \
+  inline T(ceil)(const duals::dual<T> &d) { return (ceil)(d.rpart()); }                            \
+  inline T(round)(const duals::dual<T> &d) { return (round)(d.rpart()); }                          \
+  inline int(fpclassify)(const duals::dual<T> &d) { return (fpclassify)(d.rpart()); }              \
+  inline bool(isfinite)(const duals::dual<T> &d) { return (isfinite)(d.rpart()); }                 \
+  inline bool(isnormal)(const duals::dual<T> &d) { return (isnormal)(d.rpart()); }                 \
+  inline bool(isinf)(const duals::dual<T> &d) { return (isinf)(d.rpart()); }                       \
+  inline bool(isnan)(const duals::dual<T> &d) { return (isnan)(d.rpart()); }
+
+make_math(float) make_math(double) make_math(long double)
 
 #undef make_math
 
 #endif // PARSED_BY_DOXYGEN
 
 #ifdef _LIBCPP_BEGIN_NAMESPACE_STD
-_LIBCPP_END_NAMESPACE_STD
+    _LIBCPP_END_NAMESPACE_STD
 #else
 } // namespace std
 #endif
-namespace duals {
+    namespace duals {
 
-namespace randos {
+  namespace randos {
 
-// Random real value between a and b.
-template<class T,
-         typename dist = std::uniform_real_distribution<T>,
-         CPPDUALS_ENABLE_IF(!is_complex<T>::value && !is_dual<T>::value)>
-T random(T a = T(0), T b = T(1)) {
-  static std::default_random_engine generator;
-  dist distribution(a, b);
-  return distribution(generator);
-}
+  // Random real value between a and b.
+  template <class T, typename dist = std::uniform_real_distribution<T>,
+            CPPDUALS_ENABLE_IF(!is_complex<T>::value && !is_dual<T>::value)>
+  T random(T a = T(0), T b = T(1)) {
+    static std::default_random_engine generator;
+    dist distribution(a, b);
+    return distribution(generator);
+  }
 
-template<class T,
-         typename dist = std::uniform_real_distribution<T>,
-         CPPDUALS_ENABLE_IF(!is_complex<T>::value && !is_dual<T>::value)>
-T random2(T a = T(0), T b = T(1)) { return random<T,dist>(a,b); }
-
-// Helper class for testing - also random value in dual part.
-template <class DT,
-          CPPDUALS_ENABLE_IF(is_dual<DT>::value)>
-DT random2(DT a = DT(0,0), DT b = DT(1,1)) {
-  using randos::random;
-  return DT(a.rpart() + random2<typename DT::value_type>() * (b.rpart() - a.rpart()),
-            a.dpart() + random2<typename DT::value_type>() * (b.dpart() - a.dpart()));
-}
+  template <class T, typename dist = std::uniform_real_distribution<T>,
+            CPPDUALS_ENABLE_IF(!is_complex<T>::value && !is_dual<T>::value)>
+  T random2(T a = T(0), T b = T(1)) {
+    return random<T, dist>(a, b);
+  }
 
-// Helper class for testing - also random value in dual part of the complex.
-template<class CT,
-         CPPDUALS_ENABLE_IF(is_complex<CT>::value)>
-CT random2(CT a = CT(0,0), CT b = CT(1,1)) {
-  return CT(a.real() + random2<typename CT::value_type>() * (b.real() - a.real()),
-            a.imag() + random2<typename CT::value_type>() * (b.imag() - a.imag()));
-}
+  // Helper class for testing - also random value in dual part.
+  template <class DT, CPPDUALS_ENABLE_IF(is_dual<DT>::value)>
+  DT random2(DT a = DT(0, 0), DT b = DT(1, 1)) {
+    using randos::random;
+    return DT(a.rpart() + random2<typename DT::value_type>() * (b.rpart() - a.rpart()),
+              a.dpart() + random2<typename DT::value_type>() * (b.dpart() - a.dpart()));
+  }
 
-} // randos
-
-/// Random real and dual parts, used by Eigen's Random(), by default
-// the returned value has zero dual part and is in the range [0+0_e,
-// 1+0_e].
-template <class DT,
-          typename dist = std::uniform_real_distribution<typename DT::value_type>,
-          CPPDUALS_ENABLE_IF(is_dual<DT>::value)>
-DT random(DT a = DT(0,0), DT b = DT(1,0)) {
-  using randos::random;
-  return DT(random<typename DT::value_type,dist>(a.rpart(),b.rpart()),
-            random<typename DT::value_type,dist>(a.dpart(),b.dpart()));
-}
+  // Helper class for testing - also random value in dual part of the complex.
+  template <class CT, CPPDUALS_ENABLE_IF(is_complex<CT>::value)>
+  CT random2(CT a = CT(0, 0), CT b = CT(1, 1)) {
+    return CT(a.real() + random2<typename CT::value_type>() * (b.real() - a.real()),
+              a.imag() + random2<typename CT::value_type>() * (b.imag() - a.imag()));
+  }
 
-/// Complex Conjugate of a dual is just the dual.
-template<class T> dual<T> conj(const dual<T> & x) { return x; }
-
-/// Conjugate a thing that's not dual and not complex -- it has no
-/// complex value so just return it.  This is different from
-/// std::conj() which promotes the T to a std::complex<T>.
-template<class T, CPPDUALS_ENABLE_IF(!is_dual<T>::value &&
-                                     !is_complex<T>::value &&
-                                     std::is_arithmetic<T>::value)>
-T conj(const T & x) { return x; }
-
-/// Dual Conjugate, such that x*dconj(x) = rpart(x)^2.  Different
-/// function name from complex conjugate conj().
-template<class T> dual<T> dconj(const dual<T> & x) {
-  return dual<T>(x.rpart(), - x.dpart());
-}
+  } // namespace randos
+
+  /// Random real and dual parts, used by Eigen's Random(), by default
+  // the returned value has zero dual part and is in the range [0+0_e,
+  // 1+0_e].
+  template <class DT, typename dist = std::uniform_real_distribution<typename DT::value_type>,
+            CPPDUALS_ENABLE_IF(is_dual<DT>::value)>
+  DT random(DT a = DT(0, 0), DT b = DT(1, 0)) {
+    using randos::random;
+    return DT(random<typename DT::value_type, dist>(a.rpart(), b.rpart()),
+              random<typename DT::value_type, dist>(a.dpart(), b.dpart()));
+  }
 
-/// Dual Conjugate of a complex, such that x*dconj(x) = rpart(x)^2.
-/// Different function name from complex conjugate conj().
-template<class T> std::complex<T> dconj(const std::complex<T> & x) {
-  return std::complex<T>(dconj(x.real()), dconj(x.imag()));
-}
+  /// Complex Conjugate of a dual is just the dual.
+  template <class T> dual<T> conj(const dual<T> &x) { return x; }
 
-/// Conjugate a thing that's not dual and not complex.
-template<class T, CPPDUALS_ENABLE_IF(!is_dual<T>::value &&
-                                     !is_complex<T>::value &&
-                                     std::is_arithmetic<T>::value)>
-T dconj(const T & x) { return x; }
-
-/// Exponential e^x
-template<class T> dual<T> exp(const dual<T> & x) {
-  using std::exp;
-  T v = exp(x.rpart());
-  return dual<T>(v,
-                 v * x.dpart());
-}
+  /// Conjugate a thing that's not dual and not complex -- it has no
+  /// complex value so just return it.  This is different from
+  /// std::conj() which promotes the T to a std::complex<T>.
+  template <class T, CPPDUALS_ENABLE_IF(!is_dual<T>::value && !is_complex<T>::value &&
+                                        std::is_arithmetic<T>::value)>
+  T conj(const T &x) {
+    return x;
+  }
 
-/// Natural log ln(x)
-template<class T> dual<T> log(const dual<T> & x) {
-  using std::log;
-  T v = log(x.rpart());
-  if (x.dpart() == T(0))
-    return v;
-  else
-    return dual<T>(v, x.dpart() / x.rpart());
-}
+  /// Dual Conjugate, such that x*dconj(x) = rpart(x)^2.  Different
+  /// function name from complex conjugate conj().
+  template <class T> dual<T> dconj(const dual<T> &x) { return dual<T>(x.rpart(), -x.dpart()); }
 
-template<class T> dual<T> log10(const dual<T> & x) {
-  using std::log;
-  return log(x) / log(static_cast<T>(10));
-}
+  /// Dual Conjugate of a complex, such that x*dconj(x) = rpart(x)^2.
+  /// Different function name from complex conjugate conj().
+  template <class T> std::complex<T> dconj(const std::complex<T> &x) {
+    return std::complex<T>(dconj(x.real()), dconj(x.imag()));
+  }
 
-template<class T> dual<T> log2(const dual<T> & x) {
-  using std::log;
-  return log(x) / log(static_cast<T>(2));
-}
+  /// Conjugate a thing that's not dual and not complex.
+  template <class T, CPPDUALS_ENABLE_IF(!is_dual<T>::value && !is_complex<T>::value &&
+                                        std::is_arithmetic<T>::value)>
+  T dconj(const T &x) {
+    return x;
+  }
 
-template<class T, class U, CPPDUALS_ENABLE_LEQ_DEPTH_AND_COMMON_T(T,U)>
-common_t
-pow(const dual<T> & x, const U & y) {
-  using std::pow;
-  typedef typename common_t::value_type V;
+  /// Exponential e^x
+  template <class T> dual<T> exp(const dual<T> &x) {
+    using std::exp;
+    T v = exp(x.rpart());
+    return dual<T>(v, v * x.dpart());
+  }
 
-  return common_t(pow(x.rpart(), y),
-                  x.dpart() == V(0) ? V(0)
-                  : x.dpart() * y * pow(x.rpart(), y - U(1)));
-}
+  /// Natural log ln(x)
+  template <class T> dual<T> log(const dual<T> &x) {
+    using std::log;
+    T v = log(x.rpart());
+    if (x.dpart() == T(0))
+      return v;
+    else
+      return dual<T>(v, x.dpart() / x.rpart());
+  }
 
-template<class T, class U, CPPDUALS_ENABLE_LEQ_DEPTH_AND_COMMON_T(T,U)>
-common_t
-pow(const U & x, const dual<T> & y) {
-  return pow(common_t(x), y);
-}
+  template <class T> dual<T> log10(const dual<T> &x) {
+    using std::log;
+    return log(x) / log(static_cast<T>(10));
+  }
+
+  template <class T> dual<T> log2(const dual<T> &x) {
+    using std::log;
+    return log(x) / log(static_cast<T>(2));
+  }
+
+  template <class T, class U, CPPDUALS_ENABLE_LEQ_DEPTH_AND_COMMON_T(T, U)>
+  common_t pow(const dual<T> &x, const U &y) {
+    using std::pow;
+    typedef typename common_t::value_type V;
 
-template<class T, class U, CPPDUALS_ENABLE_SAME_DEPTH_AND_COMMON_T(T,U)>
-common_t
-pow(const dual<T> & f, const dual<U> & g) {
-  using std::pow;
-  using std::log;
+    return common_t(pow(x.rpart(), y),
+                    x.dpart() == V(0) ? V(0) : x.dpart() * y * pow(x.rpart(), y - U(1)));
+  }
+
+  template <class T, class U, CPPDUALS_ENABLE_LEQ_DEPTH_AND_COMMON_T(T, U)>
+  common_t pow(const U &x, const dual<T> &y) {
+    return pow(common_t(x), y);
+  }
+
+  template <class T, class U, CPPDUALS_ENABLE_SAME_DEPTH_AND_COMMON_T(T, U)>
+  common_t pow(const dual<T> &f, const dual<U> &g) {
+    using std::log;
+    using std::pow;
 #if 1
-  using std::floor;
-  typedef typename common_t::value_type V;
-  common_t result;
-
- if (f.rpart() == T(0) && g.rpart() >= U(1)) {
-    if (g.rpart() > U(1)) {
-      result = common_t(0);
-    } else {
-      result = f;
-    }
+    using std::floor;
+    typedef typename common_t::value_type V;
+    common_t result;
 
-  } else {
-    if (f.rpart() < T(0) && g.rpart() == floor(g.rpart())) {
-      V const tmp = g.rpart() * pow(f.rpart(), g.rpart() - U(1.0));
-      result = common_t(pow(f.rpart(), g.rpart()),
-                        f.dpart() == T(0) ? T(0) : f.dpart() * tmp);
-      if (g.dpart() != U(0.0)) {
-        // Return a NaN when g.dpart() != 0.
-        result.dpart(std::numeric_limits<T>::quiet_NaN());
+    if (f.rpart() == T(0) && g.rpart() >= U(1)) {
+      if (g.rpart() > U(1)) { result = common_t(0); }
+      else { result = f; }
+    }
+    else {
+      if (f.rpart() < T(0) && g.rpart() == floor(g.rpart())) {
+        V const tmp = g.rpart() * pow(f.rpart(), g.rpart() - U(1.0));
+        result = common_t(pow(f.rpart(), g.rpart()), f.dpart() == T(0) ? T(0) : f.dpart() * tmp);
+        if (g.dpart() != U(0.0)) {
+          // Return a NaN when g.dpart() != 0.
+          result.dpart(std::numeric_limits<T>::quiet_NaN());
+        }
+      }
+      else {
+        // Handle the remaining cases. For cases 4,5,6,9 we allow the log()
+        // function to generate -HUGE_VAL or NaN, since those cases result in a
+        // nonfinite derivative.
+        V const tmp1 = pow(f.rpart(), g.rpart());
+        V const tmp2 = g.rpart() * pow(f.rpart(), g.rpart() - T(1.0));
+        V const tmp3 = tmp1 * log(f.rpart());
+        result = common_t(tmp1, f.dpart() == T(0) && g.dpart() == U(0)
+                                    ? V(0)
+                                    : tmp2 * f.dpart() + tmp3 * g.dpart());
       }
-    } else {
-      // Handle the remaining cases. For cases 4,5,6,9 we allow the log()
-      // function to generate -HUGE_VAL or NaN, since those cases result in a
-      // nonfinite derivative.
-      V const tmp1 = pow(f.rpart(), g.rpart());
-      V const tmp2 = g.rpart() * pow(f.rpart(), g.rpart() - T(1.0));
-      V const tmp3 = tmp1 * log(f.rpart());
-      result = common_t(tmp1,
-                        f.dpart() == T(0) && g.dpart() == U(0) ? V(0)
-                        : tmp2 * f.dpart() + tmp3 * g.dpart());
     }
-  }
 
-  return result;
+    return result;
 #else
   T v = pow(f.rpart(), g.rpart());
-  return common_t(v,
-                  pow(f.rpart(), g.rpart() - T(1)) *
-                  (g.rpart() * f.dpart()
-                   + f.rpart() * log(f.rpart()) * g.dpart()));
+  return common_t(v, pow(f.rpart(), g.rpart() - T(1)) *
+                         (g.rpart() * f.dpart() + f.rpart() * log(f.rpart()) * g.dpart()));
 #endif
-}
+  }
 
-namespace utils {
-template <typename T> int sgn(T val) {
-  return (T(0) < val) - (val < T(0));
-}
-}
+  namespace utils {
+  template <typename T> int sgn(T val) { return (T(0) < val) - (val < T(0)); }
+  } // namespace utils
 
-template<class T> dual<T> abs(const dual<T> & x) {
-  using std::abs;
-  return dual<T>(abs(x.rpart()),
-                 x.dpart() * utils::sgn(x.rpart()));
-}
+  template <class T> dual<T> abs(const dual<T> &x) {
+    using std::abs;
+    return dual<T>(abs(x.rpart()), x.dpart() * utils::sgn(x.rpart()));
+  }
 
-template<class T> dual<T> fabs(const dual<T> & x) {
-  using std::fabs;
-  return dual<T>(fabs(x.rpart()),
-                 x.dpart() * utils::sgn(x.rpart()));
-}
+  template <class T> dual<T> fabs(const dual<T> &x) {
+    using std::fabs;
+    return dual<T>(fabs(x.rpart()), x.dpart() * utils::sgn(x.rpart()));
+  }
 
 #if 0
 // TODO?
@@ -965,291 +956,293 @@ template<class T> dual<T> abs2(const dual<T> & x) {
 }
 #endif
 
-template<class T> duals::dual<T> copysign(const duals::dual<T> & x, const duals::dual<T> & y) {
-  using std::copysign;
-  T r = copysign(x.rpart(), y.rpart());
-  return duals::dual<T>(r, r == x.rpart() ? x.dpart() : -x.dpart());
-}
+  template <class T> duals::dual<T> copysign(const duals::dual<T> &x, const duals::dual<T> &y) {
+    using std::copysign;
+    T r = copysign(x.rpart(), y.rpart());
+    return duals::dual<T>(r, r == x.rpart() ? x.dpart() : -x.dpart());
+  }
 
-template<class T> duals::dual<T> hypot(const duals::dual<T> & x, const duals::dual<T> & y) {
-  return sqrt(x*x + y*y);
-}
+  template <class T> duals::dual<T> hypot(const duals::dual<T> &x, const duals::dual<T> &y) {
+    return sqrt(x * x + y * y);
+  }
 
-template<class T> duals::dual<T> scalbn(const duals::dual<T> & arg, int ex) {
-  return arg * std::pow((T)2, ex);
-}
+  template <class T> duals::dual<T> scalbn(const duals::dual<T> &arg, int ex) {
+    return arg * std::pow((T)2, ex);
+  }
 
-template<class T> duals::dual<T> (fmax)(const duals::dual<T> & x, const duals::dual<T> & y) {
-  return x.rpart() > y.rpart() ? x : y;
-}
+  template <class T> duals::dual<T>(fmax)(const duals::dual<T> &x, const duals::dual<T> &y) {
+    return x.rpart() > y.rpart() ? x : y;
+  }
 
-template<class T> duals::dual<T> (fmin)(const duals::dual<T> & x, const duals::dual<T> & y) {
-  return x.rpart() <= y.rpart() ? x : y;
-}
+  template <class T> duals::dual<T>(fmin)(const duals::dual<T> &x, const duals::dual<T> &y) {
+    return x.rpart() <= y.rpart() ? x : y;
+  }
 
-template<class T> duals::dual<T> logb(const duals::dual<T> & x) {
-  return duals::log2(x);
-}
+  template <class T> duals::dual<T> logb(const duals::dual<T> &x) { return duals::log2(x); }
 
-template<class T> int (fpclassify)(const duals::dual<T> & d) { using std::fpclassify; return (fpclassify)(d.rpart()); }
-template<class T> bool (isfinite)(const duals::dual<T> & d) { using std::isfinite; return (isfinite)(d.rpart()); }
-template<class T> bool (isnormal)(const duals::dual<T> & d) { using std::isnormal; return (isnormal)(d.rpart()); }
-template<class T> bool (isinf)(const duals::dual<T> & d) { using std::isinf; return (isinf)(d.rpart()); }
-template<class T> bool (isnan)(const duals::dual<T> & d) { using std::isnan; return (isnan)(d.rpart()); }
-template<class T> bool (signbit)(const duals::dual<T> & d) { using std::signbit; return (signbit)(d.rpart()); }
-
-template<class T> dual<T> sqrt(const dual<T> & x) {
-  using std::sqrt;
-  T v = sqrt(x.rpart());
-  if (x.dpart() == T(0))
-    return v;
-  else
-    return dual<T>(v, x.dpart() / (T(2) * v) );
-}
+  template <class T> int(fpclassify)(const duals::dual<T> &d) {
+    using std::fpclassify;
+    return (fpclassify)(d.rpart());
+  }
+  template <class T> bool(isfinite)(const duals::dual<T> &d) {
+    using std::isfinite;
+    return (isfinite)(d.rpart());
+  }
+  template <class T> bool(isnormal)(const duals::dual<T> &d) {
+    using std::isnormal;
+    return (isnormal)(d.rpart());
+  }
+  template <class T> bool(isinf)(const duals::dual<T> &d) {
+    using std::isinf;
+    return (isinf)(d.rpart());
+  }
+  template <class T> bool(isnan)(const duals::dual<T> &d) {
+    using std::isnan;
+    return (isnan)(d.rpart());
+  }
+  template <class T> bool(signbit)(const duals::dual<T> &d) {
+    using std::signbit;
+    return (signbit)(d.rpart());
+  }
 
-template<class T> dual<T> cbrt(const dual<T> & x) {
-  using std::cbrt;
-  T v = cbrt(x.rpart());
-  if (x.dpart() == T(0))
-    return v;
-  else
-    return dual<T>(v, x.dpart() / (T(3) * v * v) );
-}
+  template <class T> dual<T> sqrt(const dual<T> &x) {
+    using std::sqrt;
+    T v = sqrt(x.rpart());
+    if (x.dpart() == T(0))
+      return v;
+    else
+      return dual<T>(v, x.dpart() / (T(2) * v));
+  }
 
-template<class T> dual<T> sin(const dual<T> & x) {
-  using std::sin;
-  using std::cos;
-  return dual<T>(sin(x.rpart()),
-                 x.dpart() * cos(x.rpart()));
-}
+  template <class T> dual<T> cbrt(const dual<T> &x) {
+    using std::cbrt;
+    T v = cbrt(x.rpart());
+    if (x.dpart() == T(0))
+      return v;
+    else
+      return dual<T>(v, x.dpart() / (T(3) * v * v));
+  }
 
-template<class T> dual<T> cos(const dual<T> & x) {
-  using std::cos;
-  using std::sin;
-  return dual<T>(cos(x.rpart()),
-                 -sin(x.rpart()) * x.dpart());
-}
+  template <class T> dual<T> sin(const dual<T> &x) {
+    using std::cos;
+    using std::sin;
+    return dual<T>(sin(x.rpart()), x.dpart() * cos(x.rpart()));
+  }
 
-template<class T> dual<T> tan(const dual<T> & x) {
-  using std::tan;
-  T v = tan(x.rpart());
-  return dual<T>(v, x.dpart() * (v*v + 1));
-}
+  template <class T> dual<T> cos(const dual<T> &x) {
+    using std::cos;
+    using std::sin;
+    return dual<T>(cos(x.rpart()), -sin(x.rpart()) * x.dpart());
+  }
 
-template<class T> dual<T> asin(const dual<T> & x) {
-  using std::asin;
-  using std::sqrt;
-  T v = asin(x.rpart());
-  if (x.dpart() == T(0))
-    return v;
-  else
-    return dual<T>(v, x.dpart() / sqrt(1 - x.rpart()*x.rpart()));
-}
+  template <class T> dual<T> tan(const dual<T> &x) {
+    using std::tan;
+    T v = tan(x.rpart());
+    return dual<T>(v, x.dpart() * (v * v + 1));
+  }
 
-template<class T> dual<T> acos(const dual<T> & x) {
-  using std::acos;
-  using std::sqrt;
-  T v = acos(x.rpart());
-  if (x.dpart() == T(0))
-    return v;
-  else
-    return dual<T>(v, -x.dpart() / sqrt(1 - x.rpart()*x.rpart()));
-}
+  template <class T> dual<T> asin(const dual<T> &x) {
+    using std::asin;
+    using std::sqrt;
+    T v = asin(x.rpart());
+    if (x.dpart() == T(0))
+      return v;
+    else
+      return dual<T>(v, x.dpart() / sqrt(1 - x.rpart() * x.rpart()));
+  }
 
-template<class T> dual<T> atan(const dual<T> & x) {
-  using std::atan;
-  T v = atan(x.rpart());
-  if (x.dpart() == T(0))
-    return v;
-  else
-    return dual<T>(v, x.dpart() / (1 + x.rpart() * x.rpart()));
-}
+  template <class T> dual<T> acos(const dual<T> &x) {
+    using std::acos;
+    using std::sqrt;
+    T v = acos(x.rpart());
+    if (x.dpart() == T(0))
+      return v;
+    else
+      return dual<T>(v, -x.dpart() / sqrt(1 - x.rpart() * x.rpart()));
+  }
 
-template<class T> dual<T> atan2(const dual<T> & y, const dual<T> & x) {
-  using std::atan2;
-  T v = atan2(y.rpart(), x.rpart());
-  return dual<T>(v,
-                 (x.rpart() * y.dpart() - y.rpart() * x.dpart()) /
-                 (y.rpart() * y.rpart() + x.rpart() * x.rpart()));
-}
+  template <class T> dual<T> atan(const dual<T> &x) {
+    using std::atan;
+    T v = atan(x.rpart());
+    if (x.dpart() == T(0))
+      return v;
+    else
+      return dual<T>(v, x.dpart() / (1 + x.rpart() * x.rpart()));
+  }
 
-// TODO
-template<class T, class U, CPPDUALS_ENABLE_LEQ_DEPTH_AND_COMMON_T(T,U)>
-common_t
-atan2(const dual<T> & y, const U & x) {
-  using std::atan2;
-  T v = atan2(y.rpart(), x);
-  return dual<T>(v,
-                 (x * y.dpart()) /
-                 (y.rpart() * y.rpart() + x * x));
-}
+  template <class T> dual<T> atan2(const dual<T> &y, const dual<T> &x) {
+    using std::atan2;
+    T v = atan2(y.rpart(), x.rpart());
+    return dual<T>(v, (x.rpart() * y.dpart() - y.rpart() * x.dpart()) /
+                          (y.rpart() * y.rpart() + x.rpart() * x.rpart()));
+  }
 
-// TODO
-template<class T, class U, CPPDUALS_ENABLE_LEQ_DEPTH_AND_COMMON_T(T,U)>
-common_t
-atan2(const U & y, const dual<T> & x) {
-  using std::atan2;
-  T v = atan2(y, x.rpart());
-  return dual<T>(v,
-                 (-y * x.dpart()) /
-                 (y * y + x.rpart() * x.rpart()));
-}
+  // TODO
+  template <class T, class U, CPPDUALS_ENABLE_LEQ_DEPTH_AND_COMMON_T(T, U)>
+  common_t atan2(const dual<T> &y, const U &x) {
+    using std::atan2;
+    T v = atan2(y.rpart(), x);
+    return dual<T>(v, (x * y.dpart()) / (y.rpart() * y.rpart() + x * x));
+  }
 
-// TODO
-template<class T> dual<T> sinh(const dual<T> & x);
-template<class T> dual<T> cosh(const dual<T> & x);
-template<class T> dual<T> tanh(const dual<T> & x);
-template<class T> dual<T> asinh(const dual<T> & x);
-template<class T> dual<T> acosh(const dual<T> & x);
-template<class T> dual<T> atanh(const dual<T> & x);
-template<class T> dual<T> log1p(const dual<T> & x);
-template<class T> dual<T> expm1(const dual<T> & x);
-
-////////////////////////////////////////////////////////////////
-// added for meep forward-diff operations
-////////////////////////////////////////////////////////////////
-
-
-/// The tanh function.  Make sure to `#include <math.h>` before
-/// `#include <duals/dual>` to use this function.
-template<class T> dual<T> tanh(const dual<T> & x) {
-  using std::tanh;
-  T sech = 1.0 / std::cosh(x.rpart());
-  return dual<T>(tanh(x.rpart()), x.dpart() * sech * sech);
-}
+  // TODO
+  template <class T, class U, CPPDUALS_ENABLE_LEQ_DEPTH_AND_COMMON_T(T, U)>
+  common_t atan2(const U &y, const dual<T> &x) {
+    using std::atan2;
+    T v = atan2(y, x.rpart());
+    return dual<T>(v, (-y * x.dpart()) / (y * y + x.rpart() * x.rpart()));
+  }
 
-////////////////////////////////////////////////////////////////
-//
-////////////////////////////////////////////////////////////////
-
-/// The error function.  Make sure to `#include <math.h>` before
-/// `#include <duals/dual>` to use this function.
-template<class T> dual<T> erf(const dual<T> & x) {
-  using std::erf;
-  using std::sqrt;
-  using std::pow;
-  using std::exp;
-  return dual<T>(erf(x.rpart()),
-                 x.dpart() * T(2)/sqrt(T(MY_PI)) * exp(-pow(x.rpart(),T(2))));
-}
+  // TODO
+  template <class T> dual<T> sinh(const dual<T> &x);
+  template <class T> dual<T> cosh(const dual<T> &x);
+  template <class T> dual<T> tanh(const dual<T> &x);
+  template <class T> dual<T> asinh(const dual<T> &x);
+  template <class T> dual<T> acosh(const dual<T> &x);
+  template <class T> dual<T> atanh(const dual<T> &x);
+  template <class T> dual<T> log1p(const dual<T> &x);
+  template <class T> dual<T> expm1(const dual<T> &x);
+
+  ////////////////////////////////////////////////////////////////
+  // added for meep forward-diff operations
+  ////////////////////////////////////////////////////////////////
+
+  /// The tanh function.  Make sure to `#include <math.h>` before
+  /// `#include <duals/dual>` to use this function.
+  template <class T> dual<T> tanh(const dual<T> &x) {
+    using std::tanh;
+    T sech = 1.0 / std::cosh(x.rpart());
+    return dual<T>(tanh(x.rpart()), x.dpart() * sech * sech);
+  }
 
-/// Error function complement (1 - erf()).
-template<class T> dual<T> erfc(const dual<T> & x) {
-  using std::erfc;
-  using std::sqrt;
-  using std::pow;
-  using std::exp;
-  return dual<T>(erfc(x.rpart()),
-                 x.dpart() * -T(2)/sqrt(T(MY_PI)) * exp(-pow(x.rpart(),T(2))));
-}
+  ////////////////////////////////////////////////////////////////
+  //
+  ////////////////////////////////////////////////////////////////
+
+  /// The error function.  Make sure to `#include <math.h>` before
+  /// `#include <duals/dual>` to use this function.
+  template <class T> dual<T> erf(const dual<T> &x) {
+    using std::erf;
+    using std::exp;
+    using std::pow;
+    using std::sqrt;
+    return dual<T>(erf(x.rpart()), x.dpart() * T(2) / sqrt(T(MY_PI)) * exp(-pow(x.rpart(), T(2))));
+  }
 
-/// Gamma function.  Approximation of the dual part.
-// TODO specialize for integers
-template<class T> dual<T> tgamma(const dual<T> & x) {
-  using std::tgamma;
-  T v = tgamma(x.rpart());
-  if (x.dpart() == T(0))
-    return v;
-  else {
-    int errno_saved = errno;
-    T h(T(1) / (1ull << (std::numeric_limits<T>::digits / 3)));
-    T w((tgamma(x.rpart()+h) - tgamma(x.rpart()-h))/(2*h));
-    errno = errno_saved;
-    return dual<T>(v, x.dpart() * w);
+  /// Error function complement (1 - erf()).
+  template <class T> dual<T> erfc(const dual<T> &x) {
+    using std::erfc;
+    using std::exp;
+    using std::pow;
+    using std::sqrt;
+    return dual<T>(erfc(x.rpart()),
+                   x.dpart() * -T(2) / sqrt(T(MY_PI)) * exp(-pow(x.rpart(), T(2))));
+  }
+
+  /// Gamma function.  Approximation of the dual part.
+  // TODO specialize for integers
+  template <class T> dual<T> tgamma(const dual<T> &x) {
+    using std::tgamma;
+    T v = tgamma(x.rpart());
+    if (x.dpart() == T(0))
+      return v;
+    else {
+      int errno_saved = errno;
+      T h(T(1) / (1ull << (std::numeric_limits<T>::digits / 3)));
+      T w((tgamma(x.rpart() + h) - tgamma(x.rpart() - h)) / (2 * h));
+      errno = errno_saved;
+      return dual<T>(v, x.dpart() * w);
+    }
   }
-}
 
-/// Log of absolute value of gamma function.  Approximation of the dual part.
-template<class T> dual<T> lgamma(const dual<T> & x) {
-  using std::lgamma;
-  T v = lgamma(x.rpart());
-  if (x.dpart() == T(0))
-    return v;
-  else {
+  /// Log of absolute value of gamma function.  Approximation of the dual part.
+  template <class T> dual<T> lgamma(const dual<T> &x) {
+    using std::lgamma;
+    T v = lgamma(x.rpart());
+    if (x.dpart() == T(0))
+      return v;
+    else {
 #if CPPDUALS_HAVE_SIGNGAM
-    int signgam_saved = signgam;
+      int signgam_saved = signgam;
 #endif
-    int errno_saved = errno;
-    T h(T(1) / (1ull << (std::numeric_limits<T>::digits / 3)));
-    T w((lgamma(x.rpart()+h) - lgamma(x.rpart()-h))/(2*h));
+      int errno_saved = errno;
+      T h(T(1) / (1ull << (std::numeric_limits<T>::digits / 3)));
+      T w((lgamma(x.rpart() + h) - lgamma(x.rpart() - h)) / (2 * h));
 #if CPPDUALS_HAVE_SIGNGAM
-    signgam = signgam_saved;
+      signgam = signgam_saved;
 #endif
-    errno = errno_saved;
-    return dual<T>(v, x.dpart() * w);
+      errno = errno_saved;
+      return dual<T>(v, x.dpart() * w);
+    }
   }
-}
 
-/// Putto operator
-template<class T, class _CharT, class _Traits>
-std::basic_ostream<_CharT, _Traits> &
-operator<<(std::basic_ostream<_CharT, _Traits> & os, const dual<T> & x)
-{
-  std::basic_ostringstream<_CharT, _Traits> s;
-  s.flags(os.flags());
-  s.imbue(os.getloc());
-  s.precision(os.precision());
-  s << '(' << x.rpart()
-    << (x.dpart() < 0 ? "" : "+")
-    << x.dpart()
-    << "_e" << (std::is_same<typename std::decay<T>::type,float>::value ? "f" :
-                std::is_same<typename std::decay<T>::type,double>::value ? "" :
-                std::is_same<typename std::decay<T>::type,long double>::value ? "l" : "")
-    << ")";
-  return os << s.str();
-}
+  /// Putto operator
+  template <class T, class _CharT, class _Traits>
+  std::basic_ostream<_CharT, _Traits> &operator<<(std::basic_ostream<_CharT, _Traits> &os,
+                                                  const dual<T> &x) {
+    std::basic_ostringstream<_CharT, _Traits> s;
+    s.flags(os.flags());
+    s.imbue(os.getloc());
+    s.precision(os.precision());
+    s << '(' << x.rpart() << (x.dpart() < 0 ? "" : "+") << x.dpart() << "_e"
+      << (std::is_same<typename std::decay<T>::type, float>::value         ? "f"
+          : std::is_same<typename std::decay<T>::type, double>::value      ? ""
+          : std::is_same<typename std::decay<T>::type, long double>::value ? "l"
+                                                                           : "")
+      << ")";
+    return os << s.str();
+  }
 
-/// Stream reader
-template<class T, class CharT, class Traits>
-std::basic_istream<CharT, Traits> &
-operator>>(std::basic_istream<CharT, Traits> & is, dual<T> & x)
-{
-  if (is.good()) {
-    ws(is);
-    if (is.peek() == CharT('(')) {
-      is.get();
-      T r;
-      is >> r;
-      if (!is.fail()) {
-        CharT c = is.peek();
-        if (c != CharT('_')) {
-          ws(is);
-          c = is.peek();
-        }
-        if (c == CharT('+') || c == CharT('-') || c == CharT('_')) {
-          if (c == CharT('+'))
-            is.get();
-          T d;
-          if (c == CharT('_')) {
-            d = r;
-            r = 0;
-          }
-          else
-            is >> d;
-          if (!is.fail()) {
+  /// Stream reader
+  template <class T, class CharT, class Traits>
+  std::basic_istream<CharT, Traits> &operator>>(std::basic_istream<CharT, Traits> &is, dual<T> &x) {
+    if (is.good()) {
+      ws(is);
+      if (is.peek() == CharT('(')) {
+        is.get();
+        T r;
+        is >> r;
+        if (!is.fail()) {
+          CharT c = is.peek();
+          if (c != CharT('_')) {
             ws(is);
             c = is.peek();
+          }
+          if (c == CharT('+') || c == CharT('-') || c == CharT('_')) {
+            if (c == CharT('+')) is.get();
+            T d;
             if (c == CharT('_')) {
-              is.get();
+              d = r;
+              r = 0;
+            }
+            else
+              is >> d;
+            if (!is.fail()) {
+              ws(is);
               c = is.peek();
-              if (c == CharT('e')) {
+              if (c == CharT('_')) {
                 is.get();
                 c = is.peek();
-                if ((c == 'f' && !std::is_same<typename std::decay<T>::type,float>::value) ||
-                    (c == 'l' && !std::is_same<typename std::decay<T>::type,long double>::value))
-                  is.setstate(std::ios_base::failbit);
-                else {
-                  if (c == 'f' || c == 'l')
-                    is.get();
-                  ws(is);
+                if (c == CharT('e')) {
+                  is.get();
                   c = is.peek();
-                  if (c == ')') {
-                    is.get();
-                    x = dual<T>(r, d);
-                  }
-                  else
+                  if ((c == 'f' && !std::is_same<typename std::decay<T>::type, float>::value) ||
+                      (c == 'l' && !std::is_same<typename std::decay<T>::type, long double>::value))
                     is.setstate(std::ios_base::failbit);
+                  else {
+                    if (c == 'f' || c == 'l') is.get();
+                    ws(is);
+                    c = is.peek();
+                    if (c == ')') {
+                      is.get();
+                      x = dual<T>(r, d);
+                    }
+                    else
+                      is.setstate(std::ios_base::failbit);
+                  }
                 }
+                else
+                  is.setstate(std::ios_base::failbit);
               }
               else
                 is.setstate(std::ios_base::failbit);
@@ -1257,74 +1250,58 @@ operator>>(std::basic_istream<CharT, Traits> & is, dual<T> & x)
             else
               is.setstate(std::ios_base::failbit);
           }
+          else if (c == CharT(')')) {
+            is.get();
+            x = dual<T>(r, T(0));
+          }
           else
             is.setstate(std::ios_base::failbit);
         }
-        else if (c == CharT(')')) {
-          is.get();
+        else
+          is.setstate(std::ios_base::failbit);
+      }
+      else {
+        T r;
+        is >> r;
+        if (!is.fail())
           x = dual<T>(r, T(0));
-        }
         else
           is.setstate(std::ios_base::failbit);
       }
-      else
-        is.setstate(std::ios_base::failbit);
-    }
-    else {
-      T r;
-      is >> r;
-      if (!is.fail())
-        x = dual<T>(r, T(0));
-      else
-        is.setstate(std::ios_base::failbit);
     }
+    else
+      is.setstate(std::ios_base::failbit);
+    return is;
   }
-  else
-    is.setstate(std::ios_base::failbit);
-  return is;
-}
 
 #if __cpp_user_defined_literals >= 200809
-/// Dual number literals in namespace duals::literals
-inline namespace literals
-{
-using duals::dual;
-constexpr dual<float> operator""_ef(long double du)
-{
-  return { 0.0f, static_cast<float>(du) };
-}
-constexpr dual<float> operator""_ef(unsigned long long du)
-{
-  return { 0.0f, static_cast<float>(du) };
-}
-constexpr dual<double> operator""_e(long double du)
-{
-  return { 0.0, static_cast<double>(du) };
-}
-constexpr dual<double> operator""_e(unsigned long long du)
-{
-  return { 0.0, static_cast<double>(du) };
-}
-constexpr dual<long double> operator""_el(long double du)
-{
-  return { 0.0l, du };
-}
-constexpr dual<long double> operator""_el(unsigned long long du)
-{
-  return { 0.0l, static_cast<long double>(du) };
-}
-}
+  /// Dual number literals in namespace duals::literals
+  inline namespace literals {
+  using duals::dual;
+  constexpr dual<float> operator"" _ef(long double du) { return {0.0f, static_cast<float>(du)}; }
+  constexpr dual<float> operator"" _ef(unsigned long long du) {
+    return {0.0f, static_cast<float>(du)};
+  }
+  constexpr dual<double> operator"" _e(long double du) { return {0.0, static_cast<double>(du)}; }
+  constexpr dual<double> operator"" _e(unsigned long long du) {
+    return {0.0, static_cast<double>(du)};
+  }
+  constexpr dual<long double> operator"" _el(long double du) { return {0.0l, du}; }
+  constexpr dual<long double> operator"" _el(unsigned long long du) {
+    return {0.0l, static_cast<long double>(du)};
+  }
+  } // namespace literals
 #endif
 
-typedef dual<float> dualf;
-typedef dual<double> duald;
-typedef dual<long double> dualld;
-typedef dual<dualf> hyperdualf;
-typedef dual<duald> hyperduald;
-typedef dual<dualld> hyperdualld;
-typedef std::complex<dualf> cdualf;
-typedef std::complex<duald> cduald;
-typedef std::complex<dualld> cdualld;
+  typedef dual<float> dualf;
+  typedef dual<double> duald;
+  typedef dual<long double> dualld;
+  typedef dual<dualf> hyperdualf;
+  typedef dual<duald> hyperduald;
+  typedef dual<dualld> hyperdualld;
+  typedef std::complex<dualf> cdualf;
+  typedef std::complex<duald> cduald;
+  typedef std::complex<dualld> cdualld;
 
 } // namespace duals
 
@@ -1346,28 +1323,35 @@ typedef std::complex<dualld> cdualld;
 /// literals below. (this implementation is a bit hacky - glad for
 /// cleanups).
 template <typename T, typename Char>
-struct fmt::formatter<duals::dual<T>,Char> : public fmt::formatter<T,Char>
-{
-  typedef fmt::formatter<T,Char> base;
+struct fmt::formatter<duals::dual<T>, Char> : public fmt::formatter<T, Char> {
+  typedef fmt::formatter<T, Char> base;
   enum style { expr, star, pair } style_ = expr;
   fmt::detail::dynamic_format_specs<Char> specs_;
-  FMT_CONSTEXPR auto parse(format_parse_context & ctx) -> decltype(ctx.begin()) {
+  FMT_CONSTEXPR auto parse(format_parse_context &ctx) -> decltype(ctx.begin()) {
     using handler_type = fmt::detail::dynamic_specs_handler<format_parse_context>;
     auto type = fmt::detail::type_constant<T, Char>::value;
     fmt::detail::specs_checker<handler_type> handler(handler_type(specs_, ctx), type);
     auto it = ctx.begin();
-    if (it != ctx.end())
-      switch (*it) {
-      case '$': style_ = style::expr; ctx.advance_to(++it); break;
-      case '*': style_ = style::star; ctx.advance_to(++it); break;
-      case ',': style_ = style::pair; ctx.advance_to(++it); break;
-      default: break;
-    }
+    if (it != ctx.end()) switch (*it) {
+        case '$':
+          style_ = style::expr;
+          ctx.advance_to(++it);
+          break;
+        case '*':
+          style_ = style::star;
+          ctx.advance_to(++it);
+          break;
+        case ',':
+          style_ = style::pair;
+          ctx.advance_to(++it);
+          break;
+        default: break;
+      }
     parse_format_specs(ctx.begin(), ctx.end(), handler);
     return base::parse(ctx);
   }
   template <typename FormatCtx>
-  auto format(const duals::dual<T> & x, FormatCtx & ctx) -> decltype(ctx.out()) {
+  auto format(const duals::dual<T> &x, FormatCtx &ctx) -> decltype(ctx.out()) {
     format_to(ctx.out(), "(");
     if (style_ == style::pair) {
       base::format(x.rpart(), ctx);
@@ -1375,18 +1359,16 @@ struct fmt::formatter<duals::dual<T>,Char> : public fmt::formatter<T,Char>
       base::format(x.dpart(), ctx);
       return format_to(ctx.out(), ")");
     }
-    if (x.rpart() || !x.dpart())
-      base::format(x.rpart(), ctx);
+    if (x.rpart() || !x.dpart()) base::format(x.rpart(), ctx);
     if (x.dpart()) {
-      if (x.rpart() && x.dpart() >= 0 && specs_.sign != sign::plus)
-        format_to(ctx.out(), "+");
+      if (x.rpart() && x.dpart() >= 0 && specs_.sign != sign::plus) format_to(ctx.out(), "+");
       base::format(x.dpart(), ctx);
       if (style_ == style::star)
         format_to(ctx.out(), "*e");
       else
         format_to(ctx.out(), "_e");
-      if (std::is_same<typename std::decay<T>::type,float>::value)       format_to(ctx.out(), "f");
-      if (std::is_same<typename std::decay<T>::type,long double>::value) format_to(ctx.out(), "l");
+      if (std::is_same<typename std::decay<T>::type, float>::value) format_to(ctx.out(), "f");
+      if (std::is_same<typename std::decay<T>::type, long double>::value) format_to(ctx.out(), "l");
     }
     return format_to(ctx.out(), ")");
   }
@@ -1419,30 +1401,38 @@ struct fmt::formatter<duals::dual<T>,Char> : public fmt::formatter<T,Char>
 /// bit hacky - glad for cleanups).
 ///
 template <typename T, typename Char>
-struct fmt::formatter<std::complex<T>,Char> : public fmt::formatter<T,Char>
-{
-  typedef fmt::formatter<T,Char> base;
+struct fmt::formatter<std::complex<T>, Char> : public fmt::formatter<T, Char> {
+  typedef fmt::formatter<T, Char> base;
   enum style { expr, star, pair } style_ = expr;
   fmt::detail::dynamic_format_specs<Char> specs_;
-  FMT_CONSTEXPR auto parse(format_parse_context & ctx) -> decltype(ctx.begin()) {
+  FMT_CONSTEXPR auto parse(format_parse_context &ctx) -> decltype(ctx.begin()) {
     using handler_type = fmt::detail::dynamic_specs_handler<format_parse_context>;
     auto type = fmt::detail::type_constant<T, Char>::value;
     fmt::detail::specs_checker<handler_type> handler(handler_type(specs_, ctx), type);
     auto it = ctx.begin();
     if (it != ctx.end()) {
       switch (*it) {
-      case '$': style_ = style::expr; ctx.advance_to(++it); break;
-      case '*': style_ = style::star; ctx.advance_to(++it); break;
-      case ',': style_ = style::pair; ctx.advance_to(++it); break;
-      default: break;
+        case '$':
+          style_ = style::expr;
+          ctx.advance_to(++it);
+          break;
+        case '*':
+          style_ = style::star;
+          ctx.advance_to(++it);
+          break;
+        case ',':
+          style_ = style::pair;
+          ctx.advance_to(++it);
+          break;
+        default: break;
       }
     }
     parse_format_specs(ctx.begin(), ctx.end(), handler);
-    //todo: fixup alignment
+    // todo: fixup alignment
     return base::parse(ctx);
   }
   template <typename FormatCtx>
-  auto format(const std::complex<T> & x, FormatCtx & ctx) -> decltype(ctx.out()) {
+  auto format(const std::complex<T> &x, FormatCtx &ctx) -> decltype(ctx.out()) {
     format_to(ctx.out(), "(");
     if (style_ == style::pair) {
       base::format(x.real(), ctx);
@@ -1450,18 +1440,16 @@ struct fmt::formatter<std::complex<T>,Char> : public fmt::formatter<T,Char>
       base::format(x.imag(), ctx);
       return format_to(ctx.out(), ")");
     }
-    if (x.real() || !x.imag())
-      base::format(x.real(), ctx);
+    if (x.real() || !x.imag()) base::format(x.real(), ctx);
     if (x.imag()) {
-      if (x.real() && x.imag() >= 0 && specs_.sign != sign::plus)
-        format_to(ctx.out(), "+");
+      if (x.real() && x.imag() >= 0 && specs_.sign != sign::plus) format_to(ctx.out(), "+");
       base::format(x.imag(), ctx);
       if (style_ == style::star)
         format_to(ctx.out(), "*i");
       else
         format_to(ctx.out(), "i");
-      if (std::is_same<typename std::decay<T>::type,float>::value)       format_to(ctx.out(), "f");
-      if (std::is_same<typename std::decay<T>::type,long double>::value) format_to(ctx.out(), "l");
+      if (std::is_same<typename std::decay<T>::type, float>::value) format_to(ctx.out(), "f");
+      if (std::is_same<typename std::decay<T>::type, long double>::value) format_to(ctx.out(), "l");
     }
     return format_to(ctx.out(), ")");
   }

From d3f5ef99e3625805c692fbf42ad7cb9f2a6e861a Mon Sep 17 00:00:00 2001
From: smartalecH <alec.m.hammond@gmail.com>
Date: Thu, 18 Aug 2022 09:48:14 -0400
Subject: [PATCH 29/29] add threshold check for normal vector to mitigate
 floating point sensitivity

---
 src/meepgeom.cpp | 72 +++++++++++-------------------------------------
 1 file changed, 16 insertions(+), 56 deletions(-)

diff --git a/src/meepgeom.cpp b/src/meepgeom.cpp
index c7870842c..c254a9b96 100644
--- a/src/meepgeom.cpp
+++ b/src/meepgeom.cpp
@@ -1333,6 +1333,11 @@ void dual_interpolate_mat(material_type mat, symm_matrix &eps, duals::duald u) {
             u_bar * (mat->medium_2.epsilon_offdiag.z.re - mat->medium_1.epsilon_offdiag.z.re);
 }
 
+template<typename T>
+static bool AreEqual(T f1, T f2) { 
+  return (std::fabs(f1 - f2) <= 2*std::numeric_limits<T>::epsilon() * std::fmax(std::fabs(f1), std::fabs(f2)));
+}
+
 void geom_epsilon::eff_chi1inv_matrix(meep::component c, symm_matrix *chi1inv_matrix,
                                       const meep::volume &v, double tol, int maxeval,
                                       bool &fallback) {
@@ -1406,15 +1411,22 @@ void geom_epsilon::eff_chi1inv_matrix(meep::component c, symm_matrix *chi1inv_ma
       tp = geom_tree_search(p_mat, restricted_tree, &oi);
       uval = matgrid_val(p_mat, tp, oi, mat);
       normal = matgrid_grad(p_mat, tp, oi, mat);
-      if (cvector3_re(normal).x == 0 && cvector3_re(normal).y == 0 && cvector3_re(normal).z == 0)
+      double TOL = 4*std::numeric_limits<double>::epsilon();
+      
+      if (AreEqual(cvector3_re(normal).x,0.0) && 
+          AreEqual(cvector3_re(normal).y,0.0) && 
+          AreEqual(cvector3_re(normal).z,0.0)
+        )
         /* couldn't get normal vector for this point; no averaging is needed,
         but we still need to interpolate onto our grid using duals */
         goto mgavg;
       duals::duald u_prime = normalize_dual(normal);
-      duals::duald d = (mat->eta - uval) / u_prime;
+      duals::duald d;
+      /* we need to be careful when mat->eta = uval and u_prime is very small...*/
+      d = (mat->eta - uval) / u_prime;
       double r = v.diameter() / 2;
       fill = get_material_grid_fill(v.dim, d, r, uval, mat->eta);
-      if (fill.rpart() < 0) {
+      if (fill < 0) {
         /* we are far enough away from an interface that
         we don't need any averaging -- but we still need
         to interpolate from the material grid */
@@ -1454,7 +1466,6 @@ void geom_epsilon::eff_chi1inv_matrix(meep::component c, symm_matrix *chi1inv_ma
     /* ------------------------------------------- */
   }
   else {
-
     vector3 normal_re = unit_vector3(normal_to_fixed_object(vector3_minus(p, shiftby), *o));
     if (normal_re.x == 0 && normal_re.y == 0 && normal_re.z == 0)
       goto noavg; // couldn't get normal vector for this point, punt
@@ -1504,57 +1515,6 @@ void geom_epsilon::eff_chi1inv_matrix(meep::component c, symm_matrix *chi1inv_ma
 static int eps_ever_negative = 0;
 static meep::field_type func_ft = meep::E_stuff;
 
-struct matgrid_volavg {
-  meep::ndim dim;   // dimensionality of voxel
-  double rad;       // (spherical) voxel radius
-  double uval;      // bilinearly-interpolated weight at voxel center
-  double ugrad_abs; // magnitude of gradient of uval
-  double beta;      // thresholding bias
-  double eta;       // thresholding erosion/dilation
-  double eps1;      // trace of epsilon tensor from medium 1
-  double eps2;      // trace of epsilon tensor from medium 2
-};
-
-static void get_uproj_w(const matgrid_volavg *mgva, double x0, double &u_proj, double &w) {
-  // use a linear approximation for the material grid weights around the Yee grid point
-  // u_proj = tanh_projection(mgva->uval + mgva->ugrad_abs*x0, mgva->beta, mgva->eta);
-  if (mgva->dim == meep::D1)
-    w = 1 / (2 * mgva->rad);
-  else if (mgva->dim == meep::D2 || mgva->dim == meep::Dcyl)
-    w = 2 * sqrt(mgva->rad * mgva->rad - x0 * x0) / (meep::pi * mgva->rad * mgva->rad);
-  else if (mgva->dim == meep::D3)
-    w = meep::pi * (mgva->rad * mgva->rad - x0 * x0) /
-        (4 / 3 * meep::pi * mgva->rad * mgva->rad * mgva->rad);
-}
-
-#ifdef CTL_HAS_COMPLEX_INTEGRATION
-static cnumber matgrid_ceps_func(int n, number *x, void *mgva_) {
-  (void)n; // unused
-  double u_proj = 0, w = 0;
-  matgrid_volavg *mgva = (matgrid_volavg *)mgva_;
-  get_uproj_w(mgva, x[0], u_proj, w);
-  cnumber ret;
-  ret.re = (1 - u_proj) * mgva->eps1 + u_proj * mgva->eps2;
-  ret.im = (1 - u_proj) / mgva->eps1 + u_proj / mgva->eps2;
-  return ret * w;
-}
-#else
-static number matgrid_eps_func(int n, number *x, void *mgva_) {
-  (void)n; // unused
-  double u_proj = 0, w = 0;
-  matgrid_volavg *mgva = (matgrid_volavg *)mgva_;
-  get_uproj_w(mgva, x[0], u_proj, w);
-  return w * ((1 - u_proj) * mgva->eps1 + u_proj * mgva->eps2);
-}
-static number matgrid_inveps_func(int n, number *x, void *mgva_) {
-  (void)n; // unused
-  double u_proj = 0, w = 0;
-  matgrid_volavg *mgva = (matgrid_volavg *)mgva_;
-  get_uproj_w(mgva, x[0], u_proj, w);
-  return w * ((1 - u_proj) / mgva->eps1 + u_proj / mgva->eps2);
-}
-#endif
-
 #ifdef CTL_HAS_COMPLEX_INTEGRATION
 static cnumber ceps_func(int n, number *x, void *geomeps_) {
   geom_epsilon *geomeps = (geom_epsilon *)geomeps_;
@@ -2924,6 +2884,7 @@ get_material_gradient(const meep::vec &r,              // current point
 
   // locate the proper material
   material_type md;
+  if (!gv.contains(r)) meep::abort("Location (%f, %f, %f) is outside of grid volume\n",r.x(),r.y(),r.z());
   geps->get_material_pt(md, r);
 
   // get the tensor column index corresponding to the forward component
@@ -2955,7 +2916,6 @@ get_material_gradient(const meep::vec &r,              // current point
     u[idx] += 1_e; // add dual component
     geps->eff_chi1inv_row_grad(adjoint_c, dA_du, v, geps->tol, maxevals, true);
     u[idx] -= 1_e; // remove dual component
-
     return -dA_du[dir_idx] * fields_f; // note the minus sign! (from Steven's adjoint notes)
   }
   // materials have some dispersion