From c63dbfdeb3f4849fd1ac7b2f5a3cd4d171933c8d Mon Sep 17 00:00:00 2001
From: Ben Whitney <whitneybe@ornl.gov>
Date: Tue, 10 May 2022 17:51:27 -0400
Subject: [PATCH 01/58] Move Huffman tree functions to separate header.

---
 CMakeLists.txt      |   1 +
 include/huffman.hpp |  21 +++
 src/compressors.cpp | 352 +-----------------------------------------
 src/huffman.cpp     | 362 ++++++++++++++++++++++++++++++++++++++++++++
 4 files changed, 385 insertions(+), 351 deletions(-)
 create mode 100644 include/huffman.hpp
 create mode 100644 src/huffman.cpp
diff --git a/CMakeLists.txt b/CMakeLists.txt
index e8067bda25..21d3e50e21 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -201,6 +201,7 @@ set(
   MGARD_LIBRARY_CPP
         src/compress.cpp
         src/compress_internal.cpp
+  src/huffman.cpp
   src/compressors.cpp
   src/format.cpp
 )
diff --git a/include/huffman.hpp b/include/huffman.hpp
new file mode 100644
index 0000000000..67bd1bf2fd
--- /dev/null
+++ b/include/huffman.hpp
@@ -0,0 +1,21 @@
+#ifndef HUFFMAN_HPP
+#define HUFFMAN_HPP
+//!\file
+//!\brief Huffman trees for quantized multilevel coefficients.
+
+namespace mgard {
+
+void huffman_encoding(long int *quantized_data, const std::size_t n,
+                      unsigned char **out_data_hit, size_t *out_data_hit_size,
+                      unsigned char **out_data_miss, size_t *out_data_miss_size,
+                      unsigned char **out_tree, size_t *out_tree_size);
+
+void huffman_decoding(long int *quantized_data,
+                      const std::size_t quantized_data_size,
+                      unsigned char *out_data_hit, size_t out_data_hit_size,
+                      unsigned char *out_data_miss, size_t out_data_miss_size,
+                      unsigned char *out_tree, size_t out_tree_size);
+
+} // namespace mgard
+
+#endif
diff --git a/src/compressors.cpp b/src/compressors.cpp
index 915912f7f9..34c3401e14 100644
--- a/src/compressors.cpp
+++ b/src/compressors.cpp
@@ -7,13 +7,13 @@
 #include <algorithm>
 #include <bitset>
 #include <numeric>
-#include <queue>
 #include <stdexcept>
 #include <vector>
 
 #include <zlib.h>
 
 #include "format.hpp"
+#include "huffman.hpp"
 
 #ifdef MGARD_TIMING
 #include <chrono>
@@ -26,251 +26,6 @@
 
 namespace mgard {
 
-const int nql = 32768 * 4;
-
-struct htree_node {
-  int q;
-  size_t cnt;
-  unsigned int code;
-  size_t len;
-  htree_node *left;
-  htree_node *right;
-};
-
-struct huffman_codec {
-  int q;
-  unsigned int code;
-  size_t len;
-};
-
-bool myfunction(htree_node i, htree_node j) { return (i.cnt < j.cnt); }
-
-htree_node *new_htree_node(int q, size_t cnt) {
-  htree_node *new_node = new htree_node;
-  new_node->q = q;
-  new_node->cnt = cnt;
-  new_node->code = 0;
-  new_node->len = 0;
-  new_node->left = 0;
-  new_node->right = 0;
-
-  return new_node;
-}
-
-struct LessThanByCnt {
-  bool operator()(const htree_node *lhs, const htree_node *rhs) const {
-    return lhs->cnt > rhs->cnt;
-  }
-};
-
-template <class T>
-using my_priority_queue =
-    std::priority_queue<T *, std::vector<T *>, LessThanByCnt>;
-
-void build_codec(htree_node *root, unsigned int code, size_t len,
-                 huffman_codec *codec) {
-
-  root->len = len;
-  root->code = code;
-
-  if (!root->left && !root->right) {
-    codec[root->q].q = root->q;
-    codec[root->q].code = code;
-    codec[root->q].len = len;
-  }
-
-  if (root->left) {
-    build_codec(root->left, code << 1, len + 1, codec);
-  }
-
-  if (root->right) {
-    build_codec(root->right, code << 1 | 0x1, len + 1, codec);
-  }
-}
-
-my_priority_queue<htree_node> *build_tree(size_t *cnt) {
-  my_priority_queue<htree_node> *phtree;
-  phtree = new my_priority_queue<htree_node>;
-#if 1
-  for (int i = 0; i < nql; i++) {
-    if (cnt[i] != 0) {
-      htree_node *new_node = new_htree_node(i, cnt[i]);
-      phtree->push(new_node);
-    }
-  }
-
-  while (phtree->size() > 1) {
-    htree_node *top_node1 = phtree->top();
-    phtree->pop();
-    htree_node *top_node2 = phtree->top();
-    phtree->pop();
-
-    htree_node *new_node = new_htree_node(-1, top_node1->cnt + top_node2->cnt);
-    new_node->left = top_node1;
-    new_node->right = top_node2;
-    phtree->push(new_node);
-  }
-#endif
-  return phtree;
-}
-
-void free_htree_node(htree_node *node) {
-  if (node->left) {
-    free_htree_node(node->left);
-    node->left = 0;
-  }
-
-  if (node->right) {
-    free_htree_node(node->right);
-    node->right = 0;
-  }
-
-  delete node;
-}
-
-void free_tree(my_priority_queue<htree_node> *phtree) {
-  if (phtree) {
-    free_htree_node(phtree->top());
-
-    phtree->pop();
-
-    delete phtree;
-  }
-}
-
-// Note this function will change the quantized data.
-size_t *build_ft(long int *quantized_data, const std::size_t n,
-                 size_t &num_outliers) {
-  size_t *cnt = (size_t *)malloc(nql * sizeof(size_t));
-  std::memset(cnt, 0, nql * sizeof(size_t));
-
-  for (std::size_t i = 0; i < n; i++) {
-    // Convert quantization level to positive so that counting freq can be
-    // easily done. Level 0 is reserved a out-of-range flag.
-    quantized_data[i] = quantized_data[i] + nql / 2;
-    if (quantized_data[i] > 0 && quantized_data[i] < nql) {
-      cnt[quantized_data[i]]++;
-    } else {
-      cnt[0]++;
-    }
-  }
-
-  num_outliers = cnt[0];
-
-  return cnt;
-}
-
-huffman_codec *build_huffman_codec(long int *quantized_data, size_t **ft,
-                                   const std::size_t n, size_t &num_outliers) {
-  size_t *cnt;
-
-  cnt = build_ft(quantized_data, n, num_outliers);
-  *ft = cnt;
-
-  my_priority_queue<htree_node> *phtree = build_tree(cnt);
-
-  huffman_codec *codec = (huffman_codec *)malloc(sizeof(huffman_codec) * nql);
-  std::memset(codec, 0, sizeof(huffman_codec) * nql);
-
-  build_codec(phtree->top(), 0, 0, codec);
-
-  free_tree(phtree);
-  phtree = 0;
-
-  return codec;
-}
-
-void huffman_decoding(long int *quantized_data,
-                      const std::size_t quantized_data_size,
-                      unsigned char *out_data_hit, size_t out_data_hit_size,
-                      unsigned char *out_data_miss, size_t out_data_miss_size,
-                      unsigned char *out_tree, size_t out_tree_size) {
-  size_t *cft = (size_t *)out_tree;
-  int nonZeros = out_tree_size / (2 * sizeof(size_t));
-  size_t *ft = (size_t *)malloc(nql * sizeof(size_t));
-
-  std::memset(ft, 0, nql * sizeof(size_t));
-
-  for (int j = 0; j < nonZeros; j++) {
-    ft[cft[2 * j]] = cft[2 * j + 1];
-  }
-
-  my_priority_queue<htree_node> *phtree = build_tree(ft);
-
-  unsigned int *buf = (unsigned int *)out_data_hit;
-
-  // The out_data_miss may not be aligned. Therefore, the code
-  // here makes a new buffer.
-  int *miss_buf = (int *)malloc(out_data_miss_size);
-  if (out_data_miss_size) {
-    std::memcpy(miss_buf, out_data_miss, out_data_miss_size);
-  }
-
-  int *miss_bufp = miss_buf;
-
-  size_t start_bit = 0;
-  unsigned int mask = 0x80000000;
-
-  long int *q = quantized_data;
-  size_t i = 0;
-  size_t num_missed = 0;
-  while (q < (quantized_data + (quantized_data_size / sizeof(*q)))) {
-    htree_node *root = phtree->top();
-    assert(root);
-
-    size_t len = 0;
-    int offset = 0;
-    while (root->left) {
-      int flag = *(buf + start_bit / 32 + offset) & mask;
-      if (!flag) {
-        root = root->left;
-      } else {
-        root = root->right;
-      }
-
-      len++;
-
-      mask >>= 1;
-      if (!mask) {
-        mask = 0x80000000;
-        offset = 1;
-      } else {
-        //        offset = 0;
-      }
-    }
-
-    if (root->q != 0) {
-      *q = root->q - nql / 2;
-
-    } else {
-      *q = *miss_buf - nql / 2;
-
-      miss_buf++;
-      num_missed++;
-    }
-
-    q++;
-    i++;
-
-    start_bit += len;
-  }
-
-  assert(start_bit == out_data_hit_size);
-  assert(sizeof(int) * num_missed == out_data_miss_size);
-
-  // Avoid unused argument warning. If NDEBUG is defined, then the assert
-  // becomes empty and out_data_hit_size is unused. Tell the compiler that
-  // is OK and expected.
-  (void)out_data_hit_size;
-
-  free(miss_bufp);
-  miss_bufp = 0;
-  free_tree(phtree);
-  phtree = 0;
-  free(ft);
-  ft = 0;
-}
-
 void decompress_memory_huffman(unsigned char *const src,
                                const std::size_t srcLen, long int *const dst,
                                const std::size_t dstLen) {
@@ -313,111 +68,6 @@ void decompress_memory_huffman(unsigned char *const src,
   free(huffman_encoding_p);
 }
 
-void huffman_encoding(long int *quantized_data, const std::size_t n,
-                      unsigned char **out_data_hit, size_t *out_data_hit_size,
-                      unsigned char **out_data_miss, size_t *out_data_miss_size,
-                      unsigned char **out_tree, size_t *out_tree_size) {
-  size_t num_miss = 0;
-  size_t *ft = 0;
-
-  huffman_codec *codec = build_huffman_codec(quantized_data, &ft, n, num_miss);
-
-  assert(n >= num_miss);
-
-  /* For those miss points, we still need to maintain a flag (q = 0),
-   * and therefore we need to allocate space for n numbers.
-   */
-  unsigned char *p_hit = (unsigned char *)malloc(n * sizeof(int));
-  std::memset(p_hit, 0, n * sizeof(int));
-
-  int *p_miss = 0;
-  if (num_miss > 0) {
-    p_miss = (int *)malloc(num_miss * sizeof(int));
-    std::memset(p_miss, 0, num_miss * sizeof(int));
-  }
-
-  *out_data_hit = p_hit;
-  *out_data_miss = (unsigned char *)p_miss;
-  *out_data_hit_size = 0;
-  *out_data_miss_size = 0;
-
-  size_t start_bit = 0;
-  unsigned int *cur = (unsigned int *)p_hit;
-  size_t cnt_missed = 0;
-  for (std::size_t i = 0; i < n; i++) {
-    int q = quantized_data[i];
-    unsigned int code;
-    size_t len;
-
-    if (q > 0 && q < nql) {
-      // for those that are within the range
-      code = codec[q].code;
-      len = codec[q].len;
-    } else {
-      // for those that are out of the range, q is set to 0
-      code = codec[0].code;
-      len = codec[0].len;
-
-      *p_miss = q;
-      p_miss++;
-      cnt_missed++;
-    }
-
-    // Note that if len == 0, then that means that either the data is all the
-    // same number or (more likely) all data are outside the quantization
-    // range. Either way, the code contains no information and is therefore 0
-    // bits.
-
-    if (32 - start_bit % 32 < len) {
-      // current unsigned int cannot hold the code
-      // copy 32 - start_bit % 32 bits to the current int
-      // and copy  the rest len - (32 - start_bit % 32) to the next int
-      size_t rshift = len - (32 - start_bit % 32);
-      size_t lshift = 32 - rshift;
-      *(cur + start_bit / 32) = (*(cur + start_bit / 32)) | (code >> rshift);
-      *(cur + start_bit / 32 + 1) =
-          (*(cur + start_bit / 32 + 1)) | (code << lshift);
-      start_bit += len;
-    } else if (len > 0) {
-      code = code << (32 - start_bit % 32 - len);
-      *(cur + start_bit / 32) = (*(cur + start_bit / 32)) | code;
-      start_bit += len;
-    } else {
-      // Sequence is empty (everything must be the same). Do nothing.
-    }
-  }
-
-  // Note: hit size is in bits, while miss size is in bytes.
-  *out_data_hit_size = start_bit;
-  *out_data_miss_size = num_miss * sizeof(int);
-
-  // write frequency table to buffer
-  int nonZeros = 0;
-  for (int i = 0; i < nql; i++) {
-    if (ft[i] > 0) {
-      nonZeros++;
-    }
-  }
-
-  size_t *cft = (size_t *)malloc(2 * nonZeros * sizeof(size_t));
-  int off = 0;
-  for (int i = 0; i < nql; i++) {
-    if (ft[i] > 0) {
-      cft[2 * off] = i;
-      cft[2 * off + 1] = ft[i];
-      off++;
-    }
-  }
-
-  *out_tree = (unsigned char *)cft;
-  *out_tree_size = 2 * nonZeros * sizeof(size_t);
-  free(ft);
-  ft = 0;
-
-  free(codec);
-  codec = 0;
-}
-
 MemoryBuffer<unsigned char> compress_memory_huffman(long int *const src,
                                                     const std::size_t srcLen) {
   unsigned char *out_data_hit = 0;
diff --git a/src/huffman.cpp b/src/huffman.cpp
new file mode 100644
index 0000000000..6fc1dbe1d3
--- /dev/null
+++ b/src/huffman.cpp
@@ -0,0 +1,362 @@
+#include <cassert>
+#include <cstddef>
+#include <cstdlib>
+#include <cstring>
+
+#include <queue>
+
+#include "huffman.hpp"
+
+namespace mgard {
+
+const int nql = 32768 * 4;
+
+struct htree_node {
+  int q;
+  size_t cnt;
+  unsigned int code;
+  size_t len;
+  htree_node *left;
+  htree_node *right;
+};
+
+struct huffman_codec {
+  int q;
+  unsigned int code;
+  size_t len;
+};
+
+bool myfunction(htree_node i, htree_node j) { return (i.cnt < j.cnt); }
+
+htree_node *new_htree_node(int q, size_t cnt) {
+  htree_node *new_node = new htree_node;
+  new_node->q = q;
+  new_node->cnt = cnt;
+  new_node->code = 0;
+  new_node->len = 0;
+  new_node->left = 0;
+  new_node->right = 0;
+
+  return new_node;
+}
+
+struct LessThanByCnt {
+  bool operator()(const htree_node *lhs, const htree_node *rhs) const {
+    return lhs->cnt > rhs->cnt;
+  }
+};
+
+template <class T>
+using my_priority_queue =
+    std::priority_queue<T *, std::vector<T *>, LessThanByCnt>;
+
+void build_codec(htree_node *root, unsigned int code, size_t len,
+                 huffman_codec *codec) {
+
+  root->len = len;
+  root->code = code;
+
+  if (!root->left && !root->right) {
+    codec[root->q].q = root->q;
+    codec[root->q].code = code;
+    codec[root->q].len = len;
+  }
+
+  if (root->left) {
+    build_codec(root->left, code << 1, len + 1, codec);
+  }
+
+  if (root->right) {
+    build_codec(root->right, code << 1 | 0x1, len + 1, codec);
+  }
+}
+
+my_priority_queue<htree_node> *build_tree(size_t *cnt) {
+  my_priority_queue<htree_node> *phtree;
+  phtree = new my_priority_queue<htree_node>;
+#if 1
+  for (int i = 0; i < nql; i++) {
+    if (cnt[i] != 0) {
+      htree_node *new_node = new_htree_node(i, cnt[i]);
+      phtree->push(new_node);
+    }
+  }
+
+  while (phtree->size() > 1) {
+    htree_node *top_node1 = phtree->top();
+    phtree->pop();
+    htree_node *top_node2 = phtree->top();
+    phtree->pop();
+
+    htree_node *new_node = new_htree_node(-1, top_node1->cnt + top_node2->cnt);
+    new_node->left = top_node1;
+    new_node->right = top_node2;
+    phtree->push(new_node);
+  }
+#endif
+  return phtree;
+}
+
+void free_htree_node(htree_node *node) {
+  if (node->left) {
+    free_htree_node(node->left);
+    node->left = 0;
+  }
+
+  if (node->right) {
+    free_htree_node(node->right);
+    node->right = 0;
+  }
+
+  delete node;
+}
+
+void free_tree(my_priority_queue<htree_node> *phtree) {
+  if (phtree) {
+    free_htree_node(phtree->top());
+
+    phtree->pop();
+
+    delete phtree;
+  }
+}
+
+// Note this function will change the quantized data.
+size_t *build_ft(long int *quantized_data, const std::size_t n,
+                 size_t &num_outliers) {
+  size_t *cnt = (size_t *)malloc(nql * sizeof(size_t));
+  std::memset(cnt, 0, nql * sizeof(size_t));
+
+  for (std::size_t i = 0; i < n; i++) {
+    // Convert quantization level to positive so that counting freq can be
+    // easily done. Level 0 is reserved a out-of-range flag.
+    quantized_data[i] = quantized_data[i] + nql / 2;
+    if (quantized_data[i] > 0 && quantized_data[i] < nql) {
+      cnt[quantized_data[i]]++;
+    } else {
+      cnt[0]++;
+    }
+  }
+
+  num_outliers = cnt[0];
+
+  return cnt;
+}
+
+huffman_codec *build_huffman_codec(long int *quantized_data, size_t **ft,
+                                   const std::size_t n, size_t &num_outliers) {
+  size_t *cnt;
+
+  cnt = build_ft(quantized_data, n, num_outliers);
+  *ft = cnt;
+
+  my_priority_queue<htree_node> *phtree = build_tree(cnt);
+
+  huffman_codec *codec = (huffman_codec *)malloc(sizeof(huffman_codec) * nql);
+  std::memset(codec, 0, sizeof(huffman_codec) * nql);
+
+  build_codec(phtree->top(), 0, 0, codec);
+
+  free_tree(phtree);
+  phtree = 0;
+
+  return codec;
+}
+
+void huffman_encoding(long int *quantized_data, const std::size_t n,
+                      unsigned char **out_data_hit, size_t *out_data_hit_size,
+                      unsigned char **out_data_miss, size_t *out_data_miss_size,
+                      unsigned char **out_tree, size_t *out_tree_size) {
+  size_t num_miss = 0;
+  size_t *ft = 0;
+
+  huffman_codec *codec = build_huffman_codec(quantized_data, &ft, n, num_miss);
+
+  assert(n >= num_miss);
+
+  /* For those miss points, we still need to maintain a flag (q = 0),
+   * and therefore we need to allocate space for n numbers.
+   */
+  unsigned char *p_hit = (unsigned char *)malloc(n * sizeof(int));
+  std::memset(p_hit, 0, n * sizeof(int));
+
+  int *p_miss = 0;
+  if (num_miss > 0) {
+    p_miss = (int *)malloc(num_miss * sizeof(int));
+    std::memset(p_miss, 0, num_miss * sizeof(int));
+  }
+
+  *out_data_hit = p_hit;
+  *out_data_miss = (unsigned char *)p_miss;
+  *out_data_hit_size = 0;
+  *out_data_miss_size = 0;
+
+  size_t start_bit = 0;
+  unsigned int *cur = (unsigned int *)p_hit;
+  size_t cnt_missed = 0;
+  for (std::size_t i = 0; i < n; i++) {
+    int q = quantized_data[i];
+    unsigned int code;
+    size_t len;
+
+    if (q > 0 && q < nql) {
+      // for those that are within the range
+      code = codec[q].code;
+      len = codec[q].len;
+    } else {
+      // for those that are out of the range, q is set to 0
+      code = codec[0].code;
+      len = codec[0].len;
+
+      *p_miss = q;
+      p_miss++;
+      cnt_missed++;
+    }
+
+    // Note that if len == 0, then that means that either the data is all the
+    // same number or (more likely) all data are outside the quantization
+    // range. Either way, the code contains no information and is therefore 0
+    // bits.
+
+    if (32 - start_bit % 32 < len) {
+      // current unsigned int cannot hold the code
+      // copy 32 - start_bit % 32 bits to the current int
+      // and copy  the rest len - (32 - start_bit % 32) to the next int
+      size_t rshift = len - (32 - start_bit % 32);
+      size_t lshift = 32 - rshift;
+      *(cur + start_bit / 32) = (*(cur + start_bit / 32)) | (code >> rshift);
+      *(cur + start_bit / 32 + 1) =
+          (*(cur + start_bit / 32 + 1)) | (code << lshift);
+      start_bit += len;
+    } else if (len > 0) {
+      code = code << (32 - start_bit % 32 - len);
+      *(cur + start_bit / 32) = (*(cur + start_bit / 32)) | code;
+      start_bit += len;
+    } else {
+      // Sequence is empty (everything must be the same). Do nothing.
+    }
+  }
+
+  // Note: hit size is in bits, while miss size is in bytes.
+  *out_data_hit_size = start_bit;
+  *out_data_miss_size = num_miss * sizeof(int);
+
+  // write frequency table to buffer
+  int nonZeros = 0;
+  for (int i = 0; i < nql; i++) {
+    if (ft[i] > 0) {
+      nonZeros++;
+    }
+  }
+
+  size_t *cft = (size_t *)malloc(2 * nonZeros * sizeof(size_t));
+  int off = 0;
+  for (int i = 0; i < nql; i++) {
+    if (ft[i] > 0) {
+      cft[2 * off] = i;
+      cft[2 * off + 1] = ft[i];
+      off++;
+    }
+  }
+
+  *out_tree = (unsigned char *)cft;
+  *out_tree_size = 2 * nonZeros * sizeof(size_t);
+  free(ft);
+  ft = 0;
+
+  free(codec);
+  codec = 0;
+}
+
+void huffman_decoding(long int *quantized_data,
+                      const std::size_t quantized_data_size,
+                      unsigned char *out_data_hit, size_t out_data_hit_size,
+                      unsigned char *out_data_miss, size_t out_data_miss_size,
+                      unsigned char *out_tree, size_t out_tree_size) {
+  size_t *cft = (size_t *)out_tree;
+  int nonZeros = out_tree_size / (2 * sizeof(size_t));
+  size_t *ft = (size_t *)malloc(nql * sizeof(size_t));
+
+  std::memset(ft, 0, nql * sizeof(size_t));
+
+  for (int j = 0; j < nonZeros; j++) {
+    ft[cft[2 * j]] = cft[2 * j + 1];
+  }
+
+  my_priority_queue<htree_node> *phtree = build_tree(ft);
+
+  unsigned int *buf = (unsigned int *)out_data_hit;
+
+  // The out_data_miss may not be aligned. Therefore, the code
+  // here makes a new buffer.
+  int *miss_buf = (int *)malloc(out_data_miss_size);
+  if (out_data_miss_size) {
+    std::memcpy(miss_buf, out_data_miss, out_data_miss_size);
+  }
+
+  int *miss_bufp = miss_buf;
+
+  size_t start_bit = 0;
+  unsigned int mask = 0x80000000;
+
+  long int *q = quantized_data;
+  size_t i = 0;
+  size_t num_missed = 0;
+  while (q < (quantized_data + (quantized_data_size / sizeof(*q)))) {
+    htree_node *root = phtree->top();
+    assert(root);
+
+    size_t len = 0;
+    int offset = 0;
+    while (root->left) {
+      int flag = *(buf + start_bit / 32 + offset) & mask;
+      if (!flag) {
+        root = root->left;
+      } else {
+        root = root->right;
+      }
+
+      len++;
+
+      mask >>= 1;
+      if (!mask) {
+        mask = 0x80000000;
+        offset = 1;
+      } else {
+        //        offset = 0;
+      }
+    }
+
+    if (root->q != 0) {
+      *q = root->q - nql / 2;
+
+    } else {
+      *q = *miss_buf - nql / 2;
+
+      miss_buf++;
+      num_missed++;
+    }
+
+    q++;
+    i++;
+
+    start_bit += len;
+  }
+
+  assert(start_bit == out_data_hit_size);
+  assert(sizeof(int) * num_missed == out_data_miss_size);
+
+  // Avoid unused argument warning. If NDEBUG is defined, then the assert
+  // becomes empty and out_data_hit_size is unused. Tell the compiler that
+  // is OK and expected.
+  (void)out_data_hit_size;
+
+  free(miss_bufp);
+  miss_bufp = 0;
+  free_tree(phtree);
+  phtree = 0;
+  free(ft);
+  ft = 0;
+}
+
+} // namespace mgard

From f43b69f369e5cb9fc911669f1e83fac6f6fbc793 Mon Sep 17 00:00:00 2001
From: Ben Whitney <whitneybe@ornl.gov>
Date: Tue, 10 May 2022 17:55:25 -0400
Subject: [PATCH 02/58] Delete unused comparison function.

---
 src/huffman.cpp | 2 --
 1 file changed, 2 deletions(-)

diff --git a/src/huffman.cpp b/src/huffman.cpp
index 6fc1dbe1d3..55788050ba 100644
--- a/src/huffman.cpp
+++ b/src/huffman.cpp
@@ -26,8 +26,6 @@ struct huffman_codec {
   size_t len;
 };
 
-bool myfunction(htree_node i, htree_node j) { return (i.cnt < j.cnt); }
-
 htree_node *new_htree_node(int q, size_t cnt) {
   htree_node *new_node = new htree_node;
   new_node->q = q;

From 258bec855bfd5d1a00a7019cca277884545f5679 Mon Sep 17 00:00:00 2001
From: Ben Whitney <whitneybe@ornl.gov>
Date: Tue, 10 May 2022 17:56:39 -0400
Subject: [PATCH 03/58] Replace `size_t` with `std::size_t`.

---
 src/huffman.cpp | 77 ++++++++++++++++++++++++++-----------------------
 1 file changed, 41 insertions(+), 36 deletions(-)

diff --git a/src/huffman.cpp b/src/huffman.cpp
index 55788050ba..5fb40c2f23 100644
--- a/src/huffman.cpp
+++ b/src/huffman.cpp
@@ -13,9 +13,9 @@ const int nql = 32768 * 4;
 
 struct htree_node {
   int q;
-  size_t cnt;
+  std::size_t cnt;
   unsigned int code;
-  size_t len;
+  std::size_t len;
   htree_node *left;
   htree_node *right;
 };
@@ -23,10 +23,10 @@ struct htree_node {
 struct huffman_codec {
   int q;
   unsigned int code;
-  size_t len;
+  std::size_t len;
 };
 
-htree_node *new_htree_node(int q, size_t cnt) {
+htree_node *new_htree_node(int q, std::size_t cnt) {
   htree_node *new_node = new htree_node;
   new_node->q = q;
   new_node->cnt = cnt;
@@ -48,7 +48,7 @@ template <class T>
 using my_priority_queue =
     std::priority_queue<T *, std::vector<T *>, LessThanByCnt>;
 
-void build_codec(htree_node *root, unsigned int code, size_t len,
+void build_codec(htree_node *root, unsigned int code, std::size_t len,
                  huffman_codec *codec) {
 
   root->len = len;
@@ -69,7 +69,7 @@ void build_codec(htree_node *root, unsigned int code, size_t len,
   }
 }
 
-my_priority_queue<htree_node> *build_tree(size_t *cnt) {
+my_priority_queue<htree_node> *build_tree(std::size_t *cnt) {
   my_priority_queue<htree_node> *phtree;
   phtree = new my_priority_queue<htree_node>;
 #if 1
@@ -120,10 +120,10 @@ void free_tree(my_priority_queue<htree_node> *phtree) {
 }
 
 // Note this function will change the quantized data.
-size_t *build_ft(long int *quantized_data, const std::size_t n,
-                 size_t &num_outliers) {
-  size_t *cnt = (size_t *)malloc(nql * sizeof(size_t));
-  std::memset(cnt, 0, nql * sizeof(size_t));
+std::size_t *build_ft(long int *quantized_data, const std::size_t n,
+                      std::size_t &num_outliers) {
+  std::size_t *cnt = (std::size_t *)malloc(nql * sizeof(std::size_t));
+  std::memset(cnt, 0, nql * sizeof(std::size_t));
 
   for (std::size_t i = 0; i < n; i++) {
     // Convert quantization level to positive so that counting freq can be
@@ -141,9 +141,10 @@ size_t *build_ft(long int *quantized_data, const std::size_t n,
   return cnt;
 }
 
-huffman_codec *build_huffman_codec(long int *quantized_data, size_t **ft,
-                                   const std::size_t n, size_t &num_outliers) {
-  size_t *cnt;
+huffman_codec *build_huffman_codec(long int *quantized_data, std::size_t **ft,
+                                   const std::size_t n,
+                                   std::size_t &num_outliers) {
+  std::size_t *cnt;
 
   cnt = build_ft(quantized_data, n, num_outliers);
   *ft = cnt;
@@ -162,11 +163,13 @@ huffman_codec *build_huffman_codec(long int *quantized_data, size_t **ft,
 }
 
 void huffman_encoding(long int *quantized_data, const std::size_t n,
-                      unsigned char **out_data_hit, size_t *out_data_hit_size,
-                      unsigned char **out_data_miss, size_t *out_data_miss_size,
-                      unsigned char **out_tree, size_t *out_tree_size) {
-  size_t num_miss = 0;
-  size_t *ft = 0;
+                      unsigned char **out_data_hit,
+                      std::size_t *out_data_hit_size,
+                      unsigned char **out_data_miss,
+                      std::size_t *out_data_miss_size, unsigned char **out_tree,
+                      std::size_t *out_tree_size) {
+  std::size_t num_miss = 0;
+  std::size_t *ft = 0;
 
   huffman_codec *codec = build_huffman_codec(quantized_data, &ft, n, num_miss);
 
@@ -189,13 +192,13 @@ void huffman_encoding(long int *quantized_data, const std::size_t n,
   *out_data_hit_size = 0;
   *out_data_miss_size = 0;
 
-  size_t start_bit = 0;
+  std::size_t start_bit = 0;
   unsigned int *cur = (unsigned int *)p_hit;
-  size_t cnt_missed = 0;
+  std::size_t cnt_missed = 0;
   for (std::size_t i = 0; i < n; i++) {
     int q = quantized_data[i];
     unsigned int code;
-    size_t len;
+    std::size_t len;
 
     if (q > 0 && q < nql) {
       // for those that are within the range
@@ -220,8 +223,8 @@ void huffman_encoding(long int *quantized_data, const std::size_t n,
       // current unsigned int cannot hold the code
       // copy 32 - start_bit % 32 bits to the current int
       // and copy  the rest len - (32 - start_bit % 32) to the next int
-      size_t rshift = len - (32 - start_bit % 32);
-      size_t lshift = 32 - rshift;
+      std::size_t rshift = len - (32 - start_bit % 32);
+      std::size_t lshift = 32 - rshift;
       *(cur + start_bit / 32) = (*(cur + start_bit / 32)) | (code >> rshift);
       *(cur + start_bit / 32 + 1) =
           (*(cur + start_bit / 32 + 1)) | (code << lshift);
@@ -247,7 +250,7 @@ void huffman_encoding(long int *quantized_data, const std::size_t n,
     }
   }
 
-  size_t *cft = (size_t *)malloc(2 * nonZeros * sizeof(size_t));
+  std::size_t *cft = (std::size_t *)malloc(2 * nonZeros * sizeof(std::size_t));
   int off = 0;
   for (int i = 0; i < nql; i++) {
     if (ft[i] > 0) {
@@ -258,7 +261,7 @@ void huffman_encoding(long int *quantized_data, const std::size_t n,
   }
 
   *out_tree = (unsigned char *)cft;
-  *out_tree_size = 2 * nonZeros * sizeof(size_t);
+  *out_tree_size = 2 * nonZeros * sizeof(std::size_t);
   free(ft);
   ft = 0;
 
@@ -268,14 +271,16 @@ void huffman_encoding(long int *quantized_data, const std::size_t n,
 
 void huffman_decoding(long int *quantized_data,
                       const std::size_t quantized_data_size,
-                      unsigned char *out_data_hit, size_t out_data_hit_size,
-                      unsigned char *out_data_miss, size_t out_data_miss_size,
-                      unsigned char *out_tree, size_t out_tree_size) {
-  size_t *cft = (size_t *)out_tree;
-  int nonZeros = out_tree_size / (2 * sizeof(size_t));
-  size_t *ft = (size_t *)malloc(nql * sizeof(size_t));
+                      unsigned char *out_data_hit,
+                      std::size_t out_data_hit_size,
+                      unsigned char *out_data_miss,
+                      std::size_t out_data_miss_size, unsigned char *out_tree,
+                      std::size_t out_tree_size) {
+  std::size_t *cft = (std::size_t *)out_tree;
+  int nonZeros = out_tree_size / (2 * sizeof(std::size_t));
+  std::size_t *ft = (std::size_t *)malloc(nql * sizeof(std::size_t));
 
-  std::memset(ft, 0, nql * sizeof(size_t));
+  std::memset(ft, 0, nql * sizeof(std::size_t));
 
   for (int j = 0; j < nonZeros; j++) {
     ft[cft[2 * j]] = cft[2 * j + 1];
@@ -294,17 +299,17 @@ void huffman_decoding(long int *quantized_data,
 
   int *miss_bufp = miss_buf;
 
-  size_t start_bit = 0;
+  std::size_t start_bit = 0;
   unsigned int mask = 0x80000000;
 
   long int *q = quantized_data;
-  size_t i = 0;
-  size_t num_missed = 0;
+  std::size_t i = 0;
+  std::size_t num_missed = 0;
   while (q < (quantized_data + (quantized_data_size / sizeof(*q)))) {
     htree_node *root = phtree->top();
     assert(root);
 
-    size_t len = 0;
+    std::size_t len = 0;
     int offset = 0;
     while (root->left) {
       int flag = *(buf + start_bit / 32 + offset) & mask;

From bf1d545bfa7955297f2fc1c4c8a788464f9e7e1f Mon Sep 17 00:00:00 2001
From: Ben Whitney <whitneybe@ornl.gov>
Date: Wed, 11 May 2022 10:35:04 -0400
Subject: [PATCH 04/58] Replace `malloc` calls with `new` expressions.

---
 src/huffman.cpp | 49 +++++++++++++++++++++++++------------------------
 1 file changed, 25 insertions(+), 24 deletions(-)

diff --git a/src/huffman.cpp b/src/huffman.cpp
index 5fb40c2f23..d523c4f797 100644
--- a/src/huffman.cpp
+++ b/src/huffman.cpp
@@ -122,8 +122,9 @@ void free_tree(my_priority_queue<htree_node> *phtree) {
 // Note this function will change the quantized data.
 std::size_t *build_ft(long int *quantized_data, const std::size_t n,
                       std::size_t &num_outliers) {
-  std::size_t *cnt = (std::size_t *)malloc(nql * sizeof(std::size_t));
-  std::memset(cnt, 0, nql * sizeof(std::size_t));
+  // The elements of the array are value-initialized (which, because they have
+  // scalar type, is zero-initialized).
+  std::size_t *const cnt = new std::size_t[nql]();
 
   for (std::size_t i = 0; i < n; i++) {
     // Convert quantization level to positive so that counting freq can be
@@ -151,8 +152,10 @@ huffman_codec *build_huffman_codec(long int *quantized_data, std::size_t **ft,
 
   my_priority_queue<htree_node> *phtree = build_tree(cnt);
 
-  huffman_codec *codec = (huffman_codec *)malloc(sizeof(huffman_codec) * nql);
-  std::memset(codec, 0, sizeof(huffman_codec) * nql);
+  // Each element of the array is value-initialized. Since `huffman_codec` has
+  // an implicitly-defined default constructor, value-initialization is zero-
+  // initialization. I am, of course, not sure about this.
+  huffman_codec *const codec = new huffman_codec[nql]();
 
   build_codec(phtree->top(), 0, 0, codec);
 
@@ -171,29 +174,30 @@ void huffman_encoding(long int *quantized_data, const std::size_t n,
   std::size_t num_miss = 0;
   std::size_t *ft = 0;
 
-  huffman_codec *codec = build_huffman_codec(quantized_data, &ft, n, num_miss);
+  huffman_codec *const codec =
+      build_huffman_codec(quantized_data, &ft, n, num_miss);
 
   assert(n >= num_miss);
 
   /* For those miss points, we still need to maintain a flag (q = 0),
    * and therefore we need to allocate space for n numbers.
    */
-  unsigned char *p_hit = (unsigned char *)malloc(n * sizeof(int));
-  std::memset(p_hit, 0, n * sizeof(int));
+  // The elements of the array are value-initialized (here, zero-initialized).
+  unsigned int *const p_hit = new unsigned int[n]();
 
   int *p_miss = 0;
   if (num_miss > 0) {
-    p_miss = (int *)malloc(num_miss * sizeof(int));
-    std::memset(p_miss, 0, num_miss * sizeof(int));
+    // The elements of the array are value-initialized (here, zero-initialized).
+    p_miss = new int[num_miss]();
   }
 
-  *out_data_hit = p_hit;
+  *out_data_hit = reinterpret_cast<unsigned char *>(p_hit);
   *out_data_miss = (unsigned char *)p_miss;
   *out_data_hit_size = 0;
   *out_data_miss_size = 0;
 
   std::size_t start_bit = 0;
-  unsigned int *cur = (unsigned int *)p_hit;
+  unsigned int *cur = p_hit;
   std::size_t cnt_missed = 0;
   for (std::size_t i = 0; i < n; i++) {
     int q = quantized_data[i];
@@ -250,7 +254,7 @@ void huffman_encoding(long int *quantized_data, const std::size_t n,
     }
   }
 
-  std::size_t *cft = (std::size_t *)malloc(2 * nonZeros * sizeof(std::size_t));
+  std::size_t *const cft = new std::size_t[2 * nonZeros];
   int off = 0;
   for (int i = 0; i < nql; i++) {
     if (ft[i] > 0) {
@@ -262,11 +266,10 @@ void huffman_encoding(long int *quantized_data, const std::size_t n,
 
   *out_tree = (unsigned char *)cft;
   *out_tree_size = 2 * nonZeros * sizeof(std::size_t);
-  free(ft);
+  delete[] ft;
   ft = 0;
 
-  free(codec);
-  codec = 0;
+  delete[] codec;
 }
 
 void huffman_decoding(long int *quantized_data,
@@ -278,9 +281,8 @@ void huffman_decoding(long int *quantized_data,
                       std::size_t out_tree_size) {
   std::size_t *cft = (std::size_t *)out_tree;
   int nonZeros = out_tree_size / (2 * sizeof(std::size_t));
-  std::size_t *ft = (std::size_t *)malloc(nql * sizeof(std::size_t));
-
-  std::memset(ft, 0, nql * sizeof(std::size_t));
+  // The elements of the array are value-initialized (here, zero-initialized).
+  std::size_t *const ft = new std::size_t[nql]();
 
   for (int j = 0; j < nonZeros; j++) {
     ft[cft[2 * j]] = cft[2 * j + 1];
@@ -292,12 +294,13 @@ void huffman_decoding(long int *quantized_data,
 
   // The out_data_miss may not be aligned. Therefore, the code
   // here makes a new buffer.
-  int *miss_buf = (int *)malloc(out_data_miss_size);
+  assert(not(out_data_miss_size % sizeof(int)));
+  int *miss_buf = new int[out_data_miss_size / sizeof(int)];
   if (out_data_miss_size) {
     std::memcpy(miss_buf, out_data_miss, out_data_miss_size);
   }
 
-  int *miss_bufp = miss_buf;
+  int *const miss_bufp = miss_buf;
 
   std::size_t start_bit = 0;
   unsigned int mask = 0x80000000;
@@ -354,12 +357,10 @@ void huffman_decoding(long int *quantized_data,
   // is OK and expected.
   (void)out_data_hit_size;
 
-  free(miss_bufp);
-  miss_bufp = 0;
+  delete[] miss_bufp;
   free_tree(phtree);
   phtree = 0;
-  free(ft);
-  ft = 0;
+  delete[] ft;
 }
 
 } // namespace mgard

From 444541d5ba14cb2c3a8c8fc513f3214f58bc8c38 Mon Sep 17 00:00:00 2001
From: Ben Whitney <whitneybe@ornl.gov>
Date: Wed, 11 May 2022 10:49:48 -0400
Subject: [PATCH 05/58] Replace `new_htree_node` with a constructor.

---
 src/huffman.cpp | 21 +++++++--------------
 1 file changed, 7 insertions(+), 14 deletions(-)

diff --git a/src/huffman.cpp b/src/huffman.cpp
index d523c4f797..69f0f73ef0 100644
--- a/src/huffman.cpp
+++ b/src/huffman.cpp
@@ -12,6 +12,10 @@ namespace mgard {
 const int nql = 32768 * 4;
 
 struct htree_node {
+  //! Constructor.
+  htree_node(const int q, const std::size_t cnt)
+      : q(q), cnt(cnt), code(0), len(0), left(nullptr), right(nullptr) {}
+
   int q;
   std::size_t cnt;
   unsigned int code;
@@ -26,18 +30,6 @@ struct huffman_codec {
   std::size_t len;
 };
 
-htree_node *new_htree_node(int q, std::size_t cnt) {
-  htree_node *new_node = new htree_node;
-  new_node->q = q;
-  new_node->cnt = cnt;
-  new_node->code = 0;
-  new_node->len = 0;
-  new_node->left = 0;
-  new_node->right = 0;
-
-  return new_node;
-}
-
 struct LessThanByCnt {
   bool operator()(const htree_node *lhs, const htree_node *rhs) const {
     return lhs->cnt > rhs->cnt;
@@ -75,7 +67,7 @@ my_priority_queue<htree_node> *build_tree(std::size_t *cnt) {
 #if 1
   for (int i = 0; i < nql; i++) {
     if (cnt[i] != 0) {
-      htree_node *new_node = new_htree_node(i, cnt[i]);
+      htree_node *const new_node = new htree_node(i, cnt[i]);
       phtree->push(new_node);
     }
   }
@@ -86,7 +78,8 @@ my_priority_queue<htree_node> *build_tree(std::size_t *cnt) {
     htree_node *top_node2 = phtree->top();
     phtree->pop();
 
-    htree_node *new_node = new_htree_node(-1, top_node1->cnt + top_node2->cnt);
+    htree_node *const new_node =
+        new htree_node(-1, top_node1->cnt + top_node2->cnt);
     new_node->left = top_node1;
     new_node->right = top_node2;
     phtree->push(new_node);

From 362a5d519d14d0cfb99ba4f46cf9a3550c6c0a8e Mon Sep 17 00:00:00 2001
From: Ben Whitney <whitneybe@ornl.gov>
Date: Wed, 11 May 2022 11:15:09 -0400
Subject: [PATCH 06/58] Add `const` to Huffman tree variable types.

---
 include/huffman.hpp | 12 ++++----
 src/huffman.cpp     | 72 ++++++++++++++++++++++-----------------------
 2 files changed, 41 insertions(+), 43 deletions(-)

diff --git a/include/huffman.hpp b/include/huffman.hpp
index 67bd1bf2fd..9f66780e0d 100644
--- a/include/huffman.hpp
+++ b/include/huffman.hpp
@@ -5,16 +5,16 @@
 
 namespace mgard {
 
-void huffman_encoding(long int *quantized_data, const std::size_t n,
+void huffman_encoding(long int *const quantized_data, const std::size_t n,
                       unsigned char **out_data_hit, size_t *out_data_hit_size,
                       unsigned char **out_data_miss, size_t *out_data_miss_size,
                       unsigned char **out_tree, size_t *out_tree_size);
 
-void huffman_decoding(long int *quantized_data,
-                      const std::size_t quantized_data_size,
-                      unsigned char *out_data_hit, size_t out_data_hit_size,
-                      unsigned char *out_data_miss, size_t out_data_miss_size,
-                      unsigned char *out_tree, size_t out_tree_size);
+void huffman_decoding(
+    long int *const quantized_data, const std::size_t quantized_data_size,
+    unsigned char const *const out_data_hit, const size_t out_data_hit_size,
+    unsigned char const *const out_data_miss, const size_t out_data_miss_size,
+    unsigned char const *const out_tree, const size_t out_tree_size);
 
 } // namespace mgard
 
diff --git a/src/huffman.cpp b/src/huffman.cpp
index 69f0f73ef0..85d3b97a8d 100644
--- a/src/huffman.cpp
+++ b/src/huffman.cpp
@@ -31,7 +31,8 @@ struct huffman_codec {
 };
 
 struct LessThanByCnt {
-  bool operator()(const htree_node *lhs, const htree_node *rhs) const {
+  bool operator()(htree_node const *const lhs,
+                  htree_node const *const rhs) const {
     return lhs->cnt > rhs->cnt;
   }
 };
@@ -40,8 +41,8 @@ template <class T>
 using my_priority_queue =
     std::priority_queue<T *, std::vector<T *>, LessThanByCnt>;
 
-void build_codec(htree_node *root, unsigned int code, std::size_t len,
-                 huffman_codec *codec) {
+void build_codec(htree_node *const root, const unsigned int code,
+                 const std::size_t len, huffman_codec *const codec) {
 
   root->len = len;
   root->code = code;
@@ -61,9 +62,9 @@ void build_codec(htree_node *root, unsigned int code, std::size_t len,
   }
 }
 
-my_priority_queue<htree_node> *build_tree(std::size_t *cnt) {
-  my_priority_queue<htree_node> *phtree;
-  phtree = new my_priority_queue<htree_node>;
+my_priority_queue<htree_node> *build_tree(std::size_t const *const cnt) {
+  my_priority_queue<htree_node> *const phtree =
+      new my_priority_queue<htree_node>;
 #if 1
   for (int i = 0; i < nql; i++) {
     if (cnt[i] != 0) {
@@ -73,9 +74,9 @@ my_priority_queue<htree_node> *build_tree(std::size_t *cnt) {
   }
 
   while (phtree->size() > 1) {
-    htree_node *top_node1 = phtree->top();
+    htree_node *const top_node1 = phtree->top();
     phtree->pop();
-    htree_node *top_node2 = phtree->top();
+    htree_node *const top_node2 = phtree->top();
     phtree->pop();
 
     htree_node *const new_node =
@@ -88,7 +89,7 @@ my_priority_queue<htree_node> *build_tree(std::size_t *cnt) {
   return phtree;
 }
 
-void free_htree_node(htree_node *node) {
+void free_htree_node(htree_node *const node) {
   if (node->left) {
     free_htree_node(node->left);
     node->left = 0;
@@ -102,7 +103,7 @@ void free_htree_node(htree_node *node) {
   delete node;
 }
 
-void free_tree(my_priority_queue<htree_node> *phtree) {
+void free_tree(my_priority_queue<htree_node> *const phtree) {
   if (phtree) {
     free_htree_node(phtree->top());
 
@@ -113,7 +114,7 @@ void free_tree(my_priority_queue<htree_node> *phtree) {
 }
 
 // Note this function will change the quantized data.
-std::size_t *build_ft(long int *quantized_data, const std::size_t n,
+std::size_t *build_ft(long int *const quantized_data, const std::size_t n,
                       std::size_t &num_outliers) {
   // The elements of the array are value-initialized (which, because they have
   // scalar type, is zero-initialized).
@@ -135,15 +136,13 @@ std::size_t *build_ft(long int *quantized_data, const std::size_t n,
   return cnt;
 }
 
-huffman_codec *build_huffman_codec(long int *quantized_data, std::size_t **ft,
-                                   const std::size_t n,
+huffman_codec *build_huffman_codec(long int *const quantized_data,
+                                   std::size_t **ft, const std::size_t n,
                                    std::size_t &num_outliers) {
-  std::size_t *cnt;
-
-  cnt = build_ft(quantized_data, n, num_outliers);
+  std::size_t *const cnt = build_ft(quantized_data, n, num_outliers);
   *ft = cnt;
 
-  my_priority_queue<htree_node> *phtree = build_tree(cnt);
+  my_priority_queue<htree_node> *const phtree = build_tree(cnt);
 
   // Each element of the array is value-initialized. Since `huffman_codec` has
   // an implicitly-defined default constructor, value-initialization is zero-
@@ -153,12 +152,11 @@ huffman_codec *build_huffman_codec(long int *quantized_data, std::size_t **ft,
   build_codec(phtree->top(), 0, 0, codec);
 
   free_tree(phtree);
-  phtree = 0;
 
   return codec;
 }
 
-void huffman_encoding(long int *quantized_data, const std::size_t n,
+void huffman_encoding(long int *const quantized_data, const std::size_t n,
                       unsigned char **out_data_hit,
                       std::size_t *out_data_hit_size,
                       unsigned char **out_data_miss,
@@ -193,7 +191,7 @@ void huffman_encoding(long int *quantized_data, const std::size_t n,
   unsigned int *cur = p_hit;
   std::size_t cnt_missed = 0;
   for (std::size_t i = 0; i < n; i++) {
-    int q = quantized_data[i];
+    const int q = quantized_data[i];
     unsigned int code;
     std::size_t len;
 
@@ -265,15 +263,16 @@ void huffman_encoding(long int *quantized_data, const std::size_t n,
   delete[] codec;
 }
 
-void huffman_decoding(long int *quantized_data,
+void huffman_decoding(long int *const quantized_data,
                       const std::size_t quantized_data_size,
-                      unsigned char *out_data_hit,
-                      std::size_t out_data_hit_size,
-                      unsigned char *out_data_miss,
-                      std::size_t out_data_miss_size, unsigned char *out_tree,
-                      std::size_t out_tree_size) {
-  std::size_t *cft = (std::size_t *)out_tree;
-  int nonZeros = out_tree_size / (2 * sizeof(std::size_t));
+                      unsigned char const *const out_data_hit,
+                      const std::size_t out_data_hit_size,
+                      unsigned char const *const out_data_miss,
+                      const std::size_t out_data_miss_size,
+                      unsigned char const *const out_tree,
+                      const std::size_t out_tree_size) {
+  std::size_t const *const cft = (std::size_t const *)out_tree;
+  const int nonZeros = out_tree_size / (2 * sizeof(std::size_t));
   // The elements of the array are value-initialized (here, zero-initialized).
   std::size_t *const ft = new std::size_t[nql]();
 
@@ -281,19 +280,19 @@ void huffman_decoding(long int *quantized_data,
     ft[cft[2 * j]] = cft[2 * j + 1];
   }
 
-  my_priority_queue<htree_node> *phtree = build_tree(ft);
+  my_priority_queue<htree_node> *const phtree = build_tree(ft);
 
-  unsigned int *buf = (unsigned int *)out_data_hit;
+  unsigned int const *const buf = (unsigned int const *)out_data_hit;
 
   // The out_data_miss may not be aligned. Therefore, the code
   // here makes a new buffer.
   assert(not(out_data_miss_size % sizeof(int)));
-  int *miss_buf = new int[out_data_miss_size / sizeof(int)];
+  int *const miss_buf = new int[out_data_miss_size / sizeof(int)];
   if (out_data_miss_size) {
     std::memcpy(miss_buf, out_data_miss, out_data_miss_size);
   }
 
-  int *const miss_bufp = miss_buf;
+  int const *miss_bufp = miss_buf;
 
   std::size_t start_bit = 0;
   unsigned int mask = 0x80000000;
@@ -302,7 +301,7 @@ void huffman_decoding(long int *quantized_data,
   std::size_t i = 0;
   std::size_t num_missed = 0;
   while (q < (quantized_data + (quantized_data_size / sizeof(*q)))) {
-    htree_node *root = phtree->top();
+    htree_node const *root = phtree->top();
     assert(root);
 
     std::size_t len = 0;
@@ -330,9 +329,9 @@ void huffman_decoding(long int *quantized_data,
       *q = root->q - nql / 2;
 
     } else {
-      *q = *miss_buf - nql / 2;
+      *q = *miss_bufp - nql / 2;
 
-      miss_buf++;
+      miss_bufp++;
       num_missed++;
     }
 
@@ -350,9 +349,8 @@ void huffman_decoding(long int *quantized_data,
   // is OK and expected.
   (void)out_data_hit_size;
 
-  delete[] miss_bufp;
+  delete[] miss_buf;
   free_tree(phtree);
-  phtree = 0;
   delete[] ft;
 }
 

From a8d3119671addff6b445c2153381d8826cfc6531 Mon Sep 17 00:00:00 2001
From: Ben Whitney <whitneybe@ornl.gov>
Date: Wed, 11 May 2022 11:18:03 -0400
Subject: [PATCH 07/58] Use `nullptr` instead of `0` for pointer values.

---
 src/huffman.cpp | 10 +++++-----
 1 file changed, 5 insertions(+), 5 deletions(-)

diff --git a/src/huffman.cpp b/src/huffman.cpp
index 85d3b97a8d..981918d376 100644
--- a/src/huffman.cpp
+++ b/src/huffman.cpp
@@ -92,12 +92,12 @@ my_priority_queue<htree_node> *build_tree(std::size_t const *const cnt) {
 void free_htree_node(htree_node *const node) {
   if (node->left) {
     free_htree_node(node->left);
-    node->left = 0;
+    node->left = nullptr;
   }
 
   if (node->right) {
     free_htree_node(node->right);
-    node->right = 0;
+    node->right = nullptr;
   }
 
   delete node;
@@ -163,7 +163,7 @@ void huffman_encoding(long int *const quantized_data, const std::size_t n,
                       std::size_t *out_data_miss_size, unsigned char **out_tree,
                       std::size_t *out_tree_size) {
   std::size_t num_miss = 0;
-  std::size_t *ft = 0;
+  std::size_t *ft = nullptr;
 
   huffman_codec *const codec =
       build_huffman_codec(quantized_data, &ft, n, num_miss);
@@ -176,7 +176,7 @@ void huffman_encoding(long int *const quantized_data, const std::size_t n,
   // The elements of the array are value-initialized (here, zero-initialized).
   unsigned int *const p_hit = new unsigned int[n]();
 
-  int *p_miss = 0;
+  int *p_miss = nullptr;
   if (num_miss > 0) {
     // The elements of the array are value-initialized (here, zero-initialized).
     p_miss = new int[num_miss]();
@@ -258,7 +258,7 @@ void huffman_encoding(long int *const quantized_data, const std::size_t n,
   *out_tree = (unsigned char *)cft;
   *out_tree_size = 2 * nonZeros * sizeof(std::size_t);
   delete[] ft;
-  ft = 0;
+  ft = nullptr;
 
   delete[] codec;
 }

From 4125d931df455e4c4d2d477430532daa5d374287 Mon Sep 17 00:00:00 2001
From: Ben Whitney <whitneybe@ornl.gov>
Date: Wed, 11 May 2022 11:58:25 -0400
Subject: [PATCH 08/58] Pass `huffman_encoding` parameters by reference.

---
 include/huffman.hpp |  6 +++---
 src/compressors.cpp |  5 ++---
 src/huffman.cpp     | 32 ++++++++++++++++----------------
 3 files changed, 21 insertions(+), 22 deletions(-)

diff --git a/include/huffman.hpp b/include/huffman.hpp
index 9f66780e0d..5705c17996 100644
--- a/include/huffman.hpp
+++ b/include/huffman.hpp
@@ -6,9 +6,9 @@
 namespace mgard {
 
 void huffman_encoding(long int *const quantized_data, const std::size_t n,
-                      unsigned char **out_data_hit, size_t *out_data_hit_size,
-                      unsigned char **out_data_miss, size_t *out_data_miss_size,
-                      unsigned char **out_tree, size_t *out_tree_size);
+                      unsigned char *&out_data_hit, size_t &out_data_hit_size,
+                      unsigned char *&out_data_miss, size_t &out_data_miss_size,
+                      unsigned char *&out_tree, size_t &out_tree_size);
 
 void huffman_decoding(
     long int *const quantized_data, const std::size_t quantized_data_size,
diff --git a/src/compressors.cpp b/src/compressors.cpp
index 34c3401e14..ec5c2323a1 100644
--- a/src/compressors.cpp
+++ b/src/compressors.cpp
@@ -79,9 +79,8 @@ MemoryBuffer<unsigned char> compress_memory_huffman(long int *const src,
 #ifdef MGARD_TIMING
   auto huff_time1 = std::chrono::high_resolution_clock::now();
 #endif
-  huffman_encoding(src, srcLen, &out_data_hit, &out_data_hit_size,
-                   &out_data_miss, &out_data_miss_size, &out_tree,
-                   &out_tree_size);
+  huffman_encoding(src, srcLen, out_data_hit, out_data_hit_size, out_data_miss,
+                   out_data_miss_size, out_tree, out_tree_size);
 #ifdef MGARD_TIMING
   auto huff_time2 = std::chrono::high_resolution_clock::now();
   auto duration = std::chrono::duration_cast<std::chrono::microseconds>(
diff --git a/src/huffman.cpp b/src/huffman.cpp
index 981918d376..6cbd4fd941 100644
--- a/src/huffman.cpp
+++ b/src/huffman.cpp
@@ -137,10 +137,10 @@ std::size_t *build_ft(long int *const quantized_data, const std::size_t n,
 }
 
 huffman_codec *build_huffman_codec(long int *const quantized_data,
-                                   std::size_t **ft, const std::size_t n,
+                                   std::size_t *&ft, const std::size_t n,
                                    std::size_t &num_outliers) {
   std::size_t *const cnt = build_ft(quantized_data, n, num_outliers);
-  *ft = cnt;
+  ft = cnt;
 
   my_priority_queue<htree_node> *const phtree = build_tree(cnt);
 
@@ -157,16 +157,16 @@ huffman_codec *build_huffman_codec(long int *const quantized_data,
 }
 
 void huffman_encoding(long int *const quantized_data, const std::size_t n,
-                      unsigned char **out_data_hit,
-                      std::size_t *out_data_hit_size,
-                      unsigned char **out_data_miss,
-                      std::size_t *out_data_miss_size, unsigned char **out_tree,
-                      std::size_t *out_tree_size) {
+                      unsigned char *&out_data_hit,
+                      std::size_t &out_data_hit_size,
+                      unsigned char *&out_data_miss,
+                      std::size_t &out_data_miss_size, unsigned char *&out_tree,
+                      std::size_t &out_tree_size) {
   std::size_t num_miss = 0;
   std::size_t *ft = nullptr;
 
   huffman_codec *const codec =
-      build_huffman_codec(quantized_data, &ft, n, num_miss);
+      build_huffman_codec(quantized_data, ft, n, num_miss);
 
   assert(n >= num_miss);
 
@@ -182,10 +182,10 @@ void huffman_encoding(long int *const quantized_data, const std::size_t n,
     p_miss = new int[num_miss]();
   }
 
-  *out_data_hit = reinterpret_cast<unsigned char *>(p_hit);
-  *out_data_miss = (unsigned char *)p_miss;
-  *out_data_hit_size = 0;
-  *out_data_miss_size = 0;
+  out_data_hit = reinterpret_cast<unsigned char *>(p_hit);
+  out_data_miss = (unsigned char *)p_miss;
+  out_data_hit_size = 0;
+  out_data_miss_size = 0;
 
   std::size_t start_bit = 0;
   unsigned int *cur = p_hit;
@@ -234,8 +234,8 @@ void huffman_encoding(long int *const quantized_data, const std::size_t n,
   }
 
   // Note: hit size is in bits, while miss size is in bytes.
-  *out_data_hit_size = start_bit;
-  *out_data_miss_size = num_miss * sizeof(int);
+  out_data_hit_size = start_bit;
+  out_data_miss_size = num_miss * sizeof(int);
 
   // write frequency table to buffer
   int nonZeros = 0;
@@ -255,8 +255,8 @@ void huffman_encoding(long int *const quantized_data, const std::size_t n,
     }
   }
 
-  *out_tree = (unsigned char *)cft;
-  *out_tree_size = 2 * nonZeros * sizeof(std::size_t);
+  out_tree = (unsigned char *)cft;
+  out_tree_size = 2 * nonZeros * sizeof(std::size_t);
   delete[] ft;
   ft = nullptr;
 

From 846e8c73847707daabb63d0d222b27fafe22e4be Mon Sep 17 00:00:00 2001
From: Ben Whitney <whitneybe@ornl.gov>
Date: Wed, 11 May 2022 12:31:57 -0400
Subject: [PATCH 09/58] Use `std::vector` for Huffman codec array.

---
 src/huffman.cpp | 30 ++++++++++++++----------------
 1 file changed, 14 insertions(+), 16 deletions(-)

diff --git a/src/huffman.cpp b/src/huffman.cpp
index 6cbd4fd941..8c1a7e082a 100644
--- a/src/huffman.cpp
+++ b/src/huffman.cpp
@@ -4,6 +4,7 @@
 #include <cstring>
 
 #include <queue>
+#include <vector>
 
 #include "huffman.hpp"
 
@@ -41,9 +42,8 @@ template <class T>
 using my_priority_queue =
     std::priority_queue<T *, std::vector<T *>, LessThanByCnt>;
 
-void build_codec(htree_node *const root, const unsigned int code,
-                 const std::size_t len, huffman_codec *const codec) {
-
+void initialize_codec(std::vector<huffman_codec> &codec, htree_node *const root,
+                      const unsigned int code, const std::size_t len) {
   root->len = len;
   root->code = code;
 
@@ -54,11 +54,11 @@ void build_codec(htree_node *const root, const unsigned int code,
   }
 
   if (root->left) {
-    build_codec(root->left, code << 1, len + 1, codec);
+    initialize_codec(codec, root->left, code << 1, len + 1);
   }
 
   if (root->right) {
-    build_codec(root->right, code << 1 | 0x1, len + 1, codec);
+    initialize_codec(codec, root->right, code << 1 | 0x1, len + 1);
   }
 }
 
@@ -136,20 +136,20 @@ std::size_t *build_ft(long int *const quantized_data, const std::size_t n,
   return cnt;
 }
 
-huffman_codec *build_huffman_codec(long int *const quantized_data,
-                                   std::size_t *&ft, const std::size_t n,
-                                   std::size_t &num_outliers) {
+std::vector<huffman_codec> build_huffman_codec(long int *const quantized_data,
+                                               std::size_t *&ft,
+                                               const std::size_t n,
+                                               std::size_t &num_outliers) {
   std::size_t *const cnt = build_ft(quantized_data, n, num_outliers);
   ft = cnt;
 
   my_priority_queue<htree_node> *const phtree = build_tree(cnt);
 
-  // Each element of the array is value-initialized. Since `huffman_codec` has
+  // Each element of the vector is value-initialized. Since `huffman_codec` has
   // an implicitly-defined default constructor, value-initialization is zero-
-  // initialization. I am, of course, not sure about this.
-  huffman_codec *const codec = new huffman_codec[nql]();
-
-  build_codec(phtree->top(), 0, 0, codec);
+  // initialization.
+  std::vector<huffman_codec> codec(nql);
+  initialize_codec(codec, phtree->top(), 0, 0);
 
   free_tree(phtree);
 
@@ -165,7 +165,7 @@ void huffman_encoding(long int *const quantized_data, const std::size_t n,
   std::size_t num_miss = 0;
   std::size_t *ft = nullptr;
 
-  huffman_codec *const codec =
+  const std::vector<huffman_codec> codec =
       build_huffman_codec(quantized_data, ft, n, num_miss);
 
   assert(n >= num_miss);
@@ -259,8 +259,6 @@ void huffman_encoding(long int *const quantized_data, const std::size_t n,
   out_tree_size = 2 * nonZeros * sizeof(std::size_t);
   delete[] ft;
   ft = nullptr;
-
-  delete[] codec;
 }
 
 void huffman_decoding(long int *const quantized_data,

From 9ff8b96b6125f69fc29ccac7d6068239793a084c Mon Sep 17 00:00:00 2001
From: Ben Whitney <whitneybe@ornl.gov>
Date: Wed, 11 May 2022 14:26:30 -0400
Subject: [PATCH 10/58] Gather codecs and frequency table into struct.

---
 src/huffman.cpp | 97 +++++++++++++++++++++++++------------------------
 1 file changed, 49 insertions(+), 48 deletions(-)

diff --git a/src/huffman.cpp b/src/huffman.cpp
index 8c1a7e082a..44f82bc341 100644
--- a/src/huffman.cpp
+++ b/src/huffman.cpp
@@ -3,6 +3,11 @@
 #include <cstdlib>
 #include <cstring>
 
+#ifndef NDEBUG
+#include <algorithm>
+#endif
+
+#include <array>
 #include <queue>
 #include <vector>
 
@@ -31,6 +36,13 @@ struct huffman_codec {
   std::size_t len;
 };
 
+template <std::size_t NQL> struct HuffmanCodec {
+  // The arrays are value-initialized, which leads to each of their elements
+  // being value-initialized (ultimately zero-initialized).
+  std::array<huffman_codec, NQL> codec{};
+  std::array<std::size_t, NQL> frequency_table{};
+};
+
 struct LessThanByCnt {
   bool operator()(htree_node const *const lhs,
                   htree_node const *const rhs) const {
@@ -42,15 +54,16 @@ template <class T>
 using my_priority_queue =
     std::priority_queue<T *, std::vector<T *>, LessThanByCnt>;
 
-void initialize_codec(std::vector<huffman_codec> &codec, htree_node *const root,
+template <std::size_t NQL>
+void initialize_codec(HuffmanCodec<NQL> &codec, htree_node *const root,
                       const unsigned int code, const std::size_t len) {
   root->len = len;
   root->code = code;
 
   if (!root->left && !root->right) {
-    codec[root->q].q = root->q;
-    codec[root->q].code = code;
-    codec[root->q].len = len;
+    codec.codec[root->q].q = root->q;
+    codec.codec[root->q].code = code;
+    codec.codec[root->q].len = len;
   }
 
   if (root->left) {
@@ -113,42 +126,35 @@ void free_tree(my_priority_queue<htree_node> *const phtree) {
   }
 }
 
-// Note this function will change the quantized data.
-std::size_t *build_ft(long int *const quantized_data, const std::size_t n,
-                      std::size_t &num_outliers) {
-  // The elements of the array are value-initialized (which, because they have
-  // scalar type, is zero-initialized).
-  std::size_t *const cnt = new std::size_t[nql]();
+// Note: this function will change the quantized data.
+template <std::size_t NQL>
+void initialize_frequency_table(HuffmanCodec<NQL> &codec,
+                                long int *const quantized_data,
+                                const std::size_t n) {
+  assert(*std::max_element(codec.frequency_table.begin(),
+                           code.frequency_table.end()) == 0);
 
   for (std::size_t i = 0; i < n; i++) {
     // Convert quantization level to positive so that counting freq can be
     // easily done. Level 0 is reserved a out-of-range flag.
-    quantized_data[i] = quantized_data[i] + nql / 2;
-    if (quantized_data[i] > 0 && quantized_data[i] < nql) {
-      cnt[quantized_data[i]]++;
-    } else {
-      cnt[0]++;
-    }
+    quantized_data[i] = quantized_data[i] + NQL / 2;
+    ++codec.frequency_table[quantized_data[i] > 0 &&
+                                    quantized_data[i] <
+                                        static_cast<long int>(NQL)
+                                ? quantized_data[i]
+                                : 0];
   }
-
-  num_outliers = cnt[0];
-
-  return cnt;
 }
 
-std::vector<huffman_codec> build_huffman_codec(long int *const quantized_data,
-                                               std::size_t *&ft,
-                                               const std::size_t n,
-                                               std::size_t &num_outliers) {
-  std::size_t *const cnt = build_ft(quantized_data, n, num_outliers);
-  ft = cnt;
+template <std::size_t N>
+HuffmanCodec<N> build_huffman_codec(long int *const quantized_data,
+                                    const std::size_t n) {
+  HuffmanCodec<N> codec;
+  initialize_frequency_table(codec, quantized_data, n);
 
-  my_priority_queue<htree_node> *const phtree = build_tree(cnt);
+  my_priority_queue<htree_node> *const phtree =
+      build_tree(codec.frequency_table.data());
 
-  // Each element of the vector is value-initialized. Since `huffman_codec` has
-  // an implicitly-defined default constructor, value-initialization is zero-
-  // initialization.
-  std::vector<huffman_codec> codec(nql);
   initialize_codec(codec, phtree->top(), 0, 0);
 
   free_tree(phtree);
@@ -162,11 +168,8 @@ void huffman_encoding(long int *const quantized_data, const std::size_t n,
                       unsigned char *&out_data_miss,
                       std::size_t &out_data_miss_size, unsigned char *&out_tree,
                       std::size_t &out_tree_size) {
-  std::size_t num_miss = 0;
-  std::size_t *ft = nullptr;
-
-  const std::vector<huffman_codec> codec =
-      build_huffman_codec(quantized_data, ft, n, num_miss);
+  const HuffmanCodec<nql> codec = build_huffman_codec<nql>(quantized_data, n);
+  const std::size_t num_miss = codec.frequency_table[0];
 
   assert(n >= num_miss);
 
@@ -197,12 +200,12 @@ void huffman_encoding(long int *const quantized_data, const std::size_t n,
 
     if (q > 0 && q < nql) {
       // for those that are within the range
-      code = codec[q].code;
-      len = codec[q].len;
+      code = codec.codec[q].code;
+      len = codec.codec[q].len;
     } else {
       // for those that are out of the range, q is set to 0
-      code = codec[0].code;
-      len = codec[0].len;
+      code = codec.codec[0].code;
+      len = codec.codec[0].len;
 
       *p_miss = q;
       p_miss++;
@@ -218,8 +221,8 @@ void huffman_encoding(long int *const quantized_data, const std::size_t n,
       // current unsigned int cannot hold the code
       // copy 32 - start_bit % 32 bits to the current int
       // and copy  the rest len - (32 - start_bit % 32) to the next int
-      std::size_t rshift = len - (32 - start_bit % 32);
-      std::size_t lshift = 32 - rshift;
+      const std::size_t rshift = len - (32 - start_bit % 32);
+      const std::size_t lshift = 32 - rshift;
       *(cur + start_bit / 32) = (*(cur + start_bit / 32)) | (code >> rshift);
       *(cur + start_bit / 32 + 1) =
           (*(cur + start_bit / 32 + 1)) | (code << lshift);
@@ -240,7 +243,7 @@ void huffman_encoding(long int *const quantized_data, const std::size_t n,
   // write frequency table to buffer
   int nonZeros = 0;
   for (int i = 0; i < nql; i++) {
-    if (ft[i] > 0) {
+    if (codec.frequency_table[i] > 0) {
       nonZeros++;
     }
   }
@@ -248,17 +251,15 @@ void huffman_encoding(long int *const quantized_data, const std::size_t n,
   std::size_t *const cft = new std::size_t[2 * nonZeros];
   int off = 0;
   for (int i = 0; i < nql; i++) {
-    if (ft[i] > 0) {
+    if (codec.frequency_table[i] > 0) {
       cft[2 * off] = i;
-      cft[2 * off + 1] = ft[i];
+      cft[2 * off + 1] = codec.frequency_table[i];
       off++;
     }
   }
 
   out_tree = (unsigned char *)cft;
   out_tree_size = 2 * nonZeros * sizeof(std::size_t);
-  delete[] ft;
-  ft = nullptr;
 }
 
 void huffman_decoding(long int *const quantized_data,
@@ -279,6 +280,7 @@ void huffman_decoding(long int *const quantized_data,
   }
 
   my_priority_queue<htree_node> *const phtree = build_tree(ft);
+  delete[] ft;
 
   unsigned int const *const buf = (unsigned int const *)out_data_hit;
 
@@ -349,7 +351,6 @@ void huffman_decoding(long int *const quantized_data,
 
   delete[] miss_buf;
   free_tree(phtree);
-  delete[] ft;
 }
 
 } // namespace mgard

From 6a249be7e191ac23340ce2d217f52cdf26a17a1b Mon Sep 17 00:00:00 2001
From: Ben Whitney <whitneybe@ornl.gov>
Date: Mon, 16 May 2022 12:01:20 -0400
Subject: [PATCH 11/58] Add `Bits` to allow iteration over bits of array.

---
 CMakeLists.txt               |  1 +
 include/utilities.hpp        | 83 ++++++++++++++++++++++++++++++++++++
 src/utilities.cpp            | 54 +++++++++++++++++++++++
 tests/src/test_utilities.cpp | 58 +++++++++++++++++++++++++
 4 files changed, 196 insertions(+)
 create mode 100644 src/utilities.cpp

diff --git a/CMakeLists.txt b/CMakeLists.txt
index 21d3e50e21..9a2902e6db 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -201,6 +201,7 @@ set(
   MGARD_LIBRARY_CPP
         src/compress.cpp
         src/compress_internal.cpp
+  src/utilities.cpp
   src/huffman.cpp
   src/compressors.cpp
   src/format.cpp
diff --git a/include/utilities.hpp b/include/utilities.hpp
index 626bc6f235..05b625a950 100644
--- a/include/utilities.hpp
+++ b/include/utilities.hpp
@@ -449,6 +449,89 @@ template <typename T> struct MemoryBuffer {
   std::size_t size;
 };
 
+//! Range allowing iteration over the bits in an array.
+//!
+//! Iterating over this object yields each byte's bits from most to least
+//! significant.
+class Bits {
+public:
+  //! Constructor.
+  //!
+  //!\param begin Pointer to the beginning of the array to be iterated over.
+  //!\param end Pointer to the end of the array to be iterated over.
+  Bits(unsigned char const *const begin, unsigned char const *const end);
+
+  //! Equality comparison.
+  bool operator==(const Bits &other) const;
+
+  //! Inequality comparison.
+  bool operator!=(const Bits &other) const;
+
+  // Forward declaration.
+  class iterator;
+
+  //! Return an iterator to the beginning of the bit range.
+  iterator begin() const;
+
+  //! Return an iterator to the end of the bit range.
+  iterator end() const;
+
+private:
+  //! Pointer to the beginning of the array to be iterated over.
+  unsigned char const *begin_;
+
+  //! Pointer to the beginning of the array to be iterated over.
+  unsigned char const *end_;
+};
+
+//! Iterator over a bit range.
+class Bits::iterator {
+public:
+  //! Category of the iterator.
+  using iterator_category = std::forward_iterator_tag;
+  //! Type iterated over.
+  using value_type = bool;
+  //! Type for distance between iterators.
+  using difference_type = std::ptrdiff_t;
+  //! Pointer to `value_type`.
+  using pointer = value_type *;
+  //! Type returned by the dereference operator.
+  using reference = value_type;
+
+  //! Constructor.
+  //!
+  //!\param bits Associated bit range.
+  //!\param p Position in the array being iterated over.
+  //!\param offset Offset within the current byte.
+  iterator(const Bits &bits, unsigned char const *const p,
+           const unsigned char offset);
+
+  //! Equality comparison.
+  bool operator==(const iterator &other) const;
+
+  //! Inequality comparison.
+  bool operator!=(const iterator &other) const;
+
+  //! Preincrement.
+  iterator &operator++();
+
+  //! Postincrement.
+  iterator operator++(int);
+
+  //! Dereference.
+  reference operator*() const;
+
+private:
+  //! Associated bit range.
+  const Bits &iterable;
+
+  //! Position in the array being iterated over.
+  unsigned char const *p;
+
+  //! Offset within the current byte.
+  unsigned char offset;
+};
+
 } // namespace mgard
 
 #include "utilities.tpp"
diff --git a/src/utilities.cpp b/src/utilities.cpp
new file mode 100644
index 0000000000..4c3aec863a
--- /dev/null
+++ b/src/utilities.cpp
@@ -0,0 +1,54 @@
+#include "utilities.hpp"
+
+#include <climits>
+
+#include <stdexcept>
+
+namespace mgard {
+
+Bits::Bits(unsigned char const *const begin, unsigned char const *const end)
+    : begin_(begin), end_(end) {}
+
+bool Bits::operator==(const Bits &other) const {
+  return begin_ == other.begin_ and end_ == other.end_;
+}
+
+bool Bits::operator!=(const Bits &other) const { return !operator==(other); }
+
+Bits::iterator Bits::begin() const { return {*this, begin_, 0}; }
+
+Bits::iterator Bits::end() const { return {*this, end_, 0}; }
+
+Bits::iterator::iterator(const Bits &iterable, unsigned char const *const p,
+                         const unsigned char offset)
+    : iterable(iterable), p(p), offset(offset) {}
+
+bool Bits::iterator::operator==(const Bits::iterator &other) const {
+  return offset == other.offset and p == other.p and iterable == other.iterable;
+}
+
+bool Bits::iterator::operator!=(const Bits::iterator &other) const {
+  return !operator==(other);
+}
+
+Bits::iterator &Bits::iterator::operator++() {
+  ++offset;
+  if (offset == CHAR_BIT) {
+    ++p;
+    offset = 0;
+  }
+  return *this;
+}
+
+Bits::iterator Bits::iterator::operator++(int) {
+  const iterator tmp = *this;
+  operator++();
+  return tmp;
+}
+
+Bits::iterator::reference Bits::iterator::operator*() const {
+  // Operator precedence: dereference, then left shift, then bitwise AND.
+  return *p << offset & 0x80;
+}
+
+} // namespace mgard
diff --git a/tests/src/test_utilities.cpp b/tests/src/test_utilities.cpp
index 3d665e331e..31813102fd 100644
--- a/tests/src/test_utilities.cpp
+++ b/tests/src/test_utilities.cpp
@@ -171,3 +171,61 @@ TEST_CASE("CartesianProduct predecessors and successors", "[utilities]") {
 
   REQUIRE(tracker);
 }
+
+namespace {
+
+void test_bit_equality(const mgard::Bits &bits,
+                       const std::vector<bool> &expected) {
+  TrialTracker tracker;
+  std::vector<bool>::const_iterator p = expected.begin();
+  for (const bool b : bits) {
+    tracker += b == *p++;
+  }
+  REQUIRE(tracker);
+}
+
+} // namespace
+
+TEST_CASE("Bits iteration", "[utilities]") {
+  SECTION("zero end offsets") {
+    {
+      unsigned char const a[1]{0x3d};
+      const mgard::Bits bits(a, a + 1);
+      const std::vector<bool> expected{// `3`.
+                                       false, false, true, true,
+                                       // `d`.
+                                       true, true, false, true};
+      test_bit_equality(bits, expected);
+    }
+    {
+      unsigned char const a[2]{0xe6, 0x0a};
+      const mgard::Bits bits(a, a + 2);
+      const std::vector<bool> expected{// `e`.
+                                       true, true, true, false,
+                                       // `6`.
+                                       false, true, true, false,
+                                       // `0`.
+                                       false, false, false, false,
+                                       // `a`.
+                                       true, false, true, false};
+      test_bit_equality(bits, expected);
+    }
+    {
+      unsigned char const a[3]{0x12, 0x0c, 0xff};
+      const mgard::Bits bits(a, a + 3);
+      const std::vector<bool> expected{// `1`.
+                                       false, false, false, true,
+                                       // `2`.
+                                       false, false, true, false,
+                                       // `0`.
+                                       false, false, false, false,
+                                       // `c`.
+                                       true, true, false, false,
+                                       // `f`.
+                                       true, true, true, true,
+                                       // `f`.
+                                       true, true, true, true};
+      test_bit_equality(bits, expected);
+    }
+  }
+}

From 24f305264dec2bf474e73018f68a99a564aeb630 Mon Sep 17 00:00:00 2001
From: Ben Whitney <whitneybe@ornl.gov>
Date: Wed, 18 May 2022 16:42:04 -0400
Subject: [PATCH 12/58] Allow nonzero end bit offsets in `Bits`.

---
 include/utilities.hpp        | 13 +++++++++++++
 src/utilities.cpp            | 16 +++++++++++++---
 tests/src/test_utilities.cpp | 24 ++++++++++++++++++++++++
 3 files changed, 50 insertions(+), 3 deletions(-)

diff --git a/include/utilities.hpp b/include/utilities.hpp
index 05b625a950..9f514ef472 100644
--- a/include/utilities.hpp
+++ b/include/utilities.hpp
@@ -461,6 +461,16 @@ class Bits {
   //!\param end Pointer to the end of the array to be iterated over.
   Bits(unsigned char const *const begin, unsigned char const *const end);
 
+  //! Constructor.
+  //!
+  //!\overload
+  //!
+  //!\param begin Pointer to the beginning of the array to be iterated over.
+  //!\param end Pointer to the end of the array to be iterated over.
+  //!\param offset_end Offset for end iterator.
+  Bits(unsigned char const *const begin, unsigned char const *const end,
+       const unsigned char offset_end);
+
   //! Equality comparison.
   bool operator==(const Bits &other) const;
 
@@ -482,6 +492,9 @@ class Bits {
 
   //! Pointer to the beginning of the array to be iterated over.
   unsigned char const *end_;
+
+  //! Offset for end iterator.
+  unsigned char offset_end;
 };
 
 //! Iterator over a bit range.
diff --git a/src/utilities.cpp b/src/utilities.cpp
index 4c3aec863a..1c5afeb3d0 100644
--- a/src/utilities.cpp
+++ b/src/utilities.cpp
@@ -6,18 +6,28 @@
 
 namespace mgard {
 
+Bits::Bits(unsigned char const *const begin, unsigned char const *const end,
+           const unsigned char offset_end)
+    : begin_(begin), end_(end), offset_end(offset_end) {
+  if (offset_end >= CHAR_BIT) {
+    throw std::invalid_argument(
+        "offset must be smaller than number of bits in byte");
+  }
+}
+
 Bits::Bits(unsigned char const *const begin, unsigned char const *const end)
-    : begin_(begin), end_(end) {}
+    : Bits(begin, end, 0) {}
 
 bool Bits::operator==(const Bits &other) const {
-  return begin_ == other.begin_ and end_ == other.end_;
+  return begin_ == other.begin_ and end_ == other.end_ and
+         offset_end == other.offset_end;
 }
 
 bool Bits::operator!=(const Bits &other) const { return !operator==(other); }
 
 Bits::iterator Bits::begin() const { return {*this, begin_, 0}; }
 
-Bits::iterator Bits::end() const { return {*this, end_, 0}; }
+Bits::iterator Bits::end() const { return {*this, end_, offset_end}; }
 
 Bits::iterator::iterator(const Bits &iterable, unsigned char const *const p,
                          const unsigned char offset)
diff --git a/tests/src/test_utilities.cpp b/tests/src/test_utilities.cpp
index 31813102fd..1e53eec72e 100644
--- a/tests/src/test_utilities.cpp
+++ b/tests/src/test_utilities.cpp
@@ -228,4 +228,28 @@ TEST_CASE("Bits iteration", "[utilities]") {
       test_bit_equality(bits, expected);
     }
   }
+  SECTION("nonzero end offsets") {
+    {
+      unsigned char const a[1]{0xff};
+      const mgard::Bits bits(a, a, 7);
+      const std::vector<bool> expected(7, true);
+      test_bit_equality(bits, expected);
+    }
+    {
+      unsigned char const a[2]{0xa9, 0x33};
+      const mgard::Bits bits(a, a + 1, 2);
+      const std::vector<bool> expected{true,  false, true, false, true,
+                                       false, false, true, false, false};
+      test_bit_equality(bits, expected);
+    }
+    {
+      unsigned char const a[3]{0x1e, 0x0f, 0x77};
+      const mgard::Bits bits(a, a + 2, 6);
+      const std::vector<bool> expected{false, false, false, true,  true,  true,
+                                       true,  false, false, false, false, false,
+                                       true,  true,  true,  true,  false, true,
+                                       true,  true,  false, true};
+      test_bit_equality(bits, expected);
+    }
+  }
 }

From 8888b565a2e01d0314376d4dc10569e129487bd9 Mon Sep 17 00:00:00 2001
From: Ben Whitney <whitneybe@ornl.gov>
Date: Fri, 27 May 2022 20:44:09 -0400
Subject: [PATCH 13/58] Add Huffman encoding regression tests.

---
 include/huffman.hpp        |  14 +++++
 src/compressors.cpp        |   2 +-
 src/huffman.cpp            |  73 ++++++++++++++++------
 tests/CMakeLists.txt       |   1 +
 tests/src/test_huffman.cpp | 122 +++++++++++++++++++++++++++++++++++++
 5 files changed, 193 insertions(+), 19 deletions(-)
 create mode 100644 tests/src/test_huffman.cpp

diff --git a/include/huffman.hpp b/include/huffman.hpp
index 5705c17996..0ee06103d2 100644
--- a/include/huffman.hpp
+++ b/include/huffman.hpp
@@ -3,8 +3,22 @@
 //!\file
 //!\brief Huffman trees for quantized multilevel coefficients.
 
+#include <cstddef>
+
 namespace mgard {
 
+//! Encode quantized coefficients using a Huffman code.
+//!
+//!\param[in, out] quantized_data Input buffer (quantized coefficients). This
+//! buffer will be changed by the encoding process.
+//\param[in] n Number of symbols (`long int` quantized coefficients) in the
+//! input buffer.
+//!\param[out] out_data_hit Pointer to compressed buffer.
+//!\param[out] Size *in bits* of compressed buffer.
+//!\param[out] Pointer to 'missed' buffer (input symbols not assigned codes).
+//!\param[out] Size *in bytes* of 'missed' buffer.
+//!\param[out] Frequency table for input buffer.
+//!\param[out] Size *in bytes* of the frequency table.
 void huffman_encoding(long int *const quantized_data, const std::size_t n,
                       unsigned char *&out_data_hit, size_t &out_data_hit_size,
                       unsigned char *&out_data_miss, size_t &out_data_miss_size,
diff --git a/src/compressors.cpp b/src/compressors.cpp
index ec5c2323a1..05852b1ef3 100644
--- a/src/compressors.cpp
+++ b/src/compressors.cpp
@@ -5,7 +5,6 @@
 #include <cstring>
 
 #include <algorithm>
-#include <bitset>
 #include <numeric>
 #include <stdexcept>
 #include <vector>
@@ -14,6 +13,7 @@
 
 #include "format.hpp"
 #include "huffman.hpp"
+#include "utilities.hpp"
 
 #ifdef MGARD_TIMING
 #include <chrono>
diff --git a/src/huffman.cpp b/src/huffman.cpp
index 44f82bc341..3f0ccd18e2 100644
--- a/src/huffman.cpp
+++ b/src/huffman.cpp
@@ -3,9 +3,7 @@
 #include <cstdlib>
 #include <cstring>
 
-#ifndef NDEBUG
 #include <algorithm>
-#endif
 
 #include <array>
 #include <queue>
@@ -17,33 +15,64 @@ namespace mgard {
 
 const int nql = 32768 * 4;
 
+//! Node in the Huffman code creation tree.
 struct htree_node {
   //! Constructor.
+  //!
+  //!\param q (Transformed) symbol.
+  //!\param cnt Number of occurences of the (transformed) symbol in the source.
   htree_node(const int q, const std::size_t cnt)
       : q(q), cnt(cnt), code(0), len(0), left(nullptr), right(nullptr) {}
 
+  //! (Transformed) symbol.
   int q;
+
+  //! Number of occurences of the (transformed) symbol in the source.
   std::size_t cnt;
+
+  //! Codeword associated to the (transformed) symbol.
   unsigned int code;
+
+  //! Length in bits of the codeword.
   std::size_t len;
+
+  //! Left child in the code creation tree.
   htree_node *left;
+
+  //! Right child in the code creation tree.
   htree_node *right;
 };
 
+//! Input symbol–Huffman code pair.
 struct huffman_codec {
+  //! (Transformed) symbol.
   int q;
+
+  //! Codeword associated to the (transformed) symbol.
   unsigned int code;
+
+  //! Length in bits of the codeword.
   std::size_t len;
 };
 
+//! Frequency table and symbol–code mappings for encoding source.
 template <std::size_t NQL> struct HuffmanCodec {
   // The arrays are value-initialized, which leads to each of their elements
   // being value-initialized (ultimately zero-initialized).
+
+  //! Input symbol–Huffman code pairs.
   std::array<huffman_codec, NQL> codec{};
+
+  //! Frequency table for encoding source.
   std::array<std::size_t, NQL> frequency_table{};
 };
 
+//! Function object for comparing Huffman code creation nodes.
 struct LessThanByCnt {
+  //! Return whether the first node has a larger count than the second.
+  //!
+  //!\param lhs First node.
+  //!\param rhs Second node.
   bool operator()(htree_node const *const lhs,
                   htree_node const *const rhs) const {
     return lhs->cnt > rhs->cnt;
@@ -54,16 +83,16 @@ template <class T>
 using my_priority_queue =
     std::priority_queue<T *, std::vector<T *>, LessThanByCnt>;
 
-template <std::size_t NQL>
-void initialize_codec(HuffmanCodec<NQL> &codec, htree_node *const root,
+void initialize_codec(HuffmanCodec<nql> &codec, htree_node *const root,
                       const unsigned int code, const std::size_t len) {
-  root->len = len;
+  std::array<huffman_codec, nql> &codewords = codec.codec;
+
   root->code = code;
+  root->len = len;
 
   if (!root->left && !root->right) {
-    codec.codec[root->q].q = root->q;
-    codec.codec[root->q].code = code;
-    codec.codec[root->q].len = len;
+    const std::size_t index = root->q;
+    codewords.at(index) = {root->q, code, len};
   }
 
   if (root->left) {
@@ -78,7 +107,6 @@ void initialize_codec(HuffmanCodec<NQL> &codec, htree_node *const root,
 my_priority_queue<htree_node> *build_tree(std::size_t const *const cnt) {
   my_priority_queue<htree_node> *const phtree =
       new my_priority_queue<htree_node>;
-#if 1
   for (int i = 0; i < nql; i++) {
     if (cnt[i] != 0) {
       htree_node *const new_node = new htree_node(i, cnt[i]);
@@ -98,7 +126,6 @@ my_priority_queue<htree_node> *build_tree(std::size_t const *const cnt) {
     new_node->right = top_node2;
     phtree->push(new_node);
   }
-#endif
   return phtree;
 }
 
@@ -126,9 +153,15 @@ void free_tree(my_priority_queue<htree_node> *const phtree) {
   }
 }
 
-// Note: this function will change the quantized data.
-template <std::size_t NQL>
-void initialize_frequency_table(HuffmanCodec<NQL> &codec,
+//! Populate the frequency table of a `HuffmanCodec`.
+//!
+//!\note This function will change the quantized data.
+//!
+//!\param[in, out] quantized_data Input buffer (quantized coefficients). This
+//! buffer will be changed by the codec-building process.
+//\param[in] n Number of symbols (`long int` quantized coefficients) in the
+//! input buffer.
+void initialize_frequency_table(HuffmanCodec<nql> &codec,
                                 long int *const quantized_data,
                                 const std::size_t n) {
   assert(*std::max_element(codec.frequency_table.begin(),
@@ -137,15 +170,21 @@ void initialize_frequency_table(HuffmanCodec<NQL> &codec,
   for (std::size_t i = 0; i < n; i++) {
     // Convert quantization level to positive so that counting freq can be
     // easily done. Level 0 is reserved a out-of-range flag.
-    quantized_data[i] = quantized_data[i] + NQL / 2;
+    quantized_data[i] = quantized_data[i] + nql / 2;
     ++codec.frequency_table[quantized_data[i] > 0 &&
                                     quantized_data[i] <
-                                        static_cast<long int>(NQL)
+                                        static_cast<long int>(nql)
                                 ? quantized_data[i]
                                 : 0];
   }
 }
 
+//! Build a Huffman codec for an input buffer.
+//!
+//!\param[in, out] quantized_data Input buffer (quantized coefficients). This
+//! buffer will be changed by the codec-building process.
+//\param[in] n Number of symbols (`long int` quantized coefficients) in the
+//! input buffer.
 template <std::size_t N>
 HuffmanCodec<N> build_huffman_codec(long int *const quantized_data,
                                     const std::size_t n) {
@@ -186,13 +225,12 @@ void huffman_encoding(long int *const quantized_data, const std::size_t n,
   }
 
   out_data_hit = reinterpret_cast<unsigned char *>(p_hit);
-  out_data_miss = (unsigned char *)p_miss;
+  out_data_miss = reinterpret_cast<unsigned char *>(p_miss);
   out_data_hit_size = 0;
   out_data_miss_size = 0;
 
   std::size_t start_bit = 0;
   unsigned int *cur = p_hit;
-  std::size_t cnt_missed = 0;
   for (std::size_t i = 0; i < n; i++) {
     const int q = quantized_data[i];
     unsigned int code;
@@ -209,7 +247,6 @@ void huffman_encoding(long int *const quantized_data, const std::size_t n,
 
       *p_miss = q;
       p_miss++;
-      cnt_missed++;
     }
 
     // Note that if len == 0, then that means that either the data is all the
diff --git a/tests/CMakeLists.txt b/tests/CMakeLists.txt
index 1e67174fac..427b2e4546 100644
--- a/tests/CMakeLists.txt
+++ b/tests/CMakeLists.txt
@@ -20,6 +20,7 @@ set(
 	"src/test_quantize.cpp"
 	"src/test_compressors.cpp"
 	"src/test_CompressedDataset.cpp"
+	"src/test_huffman.cpp"
 )
 
 if(MGARD_ENABLE_UNSTRUCTURED AND MOAB_FOUND)
diff --git a/tests/src/test_huffman.cpp b/tests/src/test_huffman.cpp
new file mode 100644
index 0000000000..dcae0e9d03
--- /dev/null
+++ b/tests/src/test_huffman.cpp
@@ -0,0 +1,122 @@
+#include "catch2/catch_test_macros.hpp"
+
+#include <climits>
+
+#include <algorithm>
+#include <random>
+
+#include "testing_utilities.hpp"
+
+#include "huffman.hpp"
+
+namespace {
+
+void test_encoding_regression(long int *const quantized, const std::size_t N) {
+  long int *const quantized_new = new long int[N];
+  std::copy(quantized, quantized + N, quantized_new);
+
+  unsigned char *hit;
+  unsigned char *missed;
+  unsigned char *frequencies;
+  std::size_t bits_hit;
+  std::size_t bytes_missed;
+  std::size_t bytes_frequencies;
+  mgard::huffman_encoding(quantized, N, hit, bits_hit, missed, bytes_missed,
+                          frequencies, bytes_frequencies);
+
+  unsigned char *hit_new;
+  unsigned char *missed_new;
+  unsigned char *frequencies_new;
+  std::size_t bits_hit_new;
+  std::size_t bytes_missed_new;
+  std::size_t bytes_frequencies_new;
+  mgard::huffman_encoding(quantized_new, N, hit_new, bits_hit_new, missed_new,
+                          bytes_missed_new, frequencies_new,
+                          bytes_frequencies_new);
+
+  REQUIRE(bits_hit_new == bits_hit);
+  const std::size_t bytes_hit = (bits_hit + CHAR_BIT - 1) / CHAR_BIT;
+  REQUIRE(std::equal(hit, hit + bytes_hit, hit_new));
+
+  REQUIRE(bytes_missed_new == bytes_missed);
+  REQUIRE(std::equal(missed, missed + bytes_missed, missed_new));
+
+  REQUIRE(bytes_frequencies_new == bytes_frequencies);
+  REQUIRE(std::equal(frequencies, frequencies + bytes_frequencies,
+                     frequencies_new));
+
+  delete[] quantized_new;
+}
+
+void test_encoding_regression_constant(const std::size_t N, const long int q) {
+  long int *const quantized = new long int[N];
+  std::fill(quantized, quantized + N, q);
+  test_encoding_regression(quantized, N);
+  delete[] quantized;
+}
+
+//! Function object to generate periodict data.
+struct PeriodicGenerator {
+  //! Constructor.
+  //!
+  //!\param value Starting value.
+  //!\param period Generator period.
+  PeriodicGenerator(const std::size_t period, const long int value)
+      : period(period), value(value), ncalls(0) {}
+
+  //! Generator period.
+  std::size_t period;
+
+  //! Starting value.
+  long int value;
+
+  //! Number of times `operator()` has been called.
+  std::size_t ncalls;
+
+  long int operator()() {
+    return value + static_cast<long int>(ncalls++ % period);
+  }
+};
+
+void test_encoding_regression_periodic(const std::size_t N, const long int q,
+                                       const std::size_t period) {
+  long int *const quantized = new long int[N];
+  std::generate(quantized, quantized + N, PeriodicGenerator(period, q));
+  test_encoding_regression(quantized, N);
+  delete[] quantized;
+}
+
+void test_encoding_regression_random(const std::size_t N, const long int a,
+                                     const long int b,
+                                     std::default_random_engine &gen) {
+  std::uniform_int_distribution<long int> dis(a, b);
+  long int *const quantized = new long int[N];
+  std::generate(quantized, quantized + N, [&] { return dis(gen); });
+  test_encoding_regression(quantized, N);
+  delete[] quantized;
+}
+
+} // namespace
+
+TEST_CASE("encoding regression", "[huffman]") {
+  SECTION("constant data") {
+    test_encoding_regression_constant(10, 0);
+    test_encoding_regression_constant(100, 732);
+    test_encoding_regression_constant(1000, -10);
+  }
+
+  SECTION("periodic data") {
+    test_encoding_regression_periodic(10, -3, 3);
+    test_encoding_regression_periodic(100, 0, 10);
+    test_encoding_regression_periodic(1000, 51, 17);
+  }
+
+  SECTION("random data") {
+    std::default_random_engine gen(131051);
+    test_encoding_regression_random(10, 0, 1, gen);
+    test_encoding_regression_random(100, -15, -5, gen);
+    test_encoding_regression_random(1000, std::numeric_limits<int>::min(),
+                                    std::numeric_limits<int>::max(), gen);
+    test_encoding_regression_random(10000, -100, 100, gen);
+  }
+}

From d51312735cdc451a688728b752e98cf20f4cf6db Mon Sep 17 00:00:00 2001
From: Ben Whitney <whitneybe@ornl.gov>
Date: Tue, 31 May 2022 13:07:31 -0400
Subject: [PATCH 14/58] Reimplement Huffman encoding with `HuffmanCode`.

---
 include/huffman.hpp        | 145 +++++++++++++++++++++++++++++--
 include/huffman.tpp        | 124 ++++++++++++++++++++++++++
 src/huffman.cpp            | 173 ++++++++++++++++++++++++++++++++++++-
 tests/src/test_huffman.cpp |   6 +-
 4 files changed, 438 insertions(+), 10 deletions(-)
 create mode 100644 include/huffman.tpp

diff --git a/include/huffman.hpp b/include/huffman.hpp
index 0ee06103d2..ed50c8b0c7 100644
--- a/include/huffman.hpp
+++ b/include/huffman.hpp
@@ -5,31 +5,164 @@
 
 #include <cstddef>
 
+#include <memory>
+#include <vector>
+
 namespace mgard {
 
 //! Encode quantized coefficients using a Huffman code.
 //!
 //!\param[in, out] quantized_data Input buffer (quantized coefficients). This
 //! buffer will be changed by the encoding process.
-//\param[in] n Number of symbols (`long int` quantized coefficients) in the
+//!\param[in] n Number of symbols (`long int` quantized coefficients) in the
 //! input buffer.
 //!\param[out] out_data_hit Pointer to compressed buffer.
-//!\param[out] Size *in bits* of compressed buffer.
-//!\param[out] Pointer to 'missed' buffer (input symbols not assigned codes).
-//!\param[out] Size *in bytes* of 'missed' buffer.
-//!\param[out] Frequency table for input buffer.
-//!\param[out] Size *in bytes* of the frequency table.
+//!\param[out] out_data_hit_size Size *in bits* of compressed buffer.
+//!\param[out] out_data_miss Pointer to 'missed' buffer (input symbols not
+//! assigned codes).
+//!\param[out] out_data_miss_size Size *in bytes* of 'missed'
+//! buffer.
+//!\param[out] out_tree Frequency table for input buffer.
+//!\param[out] out_tree_size Size *in bytes* of the frequency table.
 void huffman_encoding(long int *const quantized_data, const std::size_t n,
                       unsigned char *&out_data_hit, size_t &out_data_hit_size,
                       unsigned char *&out_data_miss, size_t &out_data_miss_size,
                       unsigned char *&out_tree, size_t &out_tree_size);
 
+//! Encode quantized coefficients using a Huffman code.
+//!
+//!\param[in, out] quantized_data Input buffer (quantized coefficients). This
+//! buffer will be changed by the encoding process.
+//!\param[in] n Number of symbols (`long int` quantized coefficients) in the
+//! input buffer.
+//!\param[out] out_data_hit Pointer to compressed buffer.
+//!\param[out] out_data_hit_size Size *in bits* of compressed buffer.
+//!\param[out] out_data_miss Pointer to 'missed' buffer (input symbols not
+//! assigned codes).
+//!\param[out] out_data_miss_size Size *in bytes* of 'missed'
+//! buffer.
+//!\param[out] out_tree Frequency table for input buffer.
+//!\param[out] out_tree_size Size *in bytes* of the frequency table.
+void huffman_encoding_rewritten(
+    long int const *const quantized_data, const std::size_t n,
+    unsigned char *&out_data_hit, std::size_t &out_data_hit_size,
+    unsigned char *&out_data_miss, std::size_t &out_data_miss_size,
+    unsigned char *&out_tree, std::size_t &out_tree_size);
+
+//! Decode a stream encoded using a Huffman code.
+//!
+//!\param[out] quantized_data Output buffer (quantized coefficients).
+//!\param[in] quantized_data_size Size *in bytes* of output buffer.
+//!\param[in] out_data_hit Compressed buffer.
+//!\param[in] out_data_hit_size Size *in bits* of compressed buffer.
+//!\param[in] out_data_miss 'Missed' buffer (input symbols not assigned codes).
+//!\param[in] out_data_miss_size Size *in bytes* of 'missed' buffer.
+//!\param[in] out_tree Frequency table for input buffer.
+//!\param[in] out_tree_size Size *in bytes* of the frequency table.
 void huffman_decoding(
     long int *const quantized_data, const std::size_t quantized_data_size,
     unsigned char const *const out_data_hit, const size_t out_data_hit_size,
     unsigned char const *const out_data_miss, const size_t out_data_miss_size,
     unsigned char const *const out_tree, const size_t out_tree_size);
 
+//! Codeword (in progress) associated to a node in a Huffman code creation tree.
+struct HuffmanCodeword {
+  //! Bytes containing the bits of the codeword.
+  std::vector<unsigned char> bytes = {};
+
+  //! Length in bits of the codeword.
+  std::size_t length = 0;
+
+  //! Append a bit to the codeword.
+  void push_back(const bool bit);
+
+  //! Generate the codeword associated to the left child in the tree.
+  HuffmanCodeword left() const;
+
+  //! Generate the codeword associated to the right child in the tree.
+  HuffmanCodeword right() const;
+};
+
+//! Node in a Huffman code creation tree.
+struct CodeCreationTreeNode {
+  //! Constructor.
+  //!
+  //! Create a leaf node.
+  //!
+  //!\param codeword Associated codeword.
+  //!\param count Frequency of the associated symbol.
+  CodeCreationTreeNode(HuffmanCodeword *const codeword,
+                       const std::size_t count);
+
+  //! Constructor.
+  //!
+  //! Create an inner (parent) node.
+  //!
+  //!\param left Left child of the node to be created.
+  //!\param right Right child of the node to be created.
+  CodeCreationTreeNode(const std::shared_ptr<CodeCreationTreeNode> &left,
+                       const std::shared_ptr<CodeCreationTreeNode> &right);
+
+  //! Associated codeword (if this node is a leaf).
+  HuffmanCodeword *codeword = nullptr;
+
+  //! Sum of frequencies of symbols associated to leaves descending from this
+  //! node.
+  std::size_t count;
+
+  //! Left child of this node.
+  std::shared_ptr<CodeCreationTreeNode> left;
+
+  //! Right child of this node.
+  std::shared_ptr<CodeCreationTreeNode> right;
+};
+
+//! Huffman code generated from/for an input stream.
+template <typename Symbol> class HuffmanCode {
+public:
+  //! Constructor.
+  //!
+  //!\param ncodewords Number of symbols that will be assigned codewords.
+  //!\param begin Beginning of input stream.
+  //!\param end End of output stream.
+  HuffmanCode(const std::size_t ncodewords, Symbol const *const begin,
+              Symbol const *const end);
+
+  //! Number of symbols that will be assigned codewords.
+  std::size_t ncodewords;
+
+  //! Frequencies of the symbols in the input stream.
+  std::vector<std::size_t> frequencies;
+
+  //! Codewords associated to the symbols.
+  std::vector<HuffmanCodeword> codewords;
+
+  //! Report the number of out-of-range symbols encountered in the stream.
+  std::size_t nmissed() const;
+
+  //! Check whether a symbol is eligible for a codeword.
+  bool out_of_range(const Symbol symbol) const;
+
+  //! Determine the codeword index for a symbol.
+  std::size_t index(const Symbol symbol) const;
+
+private:
+  //! Smallest symbol (inclusive) to receive a codeword.
+  Symbol min_symbol;
+
+  //! Largest symbol (inclusive) to receive a codeword.
+  Symbol max_symbol;
+
+  // TODO: Check that frequency count ties aren't going to hurt us here. Stable
+  // sorting algorithm in `priority_queue`?
+
+  //! Set codewords for given node and descendants.
+  void
+  recursively_set_codewords(const std::shared_ptr<CodeCreationTreeNode> &node,
+                            const HuffmanCodeword codeword);
+};
+
 } // namespace mgard
 
+#include "huffman.tpp"
 #endif
diff --git a/include/huffman.tpp b/include/huffman.tpp
new file mode 100644
index 0000000000..da11ac5d97
--- /dev/null
+++ b/include/huffman.tpp
@@ -0,0 +1,124 @@
+#include "utilities.hpp"
+
+#include <cassert>
+
+#include <limits>
+#include <queue>
+#include <stdexcept>
+#include <type_traits>
+
+namespace mgard {
+
+//! This is used in the instantization of `std::priority_queue`.
+template <typename T> struct HeldCountGreater {
+  bool operator()(const T &a, const T &b) const { return a->count > b->count; }
+};
+
+template <typename Symbol>
+HuffmanCode<Symbol>::HuffmanCode(const std::size_t ncodewords,
+                                 Symbol const *const begin,
+                                 Symbol const *const end)
+    : ncodewords(ncodewords), frequencies(ncodewords), codewords(ncodewords) {
+  static_assert(std::is_integral<Symbol>::value and
+                    std::is_signed<Symbol>::value,
+                "symbol type must be signed and integral");
+  // Haven't carefully checked what the minimum acceptable value is.
+  if (not ncodewords) {
+    throw std::invalid_argument("`ncodewords` must be positive.");
+  }
+  {
+    const Symbol SYMBOL_MAX = std::numeric_limits<Symbol>::max();
+    const Symbol SYMBOL_MIN = std::numeric_limits<Symbol>::min();
+
+    const std::size_t max_symbol_ = (ncodewords + 1) / 2 - 1;
+    const std::size_t opp_min_symbol_ = ncodewords / 2;
+
+    // TODO: There is surely a better way of doing this. Lots of potential
+    // issues with directly comparing `opp_min_symbol_` and `-SYMBOL_MIN`.
+    // `-SYMBOL_MIN` can't necessarily be represented as a `Symbol`, for
+    // example. Trying to avoid overflows.
+    std::size_t a = opp_min_symbol_;
+    Symbol b = SYMBOL_MIN;
+    while (a) {
+      a /= 2;
+      b /= 2;
+    }
+    if (not b) {
+      // Only a "risk" because we haven't actually established that
+      // `opp_min_symbol_` is greater in magnitude than `SYMBOL_MIN`.
+      throw std::overflow_error(
+          "risk that minimum symbol cannot be represented in symbol type");
+    } else if (opp_min_symbol_ > SYMBOL_MAX) {
+      throw std::overflow_error(
+          "opposite of minimum symbol canont be represented in symbol type");
+    } else {
+      min_symbol = -static_cast<Symbol>(opp_min_symbol_);
+    }
+
+    // `opp_min_symbol_` is either equal to or one greater than `max_symbol_`,
+    // and we checked above that `opp_min_symbol <= SYMBOL_MAX`. So, we know
+    // that `max_symbol_ <= SYMBOL_MAX` here.
+    max_symbol = max_symbol_;
+  }
+  for (const Symbol symbol :
+       RangeSlice<Symbol const *const>{.begin_ = begin, .end_ = end}) {
+    ++frequencies.at(index(symbol));
+  }
+
+  using T = std::shared_ptr<CodeCreationTreeNode>;
+  std::priority_queue<T, std::vector<T>, HeldCountGreater<T>> queue;
+
+  // We can't quite use a `ZippedRange` here, I think, because
+  // `ZippedRange::iterator` doesn't expose the underlying iterators and
+  // we want a pointer to the codeword.
+  typename std::vector<std::size_t>::const_iterator p = frequencies.cbegin();
+  HuffmanCodeword *q = codewords.data();
+  for (std::size_t i = 0; i < ncodewords; ++i) {
+    const std::size_t count = *p;
+    if (count) {
+      queue.push(std::make_shared<CodeCreationTreeNode>(q, count));
+    }
+    ++p;
+    ++q;
+  }
+  while (queue.size() > 1) {
+    const std::shared_ptr<CodeCreationTreeNode> a = queue.top();
+    queue.pop();
+    const std::shared_ptr<CodeCreationTreeNode> b = queue.top();
+    queue.pop();
+
+    queue.push(std::make_shared<CodeCreationTreeNode>(a, b));
+  }
+
+  recursively_set_codewords(queue.top(), {});
+}
+
+template <typename Symbol> std::size_t HuffmanCode<Symbol>::nmissed() const {
+  return frequencies.at(0);
+}
+
+template <typename Symbol>
+bool HuffmanCode<Symbol>::out_of_range(const Symbol symbol) const {
+  return symbol < min_symbol or symbol > max_symbol;
+}
+
+template <typename Symbol>
+std::size_t HuffmanCode<Symbol>::index(const Symbol symbol) const {
+  return out_of_range(symbol) ? 0 : 1 + symbol - min_symbol;
+}
+
+template <typename Symbol>
+void HuffmanCode<Symbol>::recursively_set_codewords(
+    const std::shared_ptr<CodeCreationTreeNode> &node,
+    const HuffmanCodeword codeword) {
+  const bool children = node->left;
+  assert(children == static_cast<bool>(node->right));
+  if (children) {
+    recursively_set_codewords(node->left, codeword.left());
+    recursively_set_codewords(node->right, codeword.right());
+  } else {
+    *node->codeword = codeword;
+  }
+}
+
+} // namespace mgard
diff --git a/src/huffman.cpp b/src/huffman.cpp
index 3f0ccd18e2..af45c78da8 100644
--- a/src/huffman.cpp
+++ b/src/huffman.cpp
@@ -1,4 +1,5 @@
 #include <cassert>
+#include <climits>
 #include <cstddef>
 #include <cstdlib>
 #include <cstring>
@@ -6,15 +7,78 @@
 #include <algorithm>
 
 #include <array>
+#include <numeric>
 #include <queue>
 #include <vector>
 
 #include "huffman.hpp"
 
+#include "utilities.hpp"
+
 namespace mgard {
 
 const int nql = 32768 * 4;
 
+struct HuffmanEncodedStream {
+  //! Constructor.
+  //!
+  //!\param nbits Length in bits of the compressed stream.
+  //!\param ncompressed Length in bits of the compressed stream.
+  //!\param nmissed Length in bytes of the missed array.
+  //!\param ntable Length in bytes of the frequency table.
+  HuffmanEncodedStream(const std::size_t nbits, const std::size_t ncompressed,
+                       const std::size_t nmissed, const std::size_t ntable);
+
+  //! Length in bits of the compressed stream.
+  std::size_t nbits;
+
+  //! Compressed stream.
+  MemoryBuffer<unsigned char> hit;
+
+  //! Missed array.
+  MemoryBuffer<unsigned char> missed;
+
+  //! Frequency table.
+  MemoryBuffer<unsigned char> frequencies;
+};
+
+HuffmanEncodedStream::HuffmanEncodedStream(const std::size_t nbits,
+                                           const std::size_t ncompressed,
+                                           const std::size_t nmissed,
+                                           const std::size_t nfrequencies)
+    : nbits(nbits), hit(ncompressed), missed(nmissed),
+      frequencies(nfrequencies) {}
+
+void HuffmanCodeword::push_back(const bool bit) {
+  const unsigned char offset = length % CHAR_BIT;
+  if (not offset) {
+    bytes.push_back(0);
+  }
+  bytes.back() |= static_cast<unsigned char>(bit) << (CHAR_BIT - 1 - offset);
+  ++length;
+}
+
+HuffmanCodeword HuffmanCodeword::left() const {
+  HuffmanCodeword tmp = *this;
+  tmp.push_back(false);
+  return tmp;
+}
+
+HuffmanCodeword HuffmanCodeword::right() const {
+  HuffmanCodeword tmp = *this;
+  tmp.push_back(true);
+  return tmp;
+}
+
+CodeCreationTreeNode::CodeCreationTreeNode(HuffmanCodeword *const codeword,
+                                           const std::size_t count)
+    : codeword(codeword), count(count) {}
+
+CodeCreationTreeNode::CodeCreationTreeNode(
+    const std::shared_ptr<CodeCreationTreeNode> &left,
+    const std::shared_ptr<CodeCreationTreeNode> &right)
+    : count(left->count + right->count), left(left), right(right) {}
+
 //! Node in the Huffman code creation tree.
 struct htree_node {
   //! Constructor.
@@ -165,7 +229,7 @@ void initialize_frequency_table(HuffmanCodec<nql> &codec,
                                 long int *const quantized_data,
                                 const std::size_t n) {
   assert(*std::max_element(codec.frequency_table.begin(),
-                           code.frequency_table.end()) == 0);
+                           codec.frequency_table.end()) == 0);
 
   for (std::size_t i = 0; i < n; i++) {
     // Convert quantization level to positive so that counting freq can be
@@ -299,6 +363,113 @@ void huffman_encoding(long int *const quantized_data, const std::size_t n,
   out_tree_size = 2 * nonZeros * sizeof(std::size_t);
 }
 
+void huffman_encoding_rewritten(
+    long int const *const quantized_data, const std::size_t n,
+    unsigned char *&out_data_hit, std::size_t &out_data_hit_size,
+    unsigned char *&out_data_miss, std::size_t &out_data_miss_size,
+    unsigned char *&out_tree, std::size_t &out_tree_size) {
+  const std::size_t ncodewords = nql - 1;
+  const HuffmanCode<long int> code(ncodewords, quantized_data,
+                                   quantized_data + n);
+
+  std::vector<std::size_t> lengths;
+  for (const HuffmanCodeword &codeword : code.codewords) {
+    lengths.push_back(codeword.length);
+  }
+  const std::size_t nbits =
+      std::inner_product(code.frequencies.begin(), code.frequencies.end(),
+                         lengths.begin(), static_cast<std::size_t>(0));
+  const std::size_t nbytes =
+      sizeof(unsigned int) * ((nbits + CHAR_BIT * sizeof(unsigned int) - 1) /
+                              (CHAR_BIT * sizeof(unsigned int)));
+  if (nbytes % sizeof(unsigned int)) {
+    throw std::runtime_error(
+        "`nbytes` not bumped up to nearest multiple of `unsigned int` size");
+  }
+
+  const std::size_t nnz = ncodewords - std::count(code.frequencies.begin(),
+                                                  code.frequencies.end(), 0);
+
+  HuffmanEncodedStream out(nbits, nbytes, code.nmissed() * sizeof(int),
+                           2 * nnz * sizeof(std::size_t));
+
+  // Write frequency table.
+  {
+    std::size_t *p =
+        reinterpret_cast<std::size_t *>(out.frequencies.data.get());
+    const std::vector<std::size_t> &frequencies = code.frequencies;
+    for (std::size_t i = 0; i < ncodewords; ++i) {
+      const std::size_t frequency = frequencies.at(i);
+      if (frequency) {
+        *p++ = i;
+        *p++ = frequency;
+      }
+    }
+  }
+
+  unsigned char *const buffer = out.hit.data.get();
+  {
+    unsigned char *const p = out.hit.data.get();
+    std::fill(p, p + out.hit.size, 0);
+  }
+  unsigned char *hit = buffer;
+
+  int *missed = reinterpret_cast<int *>(out.missed.data.get());
+
+  unsigned char offset = 0;
+  for (const long int q : PseudoArray(quantized_data, n)) {
+    if (code.out_of_range(q)) {
+      // Remember that `missed` is an `int` rather than a `long int`.
+      *missed++ = q + nql / 2;
+    }
+
+    const HuffmanCodeword codeword = code.codewords.at(code.index(q));
+    std::size_t NREMAINING = codeword.length;
+    for (unsigned char byte : codeword.bytes) {
+      // Number of bits of `byte` left to write.
+      unsigned char nremaining =
+          std::min(static_cast<std::size_t>(CHAR_BIT), NREMAINING);
+      // Premature, but this will hold when we're done with `byte`.
+      NREMAINING -= nremaining;
+
+      while (nremaining) {
+        *hit |= byte >> offset;
+        // Number of bits of `byte` just written (not cumulative).
+        const unsigned char nwritten = std::min(
+            nremaining, static_cast<unsigned char>(
+                            static_cast<unsigned char>(CHAR_BIT) - offset));
+        offset += nwritten;
+        hit += offset / CHAR_BIT;
+        offset %= CHAR_BIT;
+        nremaining -= nwritten;
+        byte <<= nwritten;
+      }
+    }
+  }
+
+  {
+    const unsigned int one{1};
+    const bool little_endian = *reinterpret_cast<unsigned char const *>(&one);
+    if (little_endian) {
+      for (std::size_t i = 0; i < nbytes; i += sizeof(unsigned int)) {
+        unsigned char *a = buffer + i;
+        unsigned char *b = a + sizeof(unsigned int) - 1;
+        for (std::size_t j = 0; j < sizeof(unsigned int) / 2; ++j) {
+          std::swap(*a++, *b--);
+        }
+      }
+    }
+  }
+
+  out_data_hit_size = out.nbits;
+  out_data_miss_size = out.missed.size;
+  out_tree_size = out.frequencies.size;
+
+  out_data_hit = out.hit.data.release();
+  out_data_miss = out.missed.data.release();
+  out_tree = out.frequencies.data.release();
+}
+
 void huffman_decoding(long int *const quantized_data,
                       const std::size_t quantized_data_size,
                       unsigned char const *const out_data_hit,
diff --git a/tests/src/test_huffman.cpp b/tests/src/test_huffman.cpp
index dcae0e9d03..4efecf5474 100644
--- a/tests/src/test_huffman.cpp
+++ b/tests/src/test_huffman.cpp
@@ -30,9 +30,9 @@ void test_encoding_regression(long int *const quantized, const std::size_t N) {
   std::size_t bits_hit_new;
   std::size_t bytes_missed_new;
   std::size_t bytes_frequencies_new;
-  mgard::huffman_encoding(quantized_new, N, hit_new, bits_hit_new, missed_new,
-                          bytes_missed_new, frequencies_new,
-                          bytes_frequencies_new);
+  mgard::huffman_encoding_rewritten(quantized_new, N, hit_new, bits_hit_new,
+                                    missed_new, bytes_missed_new,
+                                    frequencies_new, bytes_frequencies_new);
 
   REQUIRE(bits_hit_new == bits_hit);
   const std::size_t bytes_hit = (bits_hit + CHAR_BIT - 1) / CHAR_BIT;

From 44cdc1271f2fa4645da3d8ec25b9403989ea77a1 Mon Sep 17 00:00:00 2001
From: Ben Whitney <whitneybe@ornl.gov>
Date: Tue, 31 May 2022 15:53:33 -0400
Subject: [PATCH 15/58] Return struct from rewritten Huffman encoder.

---
 include/huffman.hpp        | 45 +++++++++++++++++++++++++-------------
 src/huffman.cpp            | 41 ++++------------------------------
 tests/src/test_huffman.cpp | 25 ++++++++-------------
 3 files changed, 43 insertions(+), 68 deletions(-)

diff --git a/include/huffman.hpp b/include/huffman.hpp
index ed50c8b0c7..564272d7f0 100644
--- a/include/huffman.hpp
+++ b/include/huffman.hpp
@@ -8,8 +8,34 @@
 #include <memory>
 #include <vector>
 
+#include "utilities.hpp"
+
 namespace mgard {
 
+//! A stream compressed using a Huffman code.
+struct HuffmanEncodedStream {
+  //! Constructor.
+  //!
+  //!\param nbits Length in bits of the compressed stream.
+  //!\param ncompressed Length in bytes of the compressed stream.
+  //!\param nmissed Length in bytes of the missed array.
+  //!\param ntable Length in bytes of the frequency table.
+  HuffmanEncodedStream(const std::size_t nbits, const std::size_t ncompressed,
+                       const std::size_t nmissed, const std::size_t ntable);
+
+  //! Length in bits of the compressed stream.
+  std::size_t nbits;
+
+  //! Compressed stream.
+  MemoryBuffer<unsigned char> hit;
+
+  //! Missed array.
+  MemoryBuffer<unsigned char> missed;
+
+  //! Frequency table.
+  MemoryBuffer<unsigned char> frequencies;
+};
+
 //! Encode quantized coefficients using a Huffman code.
 //!
 //!\param[in, out] quantized_data Input buffer (quantized coefficients). This
@@ -31,23 +57,12 @@ void huffman_encoding(long int *const quantized_data, const std::size_t n,
 
 //! Encode quantized coefficients using a Huffman code.
 //!
-//!\param[in, out] quantized_data Input buffer (quantized coefficients). This
-//! buffer will be changed by the encoding process.
+//!\param[in] quantized_data Input buffer (quantized coefficients).
 //!\param[in] n Number of symbols (`long int` quantized coefficients) in the
 //! input buffer.
-//!\param[out] out_data_hit Pointer to compressed buffer.
-//!\param[out] out_data_hit_size Size *in bits* of compressed buffer.
-//!\param[out] out_data_miss Pointer to 'missed' buffer (input symbols not
-//! assigned codes).
-//!\param[out] out_data_miss_size Size *in bytes* of 'missed'
-//! buffer.
-//!\param[out] out_tree Frequency table for input buffer.
-//!\param[out] out_tree_size Size *in bytes* of the frequency table.
-void huffman_encoding_rewritten(
-    long int const *const quantized_data, const std::size_t n,
-    unsigned char *&out_data_hit, std::size_t &out_data_hit_size,
-    unsigned char *&out_data_miss, std::size_t &out_data_miss_size,
-    unsigned char *&out_tree, std::size_t &out_tree_size);
+HuffmanEncodedStream
+huffman_encoding_rewritten(long int const *const quantized_data,
+                           const std::size_t n);
 
 //! Decode a stream encoded using a Huffman code.
 //!
diff --git a/src/huffman.cpp b/src/huffman.cpp
index af45c78da8..1983a863d5 100644
--- a/src/huffman.cpp
+++ b/src/huffman.cpp
@@ -13,35 +13,10 @@
 
 #include "huffman.hpp"
 
-#include "utilities.hpp"
-
 namespace mgard {
 
 const int nql = 32768 * 4;
 
-struct HuffmanEncodedStream {
-  //! Constructor.
-  //!
-  //!\param nbits Length in bits of the compressed stream.
-  //!\param ncompressed Length in bits of the compressed stream.
-  //!\param nmissed Length in bytes of the missed array.
-  //!\param ntable Length in bytes of the frequency table.
-  HuffmanEncodedStream(const std::size_t nbits, const std::size_t ncompressed,
-                       const std::size_t nmissed, const std::size_t ntable);
-
-  //! Length in bits of the compressed stream.
-  std::size_t nbits;
-
-  //! Compressed stream.
-  MemoryBuffer<unsigned char> hit;
-
-  //! Missed array.
-  MemoryBuffer<unsigned char> missed;
-
-  //! Frequency table.
-  MemoryBuffer<unsigned char> frequencies;
-};
-
 HuffmanEncodedStream::HuffmanEncodedStream(const std::size_t nbits,
                                            const std::size_t ncompressed,
                                            const std::size_t nmissed,
@@ -363,11 +338,9 @@ void huffman_encoding(long int *const quantized_data, const std::size_t n,
   out_tree_size = 2 * nonZeros * sizeof(std::size_t);
 }
 
-void huffman_encoding_rewritten(
-    long int const *const quantized_data, const std::size_t n,
-    unsigned char *&out_data_hit, std::size_t &out_data_hit_size,
-    unsigned char *&out_data_miss, std::size_t &out_data_miss_size,
-    unsigned char *&out_tree, std::size_t &out_tree_size) {
+HuffmanEncodedStream
+huffman_encoding_rewritten(long int const *const quantized_data,
+                           const std::size_t n) {
   const std::size_t ncodewords = nql - 1;
   const HuffmanCode<long int> code(ncodewords, quantized_data,
                                    quantized_data + n);
@@ -461,13 +434,7 @@ void huffman_encoding_rewritten(
     }
   }
 
-  out_data_hit_size = out.nbits;
-  out_data_miss_size = out.missed.size;
-  out_tree_size = out.frequencies.size;
-
-  out_data_hit = out.hit.data.release();
-  out_data_miss = out.missed.data.release();
-  out_tree = out.frequencies.data.release();
+  return out;
 }
 
 void huffman_decoding(long int *const quantized_data,
diff --git a/tests/src/test_huffman.cpp b/tests/src/test_huffman.cpp
index 4efecf5474..55fa93c095 100644
--- a/tests/src/test_huffman.cpp
+++ b/tests/src/test_huffman.cpp
@@ -24,26 +24,19 @@ void test_encoding_regression(long int *const quantized, const std::size_t N) {
   mgard::huffman_encoding(quantized, N, hit, bits_hit, missed, bytes_missed,
                           frequencies, bytes_frequencies);
 
-  unsigned char *hit_new;
-  unsigned char *missed_new;
-  unsigned char *frequencies_new;
-  std::size_t bits_hit_new;
-  std::size_t bytes_missed_new;
-  std::size_t bytes_frequencies_new;
-  mgard::huffman_encoding_rewritten(quantized_new, N, hit_new, bits_hit_new,
-                                    missed_new, bytes_missed_new,
-                                    frequencies_new, bytes_frequencies_new);
-
-  REQUIRE(bits_hit_new == bits_hit);
+  const mgard::HuffmanEncodedStream out_new =
+      mgard::huffman_encoding_rewritten(quantized_new, N);
+
+  REQUIRE(out_new.nbits == bits_hit);
   const std::size_t bytes_hit = (bits_hit + CHAR_BIT - 1) / CHAR_BIT;
-  REQUIRE(std::equal(hit, hit + bytes_hit, hit_new));
+  REQUIRE(std::equal(hit, hit + bytes_hit, out_new.hit.data.get()));
 
-  REQUIRE(bytes_missed_new == bytes_missed);
-  REQUIRE(std::equal(missed, missed + bytes_missed, missed_new));
+  REQUIRE(out_new.missed.size == bytes_missed);
+  REQUIRE(std::equal(missed, missed + bytes_missed, out_new.missed.data.get()));
 
-  REQUIRE(bytes_frequencies_new == bytes_frequencies);
+  REQUIRE(out_new.frequencies.size == bytes_frequencies);
   REQUIRE(std::equal(frequencies, frequencies + bytes_frequencies,
-                     frequencies_new));
+                     out_new.frequencies.data.get()));
 
   delete[] quantized_new;
 }

From 38a4d96aae0f45db94307f2e5104d4ee462f6056 Mon Sep 17 00:00:00 2001
From: Ben Whitney <whitneybe@ornl.gov>
Date: Wed, 1 Jun 2022 11:10:37 -0400
Subject: [PATCH 16/58] Return struct from original Huffman encoder.

---
 include/huffman.hpp        | 14 ++------------
 src/compressors.cpp        | 22 +++++++++++-----------
 src/huffman.cpp            | 34 ++++++++++++++++++++++------------
 tests/src/test_huffman.cpp | 29 +++++++++++++----------------
 4 files changed, 48 insertions(+), 51 deletions(-)

diff --git a/include/huffman.hpp b/include/huffman.hpp
index 564272d7f0..58a47d02b5 100644
--- a/include/huffman.hpp
+++ b/include/huffman.hpp
@@ -42,18 +42,8 @@ struct HuffmanEncodedStream {
 //! buffer will be changed by the encoding process.
 //!\param[in] n Number of symbols (`long int` quantized coefficients) in the
 //! input buffer.
-//!\param[out] out_data_hit Pointer to compressed buffer.
-//!\param[out] out_data_hit_size Size *in bits* of compressed buffer.
-//!\param[out] out_data_miss Pointer to 'missed' buffer (input symbols not
-//! assigned codes).
-//!\param[out] out_data_miss_size Size *in bytes* of 'missed'
-//! buffer.
-//!\param[out] out_tree Frequency table for input buffer.
-//!\param[out] out_tree_size Size *in bytes* of the frequency table.
-void huffman_encoding(long int *const quantized_data, const std::size_t n,
-                      unsigned char *&out_data_hit, size_t &out_data_hit_size,
-                      unsigned char *&out_data_miss, size_t &out_data_miss_size,
-                      unsigned char *&out_tree, size_t &out_tree_size);
+HuffmanEncodedStream huffman_encoding(long int *const quantized_data,
+                                      const std::size_t n);
 
 //! Encode quantized coefficients using a Huffman code.
 //!
diff --git a/src/compressors.cpp b/src/compressors.cpp
index 05852b1ef3..2fc2c0147f 100644
--- a/src/compressors.cpp
+++ b/src/compressors.cpp
@@ -70,17 +70,17 @@ void decompress_memory_huffman(unsigned char *const src,
 
 MemoryBuffer<unsigned char> compress_memory_huffman(long int *const src,
                                                     const std::size_t srcLen) {
-  unsigned char *out_data_hit = 0;
-  size_t out_data_hit_size;
-  unsigned char *out_data_miss = 0;
-  size_t out_data_miss_size;
-  unsigned char *out_tree = 0;
-  size_t out_tree_size;
 #ifdef MGARD_TIMING
   auto huff_time1 = std::chrono::high_resolution_clock::now();
 #endif
-  huffman_encoding(src, srcLen, out_data_hit, out_data_hit_size, out_data_miss,
-                   out_data_miss_size, out_tree, out_tree_size);
+  HuffmanEncodedStream encoded = huffman_encoding(src, srcLen);
+  const std::size_t out_data_hit_size = encoded.nbits;
+  const std::size_t out_data_miss_size = encoded.missed.size;
+  const std::size_t out_tree_size = encoded.frequencies.size;
+  unsigned char const *const out_data_hit = encoded.hit.data.release();
+  unsigned char const *const out_data_miss = encoded.missed.data.release();
+  unsigned char const *const out_tree = encoded.frequencies.data.release();
+
 #ifdef MGARD_TIMING
   auto huff_time2 = std::chrono::high_resolution_clock::now();
   auto duration = std::chrono::duration_cast<std::chrono::microseconds>(
@@ -106,9 +106,9 @@ MemoryBuffer<unsigned char> compress_memory_huffman(long int *const src,
     bufp += out_data_miss_size;
   }
 
-  free(out_tree);
-  free(out_data_hit);
-  free(out_data_miss);
+  delete[] out_data_hit;
+  delete[] out_data_miss;
+  delete[] out_tree;
 
 #ifndef MGARD_ZSTD
 #ifdef MGARD_TIMING
diff --git a/src/huffman.cpp b/src/huffman.cpp
index 1983a863d5..3c5fbb5a77 100644
--- a/src/huffman.cpp
+++ b/src/huffman.cpp
@@ -240,12 +240,8 @@ HuffmanCodec<N> build_huffman_codec(long int *const quantized_data,
   return codec;
 }
 
-void huffman_encoding(long int *const quantized_data, const std::size_t n,
-                      unsigned char *&out_data_hit,
-                      std::size_t &out_data_hit_size,
-                      unsigned char *&out_data_miss,
-                      std::size_t &out_data_miss_size, unsigned char *&out_tree,
-                      std::size_t &out_tree_size) {
+HuffmanEncodedStream huffman_encoding(long int *const quantized_data,
+                                      const std::size_t n) {
   const HuffmanCodec<nql> codec = build_huffman_codec<nql>(quantized_data, n);
   const std::size_t num_miss = codec.frequency_table[0];
 
@@ -263,10 +259,12 @@ void huffman_encoding(long int *const quantized_data, const std::size_t n,
     p_miss = new int[num_miss]();
   }
 
-  out_data_hit = reinterpret_cast<unsigned char *>(p_hit);
-  out_data_miss = reinterpret_cast<unsigned char *>(p_miss);
-  out_data_hit_size = 0;
-  out_data_miss_size = 0;
+  unsigned char const *const out_data_hit =
+      reinterpret_cast<unsigned char *>(p_hit);
+  unsigned char const *const out_data_miss =
+      reinterpret_cast<unsigned char *>(p_miss);
+  std::size_t out_data_hit_size = 0;
+  std::size_t out_data_miss_size = 0;
 
   std::size_t start_bit = 0;
   unsigned int *cur = p_hit;
@@ -334,8 +332,20 @@ void huffman_encoding(long int *const quantized_data, const std::size_t n,
     }
   }
 
-  out_tree = (unsigned char *)cft;
-  out_tree_size = 2 * nonZeros * sizeof(std::size_t);
+  unsigned char const *const out_tree = (unsigned char *)cft;
+  const std::size_t out_tree_size = 2 * nonZeros * sizeof(std::size_t);
+
+  const std::size_t nbytes =
+      sizeof(unsigned int) *
+      ((out_data_hit_size + CHAR_BIT * sizeof(unsigned int) - 1) /
+       (CHAR_BIT * sizeof(unsigned int)));
+  HuffmanEncodedStream out(out_data_hit_size, nbytes, out_data_miss_size,
+                           out_tree_size);
+  std::copy(out_data_hit, out_data_hit + nbytes, out.hit.data.get());
+  std::copy(out_data_miss, out_data_miss + out_data_miss_size,
+            out.missed.data.get());
+  std::copy(out_tree, out_tree + out_tree_size, out.frequencies.data.get());
+  return out;
 }
 
 HuffmanEncodedStream
diff --git a/tests/src/test_huffman.cpp b/tests/src/test_huffman.cpp
index 55fa93c095..2b31eb4c9e 100644
--- a/tests/src/test_huffman.cpp
+++ b/tests/src/test_huffman.cpp
@@ -15,27 +15,24 @@ void test_encoding_regression(long int *const quantized, const std::size_t N) {
   long int *const quantized_new = new long int[N];
   std::copy(quantized, quantized + N, quantized_new);
 
-  unsigned char *hit;
-  unsigned char *missed;
-  unsigned char *frequencies;
-  std::size_t bits_hit;
-  std::size_t bytes_missed;
-  std::size_t bytes_frequencies;
-  mgard::huffman_encoding(quantized, N, hit, bits_hit, missed, bytes_missed,
-                          frequencies, bytes_frequencies);
-
+  const mgard::HuffmanEncodedStream out = mgard::huffman_encoding(quantized, N);
   const mgard::HuffmanEncodedStream out_new =
       mgard::huffman_encoding_rewritten(quantized_new, N);
 
-  REQUIRE(out_new.nbits == bits_hit);
-  const std::size_t bytes_hit = (bits_hit + CHAR_BIT - 1) / CHAR_BIT;
-  REQUIRE(std::equal(hit, hit + bytes_hit, out_new.hit.data.get()));
+  unsigned char const *const hit = out.hit.data.get();
+  REQUIRE(out_new.nbits == out.nbits);
+  const std::size_t nbytes = (out.nbits + CHAR_BIT - 1) / CHAR_BIT;
+  REQUIRE(std::equal(hit, hit + nbytes, out_new.hit.data.get()));
 
-  REQUIRE(out_new.missed.size == bytes_missed);
-  REQUIRE(std::equal(missed, missed + bytes_missed, out_new.missed.data.get()));
+  unsigned char const *const missed = out.missed.data.get();
+  const std::size_t nmissed = out.missed.size;
+  REQUIRE(out_new.missed.size == nmissed);
+  REQUIRE(std::equal(missed, missed + nmissed, out_new.missed.data.get()));
 
-  REQUIRE(out_new.frequencies.size == bytes_frequencies);
-  REQUIRE(std::equal(frequencies, frequencies + bytes_frequencies,
+  unsigned char const *const frequencies = out.frequencies.data.get();
+  const std::size_t nfrequencies = out.frequencies.size;
+  REQUIRE(out_new.frequencies.size == nfrequencies);
+  REQUIRE(std::equal(frequencies, frequencies + nfrequencies,
                      out_new.frequencies.data.get()));
 
   delete[] quantized_new;

From 83f31e0a2b1a7eba9a2596a72171906ea8ecfe37 Mon Sep 17 00:00:00 2001
From: Ben Whitney <whitneybe@ornl.gov>
Date: Wed, 1 Jun 2022 11:51:00 -0400
Subject: [PATCH 17/58] Avoid buffer copies in `huffman_encoding`.

---
 src/huffman.cpp | 101 ++++++++++++++++++------------------------------
 1 file changed, 38 insertions(+), 63 deletions(-)

diff --git a/src/huffman.cpp b/src/huffman.cpp
index 3c5fbb5a77..937c5d9b6a 100644
--- a/src/huffman.cpp
+++ b/src/huffman.cpp
@@ -247,27 +247,40 @@ HuffmanEncodedStream huffman_encoding(long int *const quantized_data,
 
   assert(n >= num_miss);
 
-  /* For those miss points, we still need to maintain a flag (q = 0),
-   * and therefore we need to allocate space for n numbers.
-   */
-  // The elements of the array are value-initialized (here, zero-initialized).
-  unsigned int *const p_hit = new unsigned int[n]();
-
-  int *p_miss = nullptr;
-  if (num_miss > 0) {
-    // The elements of the array are value-initialized (here, zero-initialized).
-    p_miss = new int[num_miss]();
+  std::size_t nnz = 0;
+  std::size_t nbits = 0;
+  for (std::size_t i = 0; i < nql; ++i) {
+    const huffman_codec &codec_ = codec.codec.at(i);
+    const std::size_t frequency = codec.frequency_table.at(i);
+    nbits += frequency * codec_.len;
+    nnz += frequency ? 1 : 0;
   }
 
-  unsigned char const *const out_data_hit =
-      reinterpret_cast<unsigned char *>(p_hit);
-  unsigned char const *const out_data_miss =
-      reinterpret_cast<unsigned char *>(p_miss);
-  std::size_t out_data_hit_size = 0;
-  std::size_t out_data_miss_size = 0;
+  const std::size_t nbytes =
+      sizeof(unsigned int) * ((nbits + CHAR_BIT * sizeof(unsigned int) - 1) /
+                              (CHAR_BIT * sizeof(unsigned int)));
+  HuffmanEncodedStream out(nbits, nbytes, num_miss * sizeof(int),
+                           2 * nnz * sizeof(std::size_t));
+
+  unsigned int *const hit =
+      reinterpret_cast<unsigned int *>(out.hit.data.get());
+  std::fill(hit, hit + nbytes / sizeof(unsigned int), 0u);
+
+  int *missed = reinterpret_cast<int *>(out.missed.data.get());
+
+  // write frequency table to buffer
+  std::size_t *const cft =
+      reinterpret_cast<std::size_t *>(out.frequencies.data.get());
+  std::size_t off = 0;
+  for (std::size_t i = 0; i < nql; ++i) {
+    if (codec.frequency_table[i] > 0) {
+      cft[2 * off] = i;
+      cft[2 * off + 1] = codec.frequency_table[i];
+      off++;
+    }
+  }
 
   std::size_t start_bit = 0;
-  unsigned int *cur = p_hit;
   for (std::size_t i = 0; i < n; i++) {
     const int q = quantized_data[i];
     unsigned int code;
@@ -282,8 +295,7 @@ HuffmanEncodedStream huffman_encoding(long int *const quantized_data,
       code = codec.codec[0].code;
       len = codec.codec[0].len;
 
-      *p_miss = q;
-      p_miss++;
+      *missed++ = q;
     }
 
     // Note that if len == 0, then that means that either the data is all the
@@ -297,54 +309,17 @@ HuffmanEncodedStream huffman_encoding(long int *const quantized_data,
       // and copy  the rest len - (32 - start_bit % 32) to the next int
       const std::size_t rshift = len - (32 - start_bit % 32);
       const std::size_t lshift = 32 - rshift;
-      *(cur + start_bit / 32) = (*(cur + start_bit / 32)) | (code >> rshift);
-      *(cur + start_bit / 32 + 1) =
-          (*(cur + start_bit / 32 + 1)) | (code << lshift);
-      start_bit += len;
-    } else if (len > 0) {
+      *(hit + start_bit / 32) = (*(hit + start_bit / 32)) | (code >> rshift);
+      *(hit + start_bit / 32 + 1) =
+          (*(hit + start_bit / 32 + 1)) | (code << lshift);
+    } else if (len) {
       code = code << (32 - start_bit % 32 - len);
-      *(cur + start_bit / 32) = (*(cur + start_bit / 32)) | code;
-      start_bit += len;
-    } else {
-      // Sequence is empty (everything must be the same). Do nothing.
-    }
-  }
-
-  // Note: hit size is in bits, while miss size is in bytes.
-  out_data_hit_size = start_bit;
-  out_data_miss_size = num_miss * sizeof(int);
-
-  // write frequency table to buffer
-  int nonZeros = 0;
-  for (int i = 0; i < nql; i++) {
-    if (codec.frequency_table[i] > 0) {
-      nonZeros++;
-    }
-  }
-
-  std::size_t *const cft = new std::size_t[2 * nonZeros];
-  int off = 0;
-  for (int i = 0; i < nql; i++) {
-    if (codec.frequency_table[i] > 0) {
-      cft[2 * off] = i;
-      cft[2 * off + 1] = codec.frequency_table[i];
-      off++;
+      *(hit + start_bit / 32) = (*(hit + start_bit / 32)) | code;
     }
+    // No effect if `len == 0`.
+    start_bit += len;
   }
 
-  unsigned char const *const out_tree = (unsigned char *)cft;
-  const std::size_t out_tree_size = 2 * nonZeros * sizeof(std::size_t);
-
-  const std::size_t nbytes =
-      sizeof(unsigned int) *
-      ((out_data_hit_size + CHAR_BIT * sizeof(unsigned int) - 1) /
-       (CHAR_BIT * sizeof(unsigned int)));
-  HuffmanEncodedStream out(out_data_hit_size, nbytes, out_data_miss_size,
-                           out_tree_size);
-  std::copy(out_data_hit, out_data_hit + nbytes, out.hit.data.get());
-  std::copy(out_data_miss, out_data_miss + out_data_miss_size,
-            out.missed.data.get());
-  std::copy(out_tree, out_tree + out_tree_size, out.frequencies.data.get());
   return out;
 }
 

From c52d44e60b875b7db344b80017b8dbbe5d62ae82 Mon Sep 17 00:00:00 2001
From: Ben Whitney <whitneybe@ornl.gov>
Date: Thu, 2 Jun 2022 16:18:50 -0400
Subject: [PATCH 18/58] Separately copy hit buffer, trailing zero bytes.

---
 src/compressors.cpp | 70 ++++++++++++++++++++++++---------------------
 1 file changed, 38 insertions(+), 32 deletions(-)

diff --git a/src/compressors.cpp b/src/compressors.cpp
index 2fc2c0147f..bf5bd7c57f 100644
--- a/src/compressors.cpp
+++ b/src/compressors.cpp
@@ -46,8 +46,8 @@ void decompress_memory_huffman(unsigned char *const src,
 
   out_data_miss_size = *(size_t *)buf;
   buf += sizeof(size_t);
-  size_t total_huffman_size =
-      out_tree_size + out_data_hit_size / 8 + 4 + out_data_miss_size;
+  size_t total_huffman_size = out_tree_size + out_data_hit_size / CHAR_BIT +
+                              sizeof(unsigned int) + out_data_miss_size;
   unsigned char *huffman_encoding_p =
       (unsigned char *)malloc(total_huffman_size);
 #ifndef MGARD_ZSTD
@@ -59,8 +59,8 @@ void decompress_memory_huffman(unsigned char *const src,
 #endif
   out_tree = huffman_encoding_p;
   out_data_hit = huffman_encoding_p + out_tree_size;
-  out_data_miss =
-      huffman_encoding_p + out_tree_size + out_data_hit_size / 8 + 4;
+  out_data_miss = huffman_encoding_p + out_tree_size +
+                  out_data_hit_size / CHAR_BIT + sizeof(unsigned int);
 
   huffman_decoding(dst, dstLen, out_data_hit, out_data_hit_size, out_data_miss,
                    out_data_miss_size, out_tree, out_tree_size);
@@ -74,12 +74,8 @@ MemoryBuffer<unsigned char> compress_memory_huffman(long int *const src,
   auto huff_time1 = std::chrono::high_resolution_clock::now();
 #endif
   HuffmanEncodedStream encoded = huffman_encoding(src, srcLen);
-  const std::size_t out_data_hit_size = encoded.nbits;
-  const std::size_t out_data_miss_size = encoded.missed.size;
-  const std::size_t out_tree_size = encoded.frequencies.size;
-  unsigned char const *const out_data_hit = encoded.hit.data.release();
-  unsigned char const *const out_data_miss = encoded.missed.data.release();
-  unsigned char const *const out_tree = encoded.frequencies.data.release();
+
+  assert(not(encoded.hit.size % sizeof(unsigned int)));
 
 #ifdef MGARD_TIMING
   auto huff_time2 = std::chrono::high_resolution_clock::now();
@@ -88,34 +84,44 @@ MemoryBuffer<unsigned char> compress_memory_huffman(long int *const src,
   std::cout << "Huffman tree time = " << (double)duration.count() / 1000000
             << "\n";
 #endif
-  const size_t total_size =
-      out_data_hit_size / 8 + 4 + out_data_miss_size + out_tree_size;
-  unsigned char *payload = (unsigned char *)malloc(total_size);
+  static_assert(CHAR_BIT == 8, "code written assuming `CHAR_BIT == 8`");
+  static_assert(sizeof(unsigned int) == 4,
+                "code written assuming `sizeof(unsigned int) == 4`");
+  const std::size_t offset = encoded.nbits % (CHAR_BIT * sizeof(unsigned int));
+  // Number of hit buffer padding bytes.
+  const std::size_t nhpb = offset ? offset / CHAR_BIT : sizeof(unsigned int);
+
+  assert(encoded.hit.size + nhpb ==
+         encoded.nbits / CHAR_BIT + sizeof(unsigned int));
+
+  const size_t npayload =
+      encoded.hit.size + nhpb + encoded.missed.size + encoded.frequencies.size;
+  unsigned char *const payload = new unsigned char[npayload];
   unsigned char *bufp = payload;
 
-  if (out_tree_size) {
-    std::memcpy(bufp, out_tree, out_tree_size);
-    bufp += out_tree_size;
-  }
+  std::memcpy(bufp, encoded.frequencies.data.get(), encoded.frequencies.size);
+  bufp += encoded.frequencies.size;
 
-  std::memcpy(bufp, out_data_hit, out_data_hit_size / 8 + 4);
-  bufp += out_data_hit_size / 8 + 4;
+  std::memcpy(bufp, encoded.hit.data.get(), encoded.hit.size);
+  bufp += encoded.hit.size;
 
-  if (out_data_miss_size) {
-    std::memcpy(bufp, out_data_miss, out_data_miss_size);
-    bufp += out_data_miss_size;
+  {
+    const unsigned char zero{0};
+    for (std::size_t i = 0; i < nhpb; ++i) {
+      std::memcpy(bufp, &zero, 1);
+      bufp += 1;
+    }
   }
 
-  delete[] out_data_hit;
-  delete[] out_data_miss;
-  delete[] out_tree;
+  std::memcpy(bufp, encoded.missed.data.get(), encoded.missed.size);
+  bufp += encoded.missed.size;
 
 #ifndef MGARD_ZSTD
 #ifdef MGARD_TIMING
   auto z_time1 = std::chrono::high_resolution_clock::now();
 #endif
   const MemoryBuffer<unsigned char> out_data =
-      compress_memory_z(payload, total_size);
+      compress_memory_z(payload, npayload);
 #ifdef MGARD_TIMING
   auto z_time2 = std::chrono::high_resolution_clock::now();
   auto z_duration =
@@ -128,7 +134,7 @@ MemoryBuffer<unsigned char> compress_memory_huffman(long int *const src,
   auto zstd_time1 = std::chrono::high_resolution_clock::now();
 #endif
   const MemoryBuffer<unsigned char> out_data =
-      compress_memory_zstd(payload, total_size);
+      compress_memory_zstd(payload, npayload);
 #ifdef MGARD_TIMING
   auto zstd_time2 = std::chrono::high_resolution_clock::now();
   auto zstd_duration = std::chrono::duration_cast<std::chrono::microseconds>(
@@ -137,20 +143,20 @@ MemoryBuffer<unsigned char> compress_memory_huffman(long int *const src,
             << (double)zstd_duration.count() / 1000000 << "\n";
 #endif
 #endif
-  free(payload);
-  payload = 0;
+  delete[] payload;
+  bufp = nullptr;
 
   const std::size_t bufferLen = 3 * sizeof(size_t) + out_data.size;
   unsigned char *const buffer = new unsigned char[bufferLen];
 
   bufp = buffer;
-  *(size_t *)bufp = out_tree_size;
+  *(size_t *)bufp = encoded.frequencies.size;
   bufp += sizeof(size_t);
 
-  *(size_t *)bufp = out_data_hit_size;
+  *(size_t *)bufp = encoded.nbits;
   bufp += sizeof(size_t);
 
-  *(size_t *)bufp = out_data_miss_size;
+  *(size_t *)bufp = encoded.missed.size;
   bufp += sizeof(size_t);
 
   {

From 7a757a628e15f9b9494ce696db3888ff027a23bf Mon Sep 17 00:00:00 2001
From: Ben Whitney <whitneybe@ornl.gov>
Date: Thu, 2 Jun 2022 13:19:22 -0400
Subject: [PATCH 19/58] Add Huffman compression regression tests.

---
 tests/include/testing_utilities.hpp | 21 +++++++++
 tests/src/test_compressors.cpp      | 67 +++++++++++++++++++++++++++++
 tests/src/test_huffman.cpp          | 23 ----------
 tests/src/testing_utilities.cpp     |  8 ++++
 4 files changed, 96 insertions(+), 23 deletions(-)

diff --git a/tests/include/testing_utilities.hpp b/tests/include/testing_utilities.hpp
index 318d521d6d..4b8343d783 100644
--- a/tests/include/testing_utilities.hpp
+++ b/tests/include/testing_utilities.hpp
@@ -61,5 +61,26 @@ mgard::TensorMeshHierarchy<M, Real>
 make_flat_hierarchy(const mgard::TensorMeshHierarchy<N, Real> &hierarchy,
                     const std::array<std::size_t, M> shape);
 
+//! Function object to generate periodic data.
+struct PeriodicGenerator {
+  //! Constructor.
+  //!
+  //!\param value Starting value.
+  //!\param period Generator period.
+  PeriodicGenerator(const std::size_t period, const long int value);
+
+  //! Generator period.
+  std::size_t period;
+
+  //! Starting value.
+  long int value;
+
+  //! Number of times `operator()` has been called.
+  std::size_t ncalls;
+
+  //! Generate next value in periodic sequence.
+  long int operator()();
+};
+
 #include "testing_utilities.tpp"
 #endif
diff --git a/tests/src/test_compressors.cpp b/tests/src/test_compressors.cpp
index 8ab071fb6f..24795a0c4e 100644
--- a/tests/src/test_compressors.cpp
+++ b/tests/src/test_compressors.cpp
@@ -9,6 +9,8 @@
 #include "compressors.hpp"
 #include "format.hpp"
 
+#include "testing_utilities.hpp"
+
 namespace {
 
 template <typename T>
@@ -28,8 +30,73 @@ void test_huffman_identity(std::default_random_engine &gen,
   delete[] decompressed;
 }
 
+void test_huffman_compression_regression(long int *const src,
+                                         const std::size_t srcLen) {
+  long int *const src_ = new long int[srcLen];
+  std::copy(src, src + srcLen, src_);
+
+  const mgard::MemoryBuffer<unsigned char> out =
+      mgard::compress_memory_huffman(src, srcLen);
+  const mgard::MemoryBuffer<unsigned char> out_ =
+      mgard::compress_memory_huffman(src_, srcLen);
+
+  REQUIRE(out.size == out_.size);
+  unsigned char const *const p = out.data.get();
+  unsigned char const *const p_ = out_.data.get();
+  REQUIRE(std::equal(p, p + out.size, p_));
+
+  delete[] src_;
+}
+
+void test_hcr_constant(const std::size_t srcLen, const long int q) {
+  long int *const src = new long int[srcLen];
+  std::fill(src, src + srcLen, q);
+  test_huffman_compression_regression(src, srcLen);
+  delete[] src;
+}
+
+void test_hcr_periodic(const std::size_t srcLen, const long int initial,
+                       const std::size_t period) {
+  long int *const src = new long int[srcLen];
+  std::generate(src, src + srcLen, PeriodicGenerator(period, initial));
+  test_huffman_compression_regression(src, srcLen);
+  delete[] src;
+}
+
+void test_hcr_random(const std::size_t srcLen, const long int a,
+                     const long int b, std::default_random_engine &gen) {
+  std::uniform_int_distribution<long int> dis(a, b);
+  long int *const src = new long int[srcLen];
+  std::generate(src, src + srcLen, [&] { return dis(gen); });
+  test_huffman_compression_regression(src, srcLen);
+  delete[] src;
+}
+
 } // namespace
 
+TEST_CASE("Huffman compression regression", "[compressors] [regression]") {
+  SECTION("constant data") {
+    test_hcr_constant(5, -3);
+    test_hcr_constant(25, 0);
+    test_hcr_constant(625, 81);
+  }
+
+  SECTION("periodic data") {
+    test_hcr_periodic(5, 0, 5);
+    test_hcr_periodic(25, -4, 6);
+    test_hcr_periodic(625, 22, 20);
+  }
+
+  SECTION("random data") {
+    std::default_random_engine gen(131051);
+    test_hcr_random(50, 0, 1, gen);
+    test_hcr_random(25, -8, 16, gen);
+    test_hcr_random(625, std::numeric_limits<int>::min(),
+                    std::numeric_limits<int>::max(), gen);
+    test_hcr_random(3125, -100, 100, gen);
+  }
+}
+
 TEST_CASE("Huffman compression", "[compressors] [!mayfail]") {
   std::default_random_engine gen(257100);
   const std::size_t n = 5000;
diff --git a/tests/src/test_huffman.cpp b/tests/src/test_huffman.cpp
index 2b31eb4c9e..d2c6cdfd3c 100644
--- a/tests/src/test_huffman.cpp
+++ b/tests/src/test_huffman.cpp
@@ -45,29 +45,6 @@ void test_encoding_regression_constant(const std::size_t N, const long int q) {
   delete[] quantized;
 }
 
-//! Function object to generate periodict data.
-struct PeriodicGenerator {
-  //! Constructor.
-  //!
-  //!\param value Starting value.
-  //!\param period Generator period.
-  PeriodicGenerator(const std::size_t period, const long int value)
-      : period(period), value(value), ncalls(0) {}
-
-  //! Generator period.
-  std::size_t period;
-
-  //! Starting value.
-  long int value;
-
-  //! Number of times `operator()` has been called.
-  std::size_t ncalls;
-
-  long int operator()() {
-    return value + static_cast<long int>(ncalls++ % period);
-  }
-};
-
 void test_encoding_regression_periodic(const std::size_t N, const long int q,
                                        const std::size_t period) {
   long int *const quantized = new long int[N];
diff --git a/tests/src/testing_utilities.cpp b/tests/src/testing_utilities.cpp
index 822c6c87e3..d84210d5fd 100644
--- a/tests/src/testing_utilities.cpp
+++ b/tests/src/testing_utilities.cpp
@@ -20,3 +20,11 @@ std::ostream &operator<<(std::ostream &os, const TrialTracker &tracker) {
   return os << tracker.nsuccesses << " successes and " << tracker.nfailures
             << " failures out of " << tracker.ntrials << " trials";
 }
+
+PeriodicGenerator::PeriodicGenerator(const std::size_t period,
+                                     const long int value)
+    : period(period), value(value), ncalls(0) {}
+
+long int PeriodicGenerator::operator()() {
+  return value + static_cast<long int>(ncalls++ % period);
+}

From 58c13e3c9fc693a7244df73164cdbdb47dda2d15 Mon Sep 17 00:00:00 2001
From: Ben Whitney <whitneybe@ornl.gov>
Date: Thu, 2 Jun 2022 16:58:06 -0400
Subject: [PATCH 20/58] Reimplement Huffman compression with constituents.

---
 include/compressors.hpp        |   8 +++
 src/compressors.cpp            | 104 +++++++++++++++++++++++++++++++--
 tests/src/test_compressors.cpp |   2 +-
 3 files changed, 109 insertions(+), 5 deletions(-)

diff --git a/include/compressors.hpp b/include/compressors.hpp
index 17cd7f7ce3..8e0952022a 100644
--- a/include/compressors.hpp
+++ b/include/compressors.hpp
@@ -23,6 +23,14 @@ namespace mgard {
 MemoryBuffer<unsigned char> compress_memory_huffman(long int *const src,
                                                     const std::size_t srcLen);
 
+//! Compress an array using a Huffman tree.
+//!
+//!\param[in] src Array to be compressed.
+//!\param[in] srcLen Size of array (number of elements) to be compressed.
+MemoryBuffer<unsigned char>
+compress_memory_huffman_rewritten(long int *const src,
+                                  const std::size_t srcLen);
+
 //! Decompress an array compressed with `compress_memory_huffman`.
 //!
 //!\param[in] src Compressed array.
diff --git a/src/compressors.cpp b/src/compressors.cpp
index bf5bd7c57f..84ebe9caf7 100644
--- a/src/compressors.cpp
+++ b/src/compressors.cpp
@@ -7,6 +7,7 @@
 #include <algorithm>
 #include <numeric>
 #include <stdexcept>
+#include <utility>
 #include <vector>
 
 #include <zlib.h>
@@ -68,6 +69,27 @@ void decompress_memory_huffman(unsigned char *const src,
   free(huffman_encoding_p);
 }
 
+namespace {
+
+using Constituent = std::pair<unsigned char const *, std::size_t>;
+
+MemoryBuffer<unsigned char>
+gather_constituents(const std::vector<Constituent> &constituents) {
+  std::size_t nbuffer = 0;
+  for (const Constituent &constituent : constituents) {
+    nbuffer += constituent.second;
+  }
+  MemoryBuffer<unsigned char> buffer(nbuffer);
+  unsigned char *p = buffer.data.get();
+  for (const Constituent &constituent : constituents) {
+    std::memcpy(p, constituent.first, constituent.second);
+    p += constituent.second;
+  }
+  return buffer;
+}
+
+} // namespace
+
 MemoryBuffer<unsigned char> compress_memory_huffman(long int *const src,
                                                     const std::size_t srcLen) {
 #ifdef MGARD_TIMING
@@ -89,13 +111,13 @@ MemoryBuffer<unsigned char> compress_memory_huffman(long int *const src,
                 "code written assuming `sizeof(unsigned int) == 4`");
   const std::size_t offset = encoded.nbits % (CHAR_BIT * sizeof(unsigned int));
   // Number of hit buffer padding bytes.
-  const std::size_t nhpb = offset ? offset / CHAR_BIT : sizeof(unsigned int);
+  const std::size_t nhbpb = offset ? offset / CHAR_BIT : sizeof(unsigned int);
 
-  assert(encoded.hit.size + nhpb ==
+  assert(encoded.hit.size + nhbpb ==
          encoded.nbits / CHAR_BIT + sizeof(unsigned int));
 
   const size_t npayload =
-      encoded.hit.size + nhpb + encoded.missed.size + encoded.frequencies.size;
+      encoded.hit.size + nhbpb + encoded.missed.size + encoded.frequencies.size;
   unsigned char *const payload = new unsigned char[npayload];
   unsigned char *bufp = payload;
 
@@ -107,7 +129,7 @@ MemoryBuffer<unsigned char> compress_memory_huffman(long int *const src,
 
   {
     const unsigned char zero{0};
-    for (std::size_t i = 0; i < nhpb; ++i) {
+    for (std::size_t i = 0; i < nhbpb; ++i) {
       std::memcpy(bufp, &zero, 1);
       bufp += 1;
     }
@@ -166,6 +188,80 @@ MemoryBuffer<unsigned char> compress_memory_huffman(long int *const src,
   return MemoryBuffer<unsigned char>(buffer, bufferLen);
 }
 
+MemoryBuffer<unsigned char>
+compress_memory_huffman_rewritten(long int *const src,
+                                  const std::size_t srcLen) {
+#ifdef MGARD_TIMING
+  auto huff_time1 = std::chrono::high_resolution_clock::now();
+#endif
+  HuffmanEncodedStream encoded = huffman_encoding(src, srcLen);
+
+  assert(not(encoded.hit.size % sizeof(unsigned int)));
+
+#ifdef MGARD_TIMING
+  auto huff_time2 = std::chrono::high_resolution_clock::now();
+  auto duration = std::chrono::duration_cast<std::chrono::microseconds>(
+      huff_time2 - huff_time1);
+  std::cout << "Huffman tree time = " << (double)duration.count() / 1000000
+            << "\n";
+#endif
+  static_assert(CHAR_BIT == 8, "code written assuming `CHAR_BIT == 8`");
+  static_assert(sizeof(unsigned int) == 4,
+                "code written assuming `sizeof(unsigned int) == 4`");
+  const std::size_t offset = encoded.nbits % (CHAR_BIT * sizeof(unsigned int));
+  // Number of hit buffer padding bytes.
+  const std::size_t nhbpb = offset ? offset / CHAR_BIT : sizeof(unsigned int);
+
+  assert(encoded.hit.size + nhbpb ==
+         encoded.nbits / CHAR_BIT + sizeof(unsigned int));
+
+  unsigned char const *hbpb = new unsigned char[nhbpb]();
+  MemoryBuffer<unsigned char> payload = gather_constituents({
+      {encoded.frequencies.data.get(), encoded.frequencies.size},
+      {encoded.hit.data.get(), encoded.hit.size},
+      {hbpb, nhbpb},
+      {encoded.missed.data.get(), encoded.missed.size},
+  });
+  delete[] hbpb;
+
+#ifndef MGARD_ZSTD
+#ifdef MGARD_TIMING
+  auto z_time1 = std::chrono::high_resolution_clock::now();
+#endif
+  const MemoryBuffer<unsigned char> out_data =
+      compress_memory_z(payload.data.get(), payload.size);
+#ifdef MGARD_TIMING
+  auto z_time2 = std::chrono::high_resolution_clock::now();
+  auto z_duration =
+      std::chrono::duration_cast<std::chrono::microseconds>(z_time2 - z_time1);
+  std::cout << "ZLIB compression time = "
+            << (double)z_duration.count() / 1000000 << "\n";
+#endif
+#else
+#ifdef MGARD_TIMING
+  auto zstd_time1 = std::chrono::high_resolution_clock::now();
+#endif
+  const MemoryBuffer<unsigned char> out_data =
+      compress_memory_zstd(payload.data.get(), payload.size);
+#ifdef MGARD_TIMING
+  auto zstd_time2 = std::chrono::high_resolution_clock::now();
+  auto zstd_duration = std::chrono::duration_cast<std::chrono::microseconds>(
+      zstd_time2 - zstd_time1);
+  std::cout << "ZSTD compression time = "
+            << (double)zstd_duration.count() / 1000000 << "\n";
+#endif
+#endif
+
+  return gather_constituents(
+      {{reinterpret_cast<unsigned char const *>(&encoded.frequencies.size),
+        sizeof(encoded.frequencies.size)},
+       {reinterpret_cast<unsigned char const *>(&encoded.nbits),
+        sizeof(encoded.nbits)},
+       {reinterpret_cast<unsigned char const *>(&encoded.missed.size),
+        sizeof(encoded.missed.size)},
+       {out_data.data.get(), out_data.size}});
+}
+
 #ifdef MGARD_ZSTD
 /*! CHECK
  * Check that the condition holds. If it doesn't print a message and die.
diff --git a/tests/src/test_compressors.cpp b/tests/src/test_compressors.cpp
index 24795a0c4e..74da33a86b 100644
--- a/tests/src/test_compressors.cpp
+++ b/tests/src/test_compressors.cpp
@@ -38,7 +38,7 @@ void test_huffman_compression_regression(long int *const src,
   const mgard::MemoryBuffer<unsigned char> out =
       mgard::compress_memory_huffman(src, srcLen);
   const mgard::MemoryBuffer<unsigned char> out_ =
-      mgard::compress_memory_huffman(src_, srcLen);
+      mgard::compress_memory_huffman_rewritten(src_, srcLen);
 
   REQUIRE(out.size == out_.size);
   unsigned char const *const p = out.data.get();

From 69d0e72c4817446f0c6e6bd1dc827571c2b10c54 Mon Sep 17 00:00:00 2001
From: Ben Whitney <whitneybe@ornl.gov>
Date: Fri, 3 Jun 2022 10:50:40 -0400
Subject: [PATCH 21/58] Remove timing statements.

---
 src/compressors.cpp | 65 ---------------------------------------------
 1 file changed, 65 deletions(-)

diff --git a/src/compressors.cpp b/src/compressors.cpp
index 84ebe9caf7..7e8064b8f2 100644
--- a/src/compressors.cpp
+++ b/src/compressors.cpp
@@ -16,11 +16,6 @@
 #include "huffman.hpp"
 #include "utilities.hpp"
 
-#ifdef MGARD_TIMING
-#include <chrono>
-#include <iostream>
-#endif
-
 #ifdef MGARD_ZSTD
 #include <zstd.h>
 #endif
@@ -92,20 +87,10 @@ gather_constituents(const std::vector<Constituent> &constituents) {
 
 MemoryBuffer<unsigned char> compress_memory_huffman(long int *const src,
                                                     const std::size_t srcLen) {
-#ifdef MGARD_TIMING
-  auto huff_time1 = std::chrono::high_resolution_clock::now();
-#endif
   HuffmanEncodedStream encoded = huffman_encoding(src, srcLen);
 
   assert(not(encoded.hit.size % sizeof(unsigned int)));
 
-#ifdef MGARD_TIMING
-  auto huff_time2 = std::chrono::high_resolution_clock::now();
-  auto duration = std::chrono::duration_cast<std::chrono::microseconds>(
-      huff_time2 - huff_time1);
-  std::cout << "Huffman tree time = " << (double)duration.count() / 1000000
-            << "\n";
-#endif
   static_assert(CHAR_BIT == 8, "code written assuming `CHAR_BIT == 8`");
   static_assert(sizeof(unsigned int) == 4,
                 "code written assuming `sizeof(unsigned int) == 4`");
@@ -139,31 +124,11 @@ MemoryBuffer<unsigned char> compress_memory_huffman(long int *const src,
   bufp += encoded.missed.size;
 
 #ifndef MGARD_ZSTD
-#ifdef MGARD_TIMING
-  auto z_time1 = std::chrono::high_resolution_clock::now();
-#endif
   const MemoryBuffer<unsigned char> out_data =
       compress_memory_z(payload, npayload);
-#ifdef MGARD_TIMING
-  auto z_time2 = std::chrono::high_resolution_clock::now();
-  auto z_duration =
-      std::chrono::duration_cast<std::chrono::microseconds>(z_time2 - z_time1);
-  std::cout << "ZLIB compression time = "
-            << (double)z_duration.count() / 1000000 << "\n";
-#endif
 #else
-#ifdef MGARD_TIMING
-  auto zstd_time1 = std::chrono::high_resolution_clock::now();
-#endif
   const MemoryBuffer<unsigned char> out_data =
       compress_memory_zstd(payload, npayload);
-#ifdef MGARD_TIMING
-  auto zstd_time2 = std::chrono::high_resolution_clock::now();
-  auto zstd_duration = std::chrono::duration_cast<std::chrono::microseconds>(
-      zstd_time2 - zstd_time1);
-  std::cout << "ZSTD compression time = "
-            << (double)zstd_duration.count() / 1000000 << "\n";
-#endif
 #endif
   delete[] payload;
   bufp = nullptr;
@@ -191,20 +156,10 @@ MemoryBuffer<unsigned char> compress_memory_huffman(long int *const src,
 MemoryBuffer<unsigned char>
 compress_memory_huffman_rewritten(long int *const src,
                                   const std::size_t srcLen) {
-#ifdef MGARD_TIMING
-  auto huff_time1 = std::chrono::high_resolution_clock::now();
-#endif
   HuffmanEncodedStream encoded = huffman_encoding(src, srcLen);
 
   assert(not(encoded.hit.size % sizeof(unsigned int)));
 
-#ifdef MGARD_TIMING
-  auto huff_time2 = std::chrono::high_resolution_clock::now();
-  auto duration = std::chrono::duration_cast<std::chrono::microseconds>(
-      huff_time2 - huff_time1);
-  std::cout << "Huffman tree time = " << (double)duration.count() / 1000000
-            << "\n";
-#endif
   static_assert(CHAR_BIT == 8, "code written assuming `CHAR_BIT == 8`");
   static_assert(sizeof(unsigned int) == 4,
                 "code written assuming `sizeof(unsigned int) == 4`");
@@ -225,31 +180,11 @@ compress_memory_huffman_rewritten(long int *const src,
   delete[] hbpb;
 
 #ifndef MGARD_ZSTD
-#ifdef MGARD_TIMING
-  auto z_time1 = std::chrono::high_resolution_clock::now();
-#endif
   const MemoryBuffer<unsigned char> out_data =
       compress_memory_z(payload.data.get(), payload.size);
-#ifdef MGARD_TIMING
-  auto z_time2 = std::chrono::high_resolution_clock::now();
-  auto z_duration =
-      std::chrono::duration_cast<std::chrono::microseconds>(z_time2 - z_time1);
-  std::cout << "ZLIB compression time = "
-            << (double)z_duration.count() / 1000000 << "\n";
-#endif
 #else
-#ifdef MGARD_TIMING
-  auto zstd_time1 = std::chrono::high_resolution_clock::now();
-#endif
   const MemoryBuffer<unsigned char> out_data =
       compress_memory_zstd(payload.data.get(), payload.size);
-#ifdef MGARD_TIMING
-  auto zstd_time2 = std::chrono::high_resolution_clock::now();
-  auto zstd_duration = std::chrono::duration_cast<std::chrono::microseconds>(
-      zstd_time2 - zstd_time1);
-  std::cout << "ZSTD compression time = "
-            << (double)zstd_duration.count() / 1000000 << "\n";
-#endif
 #endif
 
   return gather_constituents(

From f049be1af7042faa4df8abf67c3bae74fb906ec8 Mon Sep 17 00:00:00 2001
From: Ben Whitney <whitneybe@ornl.gov>
Date: Fri, 3 Jun 2022 12:03:30 -0400
Subject: [PATCH 22/58] Return struct from original Huffman decoder.

---
 include/huffman.hpp        | 15 +------
 src/compressors.cpp        | 91 ++++++++++++++++++++++----------------
 src/huffman.cpp            | 36 ++++++++-------
 tests/src/test_huffman.cpp |  2 +-
 4 files changed, 76 insertions(+), 68 deletions(-)

diff --git a/include/huffman.hpp b/include/huffman.hpp
index 58a47d02b5..3749cf288d 100644
--- a/include/huffman.hpp
+++ b/include/huffman.hpp
@@ -56,19 +56,8 @@ huffman_encoding_rewritten(long int const *const quantized_data,
 
 //! Decode a stream encoded using a Huffman code.
 //!
-//!\param[out] quantized_data Output buffer (quantized coefficients).
-//!\param[in] quantized_data_size Size *in bytes* of output buffer.
-//!\param[in] out_data_hit Compressed buffer.
-//!\param[in] out_data_hit_size Size *in bits* of compressed buffer.
-//!\param[in] out_data_miss 'Missed' buffer (input symbols not assigned codes).
-//!\param[in] out_data_miss_size Size *in bytes* of 'missed' buffer.
-//!\param[in] out_tree Frequency table for input buffer.
-//!\param[in] out_tree_size Size *in bytes* of the frequency table.
-void huffman_decoding(
-    long int *const quantized_data, const std::size_t quantized_data_size,
-    unsigned char const *const out_data_hit, const size_t out_data_hit_size,
-    unsigned char const *const out_data_miss, const size_t out_data_miss_size,
-    unsigned char const *const out_tree, const size_t out_tree_size);
+//!\param[in] encoded Input buffer (Huffman-encoded stream).
+MemoryBuffer<long int> huffman_decoding(const HuffmanEncodedStream &encoded);
 
 //! Codeword (in progress) associated to a node in a Huffman code creation tree.
 struct HuffmanCodeword {
diff --git a/src/compressors.cpp b/src/compressors.cpp
index 7e8064b8f2..151712e298 100644
--- a/src/compressors.cpp
+++ b/src/compressors.cpp
@@ -22,46 +22,65 @@
 
 namespace mgard {
 
+namespace {
+
+std::size_t hit_buffer_size(const std::size_t nbits) {
+  return nbits / CHAR_BIT + sizeof(unsigned int);
+}
+
+} // namespace
+
 void decompress_memory_huffman(unsigned char *const src,
                                const std::size_t srcLen, long int *const dst,
                                const std::size_t dstLen) {
-  unsigned char *out_data_hit = 0;
-  size_t out_data_hit_size;
-  unsigned char *out_data_miss = 0;
-  size_t out_data_miss_size;
-  unsigned char *out_tree = 0;
-  size_t out_tree_size;
-
-  unsigned char *buf = src;
-
-  out_tree_size = *(size_t *)buf;
-  buf += sizeof(size_t);
-
-  out_data_hit_size = *(size_t *)buf;
-  buf += sizeof(size_t);
-
-  out_data_miss_size = *(size_t *)buf;
-  buf += sizeof(size_t);
-  size_t total_huffman_size = out_tree_size + out_data_hit_size / CHAR_BIT +
-                              sizeof(unsigned int) + out_data_miss_size;
-  unsigned char *huffman_encoding_p =
-      (unsigned char *)malloc(total_huffman_size);
+  std::size_t const *const sizes = reinterpret_cast<std::size_t const *>(src);
+  const std::size_t nfrequencies = sizes[0];
+  const std::size_t nbits = sizes[1];
+  const std::size_t nmissed = sizes[2];
+  const std::size_t nhit = hit_buffer_size(nbits);
+
+  MemoryBuffer<unsigned char> buffer(nfrequencies + nhit + nmissed);
+  {
+    const std::size_t offset = 3 * sizeof(std::size_t);
+    unsigned char const *const src_ = src + offset;
+    const std::size_t srcLen_ = srcLen - offset;
+    unsigned char *const dst_ = buffer.data.get();
+    const std::size_t dstLen_ = buffer.size;
+
 #ifndef MGARD_ZSTD
-  decompress_memory_z(buf, srcLen - 3 * sizeof(size_t), huffman_encoding_p,
-                      total_huffman_size);
+    decompress_memory_z(src_, srcLen_, dst_, dstLen_);
 #else
-  decompress_memory_zstd(buf, srcLen - 3 * sizeof(size_t), huffman_encoding_p,
-                         total_huffman_size);
+    decompress_memory_zstd(src_, srcLen_, dst_, dstLen_);
 #endif
-  out_tree = huffman_encoding_p;
-  out_data_hit = huffman_encoding_p + out_tree_size;
-  out_data_miss = huffman_encoding_p + out_tree_size +
-                  out_data_hit_size / CHAR_BIT + sizeof(unsigned int);
+  }
 
-  huffman_decoding(dst, dstLen, out_data_hit, out_data_hit_size, out_data_miss,
-                   out_data_miss_size, out_tree, out_tree_size);
+  HuffmanEncodedStream encoded(nbits, nhit, nmissed, nfrequencies);
+  {
+    unsigned char const *begin;
+    unsigned char const *end;
+
+    begin = buffer.data.get();
+    end = begin + nfrequencies;
+    std::copy(begin, end, encoded.frequencies.data.get());
+
+    begin = end;
+    end = begin + nhit;
+    std::copy(begin, end, encoded.hit.data.get());
+
+    begin = end;
+    end = begin + nmissed;
+    std::copy(begin, end, encoded.missed.data.get());
+  }
 
-  free(huffman_encoding_p);
+  const MemoryBuffer<long int> decoded = huffman_decoding(encoded);
+  {
+    long int const *const p = decoded.data.get();
+    if (decoded.size * sizeof(*p) != dstLen) {
+      throw std::runtime_error(
+          "mismatch between expected and obtained decompressed buffer sizes");
+    }
+    std::copy(p, p + decoded.size, dst);
+  }
 }
 
 namespace {
@@ -98,8 +117,7 @@ MemoryBuffer<unsigned char> compress_memory_huffman(long int *const src,
   // Number of hit buffer padding bytes.
   const std::size_t nhbpb = offset ? offset / CHAR_BIT : sizeof(unsigned int);
 
-  assert(encoded.hit.size + nhbpb ==
-         encoded.nbits / CHAR_BIT + sizeof(unsigned int));
+  assert(encoded.hit.size + nhbpb == hit_buffer_size(encoded.nbits));
 
   const size_t npayload =
       encoded.hit.size + nhbpb + encoded.missed.size + encoded.frequencies.size;
@@ -156,7 +174,7 @@ MemoryBuffer<unsigned char> compress_memory_huffman(long int *const src,
 MemoryBuffer<unsigned char>
 compress_memory_huffman_rewritten(long int *const src,
                                   const std::size_t srcLen) {
-  HuffmanEncodedStream encoded = huffman_encoding(src, srcLen);
+  const HuffmanEncodedStream encoded = huffman_encoding(src, srcLen);
 
   assert(not(encoded.hit.size % sizeof(unsigned int)));
 
@@ -167,8 +185,7 @@ compress_memory_huffman_rewritten(long int *const src,
   // Number of hit buffer padding bytes.
   const std::size_t nhbpb = offset ? offset / CHAR_BIT : sizeof(unsigned int);
 
-  assert(encoded.hit.size + nhbpb ==
-         encoded.nbits / CHAR_BIT + sizeof(unsigned int));
+  assert(encoded.hit.size + nhbpb == hit_buffer_size(encoded.nbits));
 
   unsigned char const *hbpb = new unsigned char[nhbpb]();
   MemoryBuffer<unsigned char> payload = gather_constituents({
diff --git a/src/huffman.cpp b/src/huffman.cpp
index 937c5d9b6a..b53e072237 100644
--- a/src/huffman.cpp
+++ b/src/huffman.cpp
@@ -422,23 +422,28 @@ huffman_encoding_rewritten(long int const *const quantized_data,
   return out;
 }
 
-void huffman_decoding(long int *const quantized_data,
-                      const std::size_t quantized_data_size,
-                      unsigned char const *const out_data_hit,
-                      const std::size_t out_data_hit_size,
-                      unsigned char const *const out_data_miss,
-                      const std::size_t out_data_miss_size,
-                      unsigned char const *const out_tree,
-                      const std::size_t out_tree_size) {
+MemoryBuffer<long int> huffman_decoding(const HuffmanEncodedStream &encoded) {
+  const std::size_t out_data_miss_size = encoded.missed.size;
+  const std::size_t out_tree_size = encoded.frequencies.size;
+  unsigned char const *const out_data_hit = encoded.hit.data.get();
+  unsigned char const *const out_data_miss = encoded.missed.data.get();
+  unsigned char const *const out_tree = encoded.frequencies.data.get();
+
   std::size_t const *const cft = (std::size_t const *)out_tree;
-  const int nonZeros = out_tree_size / (2 * sizeof(std::size_t));
+  const std::size_t nnz = out_tree_size / (2 * sizeof(std::size_t));
   // The elements of the array are value-initialized (here, zero-initialized).
   std::size_t *const ft = new std::size_t[nql]();
 
-  for (int j = 0; j < nonZeros; j++) {
-    ft[cft[2 * j]] = cft[2 * j + 1];
+  std::size_t nquantized = 0;
+  for (std::size_t j = 0; j < nnz; ++j) {
+    const std::size_t frequency = cft[2 * j + 1];
+    nquantized += frequency;
+    ft[cft[2 * j]] = frequency;
   }
 
+  MemoryBuffer<long int> out(nquantized);
+  long int *const quantized_data = out.data.get();
+
   my_priority_queue<htree_node> *const phtree = build_tree(ft);
   delete[] ft;
 
@@ -460,7 +465,7 @@ void huffman_decoding(long int *const quantized_data,
   long int *q = quantized_data;
   std::size_t i = 0;
   std::size_t num_missed = 0;
-  while (q < (quantized_data + (quantized_data_size / sizeof(*q)))) {
+  while (q < quantized_data + nquantized) {
     htree_node const *root = phtree->top();
     assert(root);
 
@@ -504,13 +509,10 @@ void huffman_decoding(long int *const quantized_data,
   assert(start_bit == out_data_hit_size);
   assert(sizeof(int) * num_missed == out_data_miss_size);
 
-  // Avoid unused argument warning. If NDEBUG is defined, then the assert
-  // becomes empty and out_data_hit_size is unused. Tell the compiler that
-  // is OK and expected.
-  (void)out_data_hit_size;
-
   delete[] miss_buf;
   free_tree(phtree);
+
+  return out;
 }
 
 } // namespace mgard
diff --git a/tests/src/test_huffman.cpp b/tests/src/test_huffman.cpp
index d2c6cdfd3c..b271581ceb 100644
--- a/tests/src/test_huffman.cpp
+++ b/tests/src/test_huffman.cpp
@@ -65,7 +65,7 @@ void test_encoding_regression_random(const std::size_t N, const long int a,
 
 } // namespace
 
-TEST_CASE("encoding regression", "[huffman]") {
+TEST_CASE("encoding regression", "[huffman] [regression]") {
   SECTION("constant data") {
     test_encoding_regression_constant(10, 0);
     test_encoding_regression_constant(100, 732);

From 81cc00bcb22f637fa13b8996dce0aec4a12fbecf Mon Sep 17 00:00:00 2001
From: Ben Whitney <whitneybe@ornl.gov>
Date: Fri, 3 Jun 2022 12:32:54 -0400
Subject: [PATCH 23/58] Add Huffman decoding regression tests.

---
 tests/src/test_huffman.cpp | 69 ++++++++++++++++++++++++++++++++++++++
 1 file changed, 69 insertions(+)

diff --git a/tests/src/test_huffman.cpp b/tests/src/test_huffman.cpp
index b271581ceb..9d1829b290 100644
--- a/tests/src/test_huffman.cpp
+++ b/tests/src/test_huffman.cpp
@@ -38,6 +38,27 @@ void test_encoding_regression(long int *const quantized, const std::size_t N) {
   delete[] quantized_new;
 }
 
+void test_decoding_regression(long int *const quantized, const std::size_t N) {
+  long int *const quantized_new = new long int[N];
+  std::copy(quantized, quantized + N, quantized_new);
+
+  const mgard::HuffmanEncodedStream encoded =
+      mgard::huffman_encoding(quantized, N);
+  const mgard::HuffmanEncodedStream encoded_new =
+      mgard::huffman_encoding(quantized_new, N);
+
+  delete[] quantized_new;
+
+  const mgard::MemoryBuffer<long int> out = mgard::huffman_decoding(encoded);
+  const mgard::MemoryBuffer<long int> out_new =
+      mgard::huffman_decoding(encoded);
+
+  REQUIRE(out.size == out_new.size);
+  long int const *const p = out.data.get();
+  long int const *const p_new = out_new.data.get();
+  REQUIRE(std::equal(p, p + out.size, p_new));
+}
+
 void test_encoding_regression_constant(const std::size_t N, const long int q) {
   long int *const quantized = new long int[N];
   std::fill(quantized, quantized + N, q);
@@ -63,6 +84,31 @@ void test_encoding_regression_random(const std::size_t N, const long int a,
   delete[] quantized;
 }
 
+void test_decoding_regression_constant(const std::size_t N, const long int q) {
+  long int *const quantized = new long int[N];
+  std::fill(quantized, quantized + N, q);
+  test_decoding_regression(quantized, N);
+  delete[] quantized;
+}
+
+void test_decoding_regression_periodic(const std::size_t N, const long int q,
+                                       const std::size_t period) {
+  long int *const quantized = new long int[N];
+  std::generate(quantized, quantized + N, PeriodicGenerator(period, q));
+  test_decoding_regression(quantized, N);
+  delete[] quantized;
+}
+
+void test_decoding_regression_random(const std::size_t N, const long int a,
+                                     const long int b,
+                                     std::default_random_engine &gen) {
+  std::uniform_int_distribution<long int> dis(a, b);
+  long int *const quantized = new long int[N];
+  std::generate(quantized, quantized + N, [&] { return dis(gen); });
+  test_decoding_regression(quantized, N);
+  delete[] quantized;
+}
+
 } // namespace
 
 TEST_CASE("encoding regression", "[huffman] [regression]") {
@@ -87,3 +133,26 @@ TEST_CASE("encoding regression", "[huffman] [regression]") {
     test_encoding_regression_random(10000, -100, 100, gen);
   }
 }
+
+TEST_CASE("decoding regression", "[huffman] [regression]") {
+  SECTION("constant data") {
+    test_decoding_regression_constant(10, -11);
+    test_decoding_regression_constant(100, 79);
+    test_decoding_regression_constant(1000, -7296);
+  }
+
+  SECTION("periodic data") {
+    test_decoding_regression_periodic(10, 12, 4);
+    test_decoding_regression_periodic(100, -71, 9);
+    test_decoding_regression_periodic(1000, 3280, 23);
+  }
+
+  SECTION("random data") {
+    std::default_random_engine gen(363022);
+    test_decoding_regression_random(10, 0, 1, gen);
+    test_decoding_regression_random(100, -15, -5, gen);
+    test_decoding_regression_random(1000, std::numeric_limits<int>::min(),
+                                    std::numeric_limits<int>::max(), gen);
+    test_decoding_regression_random(10000, -100, 100, gen);
+  }
+}

From 0da8b2c0e1f7ca3b89ffc106205fa1fbc6f78935 Mon Sep 17 00:00:00 2001
From: Ben Whitney <whitneybe@ornl.gov>
Date: Mon, 6 Jun 2022 11:19:01 -0400
Subject: [PATCH 24/58] Reimplement Huffman decoding with `Bits`.

---
 include/huffman.hpp        |   6 ++
 src/huffman.cpp            | 114 ++++++++++++++++++++++++++++++++-----
 tests/src/test_huffman.cpp |   2 +-
 3 files changed, 107 insertions(+), 15 deletions(-)

diff --git a/include/huffman.hpp b/include/huffman.hpp
index 3749cf288d..07792cdab6 100644
--- a/include/huffman.hpp
+++ b/include/huffman.hpp
@@ -59,6 +59,12 @@ huffman_encoding_rewritten(long int const *const quantized_data,
 //!\param[in] encoded Input buffer (Huffman-encoded stream).
 MemoryBuffer<long int> huffman_decoding(const HuffmanEncodedStream &encoded);
 
+//! Decode a stream encoded using a Huffman code.
+//!
+//!\param[in] encoded Input buffer (Huffman-encoded stream).
+MemoryBuffer<long int>
+huffman_decoding_rewritten(const HuffmanEncodedStream &encoded);
+
 //! Codeword (in progress) associated to a node in a Huffman code creation tree.
 struct HuffmanCodeword {
   //! Bytes containing the bits of the codeword.
diff --git a/src/huffman.cpp b/src/huffman.cpp
index b53e072237..11725d219b 100644
--- a/src/huffman.cpp
+++ b/src/huffman.cpp
@@ -240,6 +240,28 @@ HuffmanCodec<N> build_huffman_codec(long int *const quantized_data,
   return codec;
 }
 
+namespace {
+
+void endianness_shuffle(unsigned char *const buffer, const std::size_t nbytes) {
+  if (nbytes % sizeof(unsigned int)) {
+    throw std::runtime_error(
+        "buffer size not a multiple of `sizeof(unsigned int)`");
+  }
+  const unsigned int one{1};
+  const bool little_endian = *reinterpret_cast<unsigned char const *>(&one);
+  if (little_endian) {
+    for (std::size_t i = 0; i < nbytes; i += sizeof(unsigned int)) {
+      unsigned char *a = buffer + i;
+      unsigned char *b = a + sizeof(unsigned int) - 1;
+      for (std::size_t j = 0; j < sizeof(unsigned int) / 2; ++j) {
+        std::swap(*a++, *b--);
+      }
+    }
+  }
+}
+
+} // namespace
+
 HuffmanEncodedStream huffman_encoding(long int *const quantized_data,
                                       const std::size_t n) {
   const HuffmanCodec<nql> codec = build_huffman_codec<nql>(quantized_data, n);
@@ -405,20 +427,7 @@ huffman_encoding_rewritten(long int const *const quantized_data,
     }
   }
 
-  {
-    const unsigned int one{1};
-    const bool little_endian = *reinterpret_cast<unsigned char const *>(&one);
-    if (little_endian) {
-      for (std::size_t i = 0; i < nbytes; i += sizeof(unsigned int)) {
-        unsigned char *a = buffer + i;
-        unsigned char *b = a + sizeof(unsigned int) - 1;
-        for (std::size_t j = 0; j < sizeof(unsigned int) / 2; ++j) {
-          std::swap(*a++, *b--);
-        }
-      }
-    }
-  }
-
+  endianness_shuffle(buffer, nbytes);
   return out;
 }
 
@@ -515,4 +524,81 @@ MemoryBuffer<long int> huffman_decoding(const HuffmanEncodedStream &encoded) {
   return out;
 }
 
+MemoryBuffer<long int>
+huffman_decoding_rewritten(const HuffmanEncodedStream &encoded) {
+  std::size_t const *const cft =
+      reinterpret_cast<std::size_t const *>(encoded.frequencies.data.get());
+  const std::size_t nnz = encoded.frequencies.size / (2 * sizeof(std::size_t));
+  // The elements of the array are value-initialized (here, zero-initialized).
+  std::size_t *const ft = new std::size_t[nql]();
+
+  std::size_t nquantized = 0;
+  for (std::size_t j = 0; j < nnz; ++j) {
+    const std::size_t frequency = cft[2 * j + 1];
+    nquantized += frequency;
+    ft[cft[2 * j]] = frequency;
+  }
+
+  MemoryBuffer<long int> out(nquantized);
+  long int *q = out.data.get();
+
+  my_priority_queue<htree_node> *const phtree = build_tree(ft);
+  delete[] ft;
+
+  // The encoded.missed.data.get() may not be aligned. Therefore, the code
+  // here makes a new buffer.
+  assert(not(encoded.missed.size % sizeof(int)));
+  int *const missed = new int[encoded.missed.size / sizeof(int)];
+  std::memcpy(missed, encoded.missed.data.get(), encoded.missed.size);
+
+  int const *p_missed = missed;
+
+  const std::size_t nbytes = encoded.hit.size;
+  unsigned char *const buffer = new unsigned char[nbytes];
+  {
+    unsigned char const *const p = encoded.hit.data.get();
+    std::copy(p, p + nbytes, buffer);
+  }
+  endianness_shuffle(buffer, nbytes);
+  const Bits bits(buffer, buffer + encoded.nbits / CHAR_BIT,
+                  encoded.nbits % CHAR_BIT);
+
+  std::size_t nbits = 0;
+  std::size_t nmissed = 0;
+  htree_node const *const root = phtree->top();
+  assert(root);
+  Bits::iterator p_ = bits.begin();
+  for (std::size_t i = 0; i < nquantized; ++i) {
+    htree_node const *node = root;
+
+    std::size_t len = 0;
+    while (node->left) {
+      node = *p_++ ? node->right : node->left;
+      ++len;
+    }
+
+    if (node->q) {
+      *q = node->q - nql / 2;
+    } else {
+      *q = *p_missed - nql / 2;
+
+      ++p_missed;
+      ++nmissed;
+    }
+
+    ++q;
+    nbits += len;
+  }
+
+  assert(nbits == encoded.nbits);
+  assert(sizeof(int) * nmissed == encoded.missed.size);
+
+  delete[] missed;
+  free_tree(phtree);
+
+  delete[] buffer;
+
+  return out;
+}
+
 } // namespace mgard
diff --git a/tests/src/test_huffman.cpp b/tests/src/test_huffman.cpp
index 9d1829b290..5fed4bedec 100644
--- a/tests/src/test_huffman.cpp
+++ b/tests/src/test_huffman.cpp
@@ -51,7 +51,7 @@ void test_decoding_regression(long int *const quantized, const std::size_t N) {
 
   const mgard::MemoryBuffer<long int> out = mgard::huffman_decoding(encoded);
   const mgard::MemoryBuffer<long int> out_new =
-      mgard::huffman_decoding(encoded);
+      mgard::huffman_decoding_rewritten(encoded);
 
   REQUIRE(out.size == out_new.size);
   long int const *const p = out.data.get();

From 55dc22dd4b3858f770a171c04e0c4fbb43318ae7 Mon Sep 17 00:00:00 2001
From: Ben Whitney <whitneybe@ornl.gov>
Date: Mon, 6 Jun 2022 19:52:48 -0400
Subject: [PATCH 25/58] Use `HuffmanCode` in decoding reimplementation.

---
 include/huffman.hpp |  76 +++++++++++++++++++++--
 include/huffman.tpp | 147 +++++++++++++++++++++++++++-----------------
 src/huffman.cpp     |  98 +++++++++++++++--------------
 3 files changed, 213 insertions(+), 108 deletions(-)

diff --git a/include/huffman.hpp b/include/huffman.hpp
index 07792cdab6..d98f5b27e7 100644
--- a/include/huffman.hpp
+++ b/include/huffman.hpp
@@ -6,6 +6,8 @@
 #include <cstddef>
 
 #include <memory>
+#include <queue>
+#include <type_traits>
 #include <vector>
 
 #include "utilities.hpp"
@@ -118,8 +120,17 @@ struct CodeCreationTreeNode {
 };
 
 //! Huffman code generated from/for an input stream.
+//!
+//!\note The construction of this class is a little convoluted.
 template <typename Symbol> class HuffmanCode {
 public:
+  static_assert(std::is_integral<Symbol>::value and
+                    std::is_signed<Symbol>::value,
+                "symbol type must be signed and integral");
+
+  //! Shared pointer to node in Huffman code creation tree.
+  using Node = std::shared_ptr<CodeCreationTreeNode>;
+
   //! Constructor.
   //!
   //!\param ncodewords Number of symbols that will be assigned codewords.
@@ -128,6 +139,13 @@ template <typename Symbol> class HuffmanCode {
   HuffmanCode(const std::size_t ncodewords, Symbol const *const begin,
               Symbol const *const end);
 
+  //! Constructor.
+  //!
+  //!\param ncodewords Number of symbols that will be assigned codewords.
+  //!\param pairs Index–frequency pairs for frequency table.
+  HuffmanCode(const std::size_t ncodewords,
+              const std::vector<std::pair<std::size_t, std::size_t>> &pairs);
+
   //! Number of symbols that will be assigned codewords.
   std::size_t ncodewords;
 
@@ -137,7 +155,8 @@ template <typename Symbol> class HuffmanCode {
   //! Codewords associated to the symbols.
   std::vector<HuffmanCodeword> codewords;
 
-  //! Report the number of out-of-range symbols encountered in the stream.
+  //! Report the number of out-of-range symbols encountered in the stream or
+  //! given in the frequency table pairs.
   std::size_t nmissed() const;
 
   //! Check whether a symbol is eligible for a codeword.
@@ -147,11 +166,58 @@ template <typename Symbol> class HuffmanCode {
   std::size_t index(const Symbol symbol) const;
 
 private:
-  //! Smallest symbol (inclusive) to receive a codeword.
-  Symbol min_symbol;
+  //! Function object used to compare code creation tree nodes.
+  struct HeldCountGreater {
+    bool operator()(const Node &a, const Node &b) const;
+  };
+
+public:
+  //! Huffman code creation tree.
+  std::priority_queue<Node, std::vector<Node>, HeldCountGreater> queue;
+
+  //! Decode a codeword (identified by associated leaf) to a symbol.
+  //!
+  //!\pre `leaf` must be a leaf (rather than an interior node) of the code
+  //! creation tree.
+  //!
+  //!\param leaf Leaf (associated to a codeword) to decode.
+  //!\param missed Pointer to next out-of-range symbol. If `leaf` is associated
+  //! to the out-of-range codeword, this pointer will be dereferenced and
+  //! incremented.
+  Symbol decode(const Node &leaf, Symbol const *&missed) const;
 
-  //! Largest symbol (inclusive) to receive a codeword.
-  Symbol max_symbol;
+private:
+  //! Smallest and largest symbols (inclusive) to receive codewords.
+  std::pair<Symbol, Symbol> endpoints;
+
+  //! Set the range of symbols that will be assigned codewords.
+  //!
+  //!\note This function depends on `ncodewords`.
+  void set_endpoints();
+
+  //! Populate the frequency table using a stream of symbols.
+  //!
+  //!\pre `frequencies` should have length `ncodewords` and all entries should
+  //! be zero.
+  //!
+  //!\param begin Beginning of stream of symbols.
+  //!\param end End of stream of symbols.
+  void populate_frequencies(Symbol const *const begin, Symbol const *const end);
+
+  //! Populate the frequency table from a collection of index–frequency pairs.
+  //!
+  //!\pre `frequencies` should have length `ncodewords` and all entries should
+  //! be zero.
+  //!
+  //!\param pairs Beginning of stream of symbols.
+  //!\param end End of stream of symbols.
+  void populate_frequencies(
+      const std::vector<std::pair<std::size_t, std::size_t>> &pairs);
+
+  //! Create the Huffman code creation tree.
+  //!
+  //!\note This function depends on `frequencies`.
+  void create_code_creation_tree();
 
   // TODO: Check that frequency count ties aren't going to hurt us here. Stable
   // sorting algorithm in `priority_queue`?
diff --git a/include/huffman.tpp b/include/huffman.tpp
index da11ac5d97..8d6a7cc9f5 100644
--- a/include/huffman.tpp
+++ b/include/huffman.tpp
@@ -1,76 +1,64 @@
 #include "utilities.hpp"
 
 #include <cassert>
+#include <cstddef>
 
 #include <limits>
-#include <queue>
 #include <stdexcept>
-#include <type_traits>
 
 namespace mgard {
 
-//! This is used in the instantization of `std::priority_queue`.
-template <typename T> struct HeldCountGreater {
-  bool operator()(const T &a, const T &b) const { return a->count > b->count; }
-};
-
 template <typename Symbol>
-HuffmanCode<Symbol>::HuffmanCode(const std::size_t ncodewords,
-                                 Symbol const *const begin,
-                                 Symbol const *const end)
-    : ncodewords(ncodewords), frequencies(ncodewords), codewords(ncodewords) {
-  static_assert(std::is_integral<Symbol>::value and
-                    std::is_signed<Symbol>::value,
-                "symbol type must be signed and integral");
+bool HuffmanCode<Symbol>::HeldCountGreater::
+operator()(const typename HuffmanCode<Symbol>::Node &a,
+           const typename HuffmanCode<Symbol>::Node &b) const {
+  return a->count > b->count;
+}
+
+template <typename Symbol> void HuffmanCode<Symbol>::set_endpoints() {
   // Haven't carefully checked what the minimum acceptable value is.
   if (not ncodewords) {
     throw std::invalid_argument("`ncodewords` must be positive.");
   }
-  {
-    const Symbol SYMBOL_MAX = std::numeric_limits<Symbol>::max();
-    const Symbol SYMBOL_MIN = std::numeric_limits<Symbol>::min();
-
-    const std::size_t max_symbol_ = (ncodewords + 1) / 2 - 1;
-    const std::size_t opp_min_symbol_ = ncodewords / 2;
-
-    // TODO: There is surely a better way of doing this. Lots of potential
-    // issues with directly comparing `opp_min_symbol_` and `-SYMBOL_MIN`.
-    // `-SYMBOL_MIN` can't necessarily be represented as a `Symbol`, for
-    // example. Trying to avoid overflows.
-    std::size_t a = opp_min_symbol_;
-    Symbol b = SYMBOL_MIN;
-    while (a) {
-      a /= 2;
-      b /= 2;
-    }
-    if (not b) {
-      // Only a "risk" because we haven't actually established that
-      // `opp_min_symbol_` is greater in magnitude than `SYMBOL_MIN`.
-      throw std::overflow_error(
-          "risk that minimum symbol cannot be represented in symbol type");
-    } else if (opp_min_symbol_ > SYMBOL_MAX) {
-      throw std::overflow_error(
-          "opposite of minimum symbol canont be represented in symbol type");
-    } else {
-      min_symbol = -static_cast<Symbol>(opp_min_symbol_);
-    }
-
-    // `opp_min_symbol_` is either equal to or one greater than `max_symbol_`,
-    // and we checked above that `opp_min_symbol <= SYMBOL_MAX`. So, we know
-    // that `max_symbol_ <= SYMBOL_MAX` here.
-    max_symbol = max_symbol_;
+  const Symbol SYMBOL_MAX = std::numeric_limits<Symbol>::max();
+  const Symbol SYMBOL_MIN = std::numeric_limits<Symbol>::min();
+
+  const std::size_t max_symbol_ = (ncodewords + 1) / 2 - 1;
+  const std::size_t opp_min_symbol_ = ncodewords / 2;
+
+  // There is surely a better way of doing this. Lots of potential issues with
+  // directly comparing `opp_min_symbol_` and `-SYMBOL_MIN`. `-SYMBOL_MIN`
+  // can't necessarily be represented as a `Symbol`, for example. Trying to
+  // avoid overflows.
+  std::size_t a = opp_min_symbol_;
+  Symbol b = SYMBOL_MIN;
+  while (a) {
+    a /= 2;
+    b /= 2;
   }
-  for (const Symbol symbol :
-       RangeSlice<Symbol const *const>{.begin_ = begin, .end_ = end}) {
-    ++frequencies.at(index(symbol));
+  if (not b) {
+    // Only a "risk" because we haven't actually established that
+    // `opp_min_symbol_` is greater in magnitude than `SYMBOL_MIN`.
+    throw std::overflow_error(
+        "risk that minimum symbol cannot be represented in symbol type");
+  } else if (opp_min_symbol_ > SYMBOL_MAX) {
+    throw std::overflow_error(
+        "opposite of minimum symbol canont be represented in symbol type");
+  } else {
+    endpoints.first = -static_cast<Symbol>(opp_min_symbol_);
   }
 
-  using T = std::shared_ptr<CodeCreationTreeNode>;
-  std::priority_queue<T, std::vector<T>, HeldCountGreater<T>> queue;
+  // `opp_min_symbol_` is either equal to or one greater than `max_symbol_`,
+  // and we checked above that `opp_min_symbol <= SYMBOL_MAX`. So, we know
+  // that `max_symbol_ <= SYMBOL_MAX` here.
+  endpoints.second = max_symbol_;
+}
 
+template <typename Symbol>
+void HuffmanCode<Symbol>::create_code_creation_tree() {
   // We can't quite use a `ZippedRange` here, I think, because
-  // `ZippedRange::iterator` doesn't expose the underlying iterators and
-  // we want a pointer to the codeword.
+  // `ZippedRange::iterator` doesn't expose the underlying iterators and we want
+  // a pointer to the codeword.
   typename std::vector<std::size_t>::const_iterator p = frequencies.cbegin();
   HuffmanCodeword *q = codewords.data();
   for (std::size_t i = 0; i < ncodewords; ++i) {
@@ -89,7 +77,54 @@ HuffmanCode<Symbol>::HuffmanCode(const std::size_t ncodewords,
 
     queue.push(std::make_shared<CodeCreationTreeNode>(a, b));
   }
+}
 
+template <typename Symbol>
+void HuffmanCode<Symbol>::populate_frequencies(Symbol const *const begin,
+                                               Symbol const *const end) {
+  for (const Symbol symbol :
+       RangeSlice<Symbol const *const>{.begin_ = begin, .end_ = end}) {
+    ++frequencies.at(index(symbol));
+  }
+}
+
+template <typename Symbol>
+Symbol
+HuffmanCode<Symbol>::decode(const typename HuffmanCode<Symbol>::Node &leaf,
+                            Symbol const *&missed) const {
+  const std::ptrdiff_t offset = leaf->codeword - codewords.data();
+  // If `offset == 0`, this is the leaf corresponding to out-of-range symbols.
+  assert(offset >= 0);
+  return offset ? endpoints.first + (offset - 1) : *missed++;
+}
+
+template <typename Symbol>
+void HuffmanCode<Symbol>::populate_frequencies(
+    const std::vector<std::pair<std::size_t, std::size_t>> &pairs) {
+  for (auto [index, frequency] : pairs) {
+    frequencies.at(index) = frequency;
+  }
+}
+
+template <typename Symbol>
+HuffmanCode<Symbol>::HuffmanCode(const std::size_t ncodewords,
+                                 Symbol const *const begin,
+                                 Symbol const *const end)
+    : ncodewords(ncodewords), frequencies(ncodewords), codewords(ncodewords) {
+  set_endpoints();
+  populate_frequencies(begin, end);
+  create_code_creation_tree();
+  recursively_set_codewords(queue.top(), {});
+}
+
+template <typename Symbol>
+HuffmanCode<Symbol>::HuffmanCode(
+    const std::size_t ncodewords,
+    const std::vector<std::pair<std::size_t, std::size_t>> &pairs)
+    : ncodewords(ncodewords), frequencies(ncodewords), codewords(ncodewords) {
+  set_endpoints();
+  populate_frequencies(pairs);
+  create_code_creation_tree();
   recursively_set_codewords(queue.top(), {});
 }
 
@@ -99,12 +134,12 @@ template <typename Symbol> std::size_t HuffmanCode<Symbol>::nmissed() const {
 
 template <typename Symbol>
 bool HuffmanCode<Symbol>::out_of_range(const Symbol symbol) const {
-  return symbol < min_symbol or symbol > max_symbol;
+  return symbol < endpoints.first or symbol > endpoints.second;
 }
 
 template <typename Symbol>
 std::size_t HuffmanCode<Symbol>::index(const Symbol symbol) const {
-  return out_of_range(symbol) ? 0 : 1 + symbol - min_symbol;
+  return out_of_range(symbol) ? 0 : 1 + symbol - endpoints.first;
 }
 
 template <typename Symbol>
diff --git a/src/huffman.cpp b/src/huffman.cpp
index 11725d219b..e0b250c48f 100644
--- a/src/huffman.cpp
+++ b/src/huffman.cpp
@@ -5,12 +5,13 @@
 #include <cstring>
 
 #include <algorithm>
-
 #include <array>
 #include <numeric>
 #include <queue>
 #include <vector>
 
+#include <iostream>
+
 #include "huffman.hpp"
 
 namespace mgard {
@@ -524,34 +525,55 @@ MemoryBuffer<long int> huffman_decoding(const HuffmanEncodedStream &encoded) {
   return out;
 }
 
+namespace {
+
+long int decode(const HuffmanCode<long int> &code,
+                const typename HuffmanCode<long int>::Node &leaf,
+                long int const *&missed) {
+  long int const *const start = missed;
+  long int decoded = code.decode(leaf, missed);
+  if (missed != start) {
+    decoded -= nql / 2;
+  }
+  return decoded;
+}
+
+} // namespace
+
 MemoryBuffer<long int>
 huffman_decoding_rewritten(const HuffmanEncodedStream &encoded) {
-  std::size_t const *const cft =
-      reinterpret_cast<std::size_t const *>(encoded.frequencies.data.get());
-  const std::size_t nnz = encoded.frequencies.size / (2 * sizeof(std::size_t));
-  // The elements of the array are value-initialized (here, zero-initialized).
-  std::size_t *const ft = new std::size_t[nql]();
+  using Symbol = long int;
+  using MissedSymbol = int;
 
+  const std::size_t nnz = encoded.frequencies.size / (2 * sizeof(std::size_t));
+  std::vector<std::pair<std::size_t, std::size_t>> pairs(nnz);
   std::size_t nquantized = 0;
-  for (std::size_t j = 0; j < nnz; ++j) {
-    const std::size_t frequency = cft[2 * j + 1];
-    nquantized += frequency;
-    ft[cft[2 * j]] = frequency;
+  {
+    std::size_t const *p =
+        reinterpret_cast<std::size_t const *>(encoded.frequencies.data.get());
+    for (std::pair<std::size_t, std::size_t> &pair : pairs) {
+      const std::size_t index = *p++;
+      const std::size_t frequency = *p++;
+      pair = {index, frequency};
+      nquantized += frequency;
+    }
   }
 
-  MemoryBuffer<long int> out(nquantized);
-  long int *q = out.data.get();
-
-  my_priority_queue<htree_node> *const phtree = build_tree(ft);
-  delete[] ft;
+  const std::size_t ncodewords = nql - 1;
+  HuffmanCode<Symbol> code(ncodewords, pairs);
 
-  // The encoded.missed.data.get() may not be aligned. Therefore, the code
-  // here makes a new buffer.
-  assert(not(encoded.missed.size % sizeof(int)));
-  int *const missed = new int[encoded.missed.size / sizeof(int)];
-  std::memcpy(missed, encoded.missed.data.get(), encoded.missed.size);
+  MemoryBuffer<Symbol> out(nquantized);
+  Symbol *q = out.data.get();
 
-  int const *p_missed = missed;
+  assert(not(encoded.missed.size % sizeof(MissedSymbol)));
+  const std::size_t nmissed = encoded.missed.size / sizeof(MissedSymbol);
+  Symbol *const missed = new Symbol[nmissed];
+  {
+    MissedSymbol const *const p =
+        reinterpret_cast<MissedSymbol const *>(encoded.missed.data.get());
+    std::copy(p, p + nmissed, missed);
+  }
+  Symbol const *p_missed = missed;
 
   const std::size_t nbytes = encoded.hit.size;
   unsigned char *const buffer = new unsigned char[nbytes];
@@ -564,38 +586,20 @@ huffman_decoding_rewritten(const HuffmanEncodedStream &encoded) {
                   encoded.nbits % CHAR_BIT);
 
   std::size_t nbits = 0;
-  std::size_t nmissed = 0;
-  htree_node const *const root = phtree->top();
+  const HuffmanCode<Symbol>::Node root = code.queue.top();
   assert(root);
-  Bits::iterator p_ = bits.begin();
+  Bits::iterator b = bits.begin();
   for (std::size_t i = 0; i < nquantized; ++i) {
-    htree_node const *node = root;
-
-    std::size_t len = 0;
-    while (node->left) {
-      node = *p_++ ? node->right : node->left;
-      ++len;
-    }
-
-    if (node->q) {
-      *q = node->q - nql / 2;
-    } else {
-      *q = *p_missed - nql / 2;
-
-      ++p_missed;
-      ++nmissed;
-    }
-
-    ++q;
-    nbits += len;
+    HuffmanCode<Symbol>::Node node;
+    for (node = root; node->left;
+         node = *b++ ? node->right : node->left, ++nbits)
+      ;
+    *q++ = decode(code, node, p_missed);
   }
-
   assert(nbits == encoded.nbits);
-  assert(sizeof(int) * nmissed == encoded.missed.size);
+  assert(sizeof(MissedSymbol) * (p_missed - missed) == encoded.missed.size);
 
   delete[] missed;
-  free_tree(phtree);
-
   delete[] buffer;
 
   return out;

From e50685dee9980e082a20970ec6480acdf9f7555a Mon Sep 17 00:00:00 2001
From: Ben Whitney <whitneybe@ornl.gov>
Date: Tue, 7 Jun 2022 11:00:59 -0400
Subject: [PATCH 26/58] Add `sizeof` checks to Huffman reimplementations.

---
 src/huffman.cpp | 21 +++++++++++++++++++++
 1 file changed, 21 insertions(+)

diff --git a/src/huffman.cpp b/src/huffman.cpp
index e0b250c48f..8acf634c26 100644
--- a/src/huffman.cpp
+++ b/src/huffman.cpp
@@ -346,9 +346,28 @@ HuffmanEncodedStream huffman_encoding(long int *const quantized_data,
   return out;
 }
 
+namespace {
+
+void check_type_sizes() {
+  static_assert(CHAR_BIT == 8,
+                "code written with assumption that `CHAR_BIT == 8`");
+  static_assert(
+      sizeof(unsigned int) == 4,
+      "code written with assumption that `sizeof(unsigned int) == 4`");
+  static_assert(sizeof(int) == 4,
+                "code written with assumption that `sizeof(int) == 4`");
+  static_assert(
+      sizeof(std::size_t) == 8,
+      "code written with assumption that `sizeof(unsigned int) == 8`");
+}
+
+} // namespace
+
 HuffmanEncodedStream
 huffman_encoding_rewritten(long int const *const quantized_data,
                            const std::size_t n) {
+  check_type_sizes();
+
   const std::size_t ncodewords = nql - 1;
   const HuffmanCode<long int> code(ncodewords, quantized_data,
                                    quantized_data + n);
@@ -542,6 +561,8 @@ long int decode(const HuffmanCode<long int> &code,
 
 MemoryBuffer<long int>
 huffman_decoding_rewritten(const HuffmanEncodedStream &encoded) {
+  check_type_sizes();
+
   using Symbol = long int;
   using MissedSymbol = int;
 

From 8be8f8798e8ea5d668d1c71a60308ecfa59244bf Mon Sep 17 00:00:00 2001
From: Ben Whitney <whitneybe@ornl.gov>
Date: Tue, 7 Jun 2022 14:28:09 -0400
Subject: [PATCH 27/58] Remove `compress_memory_huffman` from library.

---
 include/compressors.hpp                  |  7 +-
 src/compressors.cpp                      | 73 +-------------------
 tests/CMakeLists.txt                     |  1 +
 tests/include/compressors_regression.hpp | 21 ++++++
 tests/src/compressors_regression.cpp     | 85 ++++++++++++++++++++++++
 tests/src/test_compressors.cpp           |  1 +
 6 files changed, 112 insertions(+), 76 deletions(-)
 create mode 100644 tests/include/compressors_regression.hpp
 create mode 100644 tests/src/compressors_regression.cpp

diff --git a/include/compressors.hpp b/include/compressors.hpp
index 8e0952022a..b27865a8be 100644
--- a/include/compressors.hpp
+++ b/include/compressors.hpp
@@ -18,12 +18,7 @@ namespace mgard {
 
 //! Compress an array using a Huffman tree.
 //!
-//!\param[in] src Array to be compressed.
-//!\param[in] srcLen Size of array (number of elements) to be compressed.
-MemoryBuffer<unsigned char> compress_memory_huffman(long int *const src,
-                                                    const std::size_t srcLen);
-
-//! Compress an array using a Huffman tree.
+//!\deprecated
 //!
 //!\param[in] src Array to be compressed.
 //!\param[in] srcLen Size of array (number of elements) to be compressed.
diff --git a/src/compressors.cpp b/src/compressors.cpp
index 151712e298..2096b749e1 100644
--- a/src/compressors.cpp
+++ b/src/compressors.cpp
@@ -104,77 +104,10 @@ gather_constituents(const std::vector<Constituent> &constituents) {
 
 } // namespace
 
-MemoryBuffer<unsigned char> compress_memory_huffman(long int *const src,
-                                                    const std::size_t srcLen) {
-  HuffmanEncodedStream encoded = huffman_encoding(src, srcLen);
-
-  assert(not(encoded.hit.size % sizeof(unsigned int)));
-
-  static_assert(CHAR_BIT == 8, "code written assuming `CHAR_BIT == 8`");
-  static_assert(sizeof(unsigned int) == 4,
-                "code written assuming `sizeof(unsigned int) == 4`");
-  const std::size_t offset = encoded.nbits % (CHAR_BIT * sizeof(unsigned int));
-  // Number of hit buffer padding bytes.
-  const std::size_t nhbpb = offset ? offset / CHAR_BIT : sizeof(unsigned int);
-
-  assert(encoded.hit.size + nhbpb == hit_buffer_size(encoded.nbits));
-
-  const size_t npayload =
-      encoded.hit.size + nhbpb + encoded.missed.size + encoded.frequencies.size;
-  unsigned char *const payload = new unsigned char[npayload];
-  unsigned char *bufp = payload;
-
-  std::memcpy(bufp, encoded.frequencies.data.get(), encoded.frequencies.size);
-  bufp += encoded.frequencies.size;
-
-  std::memcpy(bufp, encoded.hit.data.get(), encoded.hit.size);
-  bufp += encoded.hit.size;
-
-  {
-    const unsigned char zero{0};
-    for (std::size_t i = 0; i < nhbpb; ++i) {
-      std::memcpy(bufp, &zero, 1);
-      bufp += 1;
-    }
-  }
-
-  std::memcpy(bufp, encoded.missed.data.get(), encoded.missed.size);
-  bufp += encoded.missed.size;
-
-#ifndef MGARD_ZSTD
-  const MemoryBuffer<unsigned char> out_data =
-      compress_memory_z(payload, npayload);
-#else
-  const MemoryBuffer<unsigned char> out_data =
-      compress_memory_zstd(payload, npayload);
-#endif
-  delete[] payload;
-  bufp = nullptr;
-
-  const std::size_t bufferLen = 3 * sizeof(size_t) + out_data.size;
-  unsigned char *const buffer = new unsigned char[bufferLen];
-
-  bufp = buffer;
-  *(size_t *)bufp = encoded.frequencies.size;
-  bufp += sizeof(size_t);
-
-  *(size_t *)bufp = encoded.nbits;
-  bufp += sizeof(size_t);
-
-  *(size_t *)bufp = encoded.missed.size;
-  bufp += sizeof(size_t);
-
-  {
-    unsigned char const *const p = out_data.data.get();
-    std::copy(p, p + out_data.size, bufp);
-  }
-  return MemoryBuffer<unsigned char>(buffer, bufferLen);
-}
-
 MemoryBuffer<unsigned char>
 compress_memory_huffman_rewritten(long int *const src,
                                   const std::size_t srcLen) {
-  const HuffmanEncodedStream encoded = huffman_encoding(src, srcLen);
+  const HuffmanEncodedStream encoded = huffman_encoding_rewritten(src, srcLen);
 
   assert(not(encoded.hit.size % sizeof(unsigned int)));
 
@@ -358,8 +291,8 @@ MemoryBuffer<unsigned char> compress(const pb::Header &header, void *const src,
     if (srcLen % qts) {
       throw std::runtime_error("incorrect quantization buffer size");
     }
-    return compress_memory_huffman(reinterpret_cast<long int *>(src),
-                                   srcLen / qts);
+    return compress_memory_huffman_rewritten(reinterpret_cast<long int *>(src),
+                                             srcLen / qts);
   }
 #else
     throw std::runtime_error("MGARD compiled without ZSTD support");
diff --git a/tests/CMakeLists.txt b/tests/CMakeLists.txt
index 427b2e4546..cfa61b4382 100644
--- a/tests/CMakeLists.txt
+++ b/tests/CMakeLists.txt
@@ -18,6 +18,7 @@ set(
 	"src/test_decompose.cpp"
 	"src/test_format.cpp"
 	"src/test_quantize.cpp"
+	"src/compressors_regression.cpp"
 	"src/test_compressors.cpp"
 	"src/test_CompressedDataset.cpp"
 	"src/test_huffman.cpp"
diff --git a/tests/include/compressors_regression.hpp b/tests/include/compressors_regression.hpp
new file mode 100644
index 0000000000..fc2b58577d
--- /dev/null
+++ b/tests/include/compressors_regression.hpp
@@ -0,0 +1,21 @@
+#ifndef TESTING_COMPRESSORS_REGRESSION_HPP
+#define TESTING_COMPRESSORS_REGRESSION_HPP
+//!\file
+//!\brief Huffman compression and decompression functions for regression tests.
+
+#include <cstddef>
+
+#include "utilities.hpp"
+
+namespace mgard {
+
+//! Compress an array using a Huffman tree.
+//!
+//!\param[in] src Array to be compressed.
+//!\param[in] srcLen Size of array (number of elements) to be compressed.
+MemoryBuffer<unsigned char> compress_memory_huffman(long int *const src,
+                                                    const std::size_t srcLen);
+
+} // namespace mgard
+
+#endif
diff --git a/tests/src/compressors_regression.cpp b/tests/src/compressors_regression.cpp
new file mode 100644
index 0000000000..740a504528
--- /dev/null
+++ b/tests/src/compressors_regression.cpp
@@ -0,0 +1,85 @@
+#include "compressors_regression.hpp"
+
+#include <climits>
+#include <cstring>
+
+#include "compressors.hpp"
+#include "huffman.hpp"
+
+namespace mgard {
+
+static_assert(CHAR_BIT == 8, "code written assuming `CHAR_BIT == 8`");
+
+static_assert(sizeof(unsigned int) == 4,
+              "code written assuming `sizeof(unsigned int) == 4`");
+
+static_assert(sizeof(std::size_t) == 8,
+              "code written assuming `sizeof(std::size_t) == 8`");
+
+// This code also makes endianness assumptions.
+
+MemoryBuffer<unsigned char> compress_memory_huffman(long int *const src,
+                                                    const std::size_t srcLen) {
+  HuffmanEncodedStream encoded = huffman_encoding(src, srcLen);
+
+  assert(not(encoded.hit.size % sizeof(unsigned int)));
+
+  const std::size_t offset = encoded.nbits % (CHAR_BIT * sizeof(unsigned int));
+  // Number of hit buffer padding bytes.
+  const std::size_t nhbpb = offset ? offset / CHAR_BIT : sizeof(unsigned int);
+
+  assert(encoded.hit.size + nhbpb == hit_buffer_size(encoded.nbits));
+
+  const std::size_t npayload =
+      encoded.hit.size + nhbpb + encoded.missed.size + encoded.frequencies.size;
+  unsigned char *const payload = new unsigned char[npayload];
+  unsigned char *bufp = payload;
+
+  std::memcpy(bufp, encoded.frequencies.data.get(), encoded.frequencies.size);
+  bufp += encoded.frequencies.size;
+
+  std::memcpy(bufp, encoded.hit.data.get(), encoded.hit.size);
+  bufp += encoded.hit.size;
+
+  {
+    const unsigned char zero{0};
+    for (std::size_t i = 0; i < nhbpb; ++i) {
+      std::memcpy(bufp, &zero, 1);
+      bufp += 1;
+    }
+  }
+
+  std::memcpy(bufp, encoded.missed.data.get(), encoded.missed.size);
+  bufp += encoded.missed.size;
+
+#ifndef MGARD_ZSTD
+  const MemoryBuffer<unsigned char> out_data =
+      compress_memory_z(payload, npayload);
+#else
+  const MemoryBuffer<unsigned char> out_data =
+      compress_memory_zstd(payload, npayload);
+#endif
+  delete[] payload;
+  bufp = nullptr;
+
+  const std::size_t bufferLen = 3 * sizeof(std::size_t) + out_data.size;
+  unsigned char *const buffer = new unsigned char[bufferLen];
+
+  bufp = buffer;
+  *(std::size_t *)bufp = encoded.frequencies.size;
+  bufp += sizeof(std::size_t);
+
+  *(std::size_t *)bufp = encoded.nbits;
+  bufp += sizeof(std::size_t);
+
+  *(std::size_t *)bufp = encoded.missed.size;
+  bufp += sizeof(std::size_t);
+
+  {
+    unsigned char const *const p = out_data.data.get();
+    std::copy(p, p + out_data.size, bufp);
+  }
+  return MemoryBuffer<unsigned char>(buffer, bufferLen);
+}
+
+} // namespace mgard
diff --git a/tests/src/test_compressors.cpp b/tests/src/test_compressors.cpp
index 74da33a86b..11f7ff3acb 100644
--- a/tests/src/test_compressors.cpp
+++ b/tests/src/test_compressors.cpp
@@ -7,6 +7,7 @@
 #include <random>
 
 #include "compressors.hpp"
+#include "compressors_regression.hpp"
 #include "format.hpp"
 
 #include "testing_utilities.hpp"

From e5eb83ffc40dc0a1b4f96e5492ec35197d0acd0d Mon Sep 17 00:00:00 2001
From: Ben Whitney <whitneybe@ornl.gov>
Date: Tue, 7 Jun 2022 15:39:50 -0400
Subject: [PATCH 28/58] Add Huffman decompression regression tests.

---
 tests/src/test_compressors.cpp | 75 ++++++++++++++++++++++++++++++++++
 1 file changed, 75 insertions(+)

diff --git a/tests/src/test_compressors.cpp b/tests/src/test_compressors.cpp
index 11f7ff3acb..391bc46de3 100644
--- a/tests/src/test_compressors.cpp
+++ b/tests/src/test_compressors.cpp
@@ -49,6 +49,34 @@ void test_huffman_compression_regression(long int *const src,
   delete[] src_;
 }
 
+void test_huffman_decompression_regression(long int *const src,
+                                           const std::size_t srcLen) {
+  long int *const src_ = new long int[srcLen];
+  std::copy(src, src + srcLen, src_);
+
+  const mgard::MemoryBuffer<unsigned char> compressed =
+      mgard::compress_memory_huffman(src, srcLen);
+  const mgard::MemoryBuffer<unsigned char> compressed_ =
+      mgard::compress_memory_huffman(src_, srcLen);
+
+  delete[] src_;
+
+  mgard::MemoryBuffer<long int> out(srcLen);
+  mgard::MemoryBuffer<long int> out_(srcLen);
+
+  unsigned char *const q = compressed.data.get();
+  unsigned char *const q_ = compressed_.data.get();
+  long int *const p = out.data.get();
+  long int *const p_ = out_.data.get();
+
+  mgard::decompress_memory_huffman(q, compressed.size, p,
+                                   out.size * sizeof(long int));
+  mgard::decompress_memory_huffman(q_, compressed_.size, p_,
+                                   out_.size * sizeof(long int));
+
+  REQUIRE(std::equal(p, p + srcLen, p_));
+}
+
 void test_hcr_constant(const std::size_t srcLen, const long int q) {
   long int *const src = new long int[srcLen];
   std::fill(src, src + srcLen, q);
@@ -73,6 +101,30 @@ void test_hcr_random(const std::size_t srcLen, const long int a,
   delete[] src;
 }
 
+void test_hdr_constant(const std::size_t srcLen, const long int q) {
+  long int *const src = new long int[srcLen];
+  std::fill(src, src + srcLen, q);
+  test_huffman_decompression_regression(src, srcLen);
+  delete[] src;
+}
+
+void test_hdr_periodic(const std::size_t srcLen, const long int initial,
+                       const std::size_t period) {
+  long int *const src = new long int[srcLen];
+  std::generate(src, src + srcLen, PeriodicGenerator(period, initial));
+  test_huffman_decompression_regression(src, srcLen);
+  delete[] src;
+}
+
+void test_hdr_random(const std::size_t srcLen, const long int a,
+                     const long int b, std::default_random_engine &gen) {
+  std::uniform_int_distribution<long int> dis(a, b);
+  long int *const src = new long int[srcLen];
+  std::generate(src, src + srcLen, [&] { return dis(gen); });
+  test_huffman_decompression_regression(src, srcLen);
+  delete[] src;
+}
+
 } // namespace
 
 TEST_CASE("Huffman compression regression", "[compressors] [regression]") {
@@ -98,6 +150,29 @@ TEST_CASE("Huffman compression regression", "[compressors] [regression]") {
   }
 }
 
+TEST_CASE("Huffman decompression regression", "[compressors] [regression]") {
+  SECTION("constant data") {
+    test_hdr_constant(4, -143485);
+    test_hdr_constant(64, 0);
+    test_hdr_constant(256, 67486);
+  }
+
+  SECTION("periodic data") {
+    test_hdr_periodic(10, 0, 3);
+    test_hdr_periodic(100, -570, 10);
+    test_hdr_periodic(1000, 394, 19);
+  }
+
+  SECTION("random data") {
+    std::default_random_engine gen(566222);
+    test_hdr_random(100, 1, 2, gen);
+    test_hdr_random(30, -7, 7, gen);
+    test_hdr_random(900, std::numeric_limits<int>::min(),
+                    std::numeric_limits<int>::max(), gen);
+    test_hdr_random(2700, -60, 40, gen);
+  }
+}
+
 TEST_CASE("Huffman compression", "[compressors] [!mayfail]") {
   std::default_random_engine gen(257100);
   const std::size_t n = 5000;

From a59ebebe7d7d039916dbb40f616ba60b273f4d3b Mon Sep 17 00:00:00 2001
From: Ben Whitney <whitneybe@ornl.gov>
Date: Tue, 7 Jun 2022 15:53:01 -0400
Subject: [PATCH 29/58] Add Huffman decompression reimplementation.

---
 include/compressors.hpp                  |  9 ++--
 src/compressors.cpp                      | 26 +++++++---
 src/huffman.cpp                          |  2 +-
 tests/include/compressors_regression.hpp | 10 ++++
 tests/src/compressors_regression.cpp     | 61 ++++++++++++++++++++++++
 tests/src/test_compressors.cpp           |  8 ++--
 tests/src/test_huffman.cpp               | 43 +++++++++--------
 7 files changed, 123 insertions(+), 36 deletions(-)

diff --git a/include/compressors.hpp b/include/compressors.hpp
index b27865a8be..09f8c53c22 100644
--- a/include/compressors.hpp
+++ b/include/compressors.hpp
@@ -28,13 +28,16 @@ compress_memory_huffman_rewritten(long int *const src,
 
 //! Decompress an array compressed with `compress_memory_huffman`.
 //!
+//!\deprecated
+//!
 //!\param[in] src Compressed array.
 //!\param[in] srcLen Size in bytes of the compressed array.
 //!\param[out] dst Decompressed array.
 //!\param[in] dstLen Size in bytes of the decompressed array.
-void decompress_memory_huffman(unsigned char *const src,
-                               const std::size_t srcLen, long int *const dst,
-                               const std::size_t dstLen);
+void decompress_memory_huffman_rewritten(unsigned char *const src,
+                                         const std::size_t srcLen,
+                                         long int *const dst,
+                                         const std::size_t dstLen);
 
 #ifdef MGARD_ZSTD
 //! Compress an array using `zstd`.
diff --git a/src/compressors.cpp b/src/compressors.cpp
index 2096b749e1..f97528d3d6 100644
--- a/src/compressors.cpp
+++ b/src/compressors.cpp
@@ -30,9 +30,10 @@ std::size_t hit_buffer_size(const std::size_t nbits) {
 
 } // namespace
 
-void decompress_memory_huffman(unsigned char *const src,
-                               const std::size_t srcLen, long int *const dst,
-                               const std::size_t dstLen) {
+void decompress_memory_huffman_rewritten(unsigned char *const src,
+                                         const std::size_t srcLen,
+                                         long int *const dst,
+                                         const std::size_t dstLen) {
   std::size_t const *const sizes = reinterpret_cast<std::size_t const *>(src);
   const std::size_t nfrequencies = sizes[0];
   const std::size_t nbits = sizes[1];
@@ -54,7 +55,12 @@ void decompress_memory_huffman(unsigned char *const src,
 #endif
   }
 
-  HuffmanEncodedStream encoded(nbits, nhit, nmissed, nfrequencies);
+  // `huffman_decoding_rewritten` expects the size of the hit buffer to be a
+  // multiple of `sizeof(unsigned int)`. We'll zero out any extra bytes below.
+  const std::size_t nbytes =
+      sizeof(unsigned int) *
+      ((nhit + sizeof(unsigned int) - 1) / sizeof(unsigned int));
+  HuffmanEncodedStream encoded(nbits, nbytes, nmissed, nfrequencies);
   {
     unsigned char const *begin;
     unsigned char const *end;
@@ -67,12 +73,17 @@ void decompress_memory_huffman(unsigned char *const src,
     end = begin + nhit;
     std::copy(begin, end, encoded.hit.data.get());
 
+    {
+      unsigned char *const p = encoded.hit.data.get();
+      std::fill(p + nhit, p + nbytes, 0);
+    }
+
     begin = end;
     end = begin + nmissed;
     std::copy(begin, end, encoded.missed.data.get());
   }
 
-  const MemoryBuffer<long int> decoded = huffman_decoding(encoded);
+  const MemoryBuffer<long int> decoded = huffman_decoding_rewritten(encoded);
   {
     long int const *const p = decoded.data.get();
     if (decoded.size * sizeof(*p) != dstLen) {
@@ -325,8 +336,9 @@ void decompress(const pb::Header &header, void *const src,
     break;
   case pb::Encoding::CPU_HUFFMAN_ZSTD:
 #ifdef MGARD_ZSTD
-    decompress_memory_huffman(static_cast<unsigned char *>(src), srcLen,
-                              static_cast<long int *>(dst), dstLen);
+    decompress_memory_huffman_rewritten(static_cast<unsigned char *>(src),
+                                        srcLen, static_cast<long int *>(dst),
+                                        dstLen);
     break;
 #else
     throw std::runtime_error("MGARD compiled without ZSTD support");
diff --git a/src/huffman.cpp b/src/huffman.cpp
index 8acf634c26..abe7000b29 100644
--- a/src/huffman.cpp
+++ b/src/huffman.cpp
@@ -535,7 +535,7 @@ MemoryBuffer<long int> huffman_decoding(const HuffmanEncodedStream &encoded) {
     start_bit += len;
   }
 
-  assert(start_bit == out_data_hit_size);
+  assert(start_bit == encoded.nbits);
   assert(sizeof(int) * num_missed == out_data_miss_size);
 
   delete[] miss_buf;
diff --git a/tests/include/compressors_regression.hpp b/tests/include/compressors_regression.hpp
index fc2b58577d..cc1815a3a7 100644
--- a/tests/include/compressors_regression.hpp
+++ b/tests/include/compressors_regression.hpp
@@ -16,6 +16,16 @@ namespace mgard {
 MemoryBuffer<unsigned char> compress_memory_huffman(long int *const src,
                                                     const std::size_t srcLen);
 
+//! Decompress an array compressed with `compress_memory_huffman`.
+//!
+//!\param[in] src Compressed array.
+//!\param[in] srcLen Size in bytes of the compressed array.
+//!\param[out] dst Decompressed array.
+//!\param[in] dstLen Size in bytes of the decompressed array.
+void decompress_memory_huffman(unsigned char *const src,
+                               const std::size_t srcLen, long int *const dst,
+                               const std::size_t dstLen);
+
 } // namespace mgard
 
 #endif
diff --git a/tests/src/compressors_regression.cpp b/tests/src/compressors_regression.cpp
index 740a504528..eb4d53761b 100644
--- a/tests/src/compressors_regression.cpp
+++ b/tests/src/compressors_regression.cpp
@@ -16,6 +16,14 @@ static_assert(sizeof(unsigned int) == 4,
 static_assert(sizeof(std::size_t) == 8,
               "code written assuming `sizeof(std::size_t) == 8`");
 
+namespace {
+
+std::size_t hit_buffer_size(const std::size_t nbits) {
+  return nbits / CHAR_BIT + sizeof(unsigned int);
+}
+
+} // namespace
+
 // This code also makes endianness assumptions.
 
 MemoryBuffer<unsigned char> compress_memory_huffman(long int *const src,
@@ -82,4 +90,57 @@ MemoryBuffer<unsigned char> compress_memory_huffman(long int *const src,
   return MemoryBuffer<unsigned char>(buffer, bufferLen);
 }
 
+void decompress_memory_huffman(unsigned char *const src,
+                               const std::size_t srcLen, long int *const dst,
+                               const std::size_t dstLen) {
+  std::size_t const *const sizes = reinterpret_cast<std::size_t const *>(src);
+  const std::size_t nfrequencies = sizes[0];
+  const std::size_t nbits = sizes[1];
+  const std::size_t nmissed = sizes[2];
+  const std::size_t nhit = hit_buffer_size(nbits);
+
+  MemoryBuffer<unsigned char> buffer(nfrequencies + nhit + nmissed);
+  {
+    const std::size_t offset = 3 * sizeof(std::size_t);
+    unsigned char const *const src_ = src + offset;
+    const std::size_t srcLen_ = srcLen - offset;
+    unsigned char *const dst_ = buffer.data.get();
+    const std::size_t dstLen_ = buffer.size;
+
+#ifndef MGARD_ZSTD
+    decompress_memory_z(src_, srcLen_, dst_, dstLen_);
+#else
+    decompress_memory_zstd(src_, srcLen_, dst_, dstLen_);
+#endif
+  }
+
+  HuffmanEncodedStream encoded(nbits, nhit, nmissed, nfrequencies);
+  {
+    unsigned char const *begin;
+    unsigned char const *end;
+
+    begin = buffer.data.get();
+    end = begin + nfrequencies;
+    std::copy(begin, end, encoded.frequencies.data.get());
+
+    begin = end;
+    end = begin + nhit;
+    std::copy(begin, end, encoded.hit.data.get());
+
+    begin = end;
+    end = begin + nmissed;
+    std::copy(begin, end, encoded.missed.data.get());
+  }
+
+  const MemoryBuffer<long int> decoded = huffman_decoding(encoded);
+  {
+    long int const *const p = decoded.data.get();
+    if (decoded.size * sizeof(*p) != dstLen) {
+      throw std::runtime_error(
+          "mismatch between expected and obtained decompressed buffer sizes");
+    }
+    std::copy(p, p + decoded.size, dst);
+  }
+}
+
 } // namespace mgard
diff --git a/tests/src/test_compressors.cpp b/tests/src/test_compressors.cpp
index 391bc46de3..af9de92915 100644
--- a/tests/src/test_compressors.cpp
+++ b/tests/src/test_compressors.cpp
@@ -41,12 +41,12 @@ void test_huffman_compression_regression(long int *const src,
   const mgard::MemoryBuffer<unsigned char> out_ =
       mgard::compress_memory_huffman_rewritten(src_, srcLen);
 
+  delete[] src_;
+
   REQUIRE(out.size == out_.size);
   unsigned char const *const p = out.data.get();
   unsigned char const *const p_ = out_.data.get();
   REQUIRE(std::equal(p, p + out.size, p_));
-
-  delete[] src_;
 }
 
 void test_huffman_decompression_regression(long int *const src,
@@ -71,8 +71,8 @@ void test_huffman_decompression_regression(long int *const src,
 
   mgard::decompress_memory_huffman(q, compressed.size, p,
                                    out.size * sizeof(long int));
-  mgard::decompress_memory_huffman(q_, compressed_.size, p_,
-                                   out_.size * sizeof(long int));
+  mgard::decompress_memory_huffman_rewritten(q_, compressed_.size, p_,
+                                             out_.size * sizeof(long int));
 
   REQUIRE(std::equal(p, p + srcLen, p_));
 }
diff --git a/tests/src/test_huffman.cpp b/tests/src/test_huffman.cpp
index 5fed4bedec..d984c8caf9 100644
--- a/tests/src/test_huffman.cpp
+++ b/tests/src/test_huffman.cpp
@@ -12,51 +12,52 @@
 namespace {
 
 void test_encoding_regression(long int *const quantized, const std::size_t N) {
-  long int *const quantized_new = new long int[N];
-  std::copy(quantized, quantized + N, quantized_new);
+  long int *const quantized_ = new long int[N];
+  std::copy(quantized, quantized + N, quantized_);
 
   const mgard::HuffmanEncodedStream out = mgard::huffman_encoding(quantized, N);
-  const mgard::HuffmanEncodedStream out_new =
-      mgard::huffman_encoding_rewritten(quantized_new, N);
+  const mgard::HuffmanEncodedStream out_ =
+      mgard::huffman_encoding_rewritten(quantized_, N);
 
   unsigned char const *const hit = out.hit.data.get();
-  REQUIRE(out_new.nbits == out.nbits);
+  REQUIRE(out_.nbits == out.nbits);
   const std::size_t nbytes = (out.nbits + CHAR_BIT - 1) / CHAR_BIT;
-  REQUIRE(std::equal(hit, hit + nbytes, out_new.hit.data.get()));
+  REQUIRE(std::equal(hit, hit + nbytes, out_.hit.data.get()));
 
   unsigned char const *const missed = out.missed.data.get();
   const std::size_t nmissed = out.missed.size;
-  REQUIRE(out_new.missed.size == nmissed);
-  REQUIRE(std::equal(missed, missed + nmissed, out_new.missed.data.get()));
+  REQUIRE(out_.missed.size == nmissed);
+  REQUIRE(std::equal(missed, missed + nmissed, out_.missed.data.get()));
 
   unsigned char const *const frequencies = out.frequencies.data.get();
   const std::size_t nfrequencies = out.frequencies.size;
-  REQUIRE(out_new.frequencies.size == nfrequencies);
+  REQUIRE(out_.frequencies.size == nfrequencies);
   REQUIRE(std::equal(frequencies, frequencies + nfrequencies,
-                     out_new.frequencies.data.get()));
+                     out_.frequencies.data.get()));
 
-  delete[] quantized_new;
+  delete[] quantized_;
 }
 
 void test_decoding_regression(long int *const quantized, const std::size_t N) {
-  long int *const quantized_new = new long int[N];
-  std::copy(quantized, quantized + N, quantized_new);
+  long int *const quantized_ = new long int[N];
+  std::copy(quantized, quantized + N, quantized_);
 
   const mgard::HuffmanEncodedStream encoded =
       mgard::huffman_encoding(quantized, N);
-  const mgard::HuffmanEncodedStream encoded_new =
-      mgard::huffman_encoding(quantized_new, N);
+  const mgard::HuffmanEncodedStream encoded_ =
+      mgard::huffman_encoding(quantized_, N);
 
-  delete[] quantized_new;
+  delete[] quantized_;
 
   const mgard::MemoryBuffer<long int> out = mgard::huffman_decoding(encoded);
-  const mgard::MemoryBuffer<long int> out_new =
-      mgard::huffman_decoding_rewritten(encoded);
+  const mgard::MemoryBuffer<long int> out_ =
+      mgard::huffman_decoding_rewritten(encoded_);
 
-  REQUIRE(out.size == out_new.size);
+  REQUIRE(out.size == out_.size);
+  REQUIRE(out.size == N);
   long int const *const p = out.data.get();
-  long int const *const p_new = out_new.data.get();
-  REQUIRE(std::equal(p, p + out.size, p_new));
+  long int const *const p_ = out_.data.get();
+  REQUIRE(std::equal(p, p + out.size, p_));
 }
 
 void test_encoding_regression_constant(const std::size_t N, const long int q) {

From cc78644b8ef0ab4253b5892eb9773b9d43acf550 Mon Sep 17 00:00:00 2001
From: Ben Whitney <whitneybe@ornl.gov>
Date: Wed, 8 Jun 2022 09:30:25 -0400
Subject: [PATCH 30/58] Remove `huffman_{en,de}coding` from library.

---
 include/huffman.hpp                  |  54 ++--
 src/huffman.cpp                      | 365 --------------------------
 tests/CMakeLists.txt                 |   1 +
 tests/include/huffman_regression.hpp |  28 ++
 tests/src/compressors_regression.cpp |   2 +
 tests/src/huffman_regression.cpp     | 373 +++++++++++++++++++++++++++
 tests/src/test_huffman.cpp           |   1 +
 7 files changed, 430 insertions(+), 394 deletions(-)
 create mode 100644 tests/include/huffman_regression.hpp
 create mode 100644 tests/src/huffman_regression.cpp

diff --git a/include/huffman.hpp b/include/huffman.hpp
index d98f5b27e7..24a4be22af 100644
--- a/include/huffman.hpp
+++ b/include/huffman.hpp
@@ -14,6 +14,12 @@
 
 namespace mgard {
 
+//! One more than the number of symbols assigned codewords in the deprecated
+//! Huffman encoding and decoding functions.
+//!
+//!\deprecated
+inline constexpr std::size_t nql = 32768 * 4;
+
 //! A stream compressed using a Huffman code.
 struct HuffmanEncodedStream {
   //! Constructor.
@@ -38,35 +44,6 @@ struct HuffmanEncodedStream {
   MemoryBuffer<unsigned char> frequencies;
 };
 
-//! Encode quantized coefficients using a Huffman code.
-//!
-//!\param[in, out] quantized_data Input buffer (quantized coefficients). This
-//! buffer will be changed by the encoding process.
-//!\param[in] n Number of symbols (`long int` quantized coefficients) in the
-//! input buffer.
-HuffmanEncodedStream huffman_encoding(long int *const quantized_data,
-                                      const std::size_t n);
-
-//! Encode quantized coefficients using a Huffman code.
-//!
-//!\param[in] quantized_data Input buffer (quantized coefficients).
-//!\param[in] n Number of symbols (`long int` quantized coefficients) in the
-//! input buffer.
-HuffmanEncodedStream
-huffman_encoding_rewritten(long int const *const quantized_data,
-                           const std::size_t n);
-
-//! Decode a stream encoded using a Huffman code.
-//!
-//!\param[in] encoded Input buffer (Huffman-encoded stream).
-MemoryBuffer<long int> huffman_decoding(const HuffmanEncodedStream &encoded);
-
-//! Decode a stream encoded using a Huffman code.
-//!
-//!\param[in] encoded Input buffer (Huffman-encoded stream).
-MemoryBuffer<long int>
-huffman_decoding_rewritten(const HuffmanEncodedStream &encoded);
-
 //! Codeword (in progress) associated to a node in a Huffman code creation tree.
 struct HuffmanCodeword {
   //! Bytes containing the bits of the codeword.
@@ -228,6 +205,25 @@ template <typename Symbol> class HuffmanCode {
                             const HuffmanCodeword codeword);
 };
 
+//! Encode quantized coefficients using a Huffman code.
+//!
+//!\deprecated
+//!
+//!\param[in] quantized_data Input buffer (quantized coefficients).
+//!\param[in] n Number of symbols (`long int` quantized coefficients) in the
+//! input buffer.
+HuffmanEncodedStream
+huffman_encoding_rewritten(long int const *const quantized_data,
+                           const std::size_t n);
+
+//! Decode a stream encoded using a Huffman code.
+//!
+//!\deprecated
+//!
+//!\param[in] encoded Input buffer (Huffman-encoded stream).
+MemoryBuffer<long int>
+huffman_decoding_rewritten(const HuffmanEncodedStream &encoded);
+
 } // namespace mgard
 
 #include "huffman.tpp"
diff --git a/src/huffman.cpp b/src/huffman.cpp
index abe7000b29..7e4429c6e7 100644
--- a/src/huffman.cpp
+++ b/src/huffman.cpp
@@ -16,8 +16,6 @@
 
 namespace mgard {
 
-const int nql = 32768 * 4;
-
 HuffmanEncodedStream::HuffmanEncodedStream(const std::size_t nbits,
                                            const std::size_t ncompressed,
                                            const std::size_t nmissed,
@@ -55,192 +53,6 @@ CodeCreationTreeNode::CodeCreationTreeNode(
     const std::shared_ptr<CodeCreationTreeNode> &right)
     : count(left->count + right->count), left(left), right(right) {}
 
-//! Node in the Huffman code creation tree.
-struct htree_node {
-  //! Constructor.
-  //!
-  //!\param q (Transformed) symbol.
-  //!\param cnt Number of occurences of the (transformed) symbol in the source.
-  htree_node(const int q, const std::size_t cnt)
-      : q(q), cnt(cnt), code(0), len(0), left(nullptr), right(nullptr) {}
-
-  //! (Transformed) symbol.
-  int q;
-
-  //! Number of occurences of the (transformed) symbol in the source.
-  std::size_t cnt;
-
-  //! Codeword associated to the (transformed) symbol.
-  unsigned int code;
-
-  //! Length in bits of the codeword.
-  std::size_t len;
-
-  //! Left child in the code creation tree.
-  htree_node *left;
-
-  //! Right child in the code creation tree.
-  htree_node *right;
-};
-
-//! Input symbol–Huffman code pair.
-struct huffman_codec {
-  //! (Transformed) symbol.
-  int q;
-
-  //! Codeword associated to the (transformed) symbol.
-  unsigned int code;
-
-  //! Length in bits of the codeword.
-  std::size_t len;
-};
-
-//! Frequency table and symbol–code mappings for encoding source.
-template <std::size_t NQL> struct HuffmanCodec {
-  // The arrays are value-initialized, which leads to each of their elements
-  // being value-initialized (ultimately zero-initialized).
-
-  //! Input symbol–Huffman code pairs.
-  std::array<huffman_codec, NQL> codec{};
-
-  //! Frequency table for encoding source.
-  std::array<std::size_t, NQL> frequency_table{};
-};
-
-//! Function object for comparing Huffman code creation nodes.
-struct LessThanByCnt {
-  //! Return whether the first node has a larger count than the second.
-  //!
-  //!\param lhs First node.
-  //!\param rhs Second node.
-  bool operator()(htree_node const *const lhs,
-                  htree_node const *const rhs) const {
-    return lhs->cnt > rhs->cnt;
-  }
-};
-
-template <class T>
-using my_priority_queue =
-    std::priority_queue<T *, std::vector<T *>, LessThanByCnt>;
-
-void initialize_codec(HuffmanCodec<nql> &codec, htree_node *const root,
-                      const unsigned int code, const std::size_t len) {
-  std::array<huffman_codec, nql> &codewords = codec.codec;
-
-  root->code = code;
-  root->len = len;
-
-  if (!root->left && !root->right) {
-    const std::size_t index = root->q;
-    codewords.at(index) = {root->q, code, len};
-  }
-
-  if (root->left) {
-    initialize_codec(codec, root->left, code << 1, len + 1);
-  }
-
-  if (root->right) {
-    initialize_codec(codec, root->right, code << 1 | 0x1, len + 1);
-  }
-}
-
-my_priority_queue<htree_node> *build_tree(std::size_t const *const cnt) {
-  my_priority_queue<htree_node> *const phtree =
-      new my_priority_queue<htree_node>;
-  for (int i = 0; i < nql; i++) {
-    if (cnt[i] != 0) {
-      htree_node *const new_node = new htree_node(i, cnt[i]);
-      phtree->push(new_node);
-    }
-  }
-
-  while (phtree->size() > 1) {
-    htree_node *const top_node1 = phtree->top();
-    phtree->pop();
-    htree_node *const top_node2 = phtree->top();
-    phtree->pop();
-
-    htree_node *const new_node =
-        new htree_node(-1, top_node1->cnt + top_node2->cnt);
-    new_node->left = top_node1;
-    new_node->right = top_node2;
-    phtree->push(new_node);
-  }
-  return phtree;
-}
-
-void free_htree_node(htree_node *const node) {
-  if (node->left) {
-    free_htree_node(node->left);
-    node->left = nullptr;
-  }
-
-  if (node->right) {
-    free_htree_node(node->right);
-    node->right = nullptr;
-  }
-
-  delete node;
-}
-
-void free_tree(my_priority_queue<htree_node> *const phtree) {
-  if (phtree) {
-    free_htree_node(phtree->top());
-
-    phtree->pop();
-
-    delete phtree;
-  }
-}
-
-//! Populate the frequency table of a `HuffmanCodec`.
-//!
-//!\note This function will change the quantized data.
-//!
-//!\param[in, out] quantized_data Input buffer (quantized coefficients). This
-//! buffer will be changed by the codec-building process.
-//\param[in] n Number of symbols (`long int` quantized coefficients) in the
-//! input buffer.
-void initialize_frequency_table(HuffmanCodec<nql> &codec,
-                                long int *const quantized_data,
-                                const std::size_t n) {
-  assert(*std::max_element(codec.frequency_table.begin(),
-                           codec.frequency_table.end()) == 0);
-
-  for (std::size_t i = 0; i < n; i++) {
-    // Convert quantization level to positive so that counting freq can be
-    // easily done. Level 0 is reserved a out-of-range flag.
-    quantized_data[i] = quantized_data[i] + nql / 2;
-    ++codec.frequency_table[quantized_data[i] > 0 &&
-                                    quantized_data[i] <
-                                        static_cast<long int>(nql)
-                                ? quantized_data[i]
-                                : 0];
-  }
-}
-
-//! Build a Huffman codec for an input buffer.
-//!
-//!\param[in, out] quantized_data Input buffer (quantized coefficients). This
-//! buffer will be changed by the codec-building process.
-//\param[in] n Number of symbols (`long int` quantized coefficients) in the
-//! input buffer.
-template <std::size_t N>
-HuffmanCodec<N> build_huffman_codec(long int *const quantized_data,
-                                    const std::size_t n) {
-  HuffmanCodec<N> codec;
-  initialize_frequency_table(codec, quantized_data, n);
-
-  my_priority_queue<htree_node> *const phtree =
-      build_tree(codec.frequency_table.data());
-
-  initialize_codec(codec, phtree->top(), 0, 0);
-
-  free_tree(phtree);
-
-  return codec;
-}
-
 namespace {
 
 void endianness_shuffle(unsigned char *const buffer, const std::size_t nbytes) {
@@ -262,90 +74,6 @@ void endianness_shuffle(unsigned char *const buffer, const std::size_t nbytes) {
 }
 
 } // namespace
-
-HuffmanEncodedStream huffman_encoding(long int *const quantized_data,
-                                      const std::size_t n) {
-  const HuffmanCodec<nql> codec = build_huffman_codec<nql>(quantized_data, n);
-  const std::size_t num_miss = codec.frequency_table[0];
-
-  assert(n >= num_miss);
-
-  std::size_t nnz = 0;
-  std::size_t nbits = 0;
-  for (std::size_t i = 0; i < nql; ++i) {
-    const huffman_codec &codec_ = codec.codec.at(i);
-    const std::size_t frequency = codec.frequency_table.at(i);
-    nbits += frequency * codec_.len;
-    nnz += frequency ? 1 : 0;
-  }
-
-  const std::size_t nbytes =
-      sizeof(unsigned int) * ((nbits + CHAR_BIT * sizeof(unsigned int) - 1) /
-                              (CHAR_BIT * sizeof(unsigned int)));
-  HuffmanEncodedStream out(nbits, nbytes, num_miss * sizeof(int),
-                           2 * nnz * sizeof(std::size_t));
-
-  unsigned int *const hit =
-      reinterpret_cast<unsigned int *>(out.hit.data.get());
-  std::fill(hit, hit + nbytes / sizeof(unsigned int), 0u);
-
-  int *missed = reinterpret_cast<int *>(out.missed.data.get());
-
-  // write frequency table to buffer
-  std::size_t *const cft =
-      reinterpret_cast<std::size_t *>(out.frequencies.data.get());
-  std::size_t off = 0;
-  for (std::size_t i = 0; i < nql; ++i) {
-    if (codec.frequency_table[i] > 0) {
-      cft[2 * off] = i;
-      cft[2 * off + 1] = codec.frequency_table[i];
-      off++;
-    }
-  }
-
-  std::size_t start_bit = 0;
-  for (std::size_t i = 0; i < n; i++) {
-    const int q = quantized_data[i];
-    unsigned int code;
-    std::size_t len;
-
-    if (q > 0 && q < nql) {
-      // for those that are within the range
-      code = codec.codec[q].code;
-      len = codec.codec[q].len;
-    } else {
-      // for those that are out of the range, q is set to 0
-      code = codec.codec[0].code;
-      len = codec.codec[0].len;
-
-      *missed++ = q;
-    }
-
-    // Note that if len == 0, then that means that either the data is all the
-    // same number or (more likely) all data are outside the quantization
-    // range. Either way, the code contains no information and is therefore 0
-    // bits.
-
-    if (32 - start_bit % 32 < len) {
-      // current unsigned int cannot hold the code
-      // copy 32 - start_bit % 32 bits to the current int
-      // and copy  the rest len - (32 - start_bit % 32) to the next int
-      const std::size_t rshift = len - (32 - start_bit % 32);
-      const std::size_t lshift = 32 - rshift;
-      *(hit + start_bit / 32) = (*(hit + start_bit / 32)) | (code >> rshift);
-      *(hit + start_bit / 32 + 1) =
-          (*(hit + start_bit / 32 + 1)) | (code << lshift);
-    } else if (len) {
-      code = code << (32 - start_bit % 32 - len);
-      *(hit + start_bit / 32) = (*(hit + start_bit / 32)) | code;
-    }
-    // No effect if `len == 0`.
-    start_bit += len;
-  }
-
-  return out;
-}
-
 namespace {
 
 void check_type_sizes() {
@@ -451,99 +179,6 @@ huffman_encoding_rewritten(long int const *const quantized_data,
   return out;
 }
 
-MemoryBuffer<long int> huffman_decoding(const HuffmanEncodedStream &encoded) {
-  const std::size_t out_data_miss_size = encoded.missed.size;
-  const std::size_t out_tree_size = encoded.frequencies.size;
-  unsigned char const *const out_data_hit = encoded.hit.data.get();
-  unsigned char const *const out_data_miss = encoded.missed.data.get();
-  unsigned char const *const out_tree = encoded.frequencies.data.get();
-
-  std::size_t const *const cft = (std::size_t const *)out_tree;
-  const std::size_t nnz = out_tree_size / (2 * sizeof(std::size_t));
-  // The elements of the array are value-initialized (here, zero-initialized).
-  std::size_t *const ft = new std::size_t[nql]();
-
-  std::size_t nquantized = 0;
-  for (std::size_t j = 0; j < nnz; ++j) {
-    const std::size_t frequency = cft[2 * j + 1];
-    nquantized += frequency;
-    ft[cft[2 * j]] = frequency;
-  }
-
-  MemoryBuffer<long int> out(nquantized);
-  long int *const quantized_data = out.data.get();
-
-  my_priority_queue<htree_node> *const phtree = build_tree(ft);
-  delete[] ft;
-
-  unsigned int const *const buf = (unsigned int const *)out_data_hit;
-
-  // The out_data_miss may not be aligned. Therefore, the code
-  // here makes a new buffer.
-  assert(not(out_data_miss_size % sizeof(int)));
-  int *const miss_buf = new int[out_data_miss_size / sizeof(int)];
-  if (out_data_miss_size) {
-    std::memcpy(miss_buf, out_data_miss, out_data_miss_size);
-  }
-
-  int const *miss_bufp = miss_buf;
-
-  std::size_t start_bit = 0;
-  unsigned int mask = 0x80000000;
-
-  long int *q = quantized_data;
-  std::size_t i = 0;
-  std::size_t num_missed = 0;
-  while (q < quantized_data + nquantized) {
-    htree_node const *root = phtree->top();
-    assert(root);
-
-    std::size_t len = 0;
-    int offset = 0;
-    while (root->left) {
-      int flag = *(buf + start_bit / 32 + offset) & mask;
-      if (!flag) {
-        root = root->left;
-      } else {
-        root = root->right;
-      }
-
-      len++;
-
-      mask >>= 1;
-      if (!mask) {
-        mask = 0x80000000;
-        offset = 1;
-      } else {
-        //        offset = 0;
-      }
-    }
-
-    if (root->q != 0) {
-      *q = root->q - nql / 2;
-
-    } else {
-      *q = *miss_bufp - nql / 2;
-
-      miss_bufp++;
-      num_missed++;
-    }
-
-    q++;
-    i++;
-
-    start_bit += len;
-  }
-
-  assert(start_bit == encoded.nbits);
-  assert(sizeof(int) * num_missed == out_data_miss_size);
-
-  delete[] miss_buf;
-  free_tree(phtree);
-
-  return out;
-}
-
 namespace {
 
 long int decode(const HuffmanCode<long int> &code,
diff --git a/tests/CMakeLists.txt b/tests/CMakeLists.txt
index cfa61b4382..f625d0a148 100644
--- a/tests/CMakeLists.txt
+++ b/tests/CMakeLists.txt
@@ -21,6 +21,7 @@ set(
 	"src/compressors_regression.cpp"
 	"src/test_compressors.cpp"
 	"src/test_CompressedDataset.cpp"
+	"src/huffman_regression.cpp"
 	"src/test_huffman.cpp"
 )
 
diff --git a/tests/include/huffman_regression.hpp b/tests/include/huffman_regression.hpp
new file mode 100644
index 0000000000..f10919d2ea
--- /dev/null
+++ b/tests/include/huffman_regression.hpp
@@ -0,0 +1,28 @@
+#ifndef TESTING_HUFFMAN_REGRESSION_HPP
+#define TESTING_HUFFMAN_REGRESSION_HPP
+//!\file
+//!\brief Huffman encoding and decoding functions for regression tests.
+
+#include <cstddef>
+
+#include "huffman.hpp"
+
+namespace mgard {
+
+//! Encode quantized coefficients using a Huffman code.
+//!
+//!\param[in, out] quantized_data Input buffer (quantized coefficients). This
+//! buffer will be changed by the encoding process.
+//!\param[in] n Number of symbols (`long int` quantized coefficients) in the
+//! input buffer.
+HuffmanEncodedStream huffman_encoding(long int *const quantized_data,
+                                      const std::size_t n);
+
+//! Decode a stream encoded using a Huffman code.
+//!
+//!\param[in] encoded Input buffer (Huffman-encoded stream).
+MemoryBuffer<long int> huffman_decoding(const HuffmanEncodedStream &encoded);
+
+} // namespace mgard
+
+#endif
diff --git a/tests/src/compressors_regression.cpp b/tests/src/compressors_regression.cpp
index eb4d53761b..c3dfdc9bc2 100644
--- a/tests/src/compressors_regression.cpp
+++ b/tests/src/compressors_regression.cpp
@@ -4,7 +4,9 @@
 #include <cstring>
 
 #include "compressors.hpp"
+#include "compressors_regression.hpp"
 #include "huffman.hpp"
+#include "huffman_regression.hpp"
 
 namespace mgard {
 
diff --git a/tests/src/huffman_regression.cpp b/tests/src/huffman_regression.cpp
new file mode 100644
index 0000000000..1bd397b3b6
--- /dev/null
+++ b/tests/src/huffman_regression.cpp
@@ -0,0 +1,373 @@
+#include "huffman_regression.hpp"
+
+#include <climits>
+#include <cstring>
+
+#include <array>
+#include <queue>
+
+namespace mgard {
+
+//! Node in the Huffman code creation tree.
+struct htree_node {
+  //! Constructor.
+  //!
+  //!\param q (Transformed) symbol.
+  //!\param cnt Number of occurences of the (transformed) symbol in the source.
+  htree_node(const int q, const std::size_t cnt)
+      : q(q), cnt(cnt), code(0), len(0), left(nullptr), right(nullptr) {}
+
+  //! (Transformed) symbol.
+  int q;
+
+  //! Number of occurences of the (transformed) symbol in the source.
+  std::size_t cnt;
+
+  //! Codeword associated to the (transformed) symbol.
+  unsigned int code;
+
+  //! Length in bits of the codeword.
+  std::size_t len;
+
+  //! Left child in the code creation tree.
+  htree_node *left;
+
+  //! Right child in the code creation tree.
+  htree_node *right;
+};
+
+//! Input symbol–Huffman code pair.
+struct huffman_codec {
+  //! (Transformed) symbol.
+  int q;
+
+  //! Codeword associated to the (transformed) symbol.
+  unsigned int code;
+
+  //! Length in bits of the codeword.
+  std::size_t len;
+};
+
+//! Frequency table and symbol–code mappings for encoding source.
+template <std::size_t NQL> struct HuffmanCodec {
+  // The arrays are value-initialized, which leads to each of their elements
+  // being value-initialized (ultimately zero-initialized).
+
+  //! Input symbol–Huffman code pairs.
+  std::array<huffman_codec, NQL> codec{};
+
+  //! Frequency table for encoding source.
+  std::array<std::size_t, NQL> frequency_table{};
+};
+
+//! Function object for comparing Huffman code creation nodes.
+struct LessThanByCnt {
+  //! Return whether the first node has a larger count than the second.
+  //!
+  //!\param lhs First node.
+  //!\param rhs Second node.
+  bool operator()(htree_node const *const lhs,
+                  htree_node const *const rhs) const {
+    return lhs->cnt > rhs->cnt;
+  }
+};
+
+template <class T>
+using my_priority_queue =
+    std::priority_queue<T *, std::vector<T *>, LessThanByCnt>;
+
+void initialize_codec(HuffmanCodec<nql> &codec, htree_node *const root,
+                      const unsigned int code, const std::size_t len) {
+  std::array<huffman_codec, nql> &codewords = codec.codec;
+
+  root->code = code;
+  root->len = len;
+
+  if (!root->left && !root->right) {
+    const std::size_t index = root->q;
+    codewords.at(index) = {root->q, code, len};
+  }
+
+  if (root->left) {
+    initialize_codec(codec, root->left, code << 1, len + 1);
+  }
+
+  if (root->right) {
+    initialize_codec(codec, root->right, code << 1 | 0x1, len + 1);
+  }
+}
+
+my_priority_queue<htree_node> *build_tree(std::size_t const *const cnt) {
+  my_priority_queue<htree_node> *const phtree =
+      new my_priority_queue<htree_node>;
+  for (std::size_t i = 0; i < nql; i++) {
+    if (cnt[i] != 0) {
+      htree_node *const new_node = new htree_node(i, cnt[i]);
+      phtree->push(new_node);
+    }
+  }
+
+  while (phtree->size() > 1) {
+    htree_node *const top_node1 = phtree->top();
+    phtree->pop();
+    htree_node *const top_node2 = phtree->top();
+    phtree->pop();
+
+    htree_node *const new_node =
+        new htree_node(-1, top_node1->cnt + top_node2->cnt);
+    new_node->left = top_node1;
+    new_node->right = top_node2;
+    phtree->push(new_node);
+  }
+  return phtree;
+}
+
+void free_htree_node(htree_node *const node) {
+  if (node->left) {
+    free_htree_node(node->left);
+    node->left = nullptr;
+  }
+
+  if (node->right) {
+    free_htree_node(node->right);
+    node->right = nullptr;
+  }
+
+  delete node;
+}
+
+void free_tree(my_priority_queue<htree_node> *const phtree) {
+  if (phtree) {
+    free_htree_node(phtree->top());
+
+    phtree->pop();
+
+    delete phtree;
+  }
+}
+
+//! Populate the frequency table of a `HuffmanCodec`.
+//!
+//!\note This function will change the quantized data.
+//!
+//!\param[in, out] quantized_data Input buffer (quantized coefficients). This
+//! buffer will be changed by the codec-building process.
+//\param[in] n Number of symbols (`long int` quantized coefficients) in the
+//! input buffer.
+void initialize_frequency_table(HuffmanCodec<nql> &codec,
+                                long int *const quantized_data,
+                                const std::size_t n) {
+  assert(*std::max_element(codec.frequency_table.begin(),
+                           codec.frequency_table.end()) == 0);
+
+  for (std::size_t i = 0; i < n; i++) {
+    // Convert quantization level to positive so that counting freq can be
+    // easily done. Level 0 is reserved a out-of-range flag.
+    quantized_data[i] = quantized_data[i] + nql / 2;
+    ++codec.frequency_table[quantized_data[i] > 0 &&
+                                    quantized_data[i] <
+                                        static_cast<long int>(nql)
+                                ? quantized_data[i]
+                                : 0];
+  }
+}
+
+//! Build a Huffman codec for an input buffer.
+//!
+//!\param[in, out] quantized_data Input buffer (quantized coefficients). This
+//! buffer will be changed by the codec-building process.
+//\param[in] n Number of symbols (`long int` quantized coefficients) in the
+//! input buffer.
+template <std::size_t N>
+HuffmanCodec<N> build_huffman_codec(long int *const quantized_data,
+                                    const std::size_t n) {
+  HuffmanCodec<N> codec;
+  initialize_frequency_table(codec, quantized_data, n);
+
+  my_priority_queue<htree_node> *const phtree =
+      build_tree(codec.frequency_table.data());
+
+  initialize_codec(codec, phtree->top(), 0, 0);
+
+  free_tree(phtree);
+
+  return codec;
+}
+
+HuffmanEncodedStream huffman_encoding(long int *const quantized_data,
+                                      const std::size_t n) {
+  const HuffmanCodec<nql> codec = build_huffman_codec<nql>(quantized_data, n);
+  const std::size_t num_miss = codec.frequency_table[0];
+
+  assert(n >= num_miss);
+
+  std::size_t nnz = 0;
+  std::size_t nbits = 0;
+  for (std::size_t i = 0; i < nql; ++i) {
+    const huffman_codec &codec_ = codec.codec.at(i);
+    const std::size_t frequency = codec.frequency_table.at(i);
+    nbits += frequency * codec_.len;
+    nnz += frequency ? 1 : 0;
+  }
+
+  const std::size_t nbytes =
+      sizeof(unsigned int) * ((nbits + CHAR_BIT * sizeof(unsigned int) - 1) /
+                              (CHAR_BIT * sizeof(unsigned int)));
+  HuffmanEncodedStream out(nbits, nbytes, num_miss * sizeof(int),
+                           2 * nnz * sizeof(std::size_t));
+
+  unsigned int *const hit =
+      reinterpret_cast<unsigned int *>(out.hit.data.get());
+  std::fill(hit, hit + nbytes / sizeof(unsigned int), 0u);
+
+  int *missed = reinterpret_cast<int *>(out.missed.data.get());
+
+  // write frequency table to buffer
+  std::size_t *const cft =
+      reinterpret_cast<std::size_t *>(out.frequencies.data.get());
+  std::size_t off = 0;
+  for (std::size_t i = 0; i < nql; ++i) {
+    if (codec.frequency_table[i] > 0) {
+      cft[2 * off] = i;
+      cft[2 * off + 1] = codec.frequency_table[i];
+      off++;
+    }
+  }
+
+  std::size_t start_bit = 0;
+  for (std::size_t i = 0; i < n; i++) {
+    const int q = quantized_data[i];
+    unsigned int code;
+    std::size_t len;
+
+    if (q > 0 && q < static_cast<int>(nql)) {
+      // for those that are within the range
+      code = codec.codec[q].code;
+      len = codec.codec[q].len;
+    } else {
+      // for those that are out of the range, q is set to 0
+      code = codec.codec[0].code;
+      len = codec.codec[0].len;
+
+      *missed++ = q;
+    }
+
+    // Note that if len == 0, then that means that either the data is all the
+    // same number or (more likely) all data are outside the quantization
+    // range. Either way, the code contains no information and is therefore 0
+    // bits.
+
+    if (32 - start_bit % 32 < len) {
+      // current unsigned int cannot hold the code
+      // copy 32 - start_bit % 32 bits to the current int
+      // and copy  the rest len - (32 - start_bit % 32) to the next int
+      const std::size_t rshift = len - (32 - start_bit % 32);
+      const std::size_t lshift = 32 - rshift;
+      *(hit + start_bit / 32) = (*(hit + start_bit / 32)) | (code >> rshift);
+      *(hit + start_bit / 32 + 1) =
+          (*(hit + start_bit / 32 + 1)) | (code << lshift);
+    } else if (len) {
+      code = code << (32 - start_bit % 32 - len);
+      *(hit + start_bit / 32) = (*(hit + start_bit / 32)) | code;
+    }
+    // No effect if `len == 0`.
+    start_bit += len;
+  }
+
+  return out;
+}
+
+MemoryBuffer<long int> huffman_decoding(const HuffmanEncodedStream &encoded) {
+  const std::size_t out_data_miss_size = encoded.missed.size;
+  const std::size_t out_tree_size = encoded.frequencies.size;
+  unsigned char const *const out_data_hit = encoded.hit.data.get();
+  unsigned char const *const out_data_miss = encoded.missed.data.get();
+  unsigned char const *const out_tree = encoded.frequencies.data.get();
+
+  std::size_t const *const cft = (std::size_t const *)out_tree;
+  const std::size_t nnz = out_tree_size / (2 * sizeof(std::size_t));
+  // The elements of the array are value-initialized (here, zero-initialized).
+  std::size_t *const ft = new std::size_t[nql]();
+
+  std::size_t nquantized = 0;
+  for (std::size_t j = 0; j < nnz; ++j) {
+    const std::size_t frequency = cft[2 * j + 1];
+    nquantized += frequency;
+    ft[cft[2 * j]] = frequency;
+  }
+
+  MemoryBuffer<long int> out(nquantized);
+  long int *const quantized_data = out.data.get();
+
+  my_priority_queue<htree_node> *const phtree = build_tree(ft);
+  delete[] ft;
+
+  unsigned int const *const buf = (unsigned int const *)out_data_hit;
+
+  // The out_data_miss may not be aligned. Therefore, the code
+  // here makes a new buffer.
+  assert(not(out_data_miss_size % sizeof(int)));
+  int *const miss_buf = new int[out_data_miss_size / sizeof(int)];
+  if (out_data_miss_size) {
+    std::memcpy(miss_buf, out_data_miss, out_data_miss_size);
+  }
+
+  int const *miss_bufp = miss_buf;
+
+  std::size_t start_bit = 0;
+  unsigned int mask = 0x80000000;
+
+  long int *q = quantized_data;
+  std::size_t i = 0;
+  std::size_t num_missed = 0;
+  while (q < quantized_data + nquantized) {
+    htree_node const *root = phtree->top();
+    assert(root);
+
+    std::size_t len = 0;
+    int offset = 0;
+    while (root->left) {
+      int flag = *(buf + start_bit / 32 + offset) & mask;
+      if (!flag) {
+        root = root->left;
+      } else {
+        root = root->right;
+      }
+
+      len++;
+
+      mask >>= 1;
+      if (!mask) {
+        mask = 0x80000000;
+        offset = 1;
+      } else {
+        //        offset = 0;
+      }
+    }
+
+    if (root->q != 0) {
+      *q = root->q - nql / 2;
+
+    } else {
+      *q = *miss_bufp - nql / 2;
+
+      miss_bufp++;
+      num_missed++;
+    }
+
+    q++;
+    i++;
+
+    start_bit += len;
+  }
+
+  assert(start_bit == encoded.nbits);
+  assert(sizeof(int) * num_missed == out_data_miss_size);
+
+  delete[] miss_buf;
+  free_tree(phtree);
+
+  return out;
+}
+
+} // namespace mgard
diff --git a/tests/src/test_huffman.cpp b/tests/src/test_huffman.cpp
index d984c8caf9..44cec46b67 100644
--- a/tests/src/test_huffman.cpp
+++ b/tests/src/test_huffman.cpp
@@ -8,6 +8,7 @@
 #include "testing_utilities.hpp"
 
 #include "huffman.hpp"
+#include "huffman_regression.hpp"
 
 namespace {
 

From 2a4619274c26595defae76bbac0759928207312f Mon Sep 17 00:00:00 2001
From: Ben Whitney <whitneybe@ornl.gov>
Date: Wed, 8 Jun 2022 09:58:58 -0400
Subject: [PATCH 31/58] Rename reimplemented Huffman functions.

---
 include/compressors.hpp                  | 12 ++++------
 include/huffman.hpp                      |  8 +++----
 src/compressors.cpp                      | 29 +++++++++++-------------
 src/huffman.cpp                          |  8 +++----
 tests/include/compressors_regression.hpp |  4 ++++
 tests/include/huffman_regression.hpp     |  4 ++++
 tests/src/compressors_regression.cpp     | 10 ++++++--
 tests/src/huffman_regression.cpp         |  4 ++++
 tests/src/test_compressors.cpp           | 22 +++++++++---------
 tests/src/test_huffman.cpp               | 15 ++++++------
 10 files changed, 63 insertions(+), 53 deletions(-)

diff --git a/include/compressors.hpp b/include/compressors.hpp
index 09f8c53c22..a8048966fa 100644
--- a/include/compressors.hpp
+++ b/include/compressors.hpp
@@ -22,9 +22,8 @@ namespace mgard {
 //!
 //!\param[in] src Array to be compressed.
 //!\param[in] srcLen Size of array (number of elements) to be compressed.
-MemoryBuffer<unsigned char>
-compress_memory_huffman_rewritten(long int *const src,
-                                  const std::size_t srcLen);
+MemoryBuffer<unsigned char> compress_memory_huffman(long int *const src,
+                                                    const std::size_t srcLen);
 
 //! Decompress an array compressed with `compress_memory_huffman`.
 //!
@@ -34,10 +33,9 @@ compress_memory_huffman_rewritten(long int *const src,
 //!\param[in] srcLen Size in bytes of the compressed array.
 //!\param[out] dst Decompressed array.
 //!\param[in] dstLen Size in bytes of the decompressed array.
-void decompress_memory_huffman_rewritten(unsigned char *const src,
-                                         const std::size_t srcLen,
-                                         long int *const dst,
-                                         const std::size_t dstLen);
+void decompress_memory_huffman(unsigned char *const src,
+                               const std::size_t srcLen, long int *const dst,
+                               const std::size_t dstLen);
 
 #ifdef MGARD_ZSTD
 //! Compress an array using `zstd`.
diff --git a/include/huffman.hpp b/include/huffman.hpp
index 24a4be22af..0c8b0a5b93 100644
--- a/include/huffman.hpp
+++ b/include/huffman.hpp
@@ -212,17 +212,15 @@ template <typename Symbol> class HuffmanCode {
 //!\param[in] quantized_data Input buffer (quantized coefficients).
 //!\param[in] n Number of symbols (`long int` quantized coefficients) in the
 //! input buffer.
-HuffmanEncodedStream
-huffman_encoding_rewritten(long int const *const quantized_data,
-                           const std::size_t n);
+HuffmanEncodedStream huffman_encoding(long int const *const quantized_data,
+                                      const std::size_t n);
 
 //! Decode a stream encoded using a Huffman code.
 //!
 //!\deprecated
 //!
 //!\param[in] encoded Input buffer (Huffman-encoded stream).
-MemoryBuffer<long int>
-huffman_decoding_rewritten(const HuffmanEncodedStream &encoded);
+MemoryBuffer<long int> huffman_decoding(const HuffmanEncodedStream &encoded);
 
 } // namespace mgard
 
diff --git a/src/compressors.cpp b/src/compressors.cpp
index f97528d3d6..d31a24a06a 100644
--- a/src/compressors.cpp
+++ b/src/compressors.cpp
@@ -30,10 +30,9 @@ std::size_t hit_buffer_size(const std::size_t nbits) {
 
 } // namespace
 
-void decompress_memory_huffman_rewritten(unsigned char *const src,
-                                         const std::size_t srcLen,
-                                         long int *const dst,
-                                         const std::size_t dstLen) {
+void decompress_memory_huffman(unsigned char *const src,
+                               const std::size_t srcLen, long int *const dst,
+                               const std::size_t dstLen) {
   std::size_t const *const sizes = reinterpret_cast<std::size_t const *>(src);
   const std::size_t nfrequencies = sizes[0];
   const std::size_t nbits = sizes[1];
@@ -55,8 +54,8 @@ void decompress_memory_huffman_rewritten(unsigned char *const src,
 #endif
   }
 
-  // `huffman_decoding_rewritten` expects the size of the hit buffer to be a
-  // multiple of `sizeof(unsigned int)`. We'll zero out any extra bytes below.
+  // `huffman_decoding` expects the size of the hit buffer to be a multiple of
+  // `sizeof(unsigned int)`. We'll zero out any extra bytes below.
   const std::size_t nbytes =
       sizeof(unsigned int) *
       ((nhit + sizeof(unsigned int) - 1) / sizeof(unsigned int));
@@ -83,7 +82,7 @@ void decompress_memory_huffman_rewritten(unsigned char *const src,
     std::copy(begin, end, encoded.missed.data.get());
   }
 
-  const MemoryBuffer<long int> decoded = huffman_decoding_rewritten(encoded);
+  const MemoryBuffer<long int> decoded = huffman_decoding(encoded);
   {
     long int const *const p = decoded.data.get();
     if (decoded.size * sizeof(*p) != dstLen) {
@@ -115,10 +114,9 @@ gather_constituents(const std::vector<Constituent> &constituents) {
 
 } // namespace
 
-MemoryBuffer<unsigned char>
-compress_memory_huffman_rewritten(long int *const src,
-                                  const std::size_t srcLen) {
-  const HuffmanEncodedStream encoded = huffman_encoding_rewritten(src, srcLen);
+MemoryBuffer<unsigned char> compress_memory_huffman(long int *const src,
+                                                    const std::size_t srcLen) {
+  const HuffmanEncodedStream encoded = huffman_encoding(src, srcLen);
 
   assert(not(encoded.hit.size % sizeof(unsigned int)));
 
@@ -302,8 +300,8 @@ MemoryBuffer<unsigned char> compress(const pb::Header &header, void *const src,
     if (srcLen % qts) {
       throw std::runtime_error("incorrect quantization buffer size");
     }
-    return compress_memory_huffman_rewritten(reinterpret_cast<long int *>(src),
-                                             srcLen / qts);
+    return compress_memory_huffman(reinterpret_cast<long int *>(src),
+                                   srcLen / qts);
   }
 #else
     throw std::runtime_error("MGARD compiled without ZSTD support");
@@ -336,9 +334,8 @@ void decompress(const pb::Header &header, void *const src,
     break;
   case pb::Encoding::CPU_HUFFMAN_ZSTD:
 #ifdef MGARD_ZSTD
-    decompress_memory_huffman_rewritten(static_cast<unsigned char *>(src),
-                                        srcLen, static_cast<long int *>(dst),
-                                        dstLen);
+    decompress_memory_huffman(static_cast<unsigned char *>(src), srcLen,
+                              static_cast<long int *>(dst), dstLen);
     break;
 #else
     throw std::runtime_error("MGARD compiled without ZSTD support");
diff --git a/src/huffman.cpp b/src/huffman.cpp
index 7e4429c6e7..fd5ecd6a33 100644
--- a/src/huffman.cpp
+++ b/src/huffman.cpp
@@ -91,9 +91,8 @@ void check_type_sizes() {
 
 } // namespace
 
-HuffmanEncodedStream
-huffman_encoding_rewritten(long int const *const quantized_data,
-                           const std::size_t n) {
+HuffmanEncodedStream huffman_encoding(long int const *const quantized_data,
+                                      const std::size_t n) {
   check_type_sizes();
 
   const std::size_t ncodewords = nql - 1;
@@ -194,8 +193,7 @@ long int decode(const HuffmanCode<long int> &code,
 
 } // namespace
 
-MemoryBuffer<long int>
-huffman_decoding_rewritten(const HuffmanEncodedStream &encoded) {
+MemoryBuffer<long int> huffman_decoding(const HuffmanEncodedStream &encoded) {
   check_type_sizes();
 
   using Symbol = long int;
diff --git a/tests/include/compressors_regression.hpp b/tests/include/compressors_regression.hpp
index cc1815a3a7..bfb2426a5d 100644
--- a/tests/include/compressors_regression.hpp
+++ b/tests/include/compressors_regression.hpp
@@ -9,6 +9,8 @@
 
 namespace mgard {
 
+namespace regression {
+
 //! Compress an array using a Huffman tree.
 //!
 //!\param[in] src Array to be compressed.
@@ -26,6 +28,8 @@ void decompress_memory_huffman(unsigned char *const src,
                                const std::size_t srcLen, long int *const dst,
                                const std::size_t dstLen);
 
+} // namespace regression
+
 } // namespace mgard
 
 #endif
diff --git a/tests/include/huffman_regression.hpp b/tests/include/huffman_regression.hpp
index f10919d2ea..d67cd6b4ad 100644
--- a/tests/include/huffman_regression.hpp
+++ b/tests/include/huffman_regression.hpp
@@ -9,6 +9,8 @@
 
 namespace mgard {
 
+namespace regression {
+
 //! Encode quantized coefficients using a Huffman code.
 //!
 //!\param[in, out] quantized_data Input buffer (quantized coefficients). This
@@ -23,6 +25,8 @@ HuffmanEncodedStream huffman_encoding(long int *const quantized_data,
 //!\param[in] encoded Input buffer (Huffman-encoded stream).
 MemoryBuffer<long int> huffman_decoding(const HuffmanEncodedStream &encoded);
 
+} // namespace regression
+
 } // namespace mgard
 
 #endif
diff --git a/tests/src/compressors_regression.cpp b/tests/src/compressors_regression.cpp
index c3dfdc9bc2..dd98384898 100644
--- a/tests/src/compressors_regression.cpp
+++ b/tests/src/compressors_regression.cpp
@@ -10,6 +10,8 @@
 
 namespace mgard {
 
+namespace regression {
+
 static_assert(CHAR_BIT == 8, "code written assuming `CHAR_BIT == 8`");
 
 static_assert(sizeof(unsigned int) == 4,
@@ -30,7 +32,8 @@ std::size_t hit_buffer_size(const std::size_t nbits) {
 
 MemoryBuffer<unsigned char> compress_memory_huffman(long int *const src,
                                                     const std::size_t srcLen) {
-  HuffmanEncodedStream encoded = huffman_encoding(src, srcLen);
+  HuffmanEncodedStream encoded =
+      mgard::regression::huffman_encoding(src, srcLen);
 
   assert(not(encoded.hit.size % sizeof(unsigned int)));
 
@@ -134,7 +137,8 @@ void decompress_memory_huffman(unsigned char *const src,
     std::copy(begin, end, encoded.missed.data.get());
   }
 
-  const MemoryBuffer<long int> decoded = huffman_decoding(encoded);
+  const MemoryBuffer<long int> decoded =
+      mgard::regression::huffman_decoding(encoded);
   {
     long int const *const p = decoded.data.get();
     if (decoded.size * sizeof(*p) != dstLen) {
@@ -145,4 +149,6 @@ void decompress_memory_huffman(unsigned char *const src,
   }
 }
 
+} // namespace regression
+
 } // namespace mgard
diff --git a/tests/src/huffman_regression.cpp b/tests/src/huffman_regression.cpp
index 1bd397b3b6..5fbc4b74dd 100644
--- a/tests/src/huffman_regression.cpp
+++ b/tests/src/huffman_regression.cpp
@@ -8,6 +8,8 @@
 
 namespace mgard {
 
+namespace regression {
+
 //! Node in the Huffman code creation tree.
 struct htree_node {
   //! Constructor.
@@ -370,4 +372,6 @@ MemoryBuffer<long int> huffman_decoding(const HuffmanEncodedStream &encoded) {
   return out;
 }
 
+} // namespace regression
+
 } // namespace mgard
diff --git a/tests/src/test_compressors.cpp b/tests/src/test_compressors.cpp
index af9de92915..4d5a42048b 100644
--- a/tests/src/test_compressors.cpp
+++ b/tests/src/test_compressors.cpp
@@ -37,9 +37,9 @@ void test_huffman_compression_regression(long int *const src,
   std::copy(src, src + srcLen, src_);
 
   const mgard::MemoryBuffer<unsigned char> out =
-      mgard::compress_memory_huffman(src, srcLen);
+      mgard::regression::compress_memory_huffman(src, srcLen);
   const mgard::MemoryBuffer<unsigned char> out_ =
-      mgard::compress_memory_huffman_rewritten(src_, srcLen);
+      mgard::compress_memory_huffman(src_, srcLen);
 
   delete[] src_;
 
@@ -55,9 +55,9 @@ void test_huffman_decompression_regression(long int *const src,
   std::copy(src, src + srcLen, src_);
 
   const mgard::MemoryBuffer<unsigned char> compressed =
-      mgard::compress_memory_huffman(src, srcLen);
+      mgard::regression::compress_memory_huffman(src, srcLen);
   const mgard::MemoryBuffer<unsigned char> compressed_ =
-      mgard::compress_memory_huffman(src_, srcLen);
+      mgard::regression::compress_memory_huffman(src_, srcLen);
 
   delete[] src_;
 
@@ -69,10 +69,10 @@ void test_huffman_decompression_regression(long int *const src,
   long int *const p = out.data.get();
   long int *const p_ = out_.data.get();
 
-  mgard::decompress_memory_huffman(q, compressed.size, p,
-                                   out.size * sizeof(long int));
-  mgard::decompress_memory_huffman_rewritten(q_, compressed_.size, p_,
-                                             out_.size * sizeof(long int));
+  mgard::regression::decompress_memory_huffman(q, compressed.size, p,
+                                               out.size * sizeof(long int));
+  mgard::decompress_memory_huffman(q_, compressed_.size, p_,
+                                   out_.size * sizeof(long int));
 
   REQUIRE(std::equal(p, p + srcLen, p_));
 }
@@ -268,8 +268,8 @@ TEST_CASE("compression with header configuration", "[compressors]") {
   REQUIRE(e.preprocessor() == mgard::pb::Encoding::SHUFFLE);
 #ifdef MGARD_ZSTD
   REQUIRE(e.compressor() == mgard::pb::Encoding::CPU_HUFFMAN_ZSTD);
-  mgard::decompress_memory_huffman(compressed.data.get(), compressed.size, dst,
-                                   quantizedLen);
+  mgard::regression::decompress_memory_huffman(
+      compressed.data.get(), compressed.size, dst, quantizedLen);
 #else
   REQUIRE(e.compressor() == mgard::pb::Encoding::CPU_HUFFMAN_ZLIB);
   mgard::decompress_memory_z(compressed.data.get(), compressed.size, dst,
@@ -339,7 +339,7 @@ TEST_CASE("decompression with header configuration", "[compressors]") {
     std::int64_t *const quantized_ = new std::int64_t[ndof];
     std::copy(quantized, quantized + ndof, quantized_);
     const mgard::MemoryBuffer<unsigned char> out =
-        mgard::compress_memory_huffman(quantized_, ndof);
+        mgard::regression::compress_memory_huffman(quantized_, ndof);
     delete[] quantized_;
 
     const std::size_t srcLen = out.size;
diff --git a/tests/src/test_huffman.cpp b/tests/src/test_huffman.cpp
index 44cec46b67..95eeb1af0b 100644
--- a/tests/src/test_huffman.cpp
+++ b/tests/src/test_huffman.cpp
@@ -16,9 +16,10 @@ void test_encoding_regression(long int *const quantized, const std::size_t N) {
   long int *const quantized_ = new long int[N];
   std::copy(quantized, quantized + N, quantized_);
 
-  const mgard::HuffmanEncodedStream out = mgard::huffman_encoding(quantized, N);
+  const mgard::HuffmanEncodedStream out =
+      mgard::regression::huffman_encoding(quantized, N);
   const mgard::HuffmanEncodedStream out_ =
-      mgard::huffman_encoding_rewritten(quantized_, N);
+      mgard::huffman_encoding(quantized_, N);
 
   unsigned char const *const hit = out.hit.data.get();
   REQUIRE(out_.nbits == out.nbits);
@@ -44,15 +45,15 @@ void test_decoding_regression(long int *const quantized, const std::size_t N) {
   std::copy(quantized, quantized + N, quantized_);
 
   const mgard::HuffmanEncodedStream encoded =
-      mgard::huffman_encoding(quantized, N);
+      mgard::regression::huffman_encoding(quantized, N);
   const mgard::HuffmanEncodedStream encoded_ =
-      mgard::huffman_encoding(quantized_, N);
+      mgard::regression::huffman_encoding(quantized_, N);
 
   delete[] quantized_;
 
-  const mgard::MemoryBuffer<long int> out = mgard::huffman_decoding(encoded);
-  const mgard::MemoryBuffer<long int> out_ =
-      mgard::huffman_decoding_rewritten(encoded_);
+  const mgard::MemoryBuffer<long int> out =
+      mgard::regression::huffman_decoding(encoded);
+  const mgard::MemoryBuffer<long int> out_ = mgard::huffman_decoding(encoded_);
 
   REQUIRE(out.size == out_.size);
   REQUIRE(out.size == N);

From 7d2825b442ccde216bdc53585184859b26cd130e Mon Sep 17 00:00:00 2001
From: Ben Whitney <whitneybe@ornl.gov>
Date: Wed, 8 Jun 2022 10:37:58 -0400
Subject: [PATCH 32/58] Copy input buffer in legacy Huffman encoder.

---
 include/compressors.hpp                  |  4 ++--
 src/compressors.cpp                      |  4 ++--
 tests/include/compressors_regression.hpp |  4 ++--
 tests/include/huffman_regression.hpp     |  4 +++-
 tests/src/compressors_regression.cpp     |  4 ++--
 tests/src/huffman_regression.cpp         | 11 ++++++++---
 tests/src/test_compressors.cpp           | 18 ++++--------------
 tests/src/test_huffman.cpp               | 20 ++++++--------------
 8 files changed, 29 insertions(+), 40 deletions(-)

diff --git a/include/compressors.hpp b/include/compressors.hpp
index a8048966fa..1542d3eeb2 100644
--- a/include/compressors.hpp
+++ b/include/compressors.hpp
@@ -22,7 +22,7 @@ namespace mgard {
 //!
 //!\param[in] src Array to be compressed.
 //!\param[in] srcLen Size of array (number of elements) to be compressed.
-MemoryBuffer<unsigned char> compress_memory_huffman(long int *const src,
+MemoryBuffer<unsigned char> compress_memory_huffman(long int const *const src,
                                                     const std::size_t srcLen);
 
 //! Decompress an array compressed with `compress_memory_huffman`.
@@ -33,7 +33,7 @@ MemoryBuffer<unsigned char> compress_memory_huffman(long int *const src,
 //!\param[in] srcLen Size in bytes of the compressed array.
 //!\param[out] dst Decompressed array.
 //!\param[in] dstLen Size in bytes of the decompressed array.
-void decompress_memory_huffman(unsigned char *const src,
+void decompress_memory_huffman(unsigned char const *const src,
                                const std::size_t srcLen, long int *const dst,
                                const std::size_t dstLen);
 
diff --git a/src/compressors.cpp b/src/compressors.cpp
index d31a24a06a..daa6bc8f2a 100644
--- a/src/compressors.cpp
+++ b/src/compressors.cpp
@@ -30,7 +30,7 @@ std::size_t hit_buffer_size(const std::size_t nbits) {
 
 } // namespace
 
-void decompress_memory_huffman(unsigned char *const src,
+void decompress_memory_huffman(unsigned char const *const src,
                                const std::size_t srcLen, long int *const dst,
                                const std::size_t dstLen) {
   std::size_t const *const sizes = reinterpret_cast<std::size_t const *>(src);
@@ -114,7 +114,7 @@ gather_constituents(const std::vector<Constituent> &constituents) {
 
 } // namespace
 
-MemoryBuffer<unsigned char> compress_memory_huffman(long int *const src,
+MemoryBuffer<unsigned char> compress_memory_huffman(long int const *const src,
                                                     const std::size_t srcLen) {
   const HuffmanEncodedStream encoded = huffman_encoding(src, srcLen);
 
diff --git a/tests/include/compressors_regression.hpp b/tests/include/compressors_regression.hpp
index bfb2426a5d..07f632eec4 100644
--- a/tests/include/compressors_regression.hpp
+++ b/tests/include/compressors_regression.hpp
@@ -15,7 +15,7 @@ namespace regression {
 //!
 //!\param[in] src Array to be compressed.
 //!\param[in] srcLen Size of array (number of elements) to be compressed.
-MemoryBuffer<unsigned char> compress_memory_huffman(long int *const src,
+MemoryBuffer<unsigned char> compress_memory_huffman(long int const *const src,
                                                     const std::size_t srcLen);
 
 //! Decompress an array compressed with `compress_memory_huffman`.
@@ -24,7 +24,7 @@ MemoryBuffer<unsigned char> compress_memory_huffman(long int *const src,
 //!\param[in] srcLen Size in bytes of the compressed array.
 //!\param[out] dst Decompressed array.
 //!\param[in] dstLen Size in bytes of the decompressed array.
-void decompress_memory_huffman(unsigned char *const src,
+void decompress_memory_huffman(unsigned char const *const src,
                                const std::size_t srcLen, long int *const dst,
                                const std::size_t dstLen);
 
diff --git a/tests/include/huffman_regression.hpp b/tests/include/huffman_regression.hpp
index d67cd6b4ad..e6c00b092f 100644
--- a/tests/include/huffman_regression.hpp
+++ b/tests/include/huffman_regression.hpp
@@ -13,11 +13,13 @@ namespace regression {
 
 //! Encode quantized coefficients using a Huffman code.
 //!
+//! The algorithm modifies the quantized data, so the input buffer is copied.
+//!
 //!\param[in, out] quantized_data Input buffer (quantized coefficients). This
 //! buffer will be changed by the encoding process.
 //!\param[in] n Number of symbols (`long int` quantized coefficients) in the
 //! input buffer.
-HuffmanEncodedStream huffman_encoding(long int *const quantized_data,
+HuffmanEncodedStream huffman_encoding(long int const *const quantized_data,
                                       const std::size_t n);
 
 //! Decode a stream encoded using a Huffman code.
diff --git a/tests/src/compressors_regression.cpp b/tests/src/compressors_regression.cpp
index dd98384898..c65bac9cd1 100644
--- a/tests/src/compressors_regression.cpp
+++ b/tests/src/compressors_regression.cpp
@@ -30,7 +30,7 @@ std::size_t hit_buffer_size(const std::size_t nbits) {
 
 // This code also makes endianness assumptions.
 
-MemoryBuffer<unsigned char> compress_memory_huffman(long int *const src,
+MemoryBuffer<unsigned char> compress_memory_huffman(long int const *const src,
                                                     const std::size_t srcLen) {
   HuffmanEncodedStream encoded =
       mgard::regression::huffman_encoding(src, srcLen);
@@ -95,7 +95,7 @@ MemoryBuffer<unsigned char> compress_memory_huffman(long int *const src,
   return MemoryBuffer<unsigned char>(buffer, bufferLen);
 }
 
-void decompress_memory_huffman(unsigned char *const src,
+void decompress_memory_huffman(unsigned char const *const src,
                                const std::size_t srcLen, long int *const dst,
                                const std::size_t dstLen) {
   std::size_t const *const sizes = reinterpret_cast<std::size_t const *>(src);
diff --git a/tests/src/huffman_regression.cpp b/tests/src/huffman_regression.cpp
index 5fbc4b74dd..c2c58bdc95 100644
--- a/tests/src/huffman_regression.cpp
+++ b/tests/src/huffman_regression.cpp
@@ -196,9 +196,12 @@ HuffmanCodec<N> build_huffman_codec(long int *const quantized_data,
   return codec;
 }
 
-HuffmanEncodedStream huffman_encoding(long int *const quantized_data,
+HuffmanEncodedStream huffman_encoding(long int const *const quantized_data,
                                       const std::size_t n) {
-  const HuffmanCodec<nql> codec = build_huffman_codec<nql>(quantized_data, n);
+  long int *const quantized_data_ = new long int[n];
+  std::copy(quantized_data, quantized_data + n, quantized_data_);
+
+  const HuffmanCodec<nql> codec = build_huffman_codec<nql>(quantized_data_, n);
   const std::size_t num_miss = codec.frequency_table[0];
 
   assert(n >= num_miss);
@@ -238,7 +241,7 @@ HuffmanEncodedStream huffman_encoding(long int *const quantized_data,
 
   std::size_t start_bit = 0;
   for (std::size_t i = 0; i < n; i++) {
-    const int q = quantized_data[i];
+    const int q = quantized_data_[i];
     unsigned int code;
     std::size_t len;
 
@@ -276,6 +279,8 @@ HuffmanEncodedStream huffman_encoding(long int *const quantized_data,
     start_bit += len;
   }
 
+  delete[] quantized_data_;
+
   return out;
 }
 
diff --git a/tests/src/test_compressors.cpp b/tests/src/test_compressors.cpp
index 4d5a42048b..501f085039 100644
--- a/tests/src/test_compressors.cpp
+++ b/tests/src/test_compressors.cpp
@@ -31,17 +31,12 @@ void test_huffman_identity(std::default_random_engine &gen,
   delete[] decompressed;
 }
 
-void test_huffman_compression_regression(long int *const src,
+void test_huffman_compression_regression(long int const *const src,
                                          const std::size_t srcLen) {
-  long int *const src_ = new long int[srcLen];
-  std::copy(src, src + srcLen, src_);
-
   const mgard::MemoryBuffer<unsigned char> out =
       mgard::regression::compress_memory_huffman(src, srcLen);
   const mgard::MemoryBuffer<unsigned char> out_ =
-      mgard::compress_memory_huffman(src_, srcLen);
-
-  delete[] src_;
+      mgard::compress_memory_huffman(src, srcLen);
 
   REQUIRE(out.size == out_.size);
   unsigned char const *const p = out.data.get();
@@ -49,17 +44,12 @@ void test_huffman_compression_regression(long int *const src,
   REQUIRE(std::equal(p, p + out.size, p_));
 }
 
-void test_huffman_decompression_regression(long int *const src,
+void test_huffman_decompression_regression(long int const *const src,
                                            const std::size_t srcLen) {
-  long int *const src_ = new long int[srcLen];
-  std::copy(src, src + srcLen, src_);
-
   const mgard::MemoryBuffer<unsigned char> compressed =
       mgard::regression::compress_memory_huffman(src, srcLen);
   const mgard::MemoryBuffer<unsigned char> compressed_ =
-      mgard::regression::compress_memory_huffman(src_, srcLen);
-
-  delete[] src_;
+      mgard::regression::compress_memory_huffman(src, srcLen);
 
   mgard::MemoryBuffer<long int> out(srcLen);
   mgard::MemoryBuffer<long int> out_(srcLen);
diff --git a/tests/src/test_huffman.cpp b/tests/src/test_huffman.cpp
index 95eeb1af0b..a035be7173 100644
--- a/tests/src/test_huffman.cpp
+++ b/tests/src/test_huffman.cpp
@@ -12,14 +12,12 @@
 
 namespace {
 
-void test_encoding_regression(long int *const quantized, const std::size_t N) {
-  long int *const quantized_ = new long int[N];
-  std::copy(quantized, quantized + N, quantized_);
-
+void test_encoding_regression(long int const *const quantized,
+                              const std::size_t N) {
   const mgard::HuffmanEncodedStream out =
       mgard::regression::huffman_encoding(quantized, N);
   const mgard::HuffmanEncodedStream out_ =
-      mgard::huffman_encoding(quantized_, N);
+      mgard::huffman_encoding(quantized, N);
 
   unsigned char const *const hit = out.hit.data.get();
   REQUIRE(out_.nbits == out.nbits);
@@ -36,20 +34,14 @@ void test_encoding_regression(long int *const quantized, const std::size_t N) {
   REQUIRE(out_.frequencies.size == nfrequencies);
   REQUIRE(std::equal(frequencies, frequencies + nfrequencies,
                      out_.frequencies.data.get()));
-
-  delete[] quantized_;
 }
 
-void test_decoding_regression(long int *const quantized, const std::size_t N) {
-  long int *const quantized_ = new long int[N];
-  std::copy(quantized, quantized + N, quantized_);
-
+void test_decoding_regression(long int const *const quantized,
+                              const std::size_t N) {
   const mgard::HuffmanEncodedStream encoded =
       mgard::regression::huffman_encoding(quantized, N);
   const mgard::HuffmanEncodedStream encoded_ =
-      mgard::regression::huffman_encoding(quantized_, N);
-
-  delete[] quantized_;
+      mgard::regression::huffman_encoding(quantized, N);
 
   const mgard::MemoryBuffer<long int> out =
       mgard::regression::huffman_decoding(encoded);

From 4fd5399d47d747e3e260a553499f4d80f3a21d13 Mon Sep 17 00:00:00 2001
From: Ben Whitney <whitneybe@ornl.gov>
Date: Wed, 8 Jun 2022 11:40:09 -0400
Subject: [PATCH 33/58] Directly set `HuffmanCode` endpoints.

---
 include/huffman.hpp | 20 +++++++------
 include/huffman.tpp | 73 +++++++++++++++++----------------------------
 src/huffman.cpp     | 31 ++++++++++++-------
 3 files changed, 58 insertions(+), 66 deletions(-)

diff --git a/include/huffman.hpp b/include/huffman.hpp
index 0c8b0a5b93..5890d2fb88 100644
--- a/include/huffman.hpp
+++ b/include/huffman.hpp
@@ -18,7 +18,7 @@ namespace mgard {
 //! Huffman encoding and decoding functions.
 //!
 //!\deprecated
-inline constexpr std::size_t nql = 32768 * 4;
+inline constexpr std::size_t nql = 1 << 17;
 
 //! A stream compressed using a Huffman code.
 struct HuffmanEncodedStream {
@@ -110,19 +110,24 @@ template <typename Symbol> class HuffmanCode {
 
   //! Constructor.
   //!
-  //!\param ncodewords Number of symbols that will be assigned codewords.
+  //!\param endpoints Smallest and largest symbols (inclusive) to receive
+  //! codewords.
   //!\param begin Beginning of input stream.
   //!\param end End of output stream.
-  HuffmanCode(const std::size_t ncodewords, Symbol const *const begin,
-              Symbol const *const end);
+  HuffmanCode(const std::pair<Symbol, Symbol> &endpoints,
+              Symbol const *const begin, Symbol const *const end);
 
   //! Constructor.
   //!
-  //!\param ncodewords Number of symbols that will be assigned codewords.
+  //!\param endpoints Smallest and largest symbols (inclusive) to receive
+  //! codewords.
   //!\param pairs Index–frequency pairs for frequency table.
-  HuffmanCode(const std::size_t ncodewords,
+  HuffmanCode(const std::pair<Symbol, Symbol> &endpoints,
               const std::vector<std::pair<std::size_t, std::size_t>> &pairs);
 
+  //! Smallest and largest symbols (inclusive) to receive codewords.
+  std::pair<Symbol, Symbol> endpoints;
+
   //! Number of symbols that will be assigned codewords.
   std::size_t ncodewords;
 
@@ -164,9 +169,6 @@ template <typename Symbol> class HuffmanCode {
   Symbol decode(const Node &leaf, Symbol const *&missed) const;
 
 private:
-  //! Smallest and largest symbols (inclusive) to receive codewords.
-  std::pair<Symbol, Symbol> endpoints;
-
   //! Set the range of symbols that will be assigned codewords.
   //!
   //!\note This function depends on `ncodewords`.
diff --git a/include/huffman.tpp b/include/huffman.tpp
index 8d6a7cc9f5..6d4b906fc4 100644
--- a/include/huffman.tpp
+++ b/include/huffman.tpp
@@ -3,7 +3,6 @@
 #include <cassert>
 #include <cstddef>
 
-#include <limits>
 #include <stdexcept>
 
 namespace mgard {
@@ -15,45 +14,6 @@ operator()(const typename HuffmanCode<Symbol>::Node &a,
   return a->count > b->count;
 }
 
-template <typename Symbol> void HuffmanCode<Symbol>::set_endpoints() {
-  // Haven't carefully checked what the minimum acceptable value is.
-  if (not ncodewords) {
-    throw std::invalid_argument("`ncodewords` must be positive.");
-  }
-  const Symbol SYMBOL_MAX = std::numeric_limits<Symbol>::max();
-  const Symbol SYMBOL_MIN = std::numeric_limits<Symbol>::min();
-
-  const std::size_t max_symbol_ = (ncodewords + 1) / 2 - 1;
-  const std::size_t opp_min_symbol_ = ncodewords / 2;
-
-  // There is surely a better way of doing this. Lots of potential issues with
-  // directly comparing `opp_min_symbol_` and `-SYMBOL_MIN`. `-SYMBOL_MIN`
-  // can't necessarily be represented as a `Symbol`, for example. Trying to
-  // avoid overflows.
-  std::size_t a = opp_min_symbol_;
-  Symbol b = SYMBOL_MIN;
-  while (a) {
-    a /= 2;
-    b /= 2;
-  }
-  if (not b) {
-    // Only a "risk" because we haven't actually established that
-    // `opp_min_symbol_` is greater in magnitude than `SYMBOL_MIN`.
-    throw std::overflow_error(
-        "risk that minimum symbol cannot be represented in symbol type");
-  } else if (opp_min_symbol_ > SYMBOL_MAX) {
-    throw std::overflow_error(
-        "opposite of minimum symbol canont be represented in symbol type");
-  } else {
-    endpoints.first = -static_cast<Symbol>(opp_min_symbol_);
-  }
-
-  // `opp_min_symbol_` is either equal to or one greater than `max_symbol_`,
-  // and we checked above that `opp_min_symbol <= SYMBOL_MAX`. So, we know
-  // that `max_symbol_ <= SYMBOL_MAX` here.
-  endpoints.second = max_symbol_;
-}
-
 template <typename Symbol>
 void HuffmanCode<Symbol>::create_code_creation_tree() {
   // We can't quite use a `ZippedRange` here, I think, because
@@ -106,12 +66,33 @@ void HuffmanCode<Symbol>::populate_frequencies(
   }
 }
 
+namespace {
+
+template <typename Symbol>
+std::size_t
+ncodewords_from_endpoints(const std::pair<Symbol, Symbol> &endpoints) {
+  if (endpoints.first > endpoints.second) {
+    throw std::invalid_argument(
+        "maximum symbol must be greater than or equal to minimum symbol");
+  }
+  // The endpoints are inclusive.
+  // Overflow possible in the subtraction.
+  const std::size_t ncodewords = endpoints.second - endpoints.first + 1;
+  // Haven't carefully checked what the minimum acceptable value is.
+  if (not ncodewords) {
+    throw std::invalid_argument("`ncodewords` must be positive.");
+  }
+  return ncodewords;
+}
+
+} // namespace
+
 template <typename Symbol>
-HuffmanCode<Symbol>::HuffmanCode(const std::size_t ncodewords,
+HuffmanCode<Symbol>::HuffmanCode(const std::pair<Symbol, Symbol> &endpoints,
                                  Symbol const *const begin,
                                  Symbol const *const end)
-    : ncodewords(ncodewords), frequencies(ncodewords), codewords(ncodewords) {
-  set_endpoints();
+    : endpoints(endpoints), ncodewords(ncodewords_from_endpoints(endpoints)),
+      frequencies(ncodewords), codewords(ncodewords) {
   populate_frequencies(begin, end);
   create_code_creation_tree();
   recursively_set_codewords(queue.top(), {});
@@ -119,10 +100,10 @@ HuffmanCode<Symbol>::HuffmanCode(const std::size_t ncodewords,
 
 template <typename Symbol>
 HuffmanCode<Symbol>::HuffmanCode(
-    const std::size_t ncodewords,
+    const std::pair<Symbol, Symbol> &endpoints,
     const std::vector<std::pair<std::size_t, std::size_t>> &pairs)
-    : ncodewords(ncodewords), frequencies(ncodewords), codewords(ncodewords) {
-  set_endpoints();
+    : endpoints(endpoints), ncodewords(ncodewords_from_endpoints(endpoints)),
+      frequencies(ncodewords), codewords(ncodewords) {
   populate_frequencies(pairs);
   create_code_creation_tree();
   recursively_set_codewords(queue.top(), {});
diff --git a/src/huffman.cpp b/src/huffman.cpp
index fd5ecd6a33..bc0a4c6a46 100644
--- a/src/huffman.cpp
+++ b/src/huffman.cpp
@@ -91,13 +91,21 @@ void check_type_sizes() {
 
 } // namespace
 
+namespace {
+
+const std::pair<long int, long int> nql_endpoints{
+    -static_cast<long int>((nql - 1) / 2), nql / 2 - 1};
+}
+
 HuffmanEncodedStream huffman_encoding(long int const *const quantized_data,
                                       const std::size_t n) {
   check_type_sizes();
 
-  const std::size_t ncodewords = nql - 1;
-  const HuffmanCode<long int> code(ncodewords, quantized_data,
-                                   quantized_data + n);
+  using Symbol = long int;
+  using MissedSymbol = int;
+
+  const HuffmanCode<Symbol> code(nql_endpoints, quantized_data,
+                                 quantized_data + n);
 
   std::vector<std::size_t> lengths;
   for (const HuffmanCodeword &codeword : code.codewords) {
@@ -114,10 +122,11 @@ HuffmanEncodedStream huffman_encoding(long int const *const quantized_data,
         "`nbytes` not bumped up to nearest multiple of `unsigned int` size");
   }
 
-  const std::size_t nnz = ncodewords - std::count(code.frequencies.begin(),
-                                                  code.frequencies.end(), 0);
+  const std::size_t nnz =
+      code.ncodewords -
+      std::count(code.frequencies.begin(), code.frequencies.end(), 0);
 
-  HuffmanEncodedStream out(nbits, nbytes, code.nmissed() * sizeof(int),
+  HuffmanEncodedStream out(nbits, nbytes, code.nmissed() * sizeof(MissedSymbol),
                            2 * nnz * sizeof(std::size_t));
 
   // Write frequency table.
@@ -125,7 +134,7 @@ HuffmanEncodedStream huffman_encoding(long int const *const quantized_data,
     std::size_t *p =
         reinterpret_cast<std::size_t *>(out.frequencies.data.get());
     const std::vector<std::size_t> &frequencies = code.frequencies;
-    for (std::size_t i = 0; i < ncodewords; ++i) {
+    for (std::size_t i = 0; i < code.ncodewords; ++i) {
       const std::size_t frequency = frequencies.at(i);
       if (frequency) {
         *p++ = i;
@@ -141,10 +150,11 @@ HuffmanEncodedStream huffman_encoding(long int const *const quantized_data,
   }
   unsigned char *hit = buffer;
 
-  int *missed = reinterpret_cast<int *>(out.missed.data.get());
+  MissedSymbol *missed =
+      reinterpret_cast<MissedSymbol *>(out.missed.data.get());
 
   unsigned char offset = 0;
-  for (const long int q : PseudoArray(quantized_data, n)) {
+  for (const Symbol q : PseudoArray(quantized_data, n)) {
     if (code.out_of_range(q)) {
       // Remember that `missed` is an `int` rather than a `long int`.
       *missed++ = q + nql / 2;
@@ -213,8 +223,7 @@ MemoryBuffer<long int> huffman_decoding(const HuffmanEncodedStream &encoded) {
     }
   }
 
-  const std::size_t ncodewords = nql - 1;
-  HuffmanCode<Symbol> code(ncodewords, pairs);
+  HuffmanCode<Symbol> code(nql_endpoints, pairs);
 
   MemoryBuffer<Symbol> out(nquantized);
   Symbol *q = out.data.get();

From 9f112da1950e673b89e53debe40d69b14ae1c40a Mon Sep 17 00:00:00 2001
From: Ben Whitney <whitneybe@ornl.gov>
Date: Thu, 9 Jun 2022 12:06:40 -0400
Subject: [PATCH 34/58] Fix calculation of `HuffmanCode::ncodewords`.

---
 include/huffman.hpp |  3 ++-
 include/huffman.tpp | 15 ++++++++-------
 2 files changed, 10 insertions(+), 8 deletions(-)

diff --git a/include/huffman.hpp b/include/huffman.hpp
index 5890d2fb88..a3b89fc476 100644
--- a/include/huffman.hpp
+++ b/include/huffman.hpp
@@ -128,7 +128,8 @@ template <typename Symbol> class HuffmanCode {
   //! Smallest and largest symbols (inclusive) to receive codewords.
   std::pair<Symbol, Symbol> endpoints;
 
-  //! Number of symbols that will be assigned codewords.
+  //! Number of symbols that will be assigned codewords (including one for the
+  //! 'missed' symbol).
   std::size_t ncodewords;
 
   //! Frequencies of the symbols in the input stream.
diff --git a/include/huffman.tpp b/include/huffman.tpp
index 6d4b906fc4..40cd9e7ff7 100644
--- a/include/huffman.tpp
+++ b/include/huffman.tpp
@@ -75,13 +75,14 @@ ncodewords_from_endpoints(const std::pair<Symbol, Symbol> &endpoints) {
     throw std::invalid_argument(
         "maximum symbol must be greater than or equal to minimum symbol");
   }
-  // The endpoints are inclusive.
-  // Overflow possible in the subtraction.
-  const std::size_t ncodewords = endpoints.second - endpoints.first + 1;
-  // Haven't carefully checked what the minimum acceptable value is.
-  if (not ncodewords) {
-    throw std::invalid_argument("`ncodewords` must be positive.");
-  }
+  // One for the 'missed' symbol, and the endpoints are inclusive.
+  // Overflow is possible in the subtraction `endpoints.second -
+  // endpoints.first` (suppose `Symbol` is `char` and `endpoints` is `{CHAR_MIN,
+  // CHAR_MAX}`. Casting to `std::int64_t` should avoid the problem in all
+  // practical cases.
+  const std::size_t ncodewords = 1 +
+                                 static_cast<std::int64_t>(endpoints.second) -
+                                 static_cast<std::int64_t>(endpoints.first) + 1;
   return ncodewords;
 }
 

From 6d619800504221188282f341370f393709e58787 Mon Sep 17 00:00:00 2001
From: Ben Whitney <whitneybe@ornl.gov>
Date: Thu, 9 Jun 2022 14:03:45 -0400
Subject: [PATCH 35/58] Generalize function to parse header from buffer.

---
 include/format.hpp | 17 ++++++++++-------
 include/format.tpp | 22 ++++++++++++++++++++++
 src/format.cpp     | 24 +-----------------------
 3 files changed, 33 insertions(+), 30 deletions(-)

diff --git a/include/format.hpp b/include/format.hpp
index e7821e64e6..0a817e78f6 100644
--- a/include/format.hpp
+++ b/include/format.hpp
@@ -165,16 +165,19 @@ pb::Header read_metadata(BufferWindow &window);
 //!\param header Header of the self-describing buffer.
 void write_metadata(std::ostream &ostream, const pb::Header &header);
 
-//! Parse the header of a self-describing buffer.
+template <typename T>
+//! Parse a message from a buffer window.
 //!
 //! The buffer pointer will be advanced past the header.
 //!
-//!\param window Window into the self-describing buffer. The current position
-//! should be the start of the header.
-//!\param header_size Size in bytes of the header.
-//!\return Header of the self-describing buffer.
-pb::Header read_header(BufferWindow &window,
-                       const std::uint_least64_t header_size);
+//! This function was originally written to parse the header from a
+//! self-describing buffer.
+//
+//!\param window Buffer window containing the serialized message. The current
+//! position should be the start of the message.
+//!\param nmessage Size in bytes of the message.
+//!\return Parsed message.
+T read_message(BufferWindow &window, const std::uint_least64_t nmessage);
 
 //! Check that a dataset was compressed with a compatible version of MGARD.
 //!
diff --git a/include/format.tpp b/include/format.tpp
index e223235a41..14b33bb77b 100644
--- a/include/format.tpp
+++ b/include/format.tpp
@@ -61,4 +61,26 @@ template <typename Int> bool big_endian() {
   return not*reinterpret_cast<unsigned char const *>(&n);
 }
 
+template <typename T>
+T read_message(BufferWindow &window, const std::uint_least64_t nmessage) {
+  // The `CodedInputStream` constructor takes an `int`.
+  if (nmessage > std::numeric_limits<int>::max()) {
+    throw std::runtime_error("message is too large (size would overflow)");
+  }
+  // Check that the read will stay in the buffer.
+  unsigned char const *const next = window.next(nmessage);
+  T message;
+  google::protobuf::io::CodedInputStream stream(
+      static_cast<google::protobuf::uint8 const *>(window.current), nmessage);
+  if (not message.ParseFromCodedStream(&stream)) {
+    throw std::runtime_error(
+        "message parsing encountered read or format error");
+  }
+  if (not stream.ConsumedEntireMessage()) {
+    throw std::runtime_error("part of message left unparsed");
+  }
+  window.current = next;
+  return message;
+}
+
 } // namespace mgard
diff --git a/src/format.cpp b/src/format.cpp
index f9c4c62aa9..0464e4b791 100644
--- a/src/format.cpp
+++ b/src/format.cpp
@@ -204,7 +204,7 @@ pb::Header read_metadata(BufferWindow &window) {
   const uint_least64_t header_size = read_header_size(window);
   const uint_least32_t header_crc32 = read_header_crc32(window);
   check_header_crc32(window, header_size, header_crc32);
-  return read_header(window, header_size);
+  return read_message<pb::Header>(window, header_size);
 }
 
 namespace {
@@ -232,28 +232,6 @@ void write_metadata(std::ostream &ostream, const pb::Header &header) {
   delete[] header_bytes;
 }
 
-pb::Header read_header(BufferWindow &window,
-                       const std::uint_least64_t header_size) {
-  // The `CodedInputStream` constructor takes an `int`.
-  if (header_size > std::numeric_limits<int>::max()) {
-    throw std::runtime_error("header is too large (size would overflow)");
-  }
-  // Check that the read will stay in the buffer.
-  unsigned char const *const next = window.next(header_size);
-  mgard::pb::Header header;
-  google::protobuf::io::CodedInputStream stream(
-      static_cast<google::protobuf::uint8 const *>(window.current),
-      header_size);
-  if (not header.ParseFromCodedStream(&stream)) {
-    throw std::runtime_error("header parsing encountered read or format error");
-  }
-  if (not stream.ConsumedEntireMessage()) {
-    throw std::runtime_error("part of header left unparsed");
-  }
-  window.current = next;
-  return header;
-}
-
 void check_mgard_version(const pb::Header &header) {
   const pb::VersionNumber &mgard_version = header.mgard_version();
   if (mgard_version.major_() > MGARD_VERSION_MAJOR) {

From 501183c7fb3eb6530e025423a6d9d6ea33d2b397 Mon Sep 17 00:00:00 2001
From: Ben Whitney <whitneybe@ornl.gov>
Date: Thu, 9 Jun 2022 14:13:12 -0400
Subject: [PATCH 36/58] Add Huffman encoding with protocol buffer header.

---
 include/huffman.hpp         |  20 +++-
 include/huffman.tpp         | 221 +++++++++++++++++++++++++++++++++++-
 src/huffman.cpp             |  20 +++-
 src/mgard.proto             |  71 +++++++++++-
 tests/src/test_compress.cpp |   2 +-
 tests/src/test_huffman.cpp  |  66 ++++++++++-
 6 files changed, 385 insertions(+), 15 deletions(-)

diff --git a/include/huffman.hpp b/include/huffman.hpp
index a3b89fc476..ee8f662e2d 100644
--- a/include/huffman.hpp
+++ b/include/huffman.hpp
@@ -158,16 +158,18 @@ template <typename Symbol> class HuffmanCode {
   //! Huffman code creation tree.
   std::priority_queue<Node, std::vector<Node>, HeldCountGreater> queue;
 
+  // TODO: Just indicate in return value whether symbol was missed.
+
   //! Decode a codeword (identified by associated leaf) to a symbol.
   //!
   //!\pre `leaf` must be a leaf (rather than an interior node) of the code
-  //! creation tree.
+  //! creation tree. `It::value_type` must be convertible to `Symbol`.
   //!
   //!\param leaf Leaf (associated to a codeword) to decode.
   //!\param missed Pointer to next out-of-range symbol. If `leaf` is associated
   //! to the out-of-range codeword, this pointer will be dereferenced and
   //! incremented.
-  Symbol decode(const Node &leaf, Symbol const *&missed) const;
+  template <typename It> Symbol decode(const Node &leaf, It &missed) const;
 
 private:
   //! Set the range of symbols that will be assigned codewords.
@@ -225,6 +227,20 @@ HuffmanEncodedStream huffman_encoding(long int const *const quantized_data,
 //!\param[in] encoded Input buffer (Huffman-encoded stream).
 MemoryBuffer<long int> huffman_decoding(const HuffmanEncodedStream &encoded);
 
+//! Encode quantized coefficients using a Huffman code.
+//!
+//!\param begin Input buffer (quantized coefficients).
+//!\param n Number of symbols in the input buffer.
+template <typename Symbol>
+MemoryBuffer<unsigned char> huffman_encode(Symbol const *const begin,
+                                           const std::size_t n);
+
+//! Decode a stream encoded using a Huffman code.
+//!
+//!\param encoded Input buffer (Huffman-encoded stream).
+template <typename Symbol>
+MemoryBuffer<Symbol> huffman_decode(const MemoryBuffer<unsigned char> &buffer);
+
 } // namespace mgard
 
 #include "huffman.tpp"
diff --git a/include/huffman.tpp b/include/huffman.tpp
index 40cd9e7ff7..97ead4c4c9 100644
--- a/include/huffman.tpp
+++ b/include/huffman.tpp
@@ -1,12 +1,30 @@
 #include "utilities.hpp"
 
 #include <cassert>
+#include <climits>
 #include <cstddef>
+#include <cstdint>
 
+#include <limits>
+#include <numeric>
 #include <stdexcept>
 
+#include "format.hpp"
+
+#include "proto/mgard.pb.h"
+
 namespace mgard {
 
+// Aliases for compound message field types.
+namespace {
+
+using Endpoints = google::protobuf::RepeatedField<google::protobuf::int64>;
+using Missed = google::protobuf::RepeatedField<google::protobuf::int64>;
+using Frequencies =
+    google::protobuf::Map<google::protobuf::uint64, google::protobuf::uint64>;
+
+} // namespace
+
 template <typename Symbol>
 bool HuffmanCode<Symbol>::HeldCountGreater::
 operator()(const typename HuffmanCode<Symbol>::Node &a,
@@ -49,9 +67,10 @@ void HuffmanCode<Symbol>::populate_frequencies(Symbol const *const begin,
 }
 
 template <typename Symbol>
+template <typename It>
 Symbol
 HuffmanCode<Symbol>::decode(const typename HuffmanCode<Symbol>::Node &leaf,
-                            Symbol const *&missed) const {
+                            It &missed) const {
   const std::ptrdiff_t offset = leaf->codeword - codewords.data();
   // If `offset == 0`, this is the leaf corresponding to out-of-range symbols.
   assert(offset >= 0);
@@ -138,4 +157,204 @@ void HuffmanCode<Symbol>::recursively_set_codewords(
   }
 }
 
+namespace {
+
+//! Generate the default symbol endpoints for a Huffman encoder.
+template <typename Symbol> std::pair<Symbol, Symbol> endpoints();
+
+template <typename Symbol> std::pair<Symbol, Symbol> extreme_endpoints() {
+  return {std::numeric_limits<Symbol>::min(),
+          std::numeric_limits<Symbol>::max()};
+}
+
+template <typename Symbol> std::pair<Symbol, Symbol> capped_endpoints() {
+  return {-static_cast<Symbol>(1 << 17), static_cast<Symbol>(1 << 17) - 1};
+}
+
+template <> std::pair<std::int8_t, std::int8_t> endpoints() {
+  return extreme_endpoints<std::int8_t>();
+}
+
+template <> std::pair<std::int16_t, std::int16_t> endpoints() {
+  return extreme_endpoints<std::int16_t>();
+}
+
+template <> std::pair<std::int32_t, std::int32_t> endpoints() {
+  return capped_endpoints<std::int32_t>();
+}
+
+template <> std::pair<std::int64_t, std::int64_t> endpoints() {
+  return capped_endpoints<std::int64_t>();
+}
+
+} // namespace
+
+template <typename Symbol>
+MemoryBuffer<unsigned char> huffman_encode(Symbol const *const begin,
+                                           const std::size_t n) {
+  const HuffmanCode<Symbol> code(endpoints<Symbol>(), begin, begin + n);
+
+  std::vector<std::size_t> lengths;
+  for (const HuffmanCodeword &codeword : code.codewords) {
+    lengths.push_back(codeword.length);
+  }
+  const std::size_t nbits =
+      std::inner_product(code.frequencies.begin(), code.frequencies.end(),
+                         lengths.begin(), static_cast<std::size_t>(0));
+  const std::size_t nbytes = (nbits + CHAR_BIT - 1) / CHAR_BIT;
+
+  pb::HuffmanHeader header;
+  header.set_index_mapping(pb::HuffmanHeader::INCLUSIVE_RANGE);
+  header.set_codeword_mapping(pb::HuffmanHeader::INDEX_FREQUENCY_PAIRS);
+  header.set_missed_encoding(pb::HuffmanHeader::LITERAL);
+  header.set_hit_encoding(pb::HuffmanHeader::RUN_TOGETHER);
+
+  header.add_endpoints(code.endpoints.first);
+  header.add_endpoints(code.endpoints.second);
+  header.set_nbits(nbits);
+
+  Frequencies &frequencies = *header.mutable_frequencies();
+  {
+    std::size_t i = 0;
+    for (const std::size_t frequency : code.frequencies) {
+      if (frequency) {
+        frequencies.insert({i, frequency});
+      }
+      ++i;
+    }
+  }
+
+  Missed &missed_ = *header.mutable_missed();
+  missed_.Resize(code.nmissed(), 0);
+  Missed::iterator missed = missed_.begin();
+
+  // Zero-initialize the bytes.
+  unsigned char *const hit_ = new unsigned char[nbytes]();
+  unsigned char *hit = hit_;
+
+  unsigned char offset = 0;
+  for (const Symbol q : PseudoArray(begin, n)) {
+    if (code.out_of_range(q)) {
+      *missed++ = q;
+    }
+
+    const HuffmanCodeword codeword = code.codewords.at(code.index(q));
+    std::size_t NREMAINING = codeword.length;
+    for (unsigned char byte : codeword.bytes) {
+      // Number of bits of `byte` left to write.
+      unsigned char nremaining =
+          std::min(static_cast<std::size_t>(CHAR_BIT), NREMAINING);
+      // Premature, but this will hold when we're done with `byte`.
+      NREMAINING -= nremaining;
+
+      while (nremaining) {
+        *hit |= byte >> offset;
+        // Number of bits of `byte` just written (not cumulative).
+        const unsigned char nwritten = std::min(
+            nremaining, static_cast<unsigned char>(
+                            static_cast<unsigned char>(CHAR_BIT) - offset));
+        offset += nwritten;
+        hit += offset / CHAR_BIT;
+        offset %= CHAR_BIT;
+        nremaining -= nwritten;
+        byte <<= nwritten;
+      }
+    }
+  }
+
+  const std::uint_least64_t nheader = header.ByteSize();
+  MemoryBuffer<unsigned char> out(HEADER_SIZE_SIZE + nheader + nbytes);
+  {
+    unsigned char *p = out.data.get();
+    const std::array<unsigned char, HEADER_SIZE_SIZE> nheader_ =
+        serialize_header_size(nheader);
+    std::copy(nheader_.begin(), nheader_.end(), p);
+    p += HEADER_SIZE_SIZE;
+
+    header.SerializeToArray(p, nheader);
+    p += nheader;
+
+    std::copy(hit_, hit_ + nbytes, p);
+    p += nbytes;
+  }
+
+  delete[] hit_;
+
+  return out;
+}
+
+template <typename Symbol>
+MemoryBuffer<Symbol> huffman_decode(const MemoryBuffer<unsigned char> &buffer) {
+  BufferWindow window(buffer.data.get(), buffer.size);
+  const std::uint_least64_t nheader = read_header_size(window);
+  pb::HuffmanHeader header = read_message<pb::HuffmanHeader>(window, nheader);
+
+  if (header.index_mapping() != pb::HuffmanHeader::INCLUSIVE_RANGE) {
+    throw std::runtime_error("unrecognized Huffman index mapping");
+  }
+  const Endpoints &endpoints_ = header.endpoints();
+  if (endpoints_.size() != 2) {
+    throw std::runtime_error("received an unexpected number of endpoints");
+  }
+  const std::pair<std::size_t, std::size_t> endpoints(endpoints_.Get(0),
+                                                      endpoints_.Get(1));
+
+  if (header.codeword_mapping() != pb::HuffmanHeader::INDEX_FREQUENCY_PAIRS) {
+    throw std::runtime_error("unrecognized Huffman codeword mapping");
+  }
+  const Frequencies &frequencies_ = header.frequencies();
+  // TODO: Change `HuffmanCode` constructor so it can take a pair of iterators
+  // dereferencing to (something convertible to)
+  // `std::pair<std::size_t, std::size_t>`s directly.
+  const std::vector<std::pair<std::size_t, std::size_t>> pairs(
+      frequencies_.begin(), frequencies_.end());
+
+  if (header.missed_encoding() != pb::HuffmanHeader::LITERAL) {
+    throw std::runtime_error("unrecognized Huffman missed buffer encoding");
+  }
+  const Missed &missed_ = header.missed();
+  Missed::const_iterator missed = missed_.cbegin();
+
+  if (header.hit_encoding() != pb::HuffmanHeader::RUN_TOGETHER) {
+    throw std::runtime_error("unrecognized Huffman hit buffer encoding");
+  }
+
+  const std::size_t nbits = header.nbits();
+  const std::size_t nbytes = (nbits + CHAR_BIT - 1) / CHAR_BIT;
+  if (window.current + nbytes != window.end) {
+    throw std::runtime_error("number of bits in hit buffer inconsistent with "
+                             "number of bytes in hit buffer");
+  }
+
+  const HuffmanCode<Symbol> code(endpoints, pairs);
+  // TODO: Maybe add a member function for this.
+  const std::size_t nout =
+      std::accumulate(code.frequencies.begin(), code.frequencies.end(),
+                      static_cast<std::size_t>(0));
+  MemoryBuffer<Symbol> out(nout);
+  Symbol *q = out.data.get();
+
+  const Bits bits(window.current, window.current + nbits / CHAR_BIT,
+                  nbits % CHAR_BIT);
+  std::size_t nbits_read = 0;
+  const typename HuffmanCode<Symbol>::Node root = code.queue.top();
+  assert(root);
+  Bits::iterator b = bits.begin();
+  for (std::size_t i = 0; i < nout; ++i) {
+    typename HuffmanCode<Symbol>::Node node;
+    for (node = root; node->left;
+         node = *b++ ? node->right : node->left, ++nbits_read)
+      ;
+    // TODO: Make sure `HuffmanCode::decode` can properly take `missed` (not
+    // relying on `google::protobuf::uint64` being the same as `std::size_t` or
+    // anything).
+    const Symbol decoded = code.decode(node, missed);
+    *q++ = decoded;
+  }
+  assert(nbits_read == nbits);
+  assert(missed == missed_.cend());
+
+  return out;
+}
+
 } // namespace mgard
diff --git a/src/huffman.cpp b/src/huffman.cpp
index bc0a4c6a46..2e16de3e96 100644
--- a/src/huffman.cpp
+++ b/src/huffman.cpp
@@ -190,9 +190,21 @@ HuffmanEncodedStream huffman_encoding(long int const *const quantized_data,
 
 namespace {
 
-long int decode(const HuffmanCode<long int> &code,
-                const typename HuffmanCode<long int>::Node &leaf,
-                long int const *&missed) {
+//! Decode a codeword (identified by associated leaf) to a symbol and shift.
+//!
+//!\pre `leaf` must be a leaf (rather than an interior node) of the code
+//! creation tree.
+//!
+//!\deprecated
+//!
+//!\param code Code containing the code creation tree.
+//!\param leaf Leaf (associated to a codeword) to decode.
+//!\param missed Pointer to next out-of-range symbol. If `leaf` is associated
+//! to the out-of-range codeword, this pointer will be dereferenced and
+//! incremented.
+long int decode_and_shift(const HuffmanCode<long int> &code,
+                          const typename HuffmanCode<long int>::Node &leaf,
+                          long int const *&missed) {
   long int const *const start = missed;
   long int decoded = code.decode(leaf, missed);
   if (missed != start) {
@@ -257,7 +269,7 @@ MemoryBuffer<long int> huffman_decoding(const HuffmanEncodedStream &encoded) {
     for (node = root; node->left;
          node = *b++ ? node->right : node->left, ++nbits)
       ;
-    *q++ = decode(code, node, p_missed);
+    *q++ = decode_and_shift(code, node, p_missed);
   }
   assert(nbits == encoded.nbits);
   assert(sizeof(MissedSymbol) * (p_missed - missed) == encoded.missed.size);
diff --git a/src/mgard.proto b/src/mgard.proto
index f5c6fd6fa9..a96fd67c4f 100644
--- a/src/mgard.proto
+++ b/src/mgard.proto
@@ -14,10 +14,14 @@ message CartesianGridTopology {
   repeated uint64 shape = 2;
 }
 
-message ExplicitCubeGeometry { repeated double coordinates = 2; }
+message ExplicitCubeGeometry {
+  repeated double coordinates = 2;
+}
 
 message Domain {
-  enum Topology { CARTESIAN_GRID = 0; }
+  enum Topology {
+    CARTESIAN_GRID = 0;
+  }
   enum Geometry {
     UNIT_CUBE = 0;
     EXPLICIT_CUBE = 1;
@@ -78,7 +82,9 @@ message DomainDecomposition {
 }
 
 message FunctionDecomposition {
-  enum Transform { MULTILEVEL_COEFFICIENTS = 0; }
+  enum Transform {
+    MULTILEVEL_COEFFICIENTS = 0;
+  }
   enum Hierarchy {
     POWER_OF_TWO_PLUS_ONE = 0;
     MULTIDIMENSION_WITH_GHOST_NODES = 1;
@@ -92,7 +98,9 @@ message FunctionDecomposition {
 }
 
 message Quantization {
-  enum Method { COEFFICIENTWISE_LINEAR = 0; }
+  enum Method {
+    COEFFICIENTWISE_LINEAR = 0;
+  }
   enum BinWidths {
     PER_COEFFICIENT = 0;
     PER_LEVEL = 1;
@@ -123,13 +131,64 @@ message Encoding {
     X_HUFFMAN_LZ4 = 4;
     X_HUFFMAN_ZSTD = 5;
   }
+  enum HuffmanSerialization {
+    // Original method, with 'raw' buffer serialization.
+    DEPRECATED = 0;
+    // Symbol range, frequency table, missed table, and hit buffer.
+    RFMH = 1;
+  }
 
   Preprocessor preprocessor = 1;
   Compressor compressor = 2;
-  // Only relevant when `compressor == X_HUFFMAN` or `lossless_compressor ==
-  // X_HUFFMAN_LZ4` or `compressor == X_HUFFMAN_ZSTD`
+  // Only relevant when `compressor == X_HUFFMAN` or `compressor ==
+  // X_HUFFMAN_LZ4` or `compressor == X_HUFFMAN_ZSTD`.
   uint64 huffman_dictionary_size = 3;
   uint64 huffman_block_size = 4;
+
+  // Only relevant when `compressor == CPU_HUFFMAN_ZLIB` or
+  // `compressor == CPU_HUFFMAN_ZSTD`.
+  HuffmanSerialization serialization = 5;
+
+}
+
+message HuffmanHeader {
+  enum IndexMapping {
+    // Codewords are (potentially) assigned to the symbols `{min, …, max}`.
+    // Index `0` is reserved for missed symbols.  Then `min` is assigned
+    // index `1`, `min + 1` is assigned index `2`, and so on.
+    INCLUSIVE_RANGE = 0;
+  }
+  enum CodewordMapping {
+    // A frequency table is stored as a sequence of index–frequency pairs.
+    // This table is used to construct a Huffman code creation tree.
+    INDEX_FREQUENCY_PAIRS = 0;
+  }
+  enum MissedEncoding {
+    // The missed symbols (rather than their indices, for example) are encoded.
+    LITERAL = 0;
+  }
+  enum HitEncoding {
+    // The codeword bits are run together into a single byte array.
+    RUN_TOGETHER = 0;
+  }
+
+  // How each (eligible) symbol is assigned an index.
+  IndexMapping index_mapping = 1;
+  // How each (encountered) index is assigned a codeword.
+  CodewordMapping codeword_mapping = 2;
+  // How the missed buffer is encoded.
+  MissedEncoding missed_encoding = 3;
+  // How the hit (codeword) buffer is encoded.
+  HitEncoding hit_encoding = 4;
+
+  // Minimum and maximum symbols eligible for codewords.
+  repeated sint64 endpoints = 5;
+  // Index–frequency pairs for frequency table.
+  map<uint64, uint64> frequencies = 6;
+  // Encountered symbols that were not assigned codewords.
+  repeated sint64 missed = 7;
+  // Size of the hit buffer in bits.
+  uint64 nbits = 8;
 }
 
 message Device {
diff --git a/tests/src/test_compress.cpp b/tests/src/test_compress.cpp
index ebb41eecfd..b59c05eff0 100644
--- a/tests/src/test_compress.cpp
+++ b/tests/src/test_compress.cpp
@@ -398,7 +398,7 @@ void test_self_describing_decompression(
 
 TEMPLATE_TEST_CASE("decompressing self-describing buffer", "[compress]", float,
                    double) {
-  std::default_random_engine gen(32094);
+  std::default_random_engine gen(361656);
   const std::vector<TestType> smoothness_parameters = {
       -1.5, -0.5, 0.0, 0.5, 1.5, std::numeric_limits<TestType>::infinity()};
   const std::vector<TestType> tolerances = {1, 0.1, 0.01, 0.001};
diff --git a/tests/src/test_huffman.cpp b/tests/src/test_huffman.cpp
index a035be7173..0921f4538b 100644
--- a/tests/src/test_huffman.cpp
+++ b/tests/src/test_huffman.cpp
@@ -1,6 +1,8 @@
+#include "catch2/catch_template_test_macros.hpp"
 #include "catch2/catch_test_macros.hpp"
 
 #include <climits>
+#include <cstdint>
 
 #include <algorithm>
 #include <random>
@@ -54,6 +56,15 @@ void test_decoding_regression(long int const *const quantized,
   REQUIRE(std::equal(p, p + out.size, p_));
 }
 
+template <typename T> void test_inversion(T const *const q, std::size_t N) {
+  const mgard::MemoryBuffer<unsigned char> compressed =
+      mgard::huffman_encode<T>(q, N);
+  const mgard::MemoryBuffer<T> decompressed =
+      mgard::huffman_decode<T>(compressed);
+  REQUIRE(N == decompressed.size);
+  REQUIRE(std::equal(q, q + N, decompressed.data.get()));
+}
+
 void test_encoding_regression_constant(const std::size_t N, const long int q) {
   long int *const quantized = new long int[N];
   std::fill(quantized, quantized + N, q);
@@ -104,6 +115,33 @@ void test_decoding_regression_random(const std::size_t N, const long int a,
   delete[] quantized;
 }
 
+template <typename T>
+void test_inversion_constant(const std::size_t N, const T q) {
+  T *const quantized = new T[N];
+  std::fill(quantized, quantized + N, q);
+  test_inversion(quantized, N);
+  delete[] quantized;
+}
+
+template <typename T>
+void test_inversion_periodic(const std::size_t N, const T q,
+                             const std::size_t period) {
+  T *const quantized = new T[N];
+  std::generate(quantized, quantized + N, PeriodicGenerator(period, q));
+  test_inversion(quantized, N);
+  delete[] quantized;
+}
+
+template <typename T>
+void test_inversion_random(const std::size_t N, const T a, const T b,
+                           std::default_random_engine &gen) {
+  std::uniform_int_distribution<T> dis(a, b);
+  T *const quantized = new T[N];
+  std::generate(quantized, quantized + N, [&] { return dis(gen); });
+  test_inversion(quantized, N);
+  delete[] quantized;
+}
+
 } // namespace
 
 TEST_CASE("encoding regression", "[huffman] [regression]") {
@@ -120,7 +158,7 @@ TEST_CASE("encoding regression", "[huffman] [regression]") {
   }
 
   SECTION("random data") {
-    std::default_random_engine gen(131051);
+    std::default_random_engine gen(726847);
     test_encoding_regression_random(10, 0, 1, gen);
     test_encoding_regression_random(100, -15, -5, gen);
     test_encoding_regression_random(1000, std::numeric_limits<int>::min(),
@@ -151,3 +189,29 @@ TEST_CASE("decoding regression", "[huffman] [regression]") {
     test_decoding_regression_random(10000, -100, 100, gen);
   }
 }
+
+TEMPLATE_TEST_CASE("Huffman inversion", "[huffman]", std::int8_t, std::int16_t,
+                   std::int32_t, std::int64_t) {
+  std::default_random_engine gen_(454114);
+  std::uniform_int_distribution<TestType> dis;
+  SECTION("constant data") {
+    test_inversion_constant<TestType>(10, dis(gen_));
+    test_inversion_constant<TestType>(100, -dis(gen_));
+    test_inversion_constant<TestType>(1000, dis(gen_));
+  }
+
+  SECTION("periodic data") {
+    test_inversion_periodic<TestType>(10, -dis(gen_), 11);
+    test_inversion_periodic<TestType>(100, dis(gen_), 10);
+    test_inversion_periodic<TestType>(1000, -dis(gen_), 9);
+  }
+
+  SECTION("random data") {
+    std::default_random_engine gen(950142);
+    test_inversion_random<TestType>(10, 0, 1, gen);
+    test_inversion_random<TestType>(100, -12, 11, gen);
+    test_inversion_random<TestType>(1000, std::numeric_limits<TestType>::min(),
+                                    std::numeric_limits<TestType>::max(), gen);
+    test_inversion_random<TestType>(10000, -100, 100, gen);
+  }
+}

From fec4c94c85ec535ff6e1d0b2ff0d9ba5ed0f2ed2 Mon Sep 17 00:00:00 2001
From: Ben Whitney <whitneybe@ornl.gov>
Date: Thu, 9 Jun 2022 16:56:05 -0400
Subject: [PATCH 37/58] Add static data member for default symbol range.

---
 include/huffman.hpp | 14 ++++++++----
 include/huffman.tpp | 54 ++++++++++++++++++---------------------------
 src/huffman.cpp     | 12 ++++++++++
 3 files changed, 43 insertions(+), 37 deletions(-)

diff --git a/include/huffman.hpp b/include/huffman.hpp
index ee8f662e2d..7c78b07d67 100644
--- a/include/huffman.hpp
+++ b/include/huffman.hpp
@@ -117,6 +117,14 @@ template <typename Symbol> class HuffmanCode {
   HuffmanCode(const std::pair<Symbol, Symbol> &endpoints,
               Symbol const *const begin, Symbol const *const end);
 
+  //! Constructor.
+  //!
+  //! The endpoints will be set to `default_endpoints`.
+  //!
+  //!\param begin Beginning of input stream.
+  //!\param end End of output stream.
+  HuffmanCode(Symbol const *const begin, Symbol const *const end);
+
   //! Constructor.
   //!
   //!\param endpoints Smallest and largest symbols (inclusive) to receive
@@ -172,10 +180,8 @@ template <typename Symbol> class HuffmanCode {
   template <typename It> Symbol decode(const Node &leaf, It &missed) const;
 
 private:
-  //! Set the range of symbols that will be assigned codewords.
-  //!
-  //!\note This function depends on `ncodewords`.
-  void set_endpoints();
+  //! Default symbol range.
+  const static std::pair<Symbol, Symbol> default_endpoints;
 
   //! Populate the frequency table using a stream of symbols.
   //!
diff --git a/include/huffman.tpp b/include/huffman.tpp
index 97ead4c4c9..b4718c814e 100644
--- a/include/huffman.tpp
+++ b/include/huffman.tpp
@@ -57,6 +57,21 @@ void HuffmanCode<Symbol>::create_code_creation_tree() {
   }
 }
 
+// This default will be used for `std::int{8,16}_t` We'll specialize the default
+// for `std::int{32,64}_t` in the implementation file.
+template <typename Symbol>
+const std::pair<Symbol, Symbol> HuffmanCode<Symbol>::default_endpoints = {
+    std::numeric_limits<Symbol>::min(), std::numeric_limits<Symbol>::max()};
+
+// I believe these are called 'template specialization declarations.'
+template <>
+const std::pair<std::int32_t, std::int32_t>
+    HuffmanCode<std::int32_t>::default_endpoints;
+
+template <>
+const std::pair<std::int64_t, std::int64_t>
+    HuffmanCode<std::int64_t>::default_endpoints;
+
 template <typename Symbol>
 void HuffmanCode<Symbol>::populate_frequencies(Symbol const *const begin,
                                                Symbol const *const end) {
@@ -118,6 +133,11 @@ HuffmanCode<Symbol>::HuffmanCode(const std::pair<Symbol, Symbol> &endpoints,
   recursively_set_codewords(queue.top(), {});
 }
 
+template <typename Symbol>
+HuffmanCode<Symbol>::HuffmanCode(Symbol const *const begin,
+                                 Symbol const *const end)
+    : HuffmanCode(default_endpoints, begin, end) {}
+
 template <typename Symbol>
 HuffmanCode<Symbol>::HuffmanCode(
     const std::pair<Symbol, Symbol> &endpoints,
@@ -157,42 +177,10 @@ void HuffmanCode<Symbol>::recursively_set_codewords(
   }
 }
 
-namespace {
-
-//! Generate the default symbol endpoints for a Huffman encoder.
-template <typename Symbol> std::pair<Symbol, Symbol> endpoints();
-
-template <typename Symbol> std::pair<Symbol, Symbol> extreme_endpoints() {
-  return {std::numeric_limits<Symbol>::min(),
-          std::numeric_limits<Symbol>::max()};
-}
-
-template <typename Symbol> std::pair<Symbol, Symbol> capped_endpoints() {
-  return {-static_cast<Symbol>(1 << 17), static_cast<Symbol>(1 << 17) - 1};
-}
-
-template <> std::pair<std::int8_t, std::int8_t> endpoints() {
-  return extreme_endpoints<std::int8_t>();
-}
-
-template <> std::pair<std::int16_t, std::int16_t> endpoints() {
-  return extreme_endpoints<std::int16_t>();
-}
-
-template <> std::pair<std::int32_t, std::int32_t> endpoints() {
-  return capped_endpoints<std::int32_t>();
-}
-
-template <> std::pair<std::int64_t, std::int64_t> endpoints() {
-  return capped_endpoints<std::int64_t>();
-}
-
-} // namespace
-
 template <typename Symbol>
 MemoryBuffer<unsigned char> huffman_encode(Symbol const *const begin,
                                            const std::size_t n) {
-  const HuffmanCode<Symbol> code(endpoints<Symbol>(), begin, begin + n);
+  const HuffmanCode<Symbol> code(begin, begin + n);
 
   std::vector<std::size_t> lengths;
   for (const HuffmanCodeword &codeword : code.codewords) {
diff --git a/src/huffman.cpp b/src/huffman.cpp
index 2e16de3e96..8331630657 100644
--- a/src/huffman.cpp
+++ b/src/huffman.cpp
@@ -280,4 +280,16 @@ MemoryBuffer<long int> huffman_decoding(const HuffmanEncodedStream &encoded) {
   return out;
 }
 
+template <>
+const std::pair<std::int32_t, std::int32_t>
+    HuffmanCode<std::int32_t>::default_endpoints = {
+        -static_cast<std::int32_t>(1 << 17),
+        static_cast<std::int32_t>(1 << 17) - 1};
+
+template <>
+const std::pair<std::int64_t, std::int64_t>
+    HuffmanCode<std::int64_t>::default_endpoints = {
+        -static_cast<std::int64_t>(1 << 17),
+        static_cast<std::int64_t>(1 << 17) - 1};
+
 } // namespace mgard

From ebf3c34adb80b90edb98e43a7eb9e6d48736bc20 Mon Sep 17 00:00:00 2001
From: Ben Whitney <whitneybe@ornl.gov>
Date: Thu, 9 Jun 2022 17:13:48 -0400
Subject: [PATCH 38/58] Separate codeword decoding, missed buffer lookup.

---
 include/huffman.hpp | 11 ++++-------
 include/huffman.tpp | 16 ++++++----------
 src/huffman.cpp     |  8 ++------
 3 files changed, 12 insertions(+), 23 deletions(-)

diff --git a/include/huffman.hpp b/include/huffman.hpp
index 7c78b07d67..09326c996c 100644
--- a/include/huffman.hpp
+++ b/include/huffman.hpp
@@ -166,18 +166,15 @@ template <typename Symbol> class HuffmanCode {
   //! Huffman code creation tree.
   std::priority_queue<Node, std::vector<Node>, HeldCountGreater> queue;
 
-  // TODO: Just indicate in return value whether symbol was missed.
-
   //! Decode a codeword (identified by associated leaf) to a symbol.
   //!
   //!\pre `leaf` must be a leaf (rather than an interior node) of the code
-  //! creation tree. `It::value_type` must be convertible to `Symbol`.
+  //! creation tree.
   //!
   //!\param leaf Leaf (associated to a codeword) to decode.
-  //!\param missed Pointer to next out-of-range symbol. If `leaf` is associated
-  //! to the out-of-range codeword, this pointer will be dereferenced and
-  //! incremented.
-  template <typename It> Symbol decode(const Node &leaf, It &missed) const;
+  //!\return A boolean indicating whether the original symbol was 'hit' and the
+  //! symbol itself (junk if the original symbol was 'missed').
+  std::pair<bool, Symbol> decode(const Node &leaf) const;
 
 private:
   //! Default symbol range.
diff --git a/include/huffman.tpp b/include/huffman.tpp
index b4718c814e..fdaaf4473c 100644
--- a/include/huffman.tpp
+++ b/include/huffman.tpp
@@ -82,14 +82,13 @@ void HuffmanCode<Symbol>::populate_frequencies(Symbol const *const begin,
 }
 
 template <typename Symbol>
-template <typename It>
-Symbol
-HuffmanCode<Symbol>::decode(const typename HuffmanCode<Symbol>::Node &leaf,
-                            It &missed) const {
+std::pair<bool, Symbol> HuffmanCode<Symbol>::decode(
+    const typename HuffmanCode<Symbol>::Node &leaf) const {
   const std::ptrdiff_t offset = leaf->codeword - codewords.data();
   // If `offset == 0`, this is the leaf corresponding to out-of-range symbols.
   assert(offset >= 0);
-  return offset ? endpoints.first + (offset - 1) : *missed++;
+  return offset ? std::pair<bool, Symbol>(true, endpoints.first + (offset - 1))
+                : std::pair<bool, Symbol>(false, {});
 }
 
 template <typename Symbol>
@@ -333,11 +332,8 @@ MemoryBuffer<Symbol> huffman_decode(const MemoryBuffer<unsigned char> &buffer) {
     for (node = root; node->left;
          node = *b++ ? node->right : node->left, ++nbits_read)
       ;
-    // TODO: Make sure `HuffmanCode::decode` can properly take `missed` (not
-    // relying on `google::protobuf::uint64` being the same as `std::size_t` or
-    // anything).
-    const Symbol decoded = code.decode(node, missed);
-    *q++ = decoded;
+    const std::pair<bool, Symbol> decoded = code.decode(node);
+    *q++ = decoded.first ? decoded.second : *missed++;
   }
   assert(nbits_read == nbits);
   assert(missed == missed_.cend());
diff --git a/src/huffman.cpp b/src/huffman.cpp
index 8331630657..2ccd49b8cf 100644
--- a/src/huffman.cpp
+++ b/src/huffman.cpp
@@ -205,12 +205,8 @@ namespace {
 long int decode_and_shift(const HuffmanCode<long int> &code,
                           const typename HuffmanCode<long int>::Node &leaf,
                           long int const *&missed) {
-  long int const *const start = missed;
-  long int decoded = code.decode(leaf, missed);
-  if (missed != start) {
-    decoded -= nql / 2;
-  }
-  return decoded;
+  const std::pair<bool, long int> pair = code.decode(leaf);
+  return pair.first ? pair.second : *missed++ - nql / 2;
 }
 
 } // namespace

From 1c44399a9fea3e42ee7443e86468841fc837499c Mon Sep 17 00:00:00 2001
From: Ben Whitney <whitneybe@ornl.gov>
Date: Thu, 9 Jun 2022 17:36:19 -0400
Subject: [PATCH 39/58] =?UTF-8?q?Pass=20index=E2=80=93frequency=20pair=20r?=
 =?UTF-8?q?ange=20as=20iterator=20pair.?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 include/huffman.hpp | 24 +++++++++++++-----------
 include/huffman.tpp | 22 +++++++++-------------
 src/huffman.cpp     |  2 +-
 3 files changed, 23 insertions(+), 25 deletions(-)

diff --git a/include/huffman.hpp b/include/huffman.hpp
index 09326c996c..b23d19b933 100644
--- a/include/huffman.hpp
+++ b/include/huffman.hpp
@@ -127,11 +127,16 @@ template <typename Symbol> class HuffmanCode {
 
   //! Constructor.
   //!
+  //! `It::value_type` should be (convertible to)
+  //! `std::pair<std::size_t, std::size_t>`.
+  //!
   //!\param endpoints Smallest and largest symbols (inclusive) to receive
   //! codewords.
-  //!\param pairs Index–frequency pairs for frequency table.
-  HuffmanCode(const std::pair<Symbol, Symbol> &endpoints,
-              const std::vector<std::pair<std::size_t, std::size_t>> &pairs);
+  //!\param begin Beginning of index–frequency pair range for frequency table.
+  //!\param end Beginning of index–frequency pair range for frequency table.
+  template <typename It>
+  HuffmanCode(const std::pair<Symbol, Symbol> &endpoints, const It begin,
+              const It end);
 
   //! Smallest and largest symbols (inclusive) to receive codewords.
   std::pair<Symbol, Symbol> endpoints;
@@ -191,13 +196,10 @@ template <typename Symbol> class HuffmanCode {
 
   //! Populate the frequency table from a collection of index–frequency pairs.
   //!
-  //!\pre `frequencies` should have length `ncodewords` and all entries should
-  //! be zero.
-  //!
-  //!\param pairs Beginning of stream of symbols.
-  //!\param end End of stream of symbols.
-  void populate_frequencies(
-      const std::vector<std::pair<std::size_t, std::size_t>> &pairs);
+  //!\param begin Beginning of index–frequency pair range.
+  //!\param end End of index–frequency pair range.
+  template <typename It>
+  void populate_frequencies(const It begin, const It end);
 
   //! Create the Huffman code creation tree.
   //!
@@ -240,7 +242,7 @@ MemoryBuffer<unsigned char> huffman_encode(Symbol const *const begin,
 
 //! Decode a stream encoded using a Huffman code.
 //!
-//!\param encoded Input buffer (Huffman-encoded stream).
+//!\param buffer Input buffer (Huffman-encoded stream).
 template <typename Symbol>
 MemoryBuffer<Symbol> huffman_decode(const MemoryBuffer<unsigned char> &buffer);
 
diff --git a/include/huffman.tpp b/include/huffman.tpp
index fdaaf4473c..904c23e24f 100644
--- a/include/huffman.tpp
+++ b/include/huffman.tpp
@@ -92,9 +92,9 @@ std::pair<bool, Symbol> HuffmanCode<Symbol>::decode(
 }
 
 template <typename Symbol>
-void HuffmanCode<Symbol>::populate_frequencies(
-    const std::vector<std::pair<std::size_t, std::size_t>> &pairs) {
-  for (auto [index, frequency] : pairs) {
+template <typename It>
+void HuffmanCode<Symbol>::populate_frequencies(const It begin, const It end) {
+  for (auto [index, frequency] : RangeSlice<It>{.begin_ = begin, .end_ = end}) {
     frequencies.at(index) = frequency;
   }
 }
@@ -138,12 +138,12 @@ HuffmanCode<Symbol>::HuffmanCode(Symbol const *const begin,
     : HuffmanCode(default_endpoints, begin, end) {}
 
 template <typename Symbol>
-HuffmanCode<Symbol>::HuffmanCode(
-    const std::pair<Symbol, Symbol> &endpoints,
-    const std::vector<std::pair<std::size_t, std::size_t>> &pairs)
+template <typename It>
+HuffmanCode<Symbol>::HuffmanCode(const std::pair<Symbol, Symbol> &endpoints,
+                                 const It begin, const It end)
     : endpoints(endpoints), ncodewords(ncodewords_from_endpoints(endpoints)),
       frequencies(ncodewords), codewords(ncodewords) {
-  populate_frequencies(pairs);
+  populate_frequencies(begin, end);
   create_code_creation_tree();
   recursively_set_codewords(queue.top(), {});
 }
@@ -290,11 +290,6 @@ MemoryBuffer<Symbol> huffman_decode(const MemoryBuffer<unsigned char> &buffer) {
     throw std::runtime_error("unrecognized Huffman codeword mapping");
   }
   const Frequencies &frequencies_ = header.frequencies();
-  // TODO: Change `HuffmanCode` constructor so it can take a pair of iterators
-  // dereferencing to (something convertible to)
-  // `std::pair<std::size_t, std::size_t>`s directly.
-  const std::vector<std::pair<std::size_t, std::size_t>> pairs(
-      frequencies_.begin(), frequencies_.end());
 
   if (header.missed_encoding() != pb::HuffmanHeader::LITERAL) {
     throw std::runtime_error("unrecognized Huffman missed buffer encoding");
@@ -313,7 +308,8 @@ MemoryBuffer<Symbol> huffman_decode(const MemoryBuffer<unsigned char> &buffer) {
                              "number of bytes in hit buffer");
   }
 
-  const HuffmanCode<Symbol> code(endpoints, pairs);
+  const HuffmanCode<Symbol> code(endpoints, frequencies_.begin(),
+                                 frequencies_.end());
   // TODO: Maybe add a member function for this.
   const std::size_t nout =
       std::accumulate(code.frequencies.begin(), code.frequencies.end(),
diff --git a/src/huffman.cpp b/src/huffman.cpp
index 2ccd49b8cf..ea5a23bf68 100644
--- a/src/huffman.cpp
+++ b/src/huffman.cpp
@@ -231,7 +231,7 @@ MemoryBuffer<long int> huffman_decoding(const HuffmanEncodedStream &encoded) {
     }
   }
 
-  HuffmanCode<Symbol> code(nql_endpoints, pairs);
+  HuffmanCode<Symbol> code(nql_endpoints, pairs.begin(), pairs.end());
 
   MemoryBuffer<Symbol> out(nquantized);
   Symbol *q = out.data.get();

From 1532c6661ff0e480949e65735dd152e076f778ec Mon Sep 17 00:00:00 2001
From: Ben Whitney <whitneybe@ornl.gov>
Date: Fri, 10 Jun 2022 12:48:35 -0400
Subject: [PATCH 40/58] Add function to check quantization buffer size.

---
 include/format.hpp        |  8 ++++++
 src/format.cpp            | 29 ++++++++++++++++++++++
 tests/src/test_format.cpp | 52 +++++++++++++++------------------------
 3 files changed, 57 insertions(+), 32 deletions(-)

diff --git a/include/format.hpp b/include/format.hpp
index 0a817e78f6..f3b166ffac 100644
--- a/include/format.hpp
+++ b/include/format.hpp
@@ -66,6 +66,14 @@ serialize_header_crc32(std::uint_least64_t crc32);
 //!\param p Pointer whose alignment will be checked.
 template <typename T> void check_alignment(void const *const p);
 
+//! Check that a quantization buffer has the right alignment and a valid size.
+//!
+//!\param header Self-describing dataset header.
+//!\param p Quantization buffer.
+//!\param n Size in bytes of quantization buffer.
+void check_quantization_buffer(const pb::Header &header, void const *const p,
+                               const std::size_t n);
+
 //! Determine whether an integral type is big endian.
 template <typename Int> bool big_endian();
 
diff --git a/src/format.cpp b/src/format.cpp
index 0464e4b791..83b138db81 100644
--- a/src/format.cpp
+++ b/src/format.cpp
@@ -45,6 +45,35 @@ serialize_header_crc32(std::uint_least64_t crc32) {
   return serialize<std::uint_least32_t, HEADER_CRC32_SIZE>(crc32);
 }
 
+namespace {
+
+template <typename Int>
+void check_quantization_buffer_(void const *const p, const std::size_t n) {
+  if (n % sizeof(Int)) {
+    throw std::runtime_error(
+        "quantization buffer size not a multiple of quantization type size");
+  }
+  check_alignment<Int>(p);
+}
+
+} // namespace
+
+void check_quantization_buffer(const pb::Header &header, void const *const p,
+                               const std::size_t n) {
+  switch (header.quantization().type()) {
+  case pb::Quantization::INT8_T:
+    return check_quantization_buffer_<std::int8_t>(p, n);
+  case pb::Quantization::INT16_T:
+    return check_quantization_buffer_<std::int16_t>(p, n);
+  case pb::Quantization::INT32_T:
+    return check_quantization_buffer_<std::int32_t>(p, n);
+  case pb::Quantization::INT64_T:
+    return check_quantization_buffer_<std::int64_t>(p, n);
+  default:
+    throw std::runtime_error("unrecognized quantization type");
+  }
+}
+
 template <> pb::Dataset::Type type_to_dataset_type<float>() {
   return pb::Dataset::FLOAT;
 }
diff --git a/tests/src/test_format.cpp b/tests/src/test_format.cpp
index 64943b2421..970eb87fc3 100644
--- a/tests/src/test_format.cpp
+++ b/tests/src/test_format.cpp
@@ -180,41 +180,29 @@ TEST_CASE("dataset types", "[format]") {
   REQUIRE(mgard::type_to_dataset_type<double>() == mgard::pb::Dataset::DOUBLE);
 }
 
-TEST_CASE("quantization type sizes", "[format]") {
-  mgard::pb::Header header;
-  mgard::pb::Quantization &quantization = *header.mutable_quantization();
-  const std::size_t ndof = 1;
-
-  quantization.set_type(mgard::pb::Quantization::INT8_T);
-  {
-    const mgard::MemoryBuffer<unsigned char> buffer =
-        mgard::quantization_buffer(header, ndof);
-    REQUIRE_NOTHROW(mgard::check_alignment<std::int8_t>(buffer.data.get()));
-    REQUIRE(buffer.size == 1);
-  }
+namespace {
 
-  quantization.set_type(mgard::pb::Quantization::INT16_T);
-  {
-    const mgard::MemoryBuffer<unsigned char> buffer =
-        mgard::quantization_buffer(header, ndof);
-    REQUIRE_NOTHROW(mgard::check_alignment<std::int16_t>(buffer.data.get()));
-    REQUIRE(buffer.size == 2);
-  }
+void test_quantization_buffer(const mgard::pb::Quantization::Type type,
+                              const std::size_t size) {
+  mgard::pb::Header header;
+  header.mutable_quantization()->set_type(type);
+  const mgard::MemoryBuffer<unsigned char> buffer =
+      mgard::quantization_buffer(header, 1);
+  REQUIRE_NOTHROW(
+      mgard::check_quantization_buffer(header, buffer.data.get(), buffer.size));
+  REQUIRE(buffer.size == size);
+}
 
-  quantization.set_type(mgard::pb::Quantization::INT32_T);
-  {
-    const mgard::MemoryBuffer<unsigned char> buffer =
-        mgard::quantization_buffer(header, ndof);
-    REQUIRE_NOTHROW(mgard::check_alignment<std::int32_t>(buffer.data.get()));
-    REQUIRE(buffer.size == 4);
-  }
+} // namespace
 
-  quantization.set_type(mgard::pb::Quantization::INT64_T);
-  {
-    const mgard::MemoryBuffer<unsigned char> buffer =
-        mgard::quantization_buffer(header, ndof);
-    REQUIRE_NOTHROW(mgard::check_alignment<std::int64_t>(buffer.data.get()));
-    REQUIRE(buffer.size == 8);
+TEST_CASE("quantization buffers", "[format]") {
+  const std::vector<std::pair<mgard::pb::Quantization::Type, std::size_t>>
+      pairs{{mgard::pb::Quantization::INT8_T, 1},
+            {mgard::pb::Quantization::INT16_T, 2},
+            {mgard::pb::Quantization::INT32_T, 4},
+            {mgard::pb::Quantization::INT64_T, 8}};
+  for (const auto [type, size] : pairs) {
+    test_quantization_buffer(type, size);
   }
 }
 

From 56004d2ee01c4fc9adbc5423c544e41d6211f8cd Mon Sep 17 00:00:00 2001
From: Ben Whitney <whitneybe@ornl.gov>
Date: Wed, 15 Jun 2022 12:46:19 -0400
Subject: [PATCH 41/58] Automatically calculate Huffman hit buffer size.

---
 include/huffman.hpp                  |  5 ++---
 src/compressors.cpp                  | 18 +++++-------------
 src/huffman.cpp                      | 27 +++++++++++----------------
 tests/src/compressors_regression.cpp |  8 +++++---
 tests/src/huffman_regression.cpp     |  7 ++-----
 5 files changed, 25 insertions(+), 40 deletions(-)

diff --git a/include/huffman.hpp b/include/huffman.hpp
index b23d19b933..97300f91ef 100644
--- a/include/huffman.hpp
+++ b/include/huffman.hpp
@@ -25,11 +25,10 @@ struct HuffmanEncodedStream {
   //! Constructor.
   //!
   //!\param nbits Length in bits of the compressed stream.
-  //!\param ncompressed Length in bytes of the compressed stream.
   //!\param nmissed Length in bytes of the missed array.
   //!\param ntable Length in bytes of the frequency table.
-  HuffmanEncodedStream(const std::size_t nbits, const std::size_t ncompressed,
-                       const std::size_t nmissed, const std::size_t ntable);
+  HuffmanEncodedStream(const std::size_t nbits, const std::size_t nmissed,
+                       const std::size_t ntable);
 
   //! Length in bits of the compressed stream.
   std::size_t nbits;
diff --git a/src/compressors.cpp b/src/compressors.cpp
index daa6bc8f2a..f9ff0254d6 100644
--- a/src/compressors.cpp
+++ b/src/compressors.cpp
@@ -54,12 +54,7 @@ void decompress_memory_huffman(unsigned char const *const src,
 #endif
   }
 
-  // `huffman_decoding` expects the size of the hit buffer to be a multiple of
-  // `sizeof(unsigned int)`. We'll zero out any extra bytes below.
-  const std::size_t nbytes =
-      sizeof(unsigned int) *
-      ((nhit + sizeof(unsigned int) - 1) / sizeof(unsigned int));
-  HuffmanEncodedStream encoded(nbits, nbytes, nmissed, nfrequencies);
+  HuffmanEncodedStream encoded(nbits, nmissed, nfrequencies);
   {
     unsigned char const *begin;
     unsigned char const *end;
@@ -69,15 +64,12 @@ void decompress_memory_huffman(unsigned char const *const src,
     std::copy(begin, end, encoded.frequencies.data.get());
 
     begin = end;
-    end = begin + nhit;
+    assert(encoded.hit.size <= nhit);
+    end = begin + encoded.hit.size;
     std::copy(begin, end, encoded.hit.data.get());
 
-    {
-      unsigned char *const p = encoded.hit.data.get();
-      std::fill(p + nhit, p + nbytes, 0);
-    }
-
-    begin = end;
+    // Skip any bytes between `begin + encoded.hit.size` and `begin + nhit`.
+    begin = end + nhit - encoded.hit.size;
     end = begin + nmissed;
     std::copy(begin, end, encoded.missed.data.get());
   }
diff --git a/src/huffman.cpp b/src/huffman.cpp
index ea5a23bf68..7dc1d77fbe 100644
--- a/src/huffman.cpp
+++ b/src/huffman.cpp
@@ -10,18 +10,21 @@
 #include <queue>
 #include <vector>
 
-#include <iostream>
-
 #include "huffman.hpp"
 
 namespace mgard {
 
 HuffmanEncodedStream::HuffmanEncodedStream(const std::size_t nbits,
-                                           const std::size_t ncompressed,
                                            const std::size_t nmissed,
-                                           const std::size_t nfrequencies)
-    : nbits(nbits), hit(ncompressed), missed(nmissed),
-      frequencies(nfrequencies) {}
+                                           const std::size_t ntable)
+    : nbits(nbits), hit(sizeof(unsigned int) *
+                        ((nbits + CHAR_BIT * sizeof(unsigned int) - 1) /
+                         (CHAR_BIT * sizeof(unsigned int)))),
+      missed(nmissed), frequencies(ntable) {
+  unsigned char *const p = hit.data.get();
+  // Zero out the bits/bytes we won't write to.
+  std::fill(p + (nbits + CHAR_BIT - 1) / CHAR_BIT, p + hit.size, 0);
+}
 
 void HuffmanCodeword::push_back(const bool bit) {
   const unsigned char offset = length % CHAR_BIT;
@@ -114,19 +117,11 @@ HuffmanEncodedStream huffman_encoding(long int const *const quantized_data,
   const std::size_t nbits =
       std::inner_product(code.frequencies.begin(), code.frequencies.end(),
                          lengths.begin(), static_cast<std::size_t>(0));
-  const std::size_t nbytes =
-      sizeof(unsigned int) * ((nbits + CHAR_BIT * sizeof(unsigned int) - 1) /
-                              (CHAR_BIT * sizeof(unsigned int)));
-  if (nbytes % sizeof(unsigned int)) {
-    throw std::runtime_error(
-        "`nbytes` not bumped up to nearest multiple of `unsigned int` size");
-  }
-
   const std::size_t nnz =
       code.ncodewords -
       std::count(code.frequencies.begin(), code.frequencies.end(), 0);
 
-  HuffmanEncodedStream out(nbits, nbytes, code.nmissed() * sizeof(MissedSymbol),
+  HuffmanEncodedStream out(nbits, code.nmissed() * sizeof(MissedSymbol),
                            2 * nnz * sizeof(std::size_t));
 
   // Write frequency table.
@@ -184,7 +179,7 @@ HuffmanEncodedStream huffman_encoding(long int const *const quantized_data,
     }
   }
 
-  endianness_shuffle(buffer, nbytes);
+  endianness_shuffle(buffer, out.hit.size);
   return out;
 }
 
diff --git a/tests/src/compressors_regression.cpp b/tests/src/compressors_regression.cpp
index c65bac9cd1..7f7a3ecdf1 100644
--- a/tests/src/compressors_regression.cpp
+++ b/tests/src/compressors_regression.cpp
@@ -119,7 +119,7 @@ void decompress_memory_huffman(unsigned char const *const src,
 #endif
   }
 
-  HuffmanEncodedStream encoded(nbits, nhit, nmissed, nfrequencies);
+  HuffmanEncodedStream encoded(nbits, nmissed, nfrequencies);
   {
     unsigned char const *begin;
     unsigned char const *end;
@@ -129,10 +129,12 @@ void decompress_memory_huffman(unsigned char const *const src,
     std::copy(begin, end, encoded.frequencies.data.get());
 
     begin = end;
-    end = begin + nhit;
+    assert(encoded.hit.size <= nhit);
+    end = begin + encoded.hit.size;
     std::copy(begin, end, encoded.hit.data.get());
 
-    begin = end;
+    // Skip any bytes between `begin + encoded.hit.size` and `begin + nhit`.
+    begin = end + nhit - encoded.hit.size;
     end = begin + nmissed;
     std::copy(begin, end, encoded.missed.data.get());
   }
diff --git a/tests/src/huffman_regression.cpp b/tests/src/huffman_regression.cpp
index c2c58bdc95..6d8739dbde 100644
--- a/tests/src/huffman_regression.cpp
+++ b/tests/src/huffman_regression.cpp
@@ -215,15 +215,12 @@ HuffmanEncodedStream huffman_encoding(long int const *const quantized_data,
     nnz += frequency ? 1 : 0;
   }
 
-  const std::size_t nbytes =
-      sizeof(unsigned int) * ((nbits + CHAR_BIT * sizeof(unsigned int) - 1) /
-                              (CHAR_BIT * sizeof(unsigned int)));
-  HuffmanEncodedStream out(nbits, nbytes, num_miss * sizeof(int),
+  HuffmanEncodedStream out(nbits, num_miss * sizeof(int),
                            2 * nnz * sizeof(std::size_t));
 
   unsigned int *const hit =
       reinterpret_cast<unsigned int *>(out.hit.data.get());
-  std::fill(hit, hit + nbytes / sizeof(unsigned int), 0u);
+  std::fill(hit, hit + out.hit.size / sizeof(unsigned int), 0u);
 
   int *missed = reinterpret_cast<int *>(out.missed.data.get());
 

From 25e639de39e4891701bcae03e40c0a945590f8dd Mon Sep 17 00:00:00 2001
From: Ben Whitney <whitneybe@ornl.gov>
Date: Wed, 15 Jun 2022 11:31:53 -0400
Subject: [PATCH 42/58] Add `HuffmanEncodedStream` {,de}serializer.

---
 include/huffman.hpp                  |  24 +++++
 src/compressors.cpp                  | 109 +------------------
 src/huffman.cpp                      | 155 +++++++++++++++++++++++----
 tests/src/compressors_regression.cpp |   3 +-
 tests/src/test_compressors.cpp       |   5 +-
 tests/src/test_format.cpp            |   2 +
 tests/src/test_huffman.cpp           |  47 ++++++++
 7 files changed, 213 insertions(+), 132 deletions(-)

diff --git a/include/huffman.hpp b/include/huffman.hpp
index 97300f91ef..6a746deead 100644
--- a/include/huffman.hpp
+++ b/include/huffman.hpp
@@ -43,6 +43,30 @@ struct HuffmanEncodedStream {
   MemoryBuffer<unsigned char> frequencies;
 };
 
+//! Serialize a Huffman-encoded stream and then compress.
+//!
+//!\deprecated
+//!
+//! The serialized stream will be compressed with ZSTD if `MGARD_ZSTD` is
+//! defined and with `zlib` otherwise.
+//!
+//!\param encoded Huffman-encoded stream to serialize and compress.
+MemoryBuffer<unsigned char>
+serialize_compress(const HuffmanEncodedStream &encoded);
+
+//! Decompress and then deserialize a Huffman-encoded stream.
+//!
+//!\deprecated
+//!
+//! The buffer will be decompressed with ZSTD if `MGARD_ZSTD` if defined and
+//! with `zlib` otherwise.
+//!
+//!\param src Buffer containing serialized and compressed Huffman-encoded
+//! stream.
+//!\param srcLen Size in bytes of the buffer.
+HuffmanEncodedStream decompress_deserialize(unsigned char const *const src,
+                                            const std::size_t srcLen);
+
 //! Codeword (in progress) associated to a node in a Huffman code creation tree.
 struct HuffmanCodeword {
   //! Bytes containing the bits of the codeword.
diff --git a/src/compressors.cpp b/src/compressors.cpp
index f9ff0254d6..85e2930512 100644
--- a/src/compressors.cpp
+++ b/src/compressors.cpp
@@ -22,58 +22,10 @@
 
 namespace mgard {
 
-namespace {
-
-std::size_t hit_buffer_size(const std::size_t nbits) {
-  return nbits / CHAR_BIT + sizeof(unsigned int);
-}
-
-} // namespace
-
 void decompress_memory_huffman(unsigned char const *const src,
                                const std::size_t srcLen, long int *const dst,
                                const std::size_t dstLen) {
-  std::size_t const *const sizes = reinterpret_cast<std::size_t const *>(src);
-  const std::size_t nfrequencies = sizes[0];
-  const std::size_t nbits = sizes[1];
-  const std::size_t nmissed = sizes[2];
-  const std::size_t nhit = hit_buffer_size(nbits);
-
-  MemoryBuffer<unsigned char> buffer(nfrequencies + nhit + nmissed);
-  {
-    const std::size_t offset = 3 * sizeof(std::size_t);
-    unsigned char const *const src_ = src + offset;
-    const std::size_t srcLen_ = srcLen - offset;
-    unsigned char *const dst_ = buffer.data.get();
-    const std::size_t dstLen_ = buffer.size;
-
-#ifndef MGARD_ZSTD
-    decompress_memory_z(src_, srcLen_, dst_, dstLen_);
-#else
-    decompress_memory_zstd(src_, srcLen_, dst_, dstLen_);
-#endif
-  }
-
-  HuffmanEncodedStream encoded(nbits, nmissed, nfrequencies);
-  {
-    unsigned char const *begin;
-    unsigned char const *end;
-
-    begin = buffer.data.get();
-    end = begin + nfrequencies;
-    std::copy(begin, end, encoded.frequencies.data.get());
-
-    begin = end;
-    assert(encoded.hit.size <= nhit);
-    end = begin + encoded.hit.size;
-    std::copy(begin, end, encoded.hit.data.get());
-
-    // Skip any bytes between `begin + encoded.hit.size` and `begin + nhit`.
-    begin = end + nhit - encoded.hit.size;
-    end = begin + nmissed;
-    std::copy(begin, end, encoded.missed.data.get());
-  }
-
+  const HuffmanEncodedStream encoded = decompress_deserialize(src, srcLen);
   const MemoryBuffer<long int> decoded = huffman_decoding(encoded);
   {
     long int const *const p = decoded.data.get();
@@ -85,67 +37,10 @@ void decompress_memory_huffman(unsigned char const *const src,
   }
 }
 
-namespace {
-
-using Constituent = std::pair<unsigned char const *, std::size_t>;
-
-MemoryBuffer<unsigned char>
-gather_constituents(const std::vector<Constituent> &constituents) {
-  std::size_t nbuffer = 0;
-  for (const Constituent &constituent : constituents) {
-    nbuffer += constituent.second;
-  }
-  MemoryBuffer<unsigned char> buffer(nbuffer);
-  unsigned char *p = buffer.data.get();
-  for (const Constituent &constituent : constituents) {
-    std::memcpy(p, constituent.first, constituent.second);
-    p += constituent.second;
-  }
-  return buffer;
-}
-
-} // namespace
-
 MemoryBuffer<unsigned char> compress_memory_huffman(long int const *const src,
                                                     const std::size_t srcLen) {
   const HuffmanEncodedStream encoded = huffman_encoding(src, srcLen);
-
-  assert(not(encoded.hit.size % sizeof(unsigned int)));
-
-  static_assert(CHAR_BIT == 8, "code written assuming `CHAR_BIT == 8`");
-  static_assert(sizeof(unsigned int) == 4,
-                "code written assuming `sizeof(unsigned int) == 4`");
-  const std::size_t offset = encoded.nbits % (CHAR_BIT * sizeof(unsigned int));
-  // Number of hit buffer padding bytes.
-  const std::size_t nhbpb = offset ? offset / CHAR_BIT : sizeof(unsigned int);
-
-  assert(encoded.hit.size + nhbpb == hit_buffer_size(encoded.nbits));
-
-  unsigned char const *hbpb = new unsigned char[nhbpb]();
-  MemoryBuffer<unsigned char> payload = gather_constituents({
-      {encoded.frequencies.data.get(), encoded.frequencies.size},
-      {encoded.hit.data.get(), encoded.hit.size},
-      {hbpb, nhbpb},
-      {encoded.missed.data.get(), encoded.missed.size},
-  });
-  delete[] hbpb;
-
-#ifndef MGARD_ZSTD
-  const MemoryBuffer<unsigned char> out_data =
-      compress_memory_z(payload.data.get(), payload.size);
-#else
-  const MemoryBuffer<unsigned char> out_data =
-      compress_memory_zstd(payload.data.get(), payload.size);
-#endif
-
-  return gather_constituents(
-      {{reinterpret_cast<unsigned char const *>(&encoded.frequencies.size),
-        sizeof(encoded.frequencies.size)},
-       {reinterpret_cast<unsigned char const *>(&encoded.nbits),
-        sizeof(encoded.nbits)},
-       {reinterpret_cast<unsigned char const *>(&encoded.missed.size),
-        sizeof(encoded.missed.size)},
-       {out_data.data.get(), out_data.size}});
+  return serialize_compress(encoded);
 }
 
 #ifdef MGARD_ZSTD
diff --git a/src/huffman.cpp b/src/huffman.cpp
index 7dc1d77fbe..369497cabc 100644
--- a/src/huffman.cpp
+++ b/src/huffman.cpp
@@ -8,8 +8,12 @@
 #include <array>
 #include <numeric>
 #include <queue>
+#include <stdexcept>
 #include <vector>
 
+#include <iostream>
+
+#include "compressors.hpp"
 #include "huffman.hpp"
 
 namespace mgard {
@@ -22,10 +26,136 @@ HuffmanEncodedStream::HuffmanEncodedStream(const std::size_t nbits,
                          (CHAR_BIT * sizeof(unsigned int)))),
       missed(nmissed), frequencies(ntable) {
   unsigned char *const p = hit.data.get();
-  // Zero out the bits/bytes we won't write to.
+  // Zero out the bytes we won't write to. If `nbits % CHAR_BIT`, there will
+  // still be bits in the final byte that aren't zeroed out.
   std::fill(p + (nbits + CHAR_BIT - 1) / CHAR_BIT, p + hit.size, 0);
 }
 
+namespace {
+
+void check_type_sizes() {
+  static_assert(CHAR_BIT == 8,
+                "code written with assumption that `CHAR_BIT == 8`");
+  static_assert(
+      sizeof(unsigned int) == 4,
+      "code written with assumption that `sizeof(unsigned int) == 4`");
+  static_assert(sizeof(int) == 4,
+                "code written with assumption that `sizeof(int) == 4`");
+  static_assert(
+      sizeof(std::size_t) == 8,
+      "code written with assumption that `sizeof(unsigned int) == 8`");
+}
+
+using Constituent = std::pair<unsigned char const *, std::size_t>;
+
+MemoryBuffer<unsigned char>
+gather(const std::vector<Constituent> &constituents) {
+  std::size_t nbuffer = 0;
+  for (const Constituent &constituent : constituents) {
+    nbuffer += constituent.second;
+  }
+  MemoryBuffer<unsigned char> buffer(nbuffer);
+  unsigned char *p = buffer.data.get();
+  for (const Constituent &constituent : constituents) {
+    std::memcpy(p, constituent.first, constituent.second);
+    p += constituent.second;
+  }
+  return buffer;
+}
+
+} // namespace
+
+MemoryBuffer<unsigned char>
+serialize_compress(const HuffmanEncodedStream &encoded) {
+  check_type_sizes();
+
+  assert(not(encoded.hit.size % sizeof(unsigned int)));
+
+  const std::size_t offset = encoded.nbits % (CHAR_BIT * sizeof(unsigned int));
+  // Number of hit buffer padding bytes.
+  const std::size_t nhbpb = offset ? offset / CHAR_BIT : sizeof(unsigned int);
+
+  // The righthand side is how the size in bytes of the padded hit buffer was
+  // originally calculated.
+  assert(encoded.hit.size + nhbpb ==
+         encoded.nbits / CHAR_BIT + sizeof(unsigned int));
+
+  unsigned char const *hbpb = new unsigned char[nhbpb]();
+  MemoryBuffer<unsigned char> payload = gather({
+      {encoded.frequencies.data.get(), encoded.frequencies.size},
+      {encoded.hit.data.get(), encoded.hit.size},
+      {hbpb, nhbpb},
+      {encoded.missed.data.get(), encoded.missed.size},
+  });
+  delete[] hbpb;
+
+#ifndef MGARD_ZSTD
+  const MemoryBuffer<unsigned char> out_data = compress_memory_z(
+      const_cast<unsigned char z_const *>(payload.data.get()), payload.size);
+#else
+  const MemoryBuffer<unsigned char> out_data =
+      compress_memory_zstd(payload.data.get(), payload.size);
+#endif
+
+  return gather(
+      {{reinterpret_cast<unsigned char const *>(&encoded.frequencies.size),
+        sizeof(encoded.frequencies.size)},
+       {reinterpret_cast<unsigned char const *>(&encoded.nbits),
+        sizeof(encoded.nbits)},
+       {reinterpret_cast<unsigned char const *>(&encoded.missed.size),
+        sizeof(encoded.missed.size)},
+       {out_data.data.get(), out_data.size}});
+}
+
+HuffmanEncodedStream decompress_deserialize(unsigned char const *const src,
+                                            const std::size_t srcLen) {
+  std::size_t const *const sizes = reinterpret_cast<std::size_t const *>(src);
+  const std::size_t nfrequencies = sizes[0];
+  const std::size_t nbits = sizes[1];
+  const std::size_t nmissed = sizes[2];
+  // This is how the size in bytes of the padded hit buffer was calculated
+  // in `decompress_memory_huffman` before this function was introduced.
+  const std::size_t nhit = nbits / CHAR_BIT + sizeof(unsigned int);
+
+  MemoryBuffer<unsigned char> buffer(nfrequencies + nhit + nmissed);
+  {
+    const std::size_t offset = 3 * sizeof(std::size_t);
+    unsigned char const *const src_ = src + offset;
+    const std::size_t srcLen_ = srcLen - offset;
+    unsigned char *const dst_ = buffer.data.get();
+    const std::size_t dstLen_ = buffer.size;
+
+#ifndef MGARD_ZSTD
+    decompress_memory_z(const_cast<unsigned char z_const *>(src_), srcLen_,
+                        dst_, dstLen_);
+#else
+    decompress_memory_zstd(src_, srcLen_, dst_, dstLen_);
+#endif
+  }
+
+  HuffmanEncodedStream encoded(nbits, nmissed, nfrequencies);
+  {
+    unsigned char const *begin;
+    unsigned char const *end;
+
+    begin = buffer.data.get();
+    end = begin + nfrequencies;
+    std::copy(begin, end, encoded.frequencies.data.get());
+
+    begin = end;
+    assert(encoded.hit.size <= nhit);
+    end = begin + encoded.hit.size;
+    std::copy(begin, end, encoded.hit.data.get());
+
+    // Skip any bytes between `begin + encoded.hit.size` and `begin + nhit`.
+    begin = end + nhit - encoded.hit.size;
+    end = begin + nmissed;
+    std::copy(begin, end, encoded.missed.data.get());
+  }
+
+  return encoded;
+}
+
 void HuffmanCodeword::push_back(const bool bit) {
   const unsigned char offset = length % CHAR_BIT;
   if (not offset) {
@@ -76,29 +206,10 @@ void endianness_shuffle(unsigned char *const buffer, const std::size_t nbytes) {
   }
 }
 
-} // namespace
-namespace {
-
-void check_type_sizes() {
-  static_assert(CHAR_BIT == 8,
-                "code written with assumption that `CHAR_BIT == 8`");
-  static_assert(
-      sizeof(unsigned int) == 4,
-      "code written with assumption that `sizeof(unsigned int) == 4`");
-  static_assert(sizeof(int) == 4,
-                "code written with assumption that `sizeof(int) == 4`");
-  static_assert(
-      sizeof(std::size_t) == 8,
-      "code written with assumption that `sizeof(unsigned int) == 8`");
-}
-
-} // namespace
-
-namespace {
-
 const std::pair<long int, long int> nql_endpoints{
     -static_cast<long int>((nql - 1) / 2), nql / 2 - 1};
-}
+
+} // namespace
 
 HuffmanEncodedStream huffman_encoding(long int const *const quantized_data,
                                       const std::size_t n) {
diff --git a/tests/src/compressors_regression.cpp b/tests/src/compressors_regression.cpp
index 7f7a3ecdf1..4284f31b31 100644
--- a/tests/src/compressors_regression.cpp
+++ b/tests/src/compressors_regression.cpp
@@ -113,7 +113,8 @@ void decompress_memory_huffman(unsigned char const *const src,
     const std::size_t dstLen_ = buffer.size;
 
 #ifndef MGARD_ZSTD
-    decompress_memory_z(src_, srcLen_, dst_, dstLen_);
+    decompress_memory_z(const_cast<unsigned char z_const *>(src_), srcLen_,
+                        dst_, dstLen_);
 #else
     decompress_memory_zstd(src_, srcLen_, dst_, dstLen_);
 #endif
diff --git a/tests/src/test_compressors.cpp b/tests/src/test_compressors.cpp
index 501f085039..3f5568dda7 100644
--- a/tests/src/test_compressors.cpp
+++ b/tests/src/test_compressors.cpp
@@ -172,6 +172,7 @@ TEST_CASE("Huffman compression", "[compressors] [!mayfail]") {
   SECTION("long integers") { test_huffman_identity<long int>(gen, n); }
 }
 
+#ifdef MGARD_ZSTD
 namespace {
 
 void test_zstd_identity(std::uniform_int_distribution<unsigned char> &dis,
@@ -192,7 +193,6 @@ void test_zstd_identity(std::uniform_int_distribution<unsigned char> &dis,
 
 } // namespace
 
-#ifdef MGARD_ZSTD
 TEST_CASE("zstd compression", "[compressors]") {
   std::uniform_int_distribution<unsigned char> dis;
   std::default_random_engine gen(158648);
@@ -262,7 +262,8 @@ TEST_CASE("compression with header configuration", "[compressors]") {
       compressed.data.get(), compressed.size, dst, quantizedLen);
 #else
   REQUIRE(e.compressor() == mgard::pb::Encoding::CPU_HUFFMAN_ZLIB);
-  mgard::decompress_memory_z(compressed.data.get(), compressed.size, dst,
+  mgard::decompress_memory_z(compressed.data.get(), compressed.size,
+                             reinterpret_cast<unsigned char *>(dst),
                              quantizedLen);
 #endif
   REQUIRE(std::equal(quantized, quantized + ndof, dst));
diff --git a/tests/src/test_format.cpp b/tests/src/test_format.cpp
index 970eb87fc3..47055e6ac1 100644
--- a/tests/src/test_format.cpp
+++ b/tests/src/test_format.cpp
@@ -350,11 +350,13 @@ TEST_CASE("reading encoding compressor", "[format]") {
     e.set_compressor(mgard::pb::Encoding::X_HUFFMAN_LZ4);
     REQUIRE_THROWS(mgard::read_encoding_compressor(header));
   }
+#ifdef MGARD_ZSTD
   {
     e.set_compressor(mgard::pb::Encoding::CPU_HUFFMAN_ZSTD);
     REQUIRE(mgard::read_encoding_compressor(header) ==
             mgard::pb::Encoding::CPU_HUFFMAN_ZSTD);
   }
+#endif
 }
 
 namespace {
diff --git a/tests/src/test_huffman.cpp b/tests/src/test_huffman.cpp
index 0921f4538b..cc6da1523e 100644
--- a/tests/src/test_huffman.cpp
+++ b/tests/src/test_huffman.cpp
@@ -5,6 +5,7 @@
 #include <cstdint>
 
 #include <algorithm>
+#include <numeric>
 #include <random>
 
 #include "testing_utilities.hpp"
@@ -215,3 +216,49 @@ TEMPLATE_TEST_CASE("Huffman inversion", "[huffman]", std::int8_t, std::int16_t,
     test_inversion_random<TestType>(10000, -100, 100, gen);
   }
 }
+
+TEST_CASE("`HuffmanEncodedStream` serialization inversion", "[huffman]") {
+  // This is not intended to be a valid `HuffmanEncodedStream`.
+  const std::size_t nbits = 2718;
+  const std::size_t nmissed = 896 * sizeof(int);
+  const std::size_t ntable = 681 * 2 * sizeof(std::size_t);
+  const mgard::HuffmanEncodedStream original(nbits, nmissed, ntable);
+  {
+    unsigned char *const p = original.hit.data.get();
+    std::iota(p, p + original.hit.size, 1u);
+  }
+  {
+    unsigned char *const p = original.missed.data.get();
+    std::iota(p, p + nmissed, 90u);
+  }
+  {
+    unsigned char *const p = original.frequencies.data.get();
+    std::iota(p, p + ntable, 51u);
+  }
+
+  const mgard::MemoryBuffer<unsigned char> serialized =
+      mgard::serialize_compress(original);
+  const mgard::HuffmanEncodedStream deserialized =
+      mgard::decompress_deserialize(serialized.data.get(), serialized.size);
+
+  REQUIRE(original.nbits == deserialized.nbits);
+  REQUIRE(original.hit.size == deserialized.hit.size);
+  REQUIRE(original.missed.size == deserialized.missed.size);
+  REQUIRE(original.frequencies.size == deserialized.frequencies.size);
+
+  {
+    unsigned char const *const p = original.hit.data.get();
+    unsigned char const *const q = deserialized.hit.data.get();
+    REQUIRE(std::equal(p, p + original.hit.size, q));
+  }
+  {
+    unsigned char const *const p = original.missed.data.get();
+    unsigned char const *const q = deserialized.missed.data.get();
+    REQUIRE(std::equal(p, p + nmissed, q));
+  }
+  {
+    unsigned char const *const p = original.frequencies.data.get();
+    unsigned char const *const q = deserialized.frequencies.data.get();
+    REQUIRE(std::equal(p, p + ntable, q));
+  }
+}

From a05703d3e4557e8194ad97ec60e68b6a57669563 Mon Sep 17 00:00:00 2001
From: Ben Whitney <whitneybe@ornl.gov>
Date: Wed, 15 Jun 2022 14:51:10 -0400
Subject: [PATCH 43/58] Select serialization compressor at runtime.

---
 include/huffman.hpp        | 15 +++++----
 src/compressors.cpp        | 23 ++++++++++++--
 src/huffman.cpp            | 65 +++++++++++++++++++++++++++-----------
 tests/src/test_huffman.cpp | 25 +++++++++++++--
 4 files changed, 99 insertions(+), 29 deletions(-)

diff --git a/include/huffman.hpp b/include/huffman.hpp
index 6a746deead..e86841df57 100644
--- a/include/huffman.hpp
+++ b/include/huffman.hpp
@@ -10,6 +10,7 @@
 #include <type_traits>
 #include <vector>
 
+#include "format.hpp"
 #include "utilities.hpp"
 
 namespace mgard {
@@ -47,24 +48,26 @@ struct HuffmanEncodedStream {
 //!
 //!\deprecated
 //!
-//! The serialized stream will be compressed with ZSTD if `MGARD_ZSTD` is
-//! defined and with `zlib` otherwise.
+//! The header will determine which compressor is used.
 //!
+//!\param header Header for the self-describing buffer.
 //!\param encoded Huffman-encoded stream to serialize and compress.
 MemoryBuffer<unsigned char>
-serialize_compress(const HuffmanEncodedStream &encoded);
+serialize_compress(const pb::Header &header,
+                   const HuffmanEncodedStream &encoded);
 
 //! Decompress and then deserialize a Huffman-encoded stream.
 //!
 //!\deprecated
 //!
-//! The buffer will be decompressed with ZSTD if `MGARD_ZSTD` if defined and
-//! with `zlib` otherwise.
+//! The header will determine which decompressor is used.
 //!
+//!\param header Header of the self-describing buffer.
 //!\param src Buffer containing serialized and compressed Huffman-encoded
 //! stream.
 //!\param srcLen Size in bytes of the buffer.
-HuffmanEncodedStream decompress_deserialize(unsigned char const *const src,
+HuffmanEncodedStream decompress_deserialize(const pb::Header &header,
+                                            unsigned char const *const src,
                                             const std::size_t srcLen);
 
 //! Codeword (in progress) associated to a node in a Huffman code creation tree.
diff --git a/src/compressors.cpp b/src/compressors.cpp
index 85e2930512..35d16a52c2 100644
--- a/src/compressors.cpp
+++ b/src/compressors.cpp
@@ -25,7 +25,17 @@ namespace mgard {
 void decompress_memory_huffman(unsigned char const *const src,
                                const std::size_t srcLen, long int *const dst,
                                const std::size_t dstLen) {
-  const HuffmanEncodedStream encoded = decompress_deserialize(src, srcLen);
+  // Dummy header until we change the signature of `decompress_memory_huffman`.
+  pb::Header header;
+  header.mutable_encoding()->set_compressor(
+#ifdef MGARD_ZSTD
+      pb::Encoding::CPU_HUFFMAN_ZSTD
+#else
+      pb::Encoding::CPU_HUFFMAN_ZLIB
+#endif
+  );
+  const HuffmanEncodedStream encoded =
+      decompress_deserialize(header, src, srcLen);
   const MemoryBuffer<long int> decoded = huffman_decoding(encoded);
   {
     long int const *const p = decoded.data.get();
@@ -40,7 +50,16 @@ void decompress_memory_huffman(unsigned char const *const src,
 MemoryBuffer<unsigned char> compress_memory_huffman(long int const *const src,
                                                     const std::size_t srcLen) {
   const HuffmanEncodedStream encoded = huffman_encoding(src, srcLen);
-  return serialize_compress(encoded);
+  // Dummy header until we change the signature of `compress_memory_huffman`.
+  pb::Header header;
+  header.mutable_encoding()->set_compressor(
+#ifdef MGARD_ZSTD
+      pb::Encoding::CPU_HUFFMAN_ZSTD
+#else
+      pb::Encoding::CPU_HUFFMAN_ZLIB
+#endif
+  );
+  return serialize_compress(header, encoded);
 }
 
 #ifdef MGARD_ZSTD
diff --git a/src/huffman.cpp b/src/huffman.cpp
index 369497cabc..1ebb2fbded 100644
--- a/src/huffman.cpp
+++ b/src/huffman.cpp
@@ -11,8 +11,6 @@
 #include <stdexcept>
 #include <vector>
 
-#include <iostream>
-
 #include "compressors.hpp"
 #include "huffman.hpp"
 
@@ -63,13 +61,35 @@ gather(const std::vector<Constituent> &constituents) {
   return buffer;
 }
 
+MemoryBuffer<unsigned char>
+compress_serialized_huffman(const pb::Header &header,
+                            const MemoryBuffer<unsigned char> &payload) {
+  switch (header.encoding().compressor()) {
+  case pb::Encoding::CPU_HUFFMAN_ZLIB:
+    return compress_memory_z(
+        const_cast<unsigned char z_const *>(payload.data.get()), payload.size);
+  case pb::Encoding::CPU_HUFFMAN_ZSTD:
+#ifdef MGARD_ZSTD
+    return compress_memory_zstd(payload.data.get(), payload.size);
+#else
+    throw std::runtime_error("MGARD compiled without ZSTD support");
+#endif
+  default:
+    throw std::runtime_error("unrecognized lossless compressor");
+  }
+}
+
 } // namespace
 
 MemoryBuffer<unsigned char>
-serialize_compress(const HuffmanEncodedStream &encoded) {
+serialize_compress(const pb::Header &header,
+                   const HuffmanEncodedStream &encoded) {
   check_type_sizes();
 
-  assert(not(encoded.hit.size % sizeof(unsigned int)));
+  if (header.encoding().serialization() != pb::Encoding::DEPRECATED) {
+    throw std::runtime_error(
+        "Huffman tree not to be serialized with deprecated method");
+  }
 
   const std::size_t offset = encoded.nbits % (CHAR_BIT * sizeof(unsigned int));
   // Number of hit buffer padding bytes.
@@ -89,14 +109,8 @@ serialize_compress(const HuffmanEncodedStream &encoded) {
   });
   delete[] hbpb;
 
-#ifndef MGARD_ZSTD
-  const MemoryBuffer<unsigned char> out_data = compress_memory_z(
-      const_cast<unsigned char z_const *>(payload.data.get()), payload.size);
-#else
-  const MemoryBuffer<unsigned char> out_data =
-      compress_memory_zstd(payload.data.get(), payload.size);
-#endif
-
+  const MemoryBuffer<unsigned char> compressed =
+      compress_serialized_huffman(header, payload);
   return gather(
       {{reinterpret_cast<unsigned char const *>(&encoded.frequencies.size),
         sizeof(encoded.frequencies.size)},
@@ -104,11 +118,17 @@ serialize_compress(const HuffmanEncodedStream &encoded) {
         sizeof(encoded.nbits)},
        {reinterpret_cast<unsigned char const *>(&encoded.missed.size),
         sizeof(encoded.missed.size)},
-       {out_data.data.get(), out_data.size}});
+       {compressed.data.get(), compressed.size}});
 }
 
-HuffmanEncodedStream decompress_deserialize(unsigned char const *const src,
+HuffmanEncodedStream decompress_deserialize(const pb::Header &header,
+                                            unsigned char const *const src,
                                             const std::size_t srcLen) {
+  if (header.encoding().serialization() != pb::Encoding::DEPRECATED) {
+    throw std::runtime_error(
+        "Huffman tree not serialized with deprecated method");
+  }
+
   std::size_t const *const sizes = reinterpret_cast<std::size_t const *>(src);
   const std::size_t nfrequencies = sizes[0];
   const std::size_t nbits = sizes[1];
@@ -125,12 +145,21 @@ HuffmanEncodedStream decompress_deserialize(unsigned char const *const src,
     unsigned char *const dst_ = buffer.data.get();
     const std::size_t dstLen_ = buffer.size;
 
-#ifndef MGARD_ZSTD
-    decompress_memory_z(const_cast<unsigned char z_const *>(src_), srcLen_,
-                        dst_, dstLen_);
+    switch (header.encoding().compressor()) {
+    case pb::Encoding::CPU_HUFFMAN_ZLIB:
+      decompress_memory_z(const_cast<unsigned char z_const *>(src_), srcLen_,
+                          dst_, dstLen_);
+      break;
+    case pb::Encoding::CPU_HUFFMAN_ZSTD:
+#ifdef MGARD_ZSTD
+      decompress_memory_zstd(src_, srcLen_, dst_, dstLen_);
+      break;
 #else
-    decompress_memory_zstd(src_, srcLen_, dst_, dstLen_);
+      throw std::runtime_error("MGARD compiled without ZSTD support");
 #endif
+    default:
+      throw std::runtime_error("unrecognized lossless compressor");
+    }
   }
 
   HuffmanEncodedStream encoded(nbits, nmissed, nfrequencies);
diff --git a/tests/src/test_huffman.cpp b/tests/src/test_huffman.cpp
index cc6da1523e..a444d0c6c1 100644
--- a/tests/src/test_huffman.cpp
+++ b/tests/src/test_huffman.cpp
@@ -10,6 +10,7 @@
 
 #include "testing_utilities.hpp"
 
+#include "format.hpp"
 #include "huffman.hpp"
 #include "huffman_regression.hpp"
 
@@ -217,7 +218,15 @@ TEMPLATE_TEST_CASE("Huffman inversion", "[huffman]", std::int8_t, std::int16_t,
   }
 }
 
-TEST_CASE("`HuffmanEncodedStream` serialization inversion", "[huffman]") {
+namespace {
+
+void test_hes_serialization_inversion(
+    const mgard::pb::Encoding::Compressor compressor) {
+  mgard::pb::Header header;
+  mgard::pb::Encoding &encoding = *header.mutable_encoding();
+  encoding.set_compressor(compressor);
+  encoding.set_serialization(mgard::pb::Encoding::DEPRECATED);
+
   // This is not intended to be a valid `HuffmanEncodedStream`.
   const std::size_t nbits = 2718;
   const std::size_t nmissed = 896 * sizeof(int);
@@ -237,9 +246,10 @@ TEST_CASE("`HuffmanEncodedStream` serialization inversion", "[huffman]") {
   }
 
   const mgard::MemoryBuffer<unsigned char> serialized =
-      mgard::serialize_compress(original);
+      mgard::serialize_compress(header, original);
   const mgard::HuffmanEncodedStream deserialized =
-      mgard::decompress_deserialize(serialized.data.get(), serialized.size);
+      mgard::decompress_deserialize(header, serialized.data.get(),
+                                    serialized.size);
 
   REQUIRE(original.nbits == deserialized.nbits);
   REQUIRE(original.hit.size == deserialized.hit.size);
@@ -262,3 +272,12 @@ TEST_CASE("`HuffmanEncodedStream` serialization inversion", "[huffman]") {
     REQUIRE(std::equal(p, p + ntable, q));
   }
 }
+
+} // namespace
+
+TEST_CASE("`HuffmanEncodedStream` serialization inversion", "[huffman]") {
+  test_hes_serialization_inversion(mgard::pb::Encoding::CPU_HUFFMAN_ZLIB);
+#ifdef MGARD_ZSTD
+  test_hes_serialization_inversion(mgard::pb::Encoding::CPU_HUFFMAN_ZSTD);
+#endif
+}

From 3e53ce41e824c7bde59a0c3bd5abc761198ca805 Mon Sep 17 00:00:00 2001
From: Ben Whitney <whitneybe@ornl.gov>
Date: Tue, 14 Jun 2022 13:38:52 -0400
Subject: [PATCH 44/58] Enable `RFMH` in `{,de}compress`.

---
 CMakeLists.txt          |   4 +-
 include/compress.tpp    |   2 -
 include/compressors.hpp |  21 --
 include/huffman.hpp     |   2 +
 src/compressors.cpp     | 441 +++++++++++++++++++++++++++++++++-------
 src/format.cpp          |   1 +
 src/mgard.proto         |   6 +-
 7 files changed, 374 insertions(+), 103 deletions(-)

diff --git a/CMakeLists.txt b/CMakeLists.txt
index 9a2902e6db..c92cca399a 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -11,11 +11,11 @@ endif()
 list(INSERT CMAKE_MODULE_PATH 0 "${CMAKE_CURRENT_LIST_DIR}/cmake")
 
 set(MGARD_VERSION_MAJOR "1")
-set(MGARD_VERSION_MINOR "2")
+set(MGARD_VERSION_MINOR "3")
 set(MGARD_VERSION_PATCH "0")
 
 set(MGARD_FILE_VERSION_MAJOR "1")
-set(MGARD_FILE_VERSION_MINOR "0")
+set(MGARD_FILE_VERSION_MINOR "1")
 set(MGARD_FILE_VERSION_PATCH "0")
 
 project(
diff --git a/include/compress.tpp b/include/compress.tpp
index ebd9a76e83..867b6cfa9f 100644
--- a/include/compress.tpp
+++ b/include/compress.tpp
@@ -28,8 +28,6 @@
 
 namespace mgard {
 
-using DEFAULT_INT_T = std::int64_t;
-
 template <std::size_t N, typename Real>
 CompressedDataset<N, Real>
 compress(const TensorMeshHierarchy<N, Real> &hierarchy, Real *const v,
diff --git a/include/compressors.hpp b/include/compressors.hpp
index 1542d3eeb2..c946538b50 100644
--- a/include/compressors.hpp
+++ b/include/compressors.hpp
@@ -16,27 +16,6 @@
 
 namespace mgard {
 
-//! Compress an array using a Huffman tree.
-//!
-//!\deprecated
-//!
-//!\param[in] src Array to be compressed.
-//!\param[in] srcLen Size of array (number of elements) to be compressed.
-MemoryBuffer<unsigned char> compress_memory_huffman(long int const *const src,
-                                                    const std::size_t srcLen);
-
-//! Decompress an array compressed with `compress_memory_huffman`.
-//!
-//!\deprecated
-//!
-//!\param[in] src Compressed array.
-//!\param[in] srcLen Size in bytes of the compressed array.
-//!\param[out] dst Decompressed array.
-//!\param[in] dstLen Size in bytes of the decompressed array.
-void decompress_memory_huffman(unsigned char const *const src,
-                               const std::size_t srcLen, long int *const dst,
-                               const std::size_t dstLen);
-
 #ifdef MGARD_ZSTD
 //! Compress an array using `zstd`.
 //!
diff --git a/include/huffman.hpp b/include/huffman.hpp
index e86841df57..7233f6e6a7 100644
--- a/include/huffman.hpp
+++ b/include/huffman.hpp
@@ -22,6 +22,8 @@ namespace mgard {
 inline constexpr std::size_t nql = 1 << 17;
 
 //! A stream compressed using a Huffman code.
+//!
+//!\deprecated
 struct HuffmanEncodedStream {
   //! Constructor.
   //!
diff --git a/src/compressors.cpp b/src/compressors.cpp
index 35d16a52c2..a67279292f 100644
--- a/src/compressors.cpp
+++ b/src/compressors.cpp
@@ -7,6 +7,7 @@
 #include <algorithm>
 #include <numeric>
 #include <stdexcept>
+#include <type_traits>
 #include <utility>
 #include <vector>
 
@@ -22,46 +23,6 @@
 
 namespace mgard {
 
-void decompress_memory_huffman(unsigned char const *const src,
-                               const std::size_t srcLen, long int *const dst,
-                               const std::size_t dstLen) {
-  // Dummy header until we change the signature of `decompress_memory_huffman`.
-  pb::Header header;
-  header.mutable_encoding()->set_compressor(
-#ifdef MGARD_ZSTD
-      pb::Encoding::CPU_HUFFMAN_ZSTD
-#else
-      pb::Encoding::CPU_HUFFMAN_ZLIB
-#endif
-  );
-  const HuffmanEncodedStream encoded =
-      decompress_deserialize(header, src, srcLen);
-  const MemoryBuffer<long int> decoded = huffman_decoding(encoded);
-  {
-    long int const *const p = decoded.data.get();
-    if (decoded.size * sizeof(*p) != dstLen) {
-      throw std::runtime_error(
-          "mismatch between expected and obtained decompressed buffer sizes");
-    }
-    std::copy(p, p + decoded.size, dst);
-  }
-}
-
-MemoryBuffer<unsigned char> compress_memory_huffman(long int const *const src,
-                                                    const std::size_t srcLen) {
-  const HuffmanEncodedStream encoded = huffman_encoding(src, srcLen);
-  // Dummy header until we change the signature of `compress_memory_huffman`.
-  pb::Header header;
-  header.mutable_encoding()->set_compressor(
-#ifdef MGARD_ZSTD
-      pb::Encoding::CPU_HUFFMAN_ZSTD
-#else
-      pb::Encoding::CPU_HUFFMAN_ZLIB
-#endif
-  );
-  return serialize_compress(header, encoded);
-}
-
 #ifdef MGARD_ZSTD
 /*! CHECK
  * Check that the condition holds. If it doesn't print a message and die.
@@ -76,10 +37,6 @@ MemoryBuffer<unsigned char> compress_memory_huffman(long int const *const src,
     }                                                                          \
   } while (0)
 
-/*! CHECK_ZSTD
- * Check the zstd error code and die if an error occurred after printing a
- * message.
- */
 /*! CHECK_ZSTD
  * Check the zstd error code and die if an error occurred after printing a
  * message.
@@ -191,58 +148,388 @@ void decompress_memory_zstd(void const *const src, const std::size_t srcLen,
 }
 #endif
 
+namespace {
+
+template <typename Int>
+MemoryBuffer<unsigned char> compress_huffman_C_rfmh_(const pb::Header &header,
+                                                     void const *const src,
+                                                     const std::size_t srcLen) {
+  check_quantization_buffer(header, src, srcLen);
+
+  assert(header.encoding().serialization() == pb::Encoding::RFMH);
+
+  return huffman_encode(static_cast<Int const *>(src), srcLen / sizeof(Int));
+}
+
+// `C` being either ZSTD or `zlib`.
+MemoryBuffer<unsigned char> compress_huffman_C_rfmh(const pb::Header &header,
+                                                    void *const src,
+                                                    const std::size_t srcLen) {
+  assert(header.encoding().serialization() == pb::Encoding::RFMH);
+
+  switch (header.quantization().type()) {
+  case pb::Quantization::INT8_T:
+    return compress_huffman_C_rfmh_<std::int8_t>(header, src, srcLen);
+  case pb::Quantization::INT16_T:
+    return compress_huffman_C_rfmh_<std::int16_t>(header, src, srcLen);
+  case pb::Quantization::INT32_T:
+    return compress_huffman_C_rfmh_<std::int32_t>(header, src, srcLen);
+  case pb::Quantization::INT64_T:
+    return compress_huffman_C_rfmh_<std::int64_t>(header, src, srcLen);
+  default:
+    throw std::runtime_error("unrecognized quantization type");
+  }
+}
+
+MemoryBuffer<unsigned char>
+compress_huffman_C_deprecated(const pb::Header &header, void *const src,
+                              const std::size_t srcLen) {
+  check_quantization_buffer(header, src, srcLen);
+
+  assert(header.encoding().serialization() == pb::Encoding::DEPRECATED);
+  if (header.quantization().type() != mgard::pb::Quantization::INT64_T) {
+    throw std::runtime_error(
+        "deprecated Huffman coding not implemented for quantization "
+        "types other than `std::int64_t`");
+  }
+  // I don't think it's strictly necessary that `std::int64_t` and `long int`
+  // are the same type. We could think of `long int` as a generic byte type,
+  // like `unsigned char`. Worth more attention if this assertion ever fails,
+  // though. That might be a good time to remove the deprecated Huffman coding
+  // functions.
+  static_assert(std::is_same<std::int64_t, long int>::value,
+                "deprecated Huffman coding written with assumption that "
+                "`std::int64_t` is `long int`");
+
+  return serialize_compress(
+      header, huffman_encoding(reinterpret_cast<long int const *>(src),
+                               srcLen / sizeof(long int)));
+}
+
+MemoryBuffer<unsigned char>
+compress_huffman_zlib_deprecated(const pb::Header &header, void *const src,
+                                 const std::size_t srcLen) {
+  assert(header.encoding().compressor() == pb::Encoding::CPU_HUFFMAN_ZLIB);
+
+  return compress_huffman_C_deprecated(header, src, srcLen);
+}
+
+#ifdef MGARD_ZSTD
+MemoryBuffer<unsigned char>
+compress_huffman_zstd_deprecated(const pb::Header &header, void *const src,
+                                 const std::size_t srcLen) {
+  assert(header.encoding().compressor() == pb::Encoding::CPU_HUFFMAN_ZSTD);
+
+  return compress_huffman_C_deprecated(header, src, srcLen);
+}
+#endif
+
+namespace {
+
+// `decompress_memory_z` and `decompress_memory_zstd` need to know the size of
+// the decompressed buffer before they can decompress. So, in addition to the
+// compressed serialized Huffman tree (`compressed`), we need to store the size
+// in bytes of the serialized Huffman tree (`nhuffman`).
+MemoryBuffer<unsigned char> concatenate_nhuffman_and_compressed(
+    const std::size_t nhuffman, const MemoryBuffer<unsigned char> &compressed) {
+  MemoryBuffer<unsigned char> out(HEADER_SIZE_SIZE + compressed.size);
+  unsigned char *p = out.data.get();
+
+  // Size in bytes of the serialized Huffman tree.
+  const std::array<unsigned char, HEADER_SIZE_SIZE> nhuffman_ =
+      serialize_header_size(nhuffman);
+  std::copy(nhuffman_.begin(), nhuffman_.end(), p);
+  p += HEADER_SIZE_SIZE;
+
+  unsigned char const *const q = compressed.data.get();
+  std::copy(q, q + compressed.size, p);
+  return out;
+}
+
+} // namespace
+
+MemoryBuffer<unsigned char>
+compress_huffman_zlib_rfmh(const pb::Header &header, void *const src,
+                           const std::size_t srcLen) {
+  assert(header.encoding().compressor() == pb::Encoding::CPU_HUFFMAN_ZLIB);
+  assert(header.encoding().serialization() == pb::Encoding::RFMH);
+
+  const MemoryBuffer<unsigned char> encoded =
+      compress_huffman_C_rfmh(header, src, srcLen);
+  const MemoryBuffer<unsigned char> compressed =
+      compress_memory_z(encoded.data.get(), encoded.size);
+  return concatenate_nhuffman_and_compressed(encoded.size, compressed);
+}
+
+#ifdef MGARD_ZSTD
+MemoryBuffer<unsigned char>
+compress_huffman_zstd_rfmh(const pb::Header &header, void *const src,
+                           const std::size_t srcLen) {
+  assert(header.encoding().compressor() == pb::Encoding::CPU_HUFFMAN_ZSTD);
+  assert(header.encoding().serialization() == pb::Encoding::RFMH);
+
+  const MemoryBuffer<unsigned char> encoded =
+      compress_huffman_C_rfmh(header, src, srcLen);
+  return concatenate_nhuffman_and_compressed(
+      encoded.size, compress_memory_zstd(encoded.data.get(), encoded.size));
+}
+#endif
+
+MemoryBuffer<unsigned char> compress_huffman_zlib(const pb::Header &header,
+                                                  void *const src,
+                                                  const std::size_t srcLen) {
+  assert(header.encoding().compressor() == pb::Encoding::CPU_HUFFMAN_ZLIB);
+
+  switch (header.encoding().serialization()) {
+  case pb::Encoding::DEPRECATED:
+    return compress_huffman_zlib_deprecated(header, src, srcLen);
+  case pb::Encoding::RFMH:
+    return compress_huffman_zlib_rfmh(header, src, srcLen);
+  default:
+    throw std::runtime_error("unrecognized Huffman serialization");
+  }
+}
+
+#ifdef MGARD_ZSTD
+MemoryBuffer<unsigned char> compress_huffman_zstd(const pb::Header &header,
+                                                  void *const src,
+                                                  const std::size_t srcLen) {
+  assert(header.encoding().compressor() == pb::Encoding::CPU_HUFFMAN_ZSTD);
+
+  switch (header.encoding().serialization()) {
+  case pb::Encoding::DEPRECATED:
+    return compress_huffman_zstd_deprecated(header, src, srcLen);
+  case pb::Encoding::RFMH:
+    return compress_huffman_zstd_rfmh(header, src, srcLen);
+  default:
+    throw std::runtime_error("unrecognized Huffman serialization");
+  }
+}
+#endif
+
+} // namespace
+
 MemoryBuffer<unsigned char> compress(const pb::Header &header, void *const src,
                                      const std::size_t srcLen) {
   switch (header.encoding().compressor()) {
-  case pb::Encoding::CPU_HUFFMAN_ZSTD:
+  case pb::Encoding::CPU_ZLIB:
+    return compress_memory_z(src, srcLen);
+  case pb::Encoding::CPU_ZSTD:
 #ifdef MGARD_ZSTD
-  {
-    if (header.quantization().type() != mgard::pb::Quantization::INT64_T) {
-      throw std::runtime_error("Huffman tree not implemented for quantization "
-                               "types other than `std::int64_t`");
-    }
-    // Quantization type size.
-    const std::size_t qts = quantization_buffer(header, 1).size;
-    if (srcLen % qts) {
-      throw std::runtime_error("incorrect quantization buffer size");
-    }
-    return compress_memory_huffman(reinterpret_cast<long int *>(src),
-                                   srcLen / qts);
-  }
+    return compress_memory_zstd(src, srcLen);
 #else
     throw std::runtime_error("MGARD compiled without ZSTD support");
 #endif
   case pb::Encoding::CPU_HUFFMAN_ZLIB:
-    return compress_memory_z(src, srcLen);
+    return compress_huffman_zlib(header, src, srcLen);
+  case pb::Encoding::CPU_HUFFMAN_ZSTD:
+#ifdef MGARD_ZSTD
+    return compress_huffman_zstd(header, src, srcLen);
+#else
+    throw std::runtime_error("MGARD compiled without ZSTD support");
+#endif
   default:
     throw std::runtime_error("unrecognized lossless compressor");
   }
 }
 
+void decompress_noop(void *const src, const std::size_t srcLen, void *const dst,
+                     const std::size_t dstLen) {
+  if (srcLen != dstLen) {
+    throw std::invalid_argument("source and destination lengths must be equal");
+  }
+  {
+    unsigned char const *const p = static_cast<unsigned char const *>(src);
+    unsigned char *const q = static_cast<unsigned char *>(dst);
+    std::copy(p, p + srcLen, q);
+  }
+}
+
+namespace {
+
+template <typename Int>
+void decompress_huffman_C_rfmh_(const pb::Header &header,
+                                const MemoryBuffer<unsigned char> &encoded,
+                                void *const dst, const std::size_t dstLen) {
+  check_quantization_buffer(header, dst, dstLen);
+
+  assert(header.encoding().serialization() == pb::Encoding::RFMH);
+
+  const MemoryBuffer<Int> decoded = huffman_decode<Int>(encoded);
+  if (sizeof(Int) * decoded.size != dstLen) {
+    throw std::runtime_error("size of destination buffer is incorrect");
+  }
+  unsigned char const *const p =
+      reinterpret_cast<unsigned char const *>(decoded.data.get());
+  std::copy(p, p + dstLen, static_cast<unsigned char *>(dst));
+}
+
+// `C` being either ZSTD or `zlib`.
+void decompress_huffman_C_rfmh(const pb::Header &header,
+                               const MemoryBuffer<unsigned char> &encoded,
+                               void *const dst, const std::size_t dstLen) {
+  assert(header.encoding().serialization() == pb::Encoding::RFMH);
+
+  switch (header.quantization().type()) {
+  case pb::Quantization::INT8_T:
+    return decompress_huffman_C_rfmh_<std::int8_t>(header, encoded, dst,
+                                                   dstLen);
+  case pb::Quantization::INT16_T:
+    return decompress_huffman_C_rfmh_<std::int16_t>(header, encoded, dst,
+                                                    dstLen);
+  case pb::Quantization::INT32_T:
+    return decompress_huffman_C_rfmh_<std::int32_t>(header, encoded, dst,
+                                                    dstLen);
+  case pb::Quantization::INT64_T:
+    return decompress_huffman_C_rfmh_<std::int64_t>(header, encoded, dst,
+                                                    dstLen);
+  default:
+    throw std::runtime_error("unrecognized quantization type");
+  }
+}
+
+void decompress_huffman_C_deprecated(const pb::Header &header, void *const src,
+                                     const std::size_t srcLen, void *const dst,
+                                     const std::size_t dstLen) {
+  check_quantization_buffer(header, dst, dstLen);
+
+  assert(header.encoding().serialization() == pb::Encoding::DEPRECATED);
+  if (header.quantization().type() != mgard::pb::Quantization::INT64_T) {
+    throw std::runtime_error(
+        "deprecated Huffman coding not implemented for quantization "
+        "types other than `std::int64_t`");
+  }
+  // I don't think it's strictly necessary that `std::int64_t` and `long int`
+  // are the same type. We could think of `long int` as a generic byte type,
+  // like `unsigned char`. Worth more attention if this assertion ever fails,
+  // though. That might be a good time to remove the deprecated Huffman coding
+  // functions.
+  static_assert(std::is_same<std::int64_t, long int>::value,
+                "deprecated Huffman coding written with assumption that "
+                "`std::int64_t` is `long int`");
+
+  const MemoryBuffer<long int> decoded =
+      huffman_decoding(decompress_deserialize(
+          header, reinterpret_cast<unsigned char const *>(src), srcLen));
+  if (sizeof(long int) * decoded.size != dstLen) {
+    throw std::runtime_error("size of destination buffer is incorrect");
+  }
+  {
+    unsigned char const *const p =
+        reinterpret_cast<unsigned char const *>(decoded.data.get());
+    std::copy(p, p + dstLen, static_cast<unsigned char *>(dst));
+  }
+}
+
+void decompress_huffman_zlib_deprecated(const pb::Header &header,
+                                        void *const src,
+                                        const std::size_t srcLen,
+                                        void *const dst,
+                                        const std::size_t dstLen) {
+  assert(header.encoding().compressor() == pb::Encoding::CPU_HUFFMAN_ZLIB);
+
+  return decompress_huffman_C_deprecated(header, src, srcLen, dst, dstLen);
+}
+
+#ifdef MGARD_ZSTD
+void decompress_huffman_zstd_deprecated(const pb::Header &header,
+                                        void *const src,
+                                        const std::size_t srcLen,
+                                        void *const dst,
+                                        const std::size_t dstLen) {
+  assert(header.encoding().compressor() == pb::Encoding::CPU_HUFFMAN_ZSTD);
+
+  return decompress_huffman_C_deprecated(header, src, srcLen, dst, dstLen);
+}
+#endif
+
+void decompress_huffman_zlib_rfmh(const pb::Header &header, void *const src,
+                                  const std::size_t srcLen, void *const dst,
+                                  const std::size_t dstLen) {
+  assert(header.encoding().compressor() == pb::Encoding::CPU_HUFFMAN_ZLIB);
+  assert(header.encoding().serialization() == pb::Encoding::RFMH);
+
+  BufferWindow window(src, srcLen);
+  // Read theSsze in bytes of the serialized Huffman tree.
+  MemoryBuffer<unsigned char> encoded(read_header_size(window));
+  decompress_memory_z(const_cast<unsigned char z_const *>(window.current),
+                      window.end - window.current, encoded.data.get(),
+                      encoded.size);
+
+  return decompress_huffman_C_rfmh(header, encoded, dst, dstLen);
+}
+
+#ifdef MGARD_ZSTD
+void decompress_huffman_zstd_rfmh(const pb::Header &header, void *const src,
+                                  const std::size_t srcLen, void *const dst,
+                                  const std::size_t dstLen) {
+  assert(header.encoding().compressor() == pb::Encoding::CPU_HUFFMAN_ZSTD);
+  assert(header.encoding().serialization() == pb::Encoding::RFMH);
+
+  BufferWindow window(src, srcLen);
+  // Read the size in bytes of the serialized Huffman tree.
+  MemoryBuffer<unsigned char> encoded(read_header_size(window));
+  decompress_memory_zstd(const_cast<unsigned char z_const *>(window.current),
+                         window.end - window.current, encoded.data.get(),
+                         encoded.size);
+
+  return decompress_huffman_C_rfmh(header, encoded, dst, dstLen);
+}
+#endif
+
+void decompress_huffman_zlib(const pb::Header &header, void *const src,
+                             const std::size_t srcLen, void *const dst,
+                             const std::size_t dstLen) {
+  assert(header.encoding().compressor() == pb::Encoding::CPU_HUFFMAN_ZLIB);
+
+  switch (header.encoding().serialization()) {
+  case pb::Encoding::DEPRECATED:
+    return decompress_huffman_zlib_deprecated(header, src, srcLen, dst, dstLen);
+  case pb::Encoding::RFMH:
+    return decompress_huffman_zlib_rfmh(header, src, srcLen, dst, dstLen);
+  default:
+    throw std::runtime_error("unrecognized Huffman serialization");
+  }
+}
+
+#ifdef MGARD_ZSTD
+void decompress_huffman_zstd(const pb::Header &header, void *const src,
+                             const std::size_t srcLen, void *const dst,
+                             const std::size_t dstLen) {
+  assert(header.encoding().compressor() == pb::Encoding::CPU_HUFFMAN_ZSTD);
+
+  switch (header.encoding().serialization()) {
+  case pb::Encoding::DEPRECATED:
+    return decompress_huffman_zstd_deprecated(header, src, srcLen, dst, dstLen);
+  case pb::Encoding::RFMH:
+    return decompress_huffman_zstd_rfmh(header, src, srcLen, dst, dstLen);
+  default:
+    throw std::runtime_error("unrecognized Huffman serialization");
+  }
+}
+#endif
+
+} // namespace
+
 void decompress(const pb::Header &header, void *const src,
                 const std::size_t srcLen, void *const dst,
                 const std::size_t dstLen) {
-  switch (read_encoding_compressor(header)) {
-  case pb::Encoding::NOOP_COMPRESSOR:
-    if (srcLen != dstLen) {
-      throw std::invalid_argument(
-          "source and destination lengths must be equal");
-    }
-    {
-      unsigned char const *const p = static_cast<unsigned char const *>(src);
-      unsigned char *const q = static_cast<unsigned char *>(dst);
-      std::copy(p, p + srcLen, q);
-    }
-    break;
+  switch (header.encoding().compressor()) {
+  case pb::Encoding::CPU_ZLIB:
+    return decompress_memory_z(const_cast<void z_const *>(src), srcLen,
+                               static_cast<unsigned char *>(dst), dstLen);
+  case pb::Encoding::CPU_ZSTD:
+#ifdef MGARD_ZSTD
+    return decompress_memory_zstd(
+        src, srcLen, reinterpret_cast<unsigned char *>(dst), dstLen);
+#else
+    throw std::runtime_error("MGARD compiled without ZSTD support");
+#endif
   case pb::Encoding::CPU_HUFFMAN_ZLIB:
-    decompress_memory_z(const_cast<void z_const *>(src), srcLen,
-                        static_cast<unsigned char *>(dst), dstLen);
-    break;
+    return decompress_huffman_zlib(header, src, srcLen, dst, dstLen);
   case pb::Encoding::CPU_HUFFMAN_ZSTD:
 #ifdef MGARD_ZSTD
-    decompress_memory_huffman(static_cast<unsigned char *>(src), srcLen,
-                              static_cast<long int *>(dst), dstLen);
-    break;
+    return decompress_huffman_zstd(header, src, srcLen, dst, dstLen);
 #else
     throw std::runtime_error("MGARD compiled without ZSTD support");
 #endif
diff --git a/src/format.cpp b/src/format.cpp
index 83b138db81..e9cda8e756 100644
--- a/src/format.cpp
+++ b/src/format.cpp
@@ -161,6 +161,7 @@ void populate_defaults(pb::Header &header) {
         pb::Encoding::CPU_HUFFMAN_ZLIB
 #endif
     );
+    e.set_serialization(pb::Encoding::RFMH);
   }
   {
     pb::Device &device = *header.mutable_device();
diff --git a/src/mgard.proto b/src/mgard.proto
index a96fd67c4f..2407a78f7e 100644
--- a/src/mgard.proto
+++ b/src/mgard.proto
@@ -125,7 +125,11 @@ message Encoding {
   }
   enum Compressor {
     NOOP_COMPRESSOR = 0;
-    CPU_HUFFMAN_ZLIB = 1;
+    // Explanation for the wonky numbering: this first case was originally called `CPU_HUFFMAN_ZLIB`,
+    // but the relevant code didn't actually call the Huffman encoder.
+    CPU_ZLIB = 1;
+    CPU_ZSTD = 7;
+    CPU_HUFFMAN_ZLIB = 6;
     CPU_HUFFMAN_ZSTD = 2;
     X_HUFFMAN = 3;
     X_HUFFMAN_LZ4 = 4;

From d0fe35f0e383533ffd15bf07384674ce615395ae Mon Sep 17 00:00:00 2001
From: Ben Whitney <whitneybe@ornl.gov>
Date: Thu, 16 Jun 2022 17:53:36 -0400
Subject: [PATCH 45/58] Add tests for `RFMH` in `{,de}compress`.

---
 tests/include/compressors_regression.hpp |   9 +-
 tests/src/compressors_regression.cpp     |  52 ++-
 tests/src/test_compressors.cpp           | 398 +++++++++++++----------
 3 files changed, 276 insertions(+), 183 deletions(-)

diff --git a/tests/include/compressors_regression.hpp b/tests/include/compressors_regression.hpp
index 07f632eec4..a1adfe0ee7 100644
--- a/tests/include/compressors_regression.hpp
+++ b/tests/include/compressors_regression.hpp
@@ -5,6 +5,7 @@
 
 #include <cstddef>
 
+#include "format.hpp"
 #include "utilities.hpp"
 
 namespace mgard {
@@ -13,18 +14,22 @@ namespace regression {
 
 //! Compress an array using a Huffman tree.
 //!
+//!\param[in] header Header for the self-describing buffer.
 //!\param[in] src Array to be compressed.
 //!\param[in] srcLen Size of array (number of elements) to be compressed.
-MemoryBuffer<unsigned char> compress_memory_huffman(long int const *const src,
+MemoryBuffer<unsigned char> compress_memory_huffman(const pb::Header &header,
+                                                    long int const *const src,
                                                     const std::size_t srcLen);
 
 //! Decompress an array compressed with `compress_memory_huffman`.
 //!
+//!\param[in] header Header parsed from the original self-describing buffer.
 //!\param[in] src Compressed array.
 //!\param[in] srcLen Size in bytes of the compressed array.
 //!\param[out] dst Decompressed array.
 //!\param[in] dstLen Size in bytes of the decompressed array.
-void decompress_memory_huffman(unsigned char const *const src,
+void decompress_memory_huffman(const pb::Header &header,
+                               unsigned char const *const src,
                                const std::size_t srcLen, long int *const dst,
                                const std::size_t dstLen);
 
diff --git a/tests/src/compressors_regression.cpp b/tests/src/compressors_regression.cpp
index 4284f31b31..a34a192004 100644
--- a/tests/src/compressors_regression.cpp
+++ b/tests/src/compressors_regression.cpp
@@ -26,11 +26,31 @@ std::size_t hit_buffer_size(const std::size_t nbits) {
   return nbits / CHAR_BIT + sizeof(unsigned int);
 }
 
+MemoryBuffer<unsigned char> compress_serialized(const pb::Header &header,
+                                                unsigned char const *const p,
+                                                const std::size_t n) {
+  assert(header.encoding().serialization() == pb::Encoding::DEPRECATED);
+
+  switch (header.encoding().compressor()) {
+  case pb::Encoding::CPU_HUFFMAN_ZLIB:
+    return compress_memory_z(const_cast<unsigned char z_const *>(p), n);
+  case pb::Encoding::CPU_HUFFMAN_ZSTD:
+#ifdef MGARD_ZSTD
+    return compress_memory_zstd(p, n);
+#else
+    throw std::runtime_error("MGARD compiled without ZSTD support");
+#endif
+  default:
+    throw std::runtime_error("unrecognized lossless compressor");
+  }
+}
+
 } // namespace
 
 // This code also makes endianness assumptions.
 
-MemoryBuffer<unsigned char> compress_memory_huffman(long int const *const src,
+MemoryBuffer<unsigned char> compress_memory_huffman(const pb::Header &header,
+                                                    long int const *const src,
                                                     const std::size_t srcLen) {
   HuffmanEncodedStream encoded =
       mgard::regression::huffman_encoding(src, srcLen);
@@ -65,13 +85,9 @@ MemoryBuffer<unsigned char> compress_memory_huffman(long int const *const src,
   std::memcpy(bufp, encoded.missed.data.get(), encoded.missed.size);
   bufp += encoded.missed.size;
 
-#ifndef MGARD_ZSTD
   const MemoryBuffer<unsigned char> out_data =
-      compress_memory_z(payload, npayload);
-#else
-  const MemoryBuffer<unsigned char> out_data =
-      compress_memory_zstd(payload, npayload);
-#endif
+      compress_serialized(header, payload, npayload);
+
   delete[] payload;
   bufp = nullptr;
 
@@ -95,9 +111,12 @@ MemoryBuffer<unsigned char> compress_memory_huffman(long int const *const src,
   return MemoryBuffer<unsigned char>(buffer, bufferLen);
 }
 
-void decompress_memory_huffman(unsigned char const *const src,
+void decompress_memory_huffman(const pb::Header &header,
+                               unsigned char const *const src,
                                const std::size_t srcLen, long int *const dst,
                                const std::size_t dstLen) {
+  assert(header.encoding().serialization() == pb::Encoding::DEPRECATED);
+
   std::size_t const *const sizes = reinterpret_cast<std::size_t const *>(src);
   const std::size_t nfrequencies = sizes[0];
   const std::size_t nbits = sizes[1];
@@ -112,12 +131,21 @@ void decompress_memory_huffman(unsigned char const *const src,
     unsigned char *const dst_ = buffer.data.get();
     const std::size_t dstLen_ = buffer.size;
 
-#ifndef MGARD_ZSTD
-    decompress_memory_z(const_cast<unsigned char z_const *>(src_), srcLen_,
-                        dst_, dstLen_);
+    switch (header.encoding().compressor()) {
+    case pb::Encoding::CPU_HUFFMAN_ZLIB:
+      decompress_memory_z(const_cast<unsigned char z_const *>(src_), srcLen_,
+                          dst_, dstLen_);
+      break;
+    case pb::Encoding::CPU_HUFFMAN_ZSTD:
+#ifdef MGARD_ZSTD
+      decompress_memory_zstd(src_, srcLen_, dst_, dstLen_);
+      break;
 #else
-    decompress_memory_zstd(src_, srcLen_, dst_, dstLen_);
+      throw std::runtime_error("MGARD compiled without ZSTD support");
 #endif
+    default:
+      throw std::runtime_error("unrecognized lossless compressor");
+    }
   }
 
   HuffmanEncodedStream encoded(nbits, nmissed, nfrequencies);
diff --git a/tests/src/test_compressors.cpp b/tests/src/test_compressors.cpp
index 3f5568dda7..9b7f28cc09 100644
--- a/tests/src/test_compressors.cpp
+++ b/tests/src/test_compressors.cpp
@@ -1,3 +1,4 @@
+#include "catch2/catch_template_test_macros.hpp"
 #include "catch2/catch_test_macros.hpp"
 
 #include <cstdint>
@@ -14,57 +15,72 @@
 
 namespace {
 
-template <typename T>
-void test_huffman_identity(std::default_random_engine &gen,
-                           const std::size_t n) {
-  std::uniform_int_distribution<T> dis(std::numeric_limits<T>::min());
-  const auto f = [&]() -> T { return dis(gen); };
-  std::vector<long int> src(n);
-  std::generate(src.begin(), src.end(), f);
-  std::vector<long int> src_(src);
-  mgard::MemoryBuffer<unsigned char> compressed =
-      mgard::compress_memory_huffman(src_.data(), n);
-  long int *const decompressed = new long int[n];
-  mgard::decompress_memory_huffman(compressed.data.get(), compressed.size,
-                                   decompressed, n * sizeof(long int));
-  REQUIRE(std::equal(src.begin(), src.end(), decompressed));
-  delete[] decompressed;
+// Generate a header for use with the deprecated Huffman serialization method.
+mgard::pb::Header
+deprecated_header(const mgard::pb::Encoding::Compressor compressor) {
+  mgard::pb::Header header;
+  header.mutable_quantization()->set_type(mgard::pb::Quantization::INT64_T);
+  header.mutable_encoding()->set_preprocessor(mgard::pb::Encoding::SHUFFLE);
+  header.mutable_encoding()->set_compressor(compressor);
+  header.mutable_encoding()->set_serialization(mgard::pb::Encoding::DEPRECATED);
+  return header;
 }
 
 void test_huffman_compression_regression(long int const *const src,
                                          const std::size_t srcLen) {
-  const mgard::MemoryBuffer<unsigned char> out =
-      mgard::regression::compress_memory_huffman(src, srcLen);
-  const mgard::MemoryBuffer<unsigned char> out_ =
-      mgard::compress_memory_huffman(src, srcLen);
-
-  REQUIRE(out.size == out_.size);
-  unsigned char const *const p = out.data.get();
-  unsigned char const *const p_ = out_.data.get();
-  REQUIRE(std::equal(p, p + out.size, p_));
+  std::vector<mgard::pb::Encoding::Compressor> compressors;
+  compressors.push_back(mgard::pb::Encoding::CPU_HUFFMAN_ZLIB);
+#ifdef MGARD_ZSTD
+  compressors.push_back(mgard::pb::Encoding::CPU_HUFFMAN_ZSTD);
+#endif
+
+  for (mgard::pb::Encoding::Compressor compressor : compressors) {
+    const mgard::pb::Header header = deprecated_header(compressor);
+    const mgard::MemoryBuffer<unsigned char> out =
+        mgard::regression::compress_memory_huffman(header, src, srcLen);
+    unsigned char const *const p = out.data.get();
+
+    const mgard::MemoryBuffer<unsigned char> out_ = mgard::compress(
+        header, const_cast<long int *>(src), srcLen * sizeof(long int));
+    unsigned char const *const p_ = out_.data.get();
+
+    REQUIRE(out.size == out_.size);
+    REQUIRE(std::equal(p, p + out.size, p_));
+  }
 }
 
 void test_huffman_decompression_regression(long int const *const src,
                                            const std::size_t srcLen) {
-  const mgard::MemoryBuffer<unsigned char> compressed =
-      mgard::regression::compress_memory_huffman(src, srcLen);
-  const mgard::MemoryBuffer<unsigned char> compressed_ =
-      mgard::regression::compress_memory_huffman(src, srcLen);
+  std::vector<mgard::pb::Encoding::Compressor> compressors;
+  compressors.push_back(mgard::pb::Encoding::CPU_HUFFMAN_ZLIB);
+#ifdef MGARD_ZSTD
+  compressors.push_back(mgard::pb::Encoding::CPU_HUFFMAN_ZSTD);
+#endif
+
+  for (const mgard::pb::Encoding::Compressor compressor : compressors) {
+    const mgard::pb::Header header = deprecated_header(compressor);
+
+    const mgard::MemoryBuffer<unsigned char> compressed =
+        mgard::regression::compress_memory_huffman(header, src, srcLen);
+    const mgard::MemoryBuffer<unsigned char> compressed_(compressed.size);
 
-  mgard::MemoryBuffer<long int> out(srcLen);
-  mgard::MemoryBuffer<long int> out_(srcLen);
+    unsigned char *const q = compressed.data.get();
+    unsigned char *const q_ = compressed_.data.get();
+    std::copy(q, q + compressed.size, q_);
 
-  unsigned char *const q = compressed.data.get();
-  unsigned char *const q_ = compressed_.data.get();
-  long int *const p = out.data.get();
-  long int *const p_ = out_.data.get();
+    mgard::MemoryBuffer<long int> out(srcLen);
+    mgard::MemoryBuffer<long int> out_(srcLen);
 
-  mgard::regression::decompress_memory_huffman(q, compressed.size, p,
-                                               out.size * sizeof(long int));
-  mgard::decompress_memory_huffman(q_, compressed_.size, p_,
-                                   out_.size * sizeof(long int));
+    long int *const p = out.data.get();
+    long int *const p_ = out_.data.get();
 
-  REQUIRE(std::equal(p, p + srcLen, p_));
+    mgard::regression::decompress_memory_huffman(header, q, compressed.size, p,
+                                                 out.size * sizeof(long int));
+
+    mgard::decompress(header, q_, compressed_.size, out_.data.get(),
+                      out_.size * sizeof(long int));
+    REQUIRE(std::equal(p, p + srcLen, p_));
+  }
 }
 
 void test_hcr_constant(const std::size_t srcLen, const long int q) {
@@ -163,15 +179,6 @@ TEST_CASE("Huffman decompression regression", "[compressors] [regression]") {
   }
 }
 
-TEST_CASE("Huffman compression", "[compressors] [!mayfail]") {
-  std::default_random_engine gen(257100);
-  const std::size_t n = 5000;
-  SECTION("signed characters") { test_huffman_identity<signed char>(gen, n); }
-  SECTION("short integers") { test_huffman_identity<short int>(gen, n); }
-  SECTION("integers") { test_huffman_identity<int>(gen, n); }
-  SECTION("long integers") { test_huffman_identity<long int>(gen, n); }
-}
-
 #ifdef MGARD_ZSTD
 namespace {
 
@@ -232,150 +239,203 @@ TEST_CASE("zlib compression", "[compressors]") {
   }
 }
 
-TEST_CASE("compression with header configuration", "[compressors]") {
-  mgard::pb::Header header;
-  // TODO: Once Huffman trees can be built for types other than `long int`, use
-  // something other than `std::int64_t` here.
-  mgard::populate_defaults(header);
+namespace {
 
-  const std::size_t ndof = 10000;
-  std::int64_t *const quantized = new std::int64_t[ndof];
-  std::uniform_int_distribution<std::int64_t> dis(-250, 250);
-  std::default_random_engine gen(419643);
-  const auto f = [&]() -> std::int64_t { return dis(gen); };
-  std::generate(quantized, quantized + ndof, f);
-  const std::size_t quantizedLen = ndof * sizeof(*quantized);
-  // `dst` must have the correct alignment for the quantization type.
-  std::int64_t *const dst = new std::int64_t[ndof];
-
-  std::int64_t *const quantized_ = new std::int64_t[ndof];
-  std::copy(quantized, quantized + ndof, quantized_);
+template <typename Int>
+void test_cd_inversion(const mgard::pb::Header &header,
+                       Int const *const quantized, const std::size_t n) {
+  const std::size_t nbytes = sizeof(Int) * n;
+
+  Int *const quantized_ = new Int[n];
+  std::copy(quantized, quantized + n, quantized_);
   const mgard::MemoryBuffer<unsigned char> compressed =
-      mgard::compress(header, quantized_, quantizedLen);
+      mgard::compress(header, quantized_, nbytes);
   delete[] quantized_;
 
-  const mgard::pb::Encoding &e = header.encoding();
-  REQUIRE(e.preprocessor() == mgard::pb::Encoding::SHUFFLE);
-#ifdef MGARD_ZSTD
-  REQUIRE(e.compressor() == mgard::pb::Encoding::CPU_HUFFMAN_ZSTD);
-  mgard::regression::decompress_memory_huffman(
-      compressed.data.get(), compressed.size, dst, quantizedLen);
-#else
-  REQUIRE(e.compressor() == mgard::pb::Encoding::CPU_HUFFMAN_ZLIB);
-  mgard::decompress_memory_z(compressed.data.get(), compressed.size,
-                             reinterpret_cast<unsigned char *>(dst),
-                             quantizedLen);
-#endif
-  REQUIRE(std::equal(quantized, quantized + ndof, dst));
-  delete[] dst;
+  Int *const decompressed = new Int[n];
+  mgard::decompress(header, compressed.data.get(), compressed.size,
+                    decompressed, nbytes);
+  REQUIRE(std::equal(quantized, quantized + n, decompressed));
+  delete[] decompressed;
+}
+
+template <typename Int>
+void test_cd_inversion_constant(const mgard::pb::Header &header,
+                                const std::size_t N, const Int q) {
+  Int *const quantized = new Int[N];
+  std::fill(quantized, quantized + N, q);
+  test_cd_inversion(header, quantized, N);
   delete[] quantized;
 }
 
-TEST_CASE("decompression with header configuration", "[compressors]") {
-  mgard::pb::Header header;
-  // TODO: Once Huffman trees can be built for types other than `long int`, use
-  // something other than `std::int64_t` here.
-  mgard::populate_defaults(header);
+template <typename Int>
+void test_cd_inversion_periodic(const mgard::pb::Header &header,
+                                const std::size_t N, const Int q,
+                                const std::size_t period) {
+  Int *const quantized = new Int[N];
+  std::generate(quantized, quantized + N, PeriodicGenerator(period, q));
+  test_cd_inversion(header, quantized, N);
+  delete[] quantized;
+}
+
+template <typename Int>
+void test_cd_inversion_random(const mgard::pb::Header &header,
+                              const std::size_t N, const Int a, const Int b,
+                              std::default_random_engine &gen) {
+  std::uniform_int_distribution<Int> dis(a, b);
+  Int *const quantized = new Int[N];
+  std::generate(quantized, quantized + N, [&] { return dis(gen); });
+  test_cd_inversion(header, quantized, N);
+  delete[] quantized;
+}
+
+template <typename Int>
+mgard::pb::Quantization::Type type_to_quantization_type();
 
-  const std::size_t ndof = 5000;
-  std::int64_t *const quantized = new std::int64_t[ndof];
-  std::uniform_int_distribution<std::int64_t> dis(-500, 500);
-  std::default_random_engine gen(489063);
-  const auto f = [&]() -> std::int64_t { return dis(gen); };
-  std::generate(quantized, quantized + ndof, f);
-  const std::size_t quantizedLen = ndof * sizeof(*quantized);
-  // `dst` must have the correct alignment for the quantization type.
-  std::int64_t *const dst = new std::int64_t[ndof];
+template <>
+mgard::pb::Quantization::Type type_to_quantization_type<std::int8_t>() {
+  return mgard::pb::Quantization::INT8_T;
+}
+
+template <>
+mgard::pb::Quantization::Type type_to_quantization_type<std::int16_t>() {
+  return mgard::pb::Quantization::INT16_T;
+}
 
+template <>
+mgard::pb::Quantization::Type type_to_quantization_type<std::int32_t>() {
+  return mgard::pb::Quantization::INT32_T;
+}
+
+template <>
+mgard::pb::Quantization::Type type_to_quantization_type<std::int64_t>() {
+  return mgard::pb::Quantization::INT64_T;
+}
+
+template <typename Int>
+void test_cd_inversion_constant(const mgard::pb::Header &header) {
+  test_cd_inversion_constant<Int>(header, 100, 98);
+  test_cd_inversion_constant<Int>(header, 1000, 0);
+  test_cd_inversion_constant<Int>(header, 10000, -62);
+}
+
+template <typename Int>
+void test_cd_inversion_periodic(const mgard::pb::Header &header) {
+  test_cd_inversion_periodic<Int>(header, 100, -5, 3);
+  test_cd_inversion_periodic<Int>(header, 1000, 86, 60);
+  test_cd_inversion_periodic<Int>(header, 10000, 7, 62);
+}
+
+template <typename Int>
+void test_cd_inversion_random(const mgard::pb::Header &header) {
+  std::default_random_engine gen(894584);
+  test_cd_inversion_random<Int>(header, 100, 0, 3, gen);
+  test_cd_inversion_random<Int>(header, 1000, std::numeric_limits<Int>::min(),
+                                std::numeric_limits<Int>::max(), gen);
+  test_cd_inversion_random<Int>(header, 10000, -110, 110, gen);
+}
+
+template <>
+void test_cd_inversion_random<std::int64_t>(const mgard::pb::Header &header) {
+  std::default_random_engine gen(952426);
+  test_cd_inversion_random<std::int64_t>(header, 100, -1, 1, gen);
+  // In the deprecated Huffman encoding function, the missed symbols are cast
+  // from `long int` to `int`.
+  test_cd_inversion_random<std::int64_t>(header, 1000,
+                                         std::numeric_limits<int>::min(),
+                                         std::numeric_limits<int>::max(), gen);
+  test_cd_inversion_random<std::int64_t>(header, 10000, 0, 250, gen);
+}
+
+template <typename Int>
+void test_cd_inversion(const mgard::pb::Header &header) {
+  SECTION("constant data") { test_cd_inversion_constant<Int>(header); }
+  SECTION("periodic data") { test_cd_inversion_periodic<Int>(header); }
+  SECTION("random data") { test_cd_inversion_random<Int>(header); }
+}
+
+} // namespace
+
+TEMPLATE_TEST_CASE("`compress`/`decompress` inversion", "[compressors]",
+                   std::int8_t, std::int16_t, std::int32_t, std::int64_t) {
+  mgard::pb::Header header;
+  mgard::populate_defaults(header);
+  mgard::pb::Quantization &q = *header.mutable_quantization();
   mgard::pb::Encoding &e = *header.mutable_encoding();
-  SECTION("noop") {
-    e.set_compressor(mgard::pb::Encoding::NOOP_COMPRESSOR);
-
-    const std::size_t srcLen = quantizedLen;
-    unsigned char *const src = new unsigned char[srcLen];
-    {
-      unsigned char const *const p =
-          reinterpret_cast<unsigned char const *>(quantized);
-      std::copy(p, p + quantizedLen, src);
-    }
 
-    mgard::decompress(header, src, srcLen,
-                      reinterpret_cast<unsigned char *>(dst), quantizedLen);
-    delete[] src;
-    REQUIRE(std::equal(quantized, quantized + ndof, dst));
+  const mgard::pb::Quantization::Type qtype =
+      type_to_quantization_type<TestType>();
+  q.set_type(qtype);
+
+  SECTION("`CPU_ZLIB`") {
+    e.set_compressor(mgard::pb::Encoding::CPU_ZLIB);
+    test_cd_inversion<TestType>(header);
   }
 
-  SECTION("zlib") {
-    e.set_compressor(mgard::pb::Encoding::CPU_HUFFMAN_ZLIB);
+#ifdef MGARD_ZSTD
+  SECTION("`CPU_ZSTD`") {
+    e.set_compressor(mgard::pb::Encoding::CPU_ZSTD);
+    test_cd_inversion<TestType>(header);
+  }
+#endif
 
-    const mgard::MemoryBuffer<unsigned char> out =
-        mgard::compress_memory_z(quantized, quantizedLen);
+  // The deprecated Huffman serialization method requires the quantization type
+  // to be `std::int64_t`.
+  if (qtype == mgard::pb::Quantization::INT64_T) {
+    SECTION("`CPU_HUFFMAN_ZLIB` with `DEPRECATED`") {
+      e.set_compressor(mgard::pb::Encoding::CPU_HUFFMAN_ZLIB);
+      e.set_serialization(mgard::pb::Encoding::DEPRECATED);
+      test_cd_inversion<TestType>(header);
+    }
 
-    const std::size_t srcLen = out.size * sizeof(*out.data.get());
-    unsigned char *const src = new unsigned char[srcLen];
-    {
-      unsigned char const *const p = out.data.get();
-      std::copy(p, p + srcLen, src);
+#ifdef MGARD_ZSTD
+    SECTION("`CPU_HUFFMAN_ZSTD` with `DEPRECATED`") {
+      e.set_compressor(mgard::pb::Encoding::CPU_HUFFMAN_ZLIB);
+      e.set_serialization(mgard::pb::Encoding::DEPRECATED);
+      test_cd_inversion<TestType>(header);
     }
-    mgard::decompress(header, src, srcLen,
-                      reinterpret_cast<unsigned char *>(dst), quantizedLen);
-    delete[] src;
-    REQUIRE(std::equal(quantized, quantized + ndof, dst));
+#endif
+  }
+
+  SECTION("`CPU_HUFFMAN_ZLIB` with `RFMH`") {
+    e.set_compressor(mgard::pb::Encoding::CPU_HUFFMAN_ZLIB);
+    e.set_serialization(mgard::pb::Encoding::RFMH);
+    test_cd_inversion<TestType>(header);
   }
 
 #ifdef MGARD_ZSTD
-  SECTION("zstd") {
+  SECTION("`CPU_HUFFMAN_ZSTD` with `RFMH`") {
     e.set_compressor(mgard::pb::Encoding::CPU_HUFFMAN_ZSTD);
-
-    std::int64_t *const quantized_ = new std::int64_t[ndof];
-    std::copy(quantized, quantized + ndof, quantized_);
-    const mgard::MemoryBuffer<unsigned char> out =
-        mgard::regression::compress_memory_huffman(quantized_, ndof);
-    delete[] quantized_;
-
-    const std::size_t srcLen = out.size;
-    unsigned char *const src = new unsigned char[srcLen];
-    {
-      unsigned char const *const p = out.data.get();
-      std::copy(p, p + srcLen, src);
-    }
-    mgard::decompress(header, src, srcLen,
-                      reinterpret_cast<unsigned char *>(dst), quantizedLen);
-    delete[] src;
-    REQUIRE(std::equal(quantized, quantized + ndof, dst));
+    e.set_serialization(mgard::pb::Encoding::RFMH);
+    test_cd_inversion<TestType>(header);
   }
 #endif
-
-  delete[] dst;
-  delete[] quantized;
 }
 
-TEST_CASE("compression and decompression with header", "[compressors]") {
-  mgard::pb::Header header;
-  // TODO: Once Huffman trees can be built for types other than `long int`, use
-  // something other than `std::int64_t` here.
-  mgard::populate_defaults(header);
-
-  const std::size_t ndof = 2500;
-  std::int64_t *const quantized = new std::int64_t[ndof];
-  std::uniform_int_distribution<std::int64_t> dis(-1000, 1000);
-  std::default_random_engine gen(995719);
-  const auto f = [&]() -> std::int64_t { return dis(gen); };
-  std::generate(quantized, quantized + ndof, f);
-  const std::size_t quantizedLen = ndof * sizeof(*quantized);
-  // `dst` must have the correct alignment for the quantization type.
-  std::int64_t *const dst = new std::int64_t[ndof];
-
-  std::int64_t *const quantized_ = new std::int64_t[ndof];
-  std::copy(quantized, quantized + ndof, quantized_);
-  const mgard::MemoryBuffer<unsigned char> compressed =
-      mgard::compress(header, quantized_, quantizedLen);
-  delete[] quantized_;
-
-  mgard::decompress(header, compressed.data.get(), compressed.size, dst,
-                    quantizedLen);
+// In the deprecated Huffman encoding function, the missed symbols are cast from
+// `long int` to `int`.
+TEST_CASE("deprecated Huffman inversion", "[compressors] [!shouldfail]") {
+  std::default_random_engine gen(257100);
+  const std::int64_t a =
+      2 * static_cast<std::int64_t>(std::numeric_limits<int>::min());
+  const std::int64_t b =
+      2 * static_cast<std::int64_t>(std::numeric_limits<int>::max());
+
+  SECTION("`CPU_HUFFMAN_ZLIB` with `DEPRECATED`") {
+    // Conceivably this could pass if all the generated `std::int64_t`s are
+    // representable as `int`s.
+    test_cd_inversion_random<std::int64_t>(
+        deprecated_header(mgard::pb::Encoding::CPU_HUFFMAN_ZLIB), 5000, a, b,
+        gen);
+  }
 
-  REQUIRE(std::equal(quantized, quantized + ndof, dst));
-  delete[] dst;
-  delete[] quantized;
+#ifdef MGARD_ZSTD
+  SECTION("`CPU_HUFFMAN_ZSTD` with `DEPRECATED`") {
+    // Conceivably this could pass if all the generated `std::int64_t`s are
+    // representable as `int`s.
+    test_cd_inversion_random<std::int64_t>(
+        deprecated_header(mgard::pb::Encoding::CPU_HUFFMAN_ZSTD), 5000, a, b,
+        gen);
+  }
+#endif
 }

From 309823d74dac5a3721967eb65fb41590ae7aa627 Mon Sep 17 00:00:00 2001
From: Ben Whitney <whitneybe@ornl.gov>
Date: Mon, 20 Jun 2022 11:11:14 -0400
Subject: [PATCH 46/58] Rename `compressors.hpp` to `lossless.hpp`.

---
 CMakeLists.txt                                               | 2 +-
 include/compress.tpp                                         | 2 +-
 include/compress_internal.tpp                                | 2 +-
 include/{compressors.hpp => lossless.hpp}                    | 4 ++--
 src/cuda/LosslessCompression.cu                              | 2 +-
 src/huffman.cpp                                              | 2 +-
 src/{compressors.cpp => lossless.cpp}                        | 2 +-
 tests/CMakeLists.txt                                         | 4 ++--
 .../{compressors_regression.hpp => lossless_regression.hpp}  | 0
 .../{compressors_regression.cpp => lossless_regression.cpp}  | 5 ++---
 tests/src/{test_compressors.cpp => test_lossless.cpp}        | 4 ++--
 11 files changed, 14 insertions(+), 15 deletions(-)
 rename include/{compressors.hpp => lossless.hpp} (98%)
 rename src/{compressors.cpp => lossless.cpp} (99%)
 rename tests/include/{compressors_regression.hpp => lossless_regression.hpp} (100%)
 rename tests/src/{compressors_regression.cpp => lossless_regression.cpp} (98%)
 rename tests/src/{test_compressors.cpp => test_lossless.cpp} (99%)

diff --git a/CMakeLists.txt b/CMakeLists.txt
index c92cca399a..fe65fd743b 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -203,7 +203,7 @@ set(
         src/compress_internal.cpp
   src/utilities.cpp
   src/huffman.cpp
-  src/compressors.cpp
+  src/lossless.cpp
   src/format.cpp
 )
 
diff --git a/include/compress.tpp b/include/compress.tpp
index 867b6cfa9f..5d22b1c25a 100644
--- a/include/compress.tpp
+++ b/include/compress.tpp
@@ -20,9 +20,9 @@
 #include "MGARDConfig.hpp"
 #include "TensorMultilevelCoefficientQuantizer.hpp"
 #include "TensorNorms.hpp"
-#include "compressors.hpp"
 #include "decompose.hpp"
 #include "format.hpp"
+#include "lossless.hpp"
 #include "quantize.hpp"
 #include "shuffle.hpp"
 
diff --git a/include/compress_internal.tpp b/include/compress_internal.tpp
index fd16dcd331..6aca8124af 100644
--- a/include/compress_internal.tpp
+++ b/include/compress_internal.tpp
@@ -1,8 +1,8 @@
 #include <cstdlib>
 
 #include "compress.hpp"
-#include "compressors.hpp"
 #include "decompose.hpp"
+#include "lossless.hpp"
 #include "quantize.hpp"
 #include "shuffle.hpp"
 
diff --git a/include/compressors.hpp b/include/lossless.hpp
similarity index 98%
rename from include/compressors.hpp
rename to include/lossless.hpp
index c946538b50..d24cee9a0b 100644
--- a/include/compressors.hpp
+++ b/include/lossless.hpp
@@ -1,5 +1,5 @@
-#ifndef COMPRESSORS_HPP
-#define COMPRESSORS_HPP
+#ifndef LOSSLESS_HPP
+#define LOSSLESS_HPP
 //!\file
 //!\brief Lossless compressors for quantized multilevel coefficients.
 
diff --git a/src/cuda/LosslessCompression.cu b/src/cuda/LosslessCompression.cu
index feb61ab2d4..072d6be598 100644
--- a/src/cuda/LosslessCompression.cu
+++ b/src/cuda/LosslessCompression.cu
@@ -5,7 +5,7 @@
  * Date: September 27, 2021
  */
 
-// #include "compressors.hpp"
+// #include "lossless.hpp"
 #include "cuda/Common.h"
 #include "cuda/CommonInternal.h"
 #include "cuda/LosslessCompression.h"
diff --git a/src/huffman.cpp b/src/huffman.cpp
index 1ebb2fbded..4ffd4a05af 100644
--- a/src/huffman.cpp
+++ b/src/huffman.cpp
@@ -11,8 +11,8 @@
 #include <stdexcept>
 #include <vector>
 
-#include "compressors.hpp"
 #include "huffman.hpp"
+#include "lossless.hpp"
 
 namespace mgard {
 
diff --git a/src/compressors.cpp b/src/lossless.cpp
similarity index 99%
rename from src/compressors.cpp
rename to src/lossless.cpp
index a67279292f..3244e35eb2 100644
--- a/src/compressors.cpp
+++ b/src/lossless.cpp
@@ -1,4 +1,4 @@
-#include "compressors.hpp"
+#include "lossless.hpp"
 
 #include <cassert>
 #include <cmath>
diff --git a/tests/CMakeLists.txt b/tests/CMakeLists.txt
index f625d0a148..80747cefca 100644
--- a/tests/CMakeLists.txt
+++ b/tests/CMakeLists.txt
@@ -18,8 +18,8 @@ set(
 	"src/test_decompose.cpp"
 	"src/test_format.cpp"
 	"src/test_quantize.cpp"
-	"src/compressors_regression.cpp"
-	"src/test_compressors.cpp"
+	"src/lossless_regression.cpp"
+	"src/test_lossless.cpp"
 	"src/test_CompressedDataset.cpp"
 	"src/huffman_regression.cpp"
 	"src/test_huffman.cpp"
diff --git a/tests/include/compressors_regression.hpp b/tests/include/lossless_regression.hpp
similarity index 100%
rename from tests/include/compressors_regression.hpp
rename to tests/include/lossless_regression.hpp
diff --git a/tests/src/compressors_regression.cpp b/tests/src/lossless_regression.cpp
similarity index 98%
rename from tests/src/compressors_regression.cpp
rename to tests/src/lossless_regression.cpp
index a34a192004..e84725cfc3 100644
--- a/tests/src/compressors_regression.cpp
+++ b/tests/src/lossless_regression.cpp
@@ -1,12 +1,11 @@
-#include "compressors_regression.hpp"
+#include "lossless_regression.hpp"
 
 #include <climits>
 #include <cstring>
 
-#include "compressors.hpp"
-#include "compressors_regression.hpp"
 #include "huffman.hpp"
 #include "huffman_regression.hpp"
+#include "lossless.hpp"
 
 namespace mgard {
 
diff --git a/tests/src/test_compressors.cpp b/tests/src/test_lossless.cpp
similarity index 99%
rename from tests/src/test_compressors.cpp
rename to tests/src/test_lossless.cpp
index 9b7f28cc09..fa9198709f 100644
--- a/tests/src/test_compressors.cpp
+++ b/tests/src/test_lossless.cpp
@@ -7,9 +7,9 @@
 #include <limits>
 #include <random>
 
-#include "compressors.hpp"
-#include "compressors_regression.hpp"
 #include "format.hpp"
+#include "lossless.hpp"
+#include "lossless_regression.hpp"
 
 #include "testing_utilities.hpp"
 

From 0e6c563d181c4a25d7c65f9436680bd85a97e8e6 Mon Sep 17 00:00:00 2001
From: Ben Whitney <whitneybe@ornl.gov>
Date: Mon, 20 Jun 2022 11:34:21 -0400
Subject: [PATCH 47/58] Separate lossless compressor implementations.

---
 CMakeLists.txt                                |   6 +-
 src/{lossless.cpp => lossless_dispatcher.cpp} | 131 ------------------
 src/lossless_zlib.cpp                         |  88 ++++++++++++
 src/lossless_zstd.cpp                         |  60 ++++++++
 4 files changed, 153 insertions(+), 132 deletions(-)
 rename src/{lossless.cpp => lossless_dispatcher.cpp} (77%)
 create mode 100644 src/lossless_zlib.cpp
 create mode 100644 src/lossless_zstd.cpp

diff --git a/CMakeLists.txt b/CMakeLists.txt
index fe65fd743b..03d1b09a68 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -203,9 +203,13 @@ set(
         src/compress_internal.cpp
   src/utilities.cpp
   src/huffman.cpp
-  src/lossless.cpp
+  src/lossless_zlib.cpp
+  src/lossless_dispatcher.cpp
   src/format.cpp
 )
+if(zstd_FOUND)
+	list(APPEND MGARD_LIBRARY_CPP src/lossless_zstd.cpp)
+endif()
 
 set(MAXIMUM_DIMENSION 4 CACHE STRING "Maximum supported dimension for self-describing decompression.")
 
diff --git a/src/lossless.cpp b/src/lossless_dispatcher.cpp
similarity index 77%
rename from src/lossless.cpp
rename to src/lossless_dispatcher.cpp
index 3244e35eb2..97e565dc25 100644
--- a/src/lossless.cpp
+++ b/src/lossless_dispatcher.cpp
@@ -11,143 +11,12 @@
 #include <utility>
 #include <vector>
 
-#include <zlib.h>
-
 #include "format.hpp"
 #include "huffman.hpp"
 #include "utilities.hpp"
 
-#ifdef MGARD_ZSTD
-#include <zstd.h>
-#endif
-
 namespace mgard {
 
-#ifdef MGARD_ZSTD
-/*! CHECK
- * Check that the condition holds. If it doesn't print a message and die.
- */
-#define CHECK(cond, ...)                                                       \
-  do {                                                                         \
-    if (!(cond)) {                                                             \
-      fprintf(stderr, "%s:%d CHECK(%s) failed: ", __FILE__, __LINE__, #cond);  \
-      fprintf(stderr, "" __VA_ARGS__);                                         \
-      fprintf(stderr, "\n");                                                   \
-      exit(1);                                                                 \
-    }                                                                          \
-  } while (0)
-
-/*! CHECK_ZSTD
- * Check the zstd error code and die if an error occurred after printing a
- * message.
- */
-#define CHECK_ZSTD(fn, ...)                                                    \
-  do {                                                                         \
-    size_t const err = (fn);                                                   \
-    CHECK(!ZSTD_isError(err), "%s", ZSTD_getErrorName(err));                   \
-  } while (0)
-
-MemoryBuffer<unsigned char> compress_memory_zstd(void const *const src,
-                                                 const std::size_t srcLen) {
-  const size_t cBuffSize = ZSTD_compressBound(srcLen);
-  unsigned char *const buffer = new unsigned char[cBuffSize];
-  const std::size_t cSize = ZSTD_compress(buffer, cBuffSize, src, srcLen, 1);
-  CHECK_ZSTD(cSize);
-  return MemoryBuffer<unsigned char>(buffer, cSize);
-}
-#endif
-
-MemoryBuffer<unsigned char> compress_memory_z(void z_const *const src,
-                                              const std::size_t srcLen) {
-  const std::size_t BUFSIZE = 2048 * 1024;
-  std::vector<Bytef *> buffers;
-  std::vector<std::size_t> bufferLengths;
-
-  z_stream strm;
-  strm.zalloc = Z_NULL;
-  strm.zfree = Z_NULL;
-  strm.next_in = static_cast<Bytef z_const *>(src);
-  strm.avail_in = srcLen;
-  buffers.push_back(strm.next_out = new Bytef[BUFSIZE]);
-  bufferLengths.push_back(strm.avail_out = BUFSIZE);
-
-  deflateInit(&strm, Z_BEST_COMPRESSION);
-
-  while (strm.avail_in != 0) {
-    [[maybe_unused]] const int res = deflate(&strm, Z_NO_FLUSH);
-    assert(res == Z_OK);
-    if (strm.avail_out == 0) {
-      buffers.push_back(strm.next_out = new Bytef[BUFSIZE]);
-      bufferLengths.push_back(strm.avail_out = BUFSIZE);
-    }
-  }
-
-  int res = Z_OK;
-  while (res == Z_OK) {
-    if (strm.avail_out == 0) {
-      buffers.push_back(strm.next_out = new Bytef[BUFSIZE]);
-      bufferLengths.push_back(strm.avail_out = BUFSIZE);
-    }
-    res = deflate(&strm, Z_FINISH);
-  }
-
-  assert(res == Z_STREAM_END);
-  bufferLengths.back() -= strm.avail_out;
-  // Could just do `nbuffers * BUFSIZE - strm.avail_out`.
-  const std::size_t bufferLen =
-      std::accumulate(bufferLengths.begin(), bufferLengths.end(), 0);
-  unsigned char *const buffer = new unsigned char[bufferLen];
-  {
-    const std::size_t nbuffers = buffers.size();
-    unsigned char *p = buffer;
-    for (std::size_t i = 0; i < nbuffers; ++i) {
-      unsigned char const *const buffer = buffers.at(i);
-      const std::size_t bufferLength = bufferLengths.at(i);
-      std::copy(buffer, buffer + bufferLength, p);
-      p += bufferLength;
-      delete[] buffer;
-    }
-  }
-  deflateEnd(&strm);
-
-  return MemoryBuffer<unsigned char>(buffer, bufferLen);
-}
-
-void decompress_memory_z(void z_const *const src, const std::size_t srcLen,
-                         unsigned char *const dst, const std::size_t dstLen) {
-  z_stream strm = {};
-  strm.total_in = strm.avail_in = srcLen;
-  strm.total_out = strm.avail_out = dstLen;
-  strm.next_in = static_cast<Bytef z_const *>(src);
-  strm.next_out = reinterpret_cast<Bytef *>(dst);
-
-  strm.zalloc = Z_NULL;
-  strm.zfree = Z_NULL;
-  strm.opaque = Z_NULL;
-
-  [[maybe_unused]] int res;
-  res = inflateInit2(&strm, (15 + 32)); // 15 window bits, and the +32 tells
-                                        // zlib to to detect if using gzip or
-                                        // zlib
-  assert(res == Z_OK);
-  res = inflate(&strm, Z_FINISH);
-  assert(res == Z_STREAM_END);
-  res = inflateEnd(&strm);
-  assert(res == Z_OK);
-}
-
-#ifdef MGARD_ZSTD
-void decompress_memory_zstd(void const *const src, const std::size_t srcLen,
-                            unsigned char *const dst,
-                            const std::size_t dstLen) {
-  size_t const dSize = ZSTD_decompress(dst, dstLen, src, srcLen);
-  CHECK_ZSTD(dSize);
-
-  /* When zstd knows the content size, it will error if it doesn't match. */
-  CHECK(dstLen == dSize, "Impossible because zstd will check this condition!");
-}
-#endif
-
 namespace {
 
 template <typename Int>
diff --git a/src/lossless_zlib.cpp b/src/lossless_zlib.cpp
new file mode 100644
index 0000000000..272cb6e31c
--- /dev/null
+++ b/src/lossless_zlib.cpp
@@ -0,0 +1,88 @@
+#include "lossless.hpp"
+
+#include <algorithm>
+#include <numeric>
+#include <vector>
+
+namespace mgard {
+
+MemoryBuffer<unsigned char> compress_memory_z(void z_const *const src,
+                                              const std::size_t srcLen) {
+  const std::size_t BUFSIZE = 2048 * 1024;
+  std::vector<Bytef *> buffers;
+  std::vector<std::size_t> bufferLengths;
+
+  z_stream strm;
+  strm.zalloc = Z_NULL;
+  strm.zfree = Z_NULL;
+  strm.next_in = static_cast<Bytef z_const *>(src);
+  strm.avail_in = srcLen;
+  buffers.push_back(strm.next_out = new Bytef[BUFSIZE]);
+  bufferLengths.push_back(strm.avail_out = BUFSIZE);
+
+  deflateInit(&strm, Z_BEST_COMPRESSION);
+
+  while (strm.avail_in != 0) {
+    [[maybe_unused]] const int res = deflate(&strm, Z_NO_FLUSH);
+    assert(res == Z_OK);
+    if (strm.avail_out == 0) {
+      buffers.push_back(strm.next_out = new Bytef[BUFSIZE]);
+      bufferLengths.push_back(strm.avail_out = BUFSIZE);
+    }
+  }
+
+  int res = Z_OK;
+  while (res == Z_OK) {
+    if (strm.avail_out == 0) {
+      buffers.push_back(strm.next_out = new Bytef[BUFSIZE]);
+      bufferLengths.push_back(strm.avail_out = BUFSIZE);
+    }
+    res = deflate(&strm, Z_FINISH);
+  }
+
+  assert(res == Z_STREAM_END);
+  bufferLengths.back() -= strm.avail_out;
+  // Could just do `nbuffers * BUFSIZE - strm.avail_out`.
+  const std::size_t bufferLen =
+      std::accumulate(bufferLengths.begin(), bufferLengths.end(), 0);
+  unsigned char *const buffer = new unsigned char[bufferLen];
+  {
+    const std::size_t nbuffers = buffers.size();
+    unsigned char *p = buffer;
+    for (std::size_t i = 0; i < nbuffers; ++i) {
+      unsigned char const *const buffer = buffers.at(i);
+      const std::size_t bufferLength = bufferLengths.at(i);
+      std::copy(buffer, buffer + bufferLength, p);
+      p += bufferLength;
+      delete[] buffer;
+    }
+  }
+  deflateEnd(&strm);
+
+  return MemoryBuffer<unsigned char>(buffer, bufferLen);
+}
+
+void decompress_memory_z(void z_const *const src, const std::size_t srcLen,
+                         unsigned char *const dst, const std::size_t dstLen) {
+  z_stream strm = {};
+  strm.total_in = strm.avail_in = srcLen;
+  strm.total_out = strm.avail_out = dstLen;
+  strm.next_in = static_cast<Bytef z_const *>(src);
+  strm.next_out = reinterpret_cast<Bytef *>(dst);
+
+  strm.zalloc = Z_NULL;
+  strm.zfree = Z_NULL;
+  strm.opaque = Z_NULL;
+
+  [[maybe_unused]] int res;
+  res = inflateInit2(&strm, (15 + 32)); // 15 window bits, and the +32 tells
+                                        // zlib to to detect if using gzip or
+                                        // zlib
+  assert(res == Z_OK);
+  res = inflate(&strm, Z_FINISH);
+  assert(res == Z_STREAM_END);
+  res = inflateEnd(&strm);
+  assert(res == Z_OK);
+}
+
+} // namespace mgard
diff --git a/src/lossless_zstd.cpp b/src/lossless_zstd.cpp
new file mode 100644
index 0000000000..4b7fc4bf28
--- /dev/null
+++ b/src/lossless_zstd.cpp
@@ -0,0 +1,60 @@
+#include "lossless.hpp"
+
+#include <cstdio>
+#include <cstdlib>
+
+#ifndef MGARD_ZSTD
+#error "This file requires ZSTD."
+#endif
+
+#include <zstd.h>
+
+namespace mgard {
+
+/*! CHECK
+ * Check that the condition holds. If it doesn't print a message and die.
+ */
+#define CHECK(cond, ...)                                                       \
+  do {                                                                         \
+    if (!(cond)) {                                                             \
+      std::fprintf(stderr, "%s:%d CHECK(%s) failed: ", __FILE__, __LINE__,     \
+                   #cond);                                                     \
+      std::fprintf(stderr, "" __VA_ARGS__);                                    \
+      std::fprintf(stderr, "\n");                                              \
+      std::exit(1);                                                            \
+    }                                                                          \
+  } while (0)
+
+/*! CHECK_ZSTD
+ * Check the zstd error code and die if an error occurred after printing a
+ * message.
+ */
+#define CHECK_ZSTD(fn, ...)                                                    \
+  do {                                                                         \
+    std::size_t const err = (fn);                                              \
+    CHECK(!ZSTD_isError(err), "%s", ZSTD_getErrorName(err));                   \
+  } while (0)
+
+MemoryBuffer<unsigned char> compress_memory_zstd(void const *const src,
+                                                 const std::size_t srcLen) {
+  const std::size_t cBuffSize = ZSTD_compressBound(srcLen);
+  unsigned char *const buffer = new unsigned char[cBuffSize];
+  const std::size_t cSize = ZSTD_compress(buffer, cBuffSize, src, srcLen, 1);
+  CHECK_ZSTD(cSize);
+  return MemoryBuffer<unsigned char>(buffer, cSize);
+}
+
+void decompress_memory_zstd(void const *const src, const std::size_t srcLen,
+                            unsigned char *const dst,
+                            const std::size_t dstLen) {
+  std::size_t const dSize = ZSTD_decompress(dst, dstLen, src, srcLen);
+  CHECK_ZSTD(dSize);
+
+  /* When zstd knows the content size, it will error if it doesn't match. */
+  CHECK(dstLen == dSize, "Impossible because zstd will check this condition!");
+}
+
+#undef CHECK_ZSTD
+#undef CHECK
+
+} // namespace mgard

From b697831bb3a3306f03dc6723ae7242ae82b7a467 Mon Sep 17 00:00:00 2001
From: Ben Whitney <whitneybe@ornl.gov>
Date: Mon, 20 Jun 2022 11:46:19 -0400
Subject: [PATCH 48/58] Contain `z_const` casts to `lossless_zlib.cpp`.

---
 include/lossless.hpp              | 14 +++----
 src/huffman.cpp                   |  6 +--
 src/lossless_dispatcher.cpp       | 62 +++++++++++++++----------------
 src/lossless_zlib.cpp             | 10 +++--
 tests/src/lossless_regression.cpp |  5 +--
 5 files changed, 46 insertions(+), 51 deletions(-)

diff --git a/include/lossless.hpp b/include/lossless.hpp
index d24cee9a0b..3f5a4f3fb8 100644
--- a/include/lossless.hpp
+++ b/include/lossless.hpp
@@ -5,11 +5,6 @@
 
 #include <cstddef>
 
-// For `z_const`.
-#include <zlib.h>
-
-#include <memory>
-
 #include "proto/mgard.pb.h"
 
 #include "utilities.hpp"
@@ -38,7 +33,7 @@ void decompress_memory_zstd(void const *const src, const std::size_t srcLen,
 //!
 //!\param src Array to be compressed.
 //!\param srcLen Size in bytes of the array to be compressed.
-MemoryBuffer<unsigned char> compress_memory_z(void z_const *const src,
+MemoryBuffer<unsigned char> compress_memory_z(void const *const src,
                                               const std::size_t srcLen);
 
 //! Decompress an array with `compress_memory_z`.
@@ -47,7 +42,7 @@ MemoryBuffer<unsigned char> compress_memory_z(void z_const *const src,
 //!\param srcLen Size in bytes of the compressed array data
 //!\param dst Decompressed array.
 //!\param dstLen Size in bytes of the decompressed array.
-void decompress_memory_z(void z_const *const src, const std::size_t srcLen,
+void decompress_memory_z(void const *const src, const std::size_t srcLen,
                          unsigned char *const dst, const std::size_t dstLen);
 
 //! Compress an array of quantized multilevel coefficients.
@@ -57,7 +52,8 @@ void decompress_memory_z(void z_const *const src, const std::size_t srcLen,
 //!\param[in] header Header for the self-describing buffer.
 //!\param[in] src Array of quantized multilevel coefficients.
 //!\param[in] srcLen Size in bytes of the input array.
-MemoryBuffer<unsigned char> compress(const pb::Header &header, void *const src,
+MemoryBuffer<unsigned char> compress(const pb::Header &header,
+                                     void const *const src,
                                      const std::size_t srcLen);
 
 //! Decompress an array of quantized multilevel coefficients.
@@ -69,7 +65,7 @@ MemoryBuffer<unsigned char> compress(const pb::Header &header, void *const src,
 //!\param[in] srcLen Size in bytes of the compressed array.
 //!\param[out] dst Decompressed array.
 //!\param[in] dstLen Size in bytes of the decompressed array.
-void decompress(const pb::Header &header, void *const src,
+void decompress(const pb::Header &header, void const *const src,
                 const std::size_t srcLen, void *const dst,
                 const std::size_t dstLen);
 
diff --git a/src/huffman.cpp b/src/huffman.cpp
index 4ffd4a05af..620da655bc 100644
--- a/src/huffman.cpp
+++ b/src/huffman.cpp
@@ -66,8 +66,7 @@ compress_serialized_huffman(const pb::Header &header,
                             const MemoryBuffer<unsigned char> &payload) {
   switch (header.encoding().compressor()) {
   case pb::Encoding::CPU_HUFFMAN_ZLIB:
-    return compress_memory_z(
-        const_cast<unsigned char z_const *>(payload.data.get()), payload.size);
+    return compress_memory_z(payload.data.get(), payload.size);
   case pb::Encoding::CPU_HUFFMAN_ZSTD:
 #ifdef MGARD_ZSTD
     return compress_memory_zstd(payload.data.get(), payload.size);
@@ -147,8 +146,7 @@ HuffmanEncodedStream decompress_deserialize(const pb::Header &header,
 
     switch (header.encoding().compressor()) {
     case pb::Encoding::CPU_HUFFMAN_ZLIB:
-      decompress_memory_z(const_cast<unsigned char z_const *>(src_), srcLen_,
-                          dst_, dstLen_);
+      decompress_memory_z(src_, srcLen_, dst_, dstLen_);
       break;
     case pb::Encoding::CPU_HUFFMAN_ZSTD:
 #ifdef MGARD_ZSTD
diff --git a/src/lossless_dispatcher.cpp b/src/lossless_dispatcher.cpp
index 97e565dc25..33dafb58b2 100644
--- a/src/lossless_dispatcher.cpp
+++ b/src/lossless_dispatcher.cpp
@@ -32,7 +32,7 @@ MemoryBuffer<unsigned char> compress_huffman_C_rfmh_(const pb::Header &header,
 
 // `C` being either ZSTD or `zlib`.
 MemoryBuffer<unsigned char> compress_huffman_C_rfmh(const pb::Header &header,
-                                                    void *const src,
+                                                    void const *const src,
                                                     const std::size_t srcLen) {
   assert(header.encoding().serialization() == pb::Encoding::RFMH);
 
@@ -51,7 +51,7 @@ MemoryBuffer<unsigned char> compress_huffman_C_rfmh(const pb::Header &header,
 }
 
 MemoryBuffer<unsigned char>
-compress_huffman_C_deprecated(const pb::Header &header, void *const src,
+compress_huffman_C_deprecated(const pb::Header &header, void const *const src,
                               const std::size_t srcLen) {
   check_quantization_buffer(header, src, srcLen);
 
@@ -75,18 +75,16 @@ compress_huffman_C_deprecated(const pb::Header &header, void *const src,
                                srcLen / sizeof(long int)));
 }
 
-MemoryBuffer<unsigned char>
-compress_huffman_zlib_deprecated(const pb::Header &header, void *const src,
-                                 const std::size_t srcLen) {
+MemoryBuffer<unsigned char> compress_huffman_zlib_deprecated(
+    const pb::Header &header, void const *const src, const std::size_t srcLen) {
   assert(header.encoding().compressor() == pb::Encoding::CPU_HUFFMAN_ZLIB);
 
   return compress_huffman_C_deprecated(header, src, srcLen);
 }
 
 #ifdef MGARD_ZSTD
-MemoryBuffer<unsigned char>
-compress_huffman_zstd_deprecated(const pb::Header &header, void *const src,
-                                 const std::size_t srcLen) {
+MemoryBuffer<unsigned char> compress_huffman_zstd_deprecated(
+    const pb::Header &header, void const *const src, const std::size_t srcLen) {
   assert(header.encoding().compressor() == pb::Encoding::CPU_HUFFMAN_ZSTD);
 
   return compress_huffman_C_deprecated(header, src, srcLen);
@@ -118,7 +116,7 @@ MemoryBuffer<unsigned char> concatenate_nhuffman_and_compressed(
 } // namespace
 
 MemoryBuffer<unsigned char>
-compress_huffman_zlib_rfmh(const pb::Header &header, void *const src,
+compress_huffman_zlib_rfmh(const pb::Header &header, void const *const src,
                            const std::size_t srcLen) {
   assert(header.encoding().compressor() == pb::Encoding::CPU_HUFFMAN_ZLIB);
   assert(header.encoding().serialization() == pb::Encoding::RFMH);
@@ -132,7 +130,7 @@ compress_huffman_zlib_rfmh(const pb::Header &header, void *const src,
 
 #ifdef MGARD_ZSTD
 MemoryBuffer<unsigned char>
-compress_huffman_zstd_rfmh(const pb::Header &header, void *const src,
+compress_huffman_zstd_rfmh(const pb::Header &header, void const *const src,
                            const std::size_t srcLen) {
   assert(header.encoding().compressor() == pb::Encoding::CPU_HUFFMAN_ZSTD);
   assert(header.encoding().serialization() == pb::Encoding::RFMH);
@@ -145,7 +143,7 @@ compress_huffman_zstd_rfmh(const pb::Header &header, void *const src,
 #endif
 
 MemoryBuffer<unsigned char> compress_huffman_zlib(const pb::Header &header,
-                                                  void *const src,
+                                                  void const *const src,
                                                   const std::size_t srcLen) {
   assert(header.encoding().compressor() == pb::Encoding::CPU_HUFFMAN_ZLIB);
 
@@ -161,7 +159,7 @@ MemoryBuffer<unsigned char> compress_huffman_zlib(const pb::Header &header,
 
 #ifdef MGARD_ZSTD
 MemoryBuffer<unsigned char> compress_huffman_zstd(const pb::Header &header,
-                                                  void *const src,
+                                                  void const *const src,
                                                   const std::size_t srcLen) {
   assert(header.encoding().compressor() == pb::Encoding::CPU_HUFFMAN_ZSTD);
 
@@ -178,7 +176,8 @@ MemoryBuffer<unsigned char> compress_huffman_zstd(const pb::Header &header,
 
 } // namespace
 
-MemoryBuffer<unsigned char> compress(const pb::Header &header, void *const src,
+MemoryBuffer<unsigned char> compress(const pb::Header &header,
+                                     void const *const src,
                                      const std::size_t srcLen) {
   switch (header.encoding().compressor()) {
   case pb::Encoding::CPU_ZLIB:
@@ -202,8 +201,8 @@ MemoryBuffer<unsigned char> compress(const pb::Header &header, void *const src,
   }
 }
 
-void decompress_noop(void *const src, const std::size_t srcLen, void *const dst,
-                     const std::size_t dstLen) {
+void decompress_noop(void const *const src, const std::size_t srcLen,
+                     void *const dst, const std::size_t dstLen) {
   if (srcLen != dstLen) {
     throw std::invalid_argument("source and destination lengths must be equal");
   }
@@ -257,7 +256,8 @@ void decompress_huffman_C_rfmh(const pb::Header &header,
   }
 }
 
-void decompress_huffman_C_deprecated(const pb::Header &header, void *const src,
+void decompress_huffman_C_deprecated(const pb::Header &header,
+                                     void const *const src,
                                      const std::size_t srcLen, void *const dst,
                                      const std::size_t dstLen) {
   check_quantization_buffer(header, dst, dstLen);
@@ -291,7 +291,7 @@ void decompress_huffman_C_deprecated(const pb::Header &header, void *const src,
 }
 
 void decompress_huffman_zlib_deprecated(const pb::Header &header,
-                                        void *const src,
+                                        void const *const src,
                                         const std::size_t srcLen,
                                         void *const dst,
                                         const std::size_t dstLen) {
@@ -302,7 +302,7 @@ void decompress_huffman_zlib_deprecated(const pb::Header &header,
 
 #ifdef MGARD_ZSTD
 void decompress_huffman_zstd_deprecated(const pb::Header &header,
-                                        void *const src,
+                                        void const *const src,
                                         const std::size_t srcLen,
                                         void *const dst,
                                         const std::size_t dstLen) {
@@ -312,7 +312,8 @@ void decompress_huffman_zstd_deprecated(const pb::Header &header,
 }
 #endif
 
-void decompress_huffman_zlib_rfmh(const pb::Header &header, void *const src,
+void decompress_huffman_zlib_rfmh(const pb::Header &header,
+                                  void const *const src,
                                   const std::size_t srcLen, void *const dst,
                                   const std::size_t dstLen) {
   assert(header.encoding().compressor() == pb::Encoding::CPU_HUFFMAN_ZLIB);
@@ -321,15 +322,15 @@ void decompress_huffman_zlib_rfmh(const pb::Header &header, void *const src,
   BufferWindow window(src, srcLen);
   // Read theSsze in bytes of the serialized Huffman tree.
   MemoryBuffer<unsigned char> encoded(read_header_size(window));
-  decompress_memory_z(const_cast<unsigned char z_const *>(window.current),
-                      window.end - window.current, encoded.data.get(),
-                      encoded.size);
+  decompress_memory_z(window.current, window.end - window.current,
+                      encoded.data.get(), encoded.size);
 
   return decompress_huffman_C_rfmh(header, encoded, dst, dstLen);
 }
 
 #ifdef MGARD_ZSTD
-void decompress_huffman_zstd_rfmh(const pb::Header &header, void *const src,
+void decompress_huffman_zstd_rfmh(const pb::Header &header,
+                                  void const *const src,
                                   const std::size_t srcLen, void *const dst,
                                   const std::size_t dstLen) {
   assert(header.encoding().compressor() == pb::Encoding::CPU_HUFFMAN_ZSTD);
@@ -338,15 +339,14 @@ void decompress_huffman_zstd_rfmh(const pb::Header &header, void *const src,
   BufferWindow window(src, srcLen);
   // Read the size in bytes of the serialized Huffman tree.
   MemoryBuffer<unsigned char> encoded(read_header_size(window));
-  decompress_memory_zstd(const_cast<unsigned char z_const *>(window.current),
-                         window.end - window.current, encoded.data.get(),
-                         encoded.size);
+  decompress_memory_zstd(window.current, window.end - window.current,
+                         encoded.data.get(), encoded.size);
 
   return decompress_huffman_C_rfmh(header, encoded, dst, dstLen);
 }
 #endif
 
-void decompress_huffman_zlib(const pb::Header &header, void *const src,
+void decompress_huffman_zlib(const pb::Header &header, void const *const src,
                              const std::size_t srcLen, void *const dst,
                              const std::size_t dstLen) {
   assert(header.encoding().compressor() == pb::Encoding::CPU_HUFFMAN_ZLIB);
@@ -362,7 +362,7 @@ void decompress_huffman_zlib(const pb::Header &header, void *const src,
 }
 
 #ifdef MGARD_ZSTD
-void decompress_huffman_zstd(const pb::Header &header, void *const src,
+void decompress_huffman_zstd(const pb::Header &header, void const *const src,
                              const std::size_t srcLen, void *const dst,
                              const std::size_t dstLen) {
   assert(header.encoding().compressor() == pb::Encoding::CPU_HUFFMAN_ZSTD);
@@ -380,13 +380,13 @@ void decompress_huffman_zstd(const pb::Header &header, void *const src,
 
 } // namespace
 
-void decompress(const pb::Header &header, void *const src,
+void decompress(const pb::Header &header, void const *const src,
                 const std::size_t srcLen, void *const dst,
                 const std::size_t dstLen) {
   switch (header.encoding().compressor()) {
   case pb::Encoding::CPU_ZLIB:
-    return decompress_memory_z(const_cast<void z_const *>(src), srcLen,
-                               static_cast<unsigned char *>(dst), dstLen);
+    return decompress_memory_z(src, srcLen, static_cast<unsigned char *>(dst),
+                               dstLen);
   case pb::Encoding::CPU_ZSTD:
 #ifdef MGARD_ZSTD
     return decompress_memory_zstd(
diff --git a/src/lossless_zlib.cpp b/src/lossless_zlib.cpp
index 272cb6e31c..9bb30c0b77 100644
--- a/src/lossless_zlib.cpp
+++ b/src/lossless_zlib.cpp
@@ -4,9 +4,11 @@
 #include <numeric>
 #include <vector>
 
+#include <zlib.h>
+
 namespace mgard {
 
-MemoryBuffer<unsigned char> compress_memory_z(void z_const *const src,
+MemoryBuffer<unsigned char> compress_memory_z(void const *const src,
                                               const std::size_t srcLen) {
   const std::size_t BUFSIZE = 2048 * 1024;
   std::vector<Bytef *> buffers;
@@ -15,7 +17,7 @@ MemoryBuffer<unsigned char> compress_memory_z(void z_const *const src,
   z_stream strm;
   strm.zalloc = Z_NULL;
   strm.zfree = Z_NULL;
-  strm.next_in = static_cast<Bytef z_const *>(src);
+  strm.next_in = static_cast<Bytef z_const *>(const_cast<void z_const *>(src));
   strm.avail_in = srcLen;
   buffers.push_back(strm.next_out = new Bytef[BUFSIZE]);
   bufferLengths.push_back(strm.avail_out = BUFSIZE);
@@ -62,12 +64,12 @@ MemoryBuffer<unsigned char> compress_memory_z(void z_const *const src,
   return MemoryBuffer<unsigned char>(buffer, bufferLen);
 }
 
-void decompress_memory_z(void z_const *const src, const std::size_t srcLen,
+void decompress_memory_z(void const *const src, const std::size_t srcLen,
                          unsigned char *const dst, const std::size_t dstLen) {
   z_stream strm = {};
   strm.total_in = strm.avail_in = srcLen;
   strm.total_out = strm.avail_out = dstLen;
-  strm.next_in = static_cast<Bytef z_const *>(src);
+  strm.next_in = static_cast<Bytef z_const *>(const_cast<void z_const *>(src));
   strm.next_out = reinterpret_cast<Bytef *>(dst);
 
   strm.zalloc = Z_NULL;
diff --git a/tests/src/lossless_regression.cpp b/tests/src/lossless_regression.cpp
index e84725cfc3..4320bee17a 100644
--- a/tests/src/lossless_regression.cpp
+++ b/tests/src/lossless_regression.cpp
@@ -32,7 +32,7 @@ MemoryBuffer<unsigned char> compress_serialized(const pb::Header &header,
 
   switch (header.encoding().compressor()) {
   case pb::Encoding::CPU_HUFFMAN_ZLIB:
-    return compress_memory_z(const_cast<unsigned char z_const *>(p), n);
+    return compress_memory_z(p, n);
   case pb::Encoding::CPU_HUFFMAN_ZSTD:
 #ifdef MGARD_ZSTD
     return compress_memory_zstd(p, n);
@@ -132,8 +132,7 @@ void decompress_memory_huffman(const pb::Header &header,
 
     switch (header.encoding().compressor()) {
     case pb::Encoding::CPU_HUFFMAN_ZLIB:
-      decompress_memory_z(const_cast<unsigned char z_const *>(src_), srcLen_,
-                          dst_, dstLen_);
+      decompress_memory_z(src_, srcLen_, dst_, dstLen_);
       break;
     case pb::Encoding::CPU_HUFFMAN_ZSTD:
 #ifdef MGARD_ZSTD

From 0848131b9825095498d6b11ab7016e84eb7a4ec9 Mon Sep 17 00:00:00 2001
From: Ben Whitney <whitneybe@ornl.gov>
Date: Mon, 20 Jun 2022 12:16:55 -0400
Subject: [PATCH 49/58] Rename lossless compression functions.

---
 include/lossless.hpp              | 20 ++++++++++----------
 src/cuda/LosslessCompression.cu   |  4 ++--
 src/huffman.cpp                   |  8 ++++----
 src/lossless_dispatcher.cpp       | 28 ++++++++++++++--------------
 src/lossless_zlib.cpp             |  8 ++++----
 src/lossless_zstd.cpp             |  9 ++++-----
 tests/src/lossless_regression.cpp |  8 ++++----
 tests/src/test_lossless.cpp       |  8 ++++----
 8 files changed, 46 insertions(+), 47 deletions(-)

diff --git a/include/lossless.hpp b/include/lossless.hpp
index 3f5a4f3fb8..b3cafe1175 100644
--- a/include/lossless.hpp
+++ b/include/lossless.hpp
@@ -16,34 +16,34 @@ namespace mgard {
 //!
 //!\param[in] src Array to be compressed.
 //!\param[in] srcLen Size in bytes of the array to be compressed.
-MemoryBuffer<unsigned char> compress_memory_zstd(void const *const src,
-                                                 const std::size_t srcLen);
+MemoryBuffer<unsigned char> compress_zstd(void const *const src,
+                                          const std::size_t srcLen);
 
-//! Decompress an array compressed with `compress_memory_zstd`.
+//! Decompress an array compressed with `compress_zstd`.
 //!
 //!\param[in] src Compressed array.
 //!\param[in] srcLen Size in bytes of the compressed array.
 //!\param[out] dst Decompressed array.
 //!\param[in] dstLen Size in bytes of the decompressed array.
-void decompress_memory_zstd(void const *const src, const std::size_t srcLen,
-                            unsigned char *const dst, const std::size_t dstLen);
+void decompress_zstd(void const *const src, const std::size_t srcLen,
+                     unsigned char *const dst, const std::size_t dstLen);
 #endif
 
 //! Compress an array using `zlib`.
 //!
 //!\param src Array to be compressed.
 //!\param srcLen Size in bytes of the array to be compressed.
-MemoryBuffer<unsigned char> compress_memory_z(void const *const src,
-                                              const std::size_t srcLen);
+MemoryBuffer<unsigned char> compress_zlib(void const *const src,
+                                          const std::size_t srcLen);
 
-//! Decompress an array with `compress_memory_z`.
+//! Decompress an array with `compress_zlib`.
 //!
 //!\param src Compressed array.
 //!\param srcLen Size in bytes of the compressed array data
 //!\param dst Decompressed array.
 //!\param dstLen Size in bytes of the decompressed array.
-void decompress_memory_z(void const *const src, const std::size_t srcLen,
-                         unsigned char *const dst, const std::size_t dstLen);
+void decompress_zlib(void const *const src, const std::size_t srcLen,
+                     unsigned char *const dst, const std::size_t dstLen);
 
 //! Compress an array of quantized multilevel coefficients.
 //!
diff --git a/src/cuda/LosslessCompression.cu b/src/cuda/LosslessCompression.cu
index 072d6be598..3d32d660f7 100644
--- a/src/cuda/LosslessCompression.cu
+++ b/src/cuda/LosslessCompression.cu
@@ -90,7 +90,7 @@ unsigned char *compress_memory_huffman(long int *const src,
   free(out_data_miss);
 
   // const MemoryBuffer<unsigned char> out_data =
-  //     compress_memory_zstd(payload, total_size);
+  //     compress_zstd(payload, total_size);
 
   const size_t cBuffSize = ZSTD_compressBound(total_size);
   unsigned char *const zstd_buffer = new unsigned char[cBuffSize];
@@ -148,7 +148,7 @@ void decompress_memory_huffman(unsigned char *const src,
       out_tree_size + out_data_hit_size / 8 + 4 + out_data_miss_size;
   unsigned char *huffman_encoding_p =
       (unsigned char *)malloc(total_huffman_size);
-  // decompress_memory_zstd(buf, srcLen - 3 * sizeof(size_t),
+  // decompress_zstd(buf, srcLen - 3 * sizeof(size_t),
   // huffman_encoding_p,
   //                        total_huffman_size);
 
diff --git a/src/huffman.cpp b/src/huffman.cpp
index 620da655bc..a9f9fbca1e 100644
--- a/src/huffman.cpp
+++ b/src/huffman.cpp
@@ -66,10 +66,10 @@ compress_serialized_huffman(const pb::Header &header,
                             const MemoryBuffer<unsigned char> &payload) {
   switch (header.encoding().compressor()) {
   case pb::Encoding::CPU_HUFFMAN_ZLIB:
-    return compress_memory_z(payload.data.get(), payload.size);
+    return compress_zlib(payload.data.get(), payload.size);
   case pb::Encoding::CPU_HUFFMAN_ZSTD:
 #ifdef MGARD_ZSTD
-    return compress_memory_zstd(payload.data.get(), payload.size);
+    return compress_zstd(payload.data.get(), payload.size);
 #else
     throw std::runtime_error("MGARD compiled without ZSTD support");
 #endif
@@ -146,11 +146,11 @@ HuffmanEncodedStream decompress_deserialize(const pb::Header &header,
 
     switch (header.encoding().compressor()) {
     case pb::Encoding::CPU_HUFFMAN_ZLIB:
-      decompress_memory_z(src_, srcLen_, dst_, dstLen_);
+      decompress_zlib(src_, srcLen_, dst_, dstLen_);
       break;
     case pb::Encoding::CPU_HUFFMAN_ZSTD:
 #ifdef MGARD_ZSTD
-      decompress_memory_zstd(src_, srcLen_, dst_, dstLen_);
+      decompress_zstd(src_, srcLen_, dst_, dstLen_);
       break;
 #else
       throw std::runtime_error("MGARD compiled without ZSTD support");
diff --git a/src/lossless_dispatcher.cpp b/src/lossless_dispatcher.cpp
index 33dafb58b2..175e7495ba 100644
--- a/src/lossless_dispatcher.cpp
+++ b/src/lossless_dispatcher.cpp
@@ -93,8 +93,8 @@ MemoryBuffer<unsigned char> compress_huffman_zstd_deprecated(
 
 namespace {
 
-// `decompress_memory_z` and `decompress_memory_zstd` need to know the size of
-// the decompressed buffer before they can decompress. So, in addition to the
+// `decompress_zlib` and `decompress_zstd` need to know the size of the
+// decompressed buffer before they can decompress. So, in addition to the
 // compressed serialized Huffman tree (`compressed`), we need to store the size
 // in bytes of the serialized Huffman tree (`nhuffman`).
 MemoryBuffer<unsigned char> concatenate_nhuffman_and_compressed(
@@ -124,7 +124,7 @@ compress_huffman_zlib_rfmh(const pb::Header &header, void const *const src,
   const MemoryBuffer<unsigned char> encoded =
       compress_huffman_C_rfmh(header, src, srcLen);
   const MemoryBuffer<unsigned char> compressed =
-      compress_memory_z(encoded.data.get(), encoded.size);
+      compress_zlib(encoded.data.get(), encoded.size);
   return concatenate_nhuffman_and_compressed(encoded.size, compressed);
 }
 
@@ -138,7 +138,7 @@ compress_huffman_zstd_rfmh(const pb::Header &header, void const *const src,
   const MemoryBuffer<unsigned char> encoded =
       compress_huffman_C_rfmh(header, src, srcLen);
   return concatenate_nhuffman_and_compressed(
-      encoded.size, compress_memory_zstd(encoded.data.get(), encoded.size));
+      encoded.size, compress_zstd(encoded.data.get(), encoded.size));
 }
 #endif
 
@@ -181,10 +181,10 @@ MemoryBuffer<unsigned char> compress(const pb::Header &header,
                                      const std::size_t srcLen) {
   switch (header.encoding().compressor()) {
   case pb::Encoding::CPU_ZLIB:
-    return compress_memory_z(src, srcLen);
+    return compress_zlib(src, srcLen);
   case pb::Encoding::CPU_ZSTD:
 #ifdef MGARD_ZSTD
-    return compress_memory_zstd(src, srcLen);
+    return compress_zstd(src, srcLen);
 #else
     throw std::runtime_error("MGARD compiled without ZSTD support");
 #endif
@@ -322,8 +322,8 @@ void decompress_huffman_zlib_rfmh(const pb::Header &header,
   BufferWindow window(src, srcLen);
   // Read theSsze in bytes of the serialized Huffman tree.
   MemoryBuffer<unsigned char> encoded(read_header_size(window));
-  decompress_memory_z(window.current, window.end - window.current,
-                      encoded.data.get(), encoded.size);
+  decompress_zlib(window.current, window.end - window.current,
+                  encoded.data.get(), encoded.size);
 
   return decompress_huffman_C_rfmh(header, encoded, dst, dstLen);
 }
@@ -339,8 +339,8 @@ void decompress_huffman_zstd_rfmh(const pb::Header &header,
   BufferWindow window(src, srcLen);
   // Read the size in bytes of the serialized Huffman tree.
   MemoryBuffer<unsigned char> encoded(read_header_size(window));
-  decompress_memory_zstd(window.current, window.end - window.current,
-                         encoded.data.get(), encoded.size);
+  decompress_zstd(window.current, window.end - window.current,
+                  encoded.data.get(), encoded.size);
 
   return decompress_huffman_C_rfmh(header, encoded, dst, dstLen);
 }
@@ -385,12 +385,12 @@ void decompress(const pb::Header &header, void const *const src,
                 const std::size_t dstLen) {
   switch (header.encoding().compressor()) {
   case pb::Encoding::CPU_ZLIB:
-    return decompress_memory_z(src, srcLen, static_cast<unsigned char *>(dst),
-                               dstLen);
+    return decompress_zlib(src, srcLen, static_cast<unsigned char *>(dst),
+                           dstLen);
   case pb::Encoding::CPU_ZSTD:
 #ifdef MGARD_ZSTD
-    return decompress_memory_zstd(
-        src, srcLen, reinterpret_cast<unsigned char *>(dst), dstLen);
+    return decompress_zstd(src, srcLen, reinterpret_cast<unsigned char *>(dst),
+                           dstLen);
 #else
     throw std::runtime_error("MGARD compiled without ZSTD support");
 #endif
diff --git a/src/lossless_zlib.cpp b/src/lossless_zlib.cpp
index 9bb30c0b77..b41ac643ac 100644
--- a/src/lossless_zlib.cpp
+++ b/src/lossless_zlib.cpp
@@ -8,8 +8,8 @@
 
 namespace mgard {
 
-MemoryBuffer<unsigned char> compress_memory_z(void const *const src,
-                                              const std::size_t srcLen) {
+MemoryBuffer<unsigned char> compress_zlib(void const *const src,
+                                          const std::size_t srcLen) {
   const std::size_t BUFSIZE = 2048 * 1024;
   std::vector<Bytef *> buffers;
   std::vector<std::size_t> bufferLengths;
@@ -64,8 +64,8 @@ MemoryBuffer<unsigned char> compress_memory_z(void const *const src,
   return MemoryBuffer<unsigned char>(buffer, bufferLen);
 }
 
-void decompress_memory_z(void const *const src, const std::size_t srcLen,
-                         unsigned char *const dst, const std::size_t dstLen) {
+void decompress_zlib(void const *const src, const std::size_t srcLen,
+                     unsigned char *const dst, const std::size_t dstLen) {
   z_stream strm = {};
   strm.total_in = strm.avail_in = srcLen;
   strm.total_out = strm.avail_out = dstLen;
diff --git a/src/lossless_zstd.cpp b/src/lossless_zstd.cpp
index 4b7fc4bf28..749c5794a4 100644
--- a/src/lossless_zstd.cpp
+++ b/src/lossless_zstd.cpp
@@ -35,8 +35,8 @@ namespace mgard {
     CHECK(!ZSTD_isError(err), "%s", ZSTD_getErrorName(err));                   \
   } while (0)
 
-MemoryBuffer<unsigned char> compress_memory_zstd(void const *const src,
-                                                 const std::size_t srcLen) {
+MemoryBuffer<unsigned char> compress_zstd(void const *const src,
+                                          const std::size_t srcLen) {
   const std::size_t cBuffSize = ZSTD_compressBound(srcLen);
   unsigned char *const buffer = new unsigned char[cBuffSize];
   const std::size_t cSize = ZSTD_compress(buffer, cBuffSize, src, srcLen, 1);
@@ -44,9 +44,8 @@ MemoryBuffer<unsigned char> compress_memory_zstd(void const *const src,
   return MemoryBuffer<unsigned char>(buffer, cSize);
 }
 
-void decompress_memory_zstd(void const *const src, const std::size_t srcLen,
-                            unsigned char *const dst,
-                            const std::size_t dstLen) {
+void decompress_zstd(void const *const src, const std::size_t srcLen,
+                     unsigned char *const dst, const std::size_t dstLen) {
   std::size_t const dSize = ZSTD_decompress(dst, dstLen, src, srcLen);
   CHECK_ZSTD(dSize);
 
diff --git a/tests/src/lossless_regression.cpp b/tests/src/lossless_regression.cpp
index 4320bee17a..11c04a4a01 100644
--- a/tests/src/lossless_regression.cpp
+++ b/tests/src/lossless_regression.cpp
@@ -32,10 +32,10 @@ MemoryBuffer<unsigned char> compress_serialized(const pb::Header &header,
 
   switch (header.encoding().compressor()) {
   case pb::Encoding::CPU_HUFFMAN_ZLIB:
-    return compress_memory_z(p, n);
+    return compress_zlib(p, n);
   case pb::Encoding::CPU_HUFFMAN_ZSTD:
 #ifdef MGARD_ZSTD
-    return compress_memory_zstd(p, n);
+    return compress_zstd(p, n);
 #else
     throw std::runtime_error("MGARD compiled without ZSTD support");
 #endif
@@ -132,11 +132,11 @@ void decompress_memory_huffman(const pb::Header &header,
 
     switch (header.encoding().compressor()) {
     case pb::Encoding::CPU_HUFFMAN_ZLIB:
-      decompress_memory_z(src_, srcLen_, dst_, dstLen_);
+      decompress_zlib(src_, srcLen_, dst_, dstLen_);
       break;
     case pb::Encoding::CPU_HUFFMAN_ZSTD:
 #ifdef MGARD_ZSTD
-      decompress_memory_zstd(src_, srcLen_, dst_, dstLen_);
+      decompress_zstd(src_, srcLen_, dst_, dstLen_);
       break;
 #else
       throw std::runtime_error("MGARD compiled without ZSTD support");
diff --git a/tests/src/test_lossless.cpp b/tests/src/test_lossless.cpp
index fa9198709f..bcb5b32bb6 100644
--- a/tests/src/test_lossless.cpp
+++ b/tests/src/test_lossless.cpp
@@ -189,10 +189,10 @@ void test_zstd_identity(std::uniform_int_distribution<unsigned char> &dis,
   std::generate(src, src + n, f);
   unsigned char *const src_ = new unsigned char[n];
   std::copy(src, src + n, src_);
-  mgard::MemoryBuffer<unsigned char> dst = mgard::compress_memory_zstd(src_, n);
+  mgard::MemoryBuffer<unsigned char> dst = mgard::compress_zstd(src_, n);
   delete[] src_;
   unsigned char *const decompressed = new unsigned char[n];
-  mgard::decompress_memory_zstd(dst.data.get(), dst.size, decompressed, n);
+  mgard::decompress_zstd(dst.data.get(), dst.size, decompressed, n);
   REQUIRE(std::equal(src, src + n, decompressed));
   delete[] decompressed;
   delete[] src;
@@ -219,10 +219,10 @@ void test_zlib_identity(std::uniform_int_distribution<unsigned char> &dis,
   std::generate(src, src + n, f);
   unsigned char *const src_ = new unsigned char[n];
   std::copy(src, src + n, src_);
-  mgard::MemoryBuffer<unsigned char> dst = mgard::compress_memory_z(src_, n);
+  mgard::MemoryBuffer<unsigned char> dst = mgard::compress_zlib(src_, n);
   delete[] src_;
   unsigned char *const decompressed = new unsigned char[n];
-  mgard::decompress_memory_z(dst.data.get(), dst.size, decompressed, n);
+  mgard::decompress_zlib(dst.data.get(), dst.size, decompressed, n);
   REQUIRE(std::equal(src, src + n, decompressed));
   delete[] decompressed;
   delete[] src;

From 48529f9d2155618955780281595c65be2f503243 Mon Sep 17 00:00:00 2001
From: Ben Whitney <whitneybe@ornl.gov>
Date: Mon, 20 Jun 2022 12:49:48 -0400
Subject: [PATCH 50/58] Change argument order in periodic data tests.

---
 tests/src/test_huffman.cpp  | 32 +++++++++++++++++---------------
 tests/src/test_lossless.cpp | 30 +++++++++++++++---------------
 2 files changed, 32 insertions(+), 30 deletions(-)

diff --git a/tests/src/test_huffman.cpp b/tests/src/test_huffman.cpp
index a444d0c6c1..8874ddd168 100644
--- a/tests/src/test_huffman.cpp
+++ b/tests/src/test_huffman.cpp
@@ -74,8 +74,9 @@ void test_encoding_regression_constant(const std::size_t N, const long int q) {
   delete[] quantized;
 }
 
-void test_encoding_regression_periodic(const std::size_t N, const long int q,
-                                       const std::size_t period) {
+void test_encoding_regression_periodic(const std::size_t N,
+                                       const std::size_t period,
+                                       const long int q) {
   long int *const quantized = new long int[N];
   std::generate(quantized, quantized + N, PeriodicGenerator(period, q));
   test_encoding_regression(quantized, N);
@@ -99,8 +100,9 @@ void test_decoding_regression_constant(const std::size_t N, const long int q) {
   delete[] quantized;
 }
 
-void test_decoding_regression_periodic(const std::size_t N, const long int q,
-                                       const std::size_t period) {
+void test_decoding_regression_periodic(const std::size_t N,
+                                       const std::size_t period,
+                                       const long int q) {
   long int *const quantized = new long int[N];
   std::generate(quantized, quantized + N, PeriodicGenerator(period, q));
   test_decoding_regression(quantized, N);
@@ -126,8 +128,8 @@ void test_inversion_constant(const std::size_t N, const T q) {
 }
 
 template <typename T>
-void test_inversion_periodic(const std::size_t N, const T q,
-                             const std::size_t period) {
+void test_inversion_periodic(const std::size_t N, const std::size_t period,
+                             const T q) {
   T *const quantized = new T[N];
   std::generate(quantized, quantized + N, PeriodicGenerator(period, q));
   test_inversion(quantized, N);
@@ -154,9 +156,9 @@ TEST_CASE("encoding regression", "[huffman] [regression]") {
   }
 
   SECTION("periodic data") {
-    test_encoding_regression_periodic(10, -3, 3);
-    test_encoding_regression_periodic(100, 0, 10);
-    test_encoding_regression_periodic(1000, 51, 17);
+    test_encoding_regression_periodic(10, 3, -3);
+    test_encoding_regression_periodic(100, 10, 0);
+    test_encoding_regression_periodic(1000, 17, 51);
   }
 
   SECTION("random data") {
@@ -177,9 +179,9 @@ TEST_CASE("decoding regression", "[huffman] [regression]") {
   }
 
   SECTION("periodic data") {
-    test_decoding_regression_periodic(10, 12, 4);
-    test_decoding_regression_periodic(100, -71, 9);
-    test_decoding_regression_periodic(1000, 3280, 23);
+    test_decoding_regression_periodic(10, 4, 12);
+    test_decoding_regression_periodic(100, 9, -71);
+    test_decoding_regression_periodic(1000, 23, 3280);
   }
 
   SECTION("random data") {
@@ -203,9 +205,9 @@ TEMPLATE_TEST_CASE("Huffman inversion", "[huffman]", std::int8_t, std::int16_t,
   }
 
   SECTION("periodic data") {
-    test_inversion_periodic<TestType>(10, -dis(gen_), 11);
-    test_inversion_periodic<TestType>(100, dis(gen_), 10);
-    test_inversion_periodic<TestType>(1000, -dis(gen_), 9);
+    test_inversion_periodic<TestType>(10, 3, -dis(gen_));
+    test_inversion_periodic<TestType>(100, 10, dis(gen_));
+    test_inversion_periodic<TestType>(1000, 9, -dis(gen_));
   }
 
   SECTION("random data") {
diff --git a/tests/src/test_lossless.cpp b/tests/src/test_lossless.cpp
index bcb5b32bb6..b4e9c06661 100644
--- a/tests/src/test_lossless.cpp
+++ b/tests/src/test_lossless.cpp
@@ -90,8 +90,8 @@ void test_hcr_constant(const std::size_t srcLen, const long int q) {
   delete[] src;
 }
 
-void test_hcr_periodic(const std::size_t srcLen, const long int initial,
-                       const std::size_t period) {
+void test_hcr_periodic(const std::size_t srcLen, const std::size_t period,
+                       const long int initial) {
   long int *const src = new long int[srcLen];
   std::generate(src, src + srcLen, PeriodicGenerator(period, initial));
   test_huffman_compression_regression(src, srcLen);
@@ -114,8 +114,8 @@ void test_hdr_constant(const std::size_t srcLen, const long int q) {
   delete[] src;
 }
 
-void test_hdr_periodic(const std::size_t srcLen, const long int initial,
-                       const std::size_t period) {
+void test_hdr_periodic(const std::size_t srcLen, const std::size_t period,
+                       const long int initial) {
   long int *const src = new long int[srcLen];
   std::generate(src, src + srcLen, PeriodicGenerator(period, initial));
   test_huffman_decompression_regression(src, srcLen);
@@ -141,9 +141,9 @@ TEST_CASE("Huffman compression regression", "[compressors] [regression]") {
   }
 
   SECTION("periodic data") {
-    test_hcr_periodic(5, 0, 5);
-    test_hcr_periodic(25, -4, 6);
-    test_hcr_periodic(625, 22, 20);
+    test_hcr_periodic(5, 5, 0);
+    test_hcr_periodic(25, 6, -4);
+    test_hcr_periodic(625, 20, 22);
   }
 
   SECTION("random data") {
@@ -164,9 +164,9 @@ TEST_CASE("Huffman decompression regression", "[compressors] [regression]") {
   }
 
   SECTION("periodic data") {
-    test_hdr_periodic(10, 0, 3);
-    test_hdr_periodic(100, -570, 10);
-    test_hdr_periodic(1000, 394, 19);
+    test_hdr_periodic(10, 3, 0);
+    test_hdr_periodic(100, 10, -570);
+    test_hdr_periodic(1000, 19, 394);
   }
 
   SECTION("random data") {
@@ -270,8 +270,8 @@ void test_cd_inversion_constant(const mgard::pb::Header &header,
 
 template <typename Int>
 void test_cd_inversion_periodic(const mgard::pb::Header &header,
-                                const std::size_t N, const Int q,
-                                const std::size_t period) {
+                                const std::size_t N, const std::size_t period,
+                                const Int q) {
   Int *const quantized = new Int[N];
   std::generate(quantized, quantized + N, PeriodicGenerator(period, q));
   test_cd_inversion(header, quantized, N);
@@ -321,9 +321,9 @@ void test_cd_inversion_constant(const mgard::pb::Header &header) {
 
 template <typename Int>
 void test_cd_inversion_periodic(const mgard::pb::Header &header) {
-  test_cd_inversion_periodic<Int>(header, 100, -5, 3);
-  test_cd_inversion_periodic<Int>(header, 1000, 86, 60);
-  test_cd_inversion_periodic<Int>(header, 10000, 7, 62);
+  test_cd_inversion_periodic<Int>(header, 100, 3, -5);
+  test_cd_inversion_periodic<Int>(header, 1000, 60, 86);
+  test_cd_inversion_periodic<Int>(header, 10000, 62, 7);
 }
 
 template <typename Int>

From 547d4e03e76ce44e38367023b0bcdc8846282ead Mon Sep 17 00:00:00 2001
From: Ben Whitney <whitneybe@ornl.gov>
Date: Mon, 20 Jun 2022 12:59:23 -0400
Subject: [PATCH 51/58] Remove unused `NOOP_COMPRESSOR` decompressor.

---
 src/lossless_dispatcher.cpp | 12 ------------
 src/mgard.proto             |  2 ++
 2 files changed, 2 insertions(+), 12 deletions(-)

diff --git a/src/lossless_dispatcher.cpp b/src/lossless_dispatcher.cpp
index 175e7495ba..eb6a71593a 100644
--- a/src/lossless_dispatcher.cpp
+++ b/src/lossless_dispatcher.cpp
@@ -201,18 +201,6 @@ MemoryBuffer<unsigned char> compress(const pb::Header &header,
   }
 }
 
-void decompress_noop(void const *const src, const std::size_t srcLen,
-                     void *const dst, const std::size_t dstLen) {
-  if (srcLen != dstLen) {
-    throw std::invalid_argument("source and destination lengths must be equal");
-  }
-  {
-    unsigned char const *const p = static_cast<unsigned char const *>(src);
-    unsigned char *const q = static_cast<unsigned char *>(dst);
-    std::copy(p, p + srcLen, q);
-  }
-}
-
 namespace {
 
 template <typename Int>
diff --git a/src/mgard.proto b/src/mgard.proto
index 2407a78f7e..2b80d7de0a 100644
--- a/src/mgard.proto
+++ b/src/mgard.proto
@@ -124,6 +124,8 @@ message Encoding {
     SHUFFLE = 1;
   }
   enum Compressor {
+    // Not yet implemented. We'll want to add a message for quantized coefficients stored 'verbatim'
+    // (probably still somewhat compressed because of the varint encoding used for `int64`s).
     NOOP_COMPRESSOR = 0;
     // Explanation for the wonky numbering: this first case was originally called `CPU_HUFFMAN_ZLIB`,
     // but the relevant code didn't actually call the Huffman encoder.

From 021e33000e42c2111eb96033b00dbad876205bcd Mon Sep 17 00:00:00 2001
From: Ben Whitney <whitneybe@ornl.gov>
Date: Tue, 21 Jun 2022 12:05:40 -0400
Subject: [PATCH 52/58] Add `Chain` to allow iterator range concatenation.

---
 include/utilities.hpp        | 87 ++++++++++++++++++++++++++++++++++++
 include/utilities.tpp        | 76 +++++++++++++++++++++++++++++++
 tests/src/test_utilities.cpp | 55 +++++++++++++++++++++++
 3 files changed, 218 insertions(+)

diff --git a/include/utilities.hpp b/include/utilities.hpp
index 9f514ef472..43b005a0d4 100644
--- a/include/utilities.hpp
+++ b/include/utilities.hpp
@@ -9,6 +9,7 @@
 #include <iterator>
 #include <memory>
 #include <utility>
+#include <vector>
 
 namespace mgard {
 
@@ -545,6 +546,92 @@ class Bits::iterator {
   unsigned char offset;
 };
 
+//! Concatenated iterator ranges.
+//!
+//! Approximate Python's `itertools.chain` generator.
+template <typename It> class Chain {
+public:
+  //! Constructor.
+  //!
+  //!\param segments Beginnings and lengths of iterator ranges.
+  Chain(const std::vector<std::pair<It, std::size_t>> &segments);
+
+  // Forward declaration.
+  class iterator;
+
+  //! Return an iterator to the beginning of the enumeration.
+  iterator begin() const;
+
+  //! Return an iterator to the end of the enumeration.
+  iterator end() const;
+
+  //! Beginnings and lengths of iterator ranges.
+  std::vector<std::pair<It, std::size_t>> segments;
+};
+
+//! Equality comparison.
+template <typename It> bool operator==(const Chain<It> &a, const Chain<It> &b);
+
+//! Inequality comparison.
+template <typename It> bool operator!=(const Chain<It> &a, const Chain<It> &b);
+
+//! Iterator over concatenated iterator ranges.
+template <typename It> class Chain<It>::iterator {
+public:
+  //! Category of the iterator.
+  using iterator_category = std::forward_iterator_tag;
+  //! Type iterated over.
+  using value_type = typename std::iterator_traits<It>::value_type;
+  //! Type for distance between iterators.
+  using difference_type = typename std::iterator_traits<It>::difference_type;
+  //! Pointer to `value_type`.
+  using pointer = typename std::iterator_traits<It>::pointer;
+  //! Type returned by the dereference operator.
+  using reference = typename std::iterator_traits<It>::reference;
+
+  //! Constructor.
+  //!
+  //!\param iterable Associated chain.
+  //!\param q Iterator to current segment.
+  iterator(
+      const Chain &iterable,
+      const typename std::vector<std::pair<It, std::size_t>>::const_iterator q);
+
+  //! Equality comparison.
+  bool operator==(const iterator &other) const;
+
+  //! Inequality comparison.
+  bool operator!=(const iterator &other) const;
+
+  //! Preincrement.
+  iterator &operator++();
+
+  //! Postincrement.
+  iterator operator++(int);
+
+  //! Dereference.
+  reference operator*() const;
+
+private:
+  //! Associated bit range.
+  const Chain &iterable;
+
+  //! Iterator to current segment.
+  typename std::vector<std::pair<It, std::size_t>>::const_iterator q;
+
+  //! Position in the current segment.
+  It p;
+
+  //! Distance from the beginning of the current segment.
+  std::size_t i;
+
+  //! Length of the current segment.
+  std::size_t n;
+
+  //! Zero `i`; populate `p` and `n` from `q` if not at end.
+  void conditionally_start_segment();
+};
+
 } // namespace mgard
 
 #include "utilities.tpp"
diff --git a/include/utilities.tpp b/include/utilities.tpp
index 52fd9b4ba7..6260fa5af4 100644
--- a/include/utilities.tpp
+++ b/include/utilities.tpp
@@ -345,4 +345,80 @@ template <typename T>
 MemoryBuffer<T>::MemoryBuffer(const std::size_t size)
     : MemoryBuffer(new T[size], size) {}
 
+template <typename It>
+Chain<It>::Chain(const std::vector<std::pair<It, std::size_t>> &segments)
+    : segments(segments) {}
+
+template <typename It> bool operator==(const Chain<It> &a, const Chain<It> &b) {
+  return a.segments == b.segments;
+}
+
+template <typename It> bool operator!=(const Chain<It> &a, const Chain<It> &b) {
+  return !operator==(a, b);
+}
+
+template <typename It> typename Chain<It>::iterator Chain<It>::begin() const {
+  return {*this, segments.begin()};
+}
+
+template <typename It> typename Chain<It>::iterator Chain<It>::end() const {
+  return {*this, segments.end()};
+}
+
+template <typename It>
+Chain<It>::iterator::iterator(
+    const Chain<It> &iterable,
+    const typename std::vector<std::pair<It, std::size_t>>::const_iterator q)
+    : iterable(iterable), q(q) {
+  conditionally_start_segment();
+}
+
+template <typename It> void Chain<It>::iterator::conditionally_start_segment() {
+  i = 0;
+  if (q != iterable.segments.end()) {
+    const std::pair<It, std::size_t> pair = *q;
+    p = pair.first;
+    n = pair.second;
+    if (not n) {
+      ++q;
+      conditionally_start_segment();
+    }
+  }
+}
+
+template <typename It>
+bool Chain<It>::iterator::
+operator==(const typename Chain<It>::iterator &other) const {
+  return i == other.i and q == other.q and iterable == other.iterable;
+}
+
+template <typename It>
+bool Chain<It>::iterator::
+operator!=(const typename Chain<It>::iterator &other) const {
+  return !operator==(other);
+}
+
+template <typename It>
+typename Chain<It>::iterator &Chain<It>::iterator::operator++() {
+  ++p;
+  ++i;
+  if (i == n) {
+    ++q;
+    conditionally_start_segment();
+  }
+  return *this;
+}
+
+template <typename It>
+typename Chain<It>::iterator Chain<It>::iterator::operator++(int) {
+  const iterator tmp = *this;
+  operator++();
+  return tmp;
+}
+
+template <typename It>
+typename Chain<It>::iterator::reference Chain<It>::iterator::operator*() const {
+  return *p;
+}
+
 } // namespace mgard
diff --git a/tests/src/test_utilities.cpp b/tests/src/test_utilities.cpp
index 1e53eec72e..e5d66656e8 100644
--- a/tests/src/test_utilities.cpp
+++ b/tests/src/test_utilities.cpp
@@ -253,3 +253,58 @@ TEST_CASE("Bits iteration", "[utilities]") {
     }
   }
 }
+
+TEST_CASE("Chain iteration", "[utilities]") {
+  SECTION("reading") {
+    const std::size_t N = 5;
+    std::array<std::vector<unsigned char>, N> in;
+    in.at(0) = {0};
+    in.at(1) = {1, 2, 3};
+    in.at(2) = {};
+    in.at(3) = {4, 5, 6};
+    in.at(4) = {7, 8, 9, 10};
+    using It = std::vector<unsigned char>::const_iterator;
+    std::vector<std::pair<It, std::size_t>> segments;
+    for (const std::vector<unsigned char> &in_ : in) {
+      segments.push_back({in_.begin(), in_.size()});
+    }
+    unsigned char expected = 0;
+    TrialTracker tracker;
+    for (const unsigned char read : mgard::Chain(segments)) {
+      tracker += read == expected++;
+    }
+    REQUIRE(tracker);
+    REQUIRE(expected == 11);
+  }
+
+  SECTION("writing") {
+    const std::size_t N = 4;
+    std::array<std::vector<unsigned short int>, N> out;
+    const std::array<std::size_t, N> ns{3, 5, 0, 1};
+    using It = std::vector<unsigned short int>::iterator;
+    std::vector<std::pair<It, std::size_t>> segments;
+    segments.reserve(N);
+    for (std::size_t i = 0; i < N; ++i) {
+      std::vector<unsigned short int> &out_ = out.at(i);
+      const std::size_t n = ns.at(i);
+      out_.resize(n);
+      segments.push_back({out_.begin(), n});
+    }
+
+    unsigned short int a = 1;
+    unsigned short int b = 1;
+    for (unsigned short int &c : mgard::Chain(segments)) {
+      c = a;
+      const unsigned short int tmp = a + b;
+      a = b;
+      b = tmp;
+    }
+
+    std::array<std::vector<unsigned short int>, N> expected;
+    expected.at(0) = {1, 1, 2};
+    expected.at(1) = {3, 5, 8, 13, 21};
+    expected.at(2) = {};
+    expected.at(3) = {34};
+    REQUIRE(out == expected);
+  }
+}

From 1d07149c0d79659cd485b7d75e84024d4966022b Mon Sep 17 00:00:00 2001
From: Ben Whitney <whitneybe@ornl.gov>
Date: Tue, 28 Jun 2022 16:11:32 -0400
Subject: [PATCH 53/58] Limit sizes of frequency and 'missed' subtables.

---
 include/huffman.tpp | 285 ++++++++++++++++++++++++++++++++++++++++----
 src/mgard.proto     |  16 ++-
 2 files changed, 273 insertions(+), 28 deletions(-)

diff --git a/include/huffman.tpp b/include/huffman.tpp
index 904c23e24f..cf5706e9b1 100644
--- a/include/huffman.tpp
+++ b/include/huffman.tpp
@@ -22,6 +22,7 @@ using Endpoints = google::protobuf::RepeatedField<google::protobuf::int64>;
 using Missed = google::protobuf::RepeatedField<google::protobuf::int64>;
 using Frequencies =
     google::protobuf::Map<google::protobuf::uint64, google::protobuf::uint64>;
+using SubtableSizes = google::protobuf::RepeatedField<google::protobuf::uint64>;
 
 } // namespace
 
@@ -176,6 +177,221 @@ void HuffmanCode<Symbol>::recursively_set_codewords(
   }
 }
 
+namespace {
+
+//! Maximum number of elements per frequency/missed subtable.
+inline constexpr std::size_t SUBTABLE_MAX_SIZE = 1 << 20;
+
+//! A logical table split into one or more subtables of moderate size.
+//!
+//! The logical table can be read by chaining the subtables.
+template <typename Message, typename It> struct Supertable {
+  // The beginning and size of a subtable.
+  using Segment = std::pair<It, std::size_t>;
+
+  //! Constructor.
+  //!
+  //! Construct an 'empty' `Supertable`. The data members will be given the
+  //! right sizes, but for the most part they will not populated. That is left
+  //! to derived class constructors or callers.
+  //!
+  //!\param nelements Total number of subtable entries.
+  //!\param nbytes_subtables Sizes in bytes of the subtables (field in
+  //! `pb::HuffmanHeader`). This field will be written to.
+  Supertable(const std::size_t nelements, SubtableSizes &nbytes_subtables)
+      : nsubtables((nelements + SUBTABLE_MAX_SIZE - 1) / SUBTABLE_MAX_SIZE),
+        subtables(nsubtables), segments(nsubtables),
+        nbytes_subtables(nbytes_subtables) {
+    nbytes_subtables.Resize(nsubtables, 0);
+
+    for (std::size_t i = 0; i + 1 < nsubtables; ++i) {
+      segments.at(i).second = SUBTABLE_MAX_SIZE;
+    }
+    if (nsubtables) {
+      // If `nelements` is an exact multiple of `SUBTABLE_MAX_SIZE` and not
+      // zero, we need this last size to be `SUBTABLE_MAX_SIZE`, not `0`. If
+      // `nelements` is zero, we won't be executing this statement.
+      segments.back().second = nelements % SUBTABLE_MAX_SIZE
+                                   ? nelements % SUBTABLE_MAX_SIZE
+                                   : SUBTABLE_MAX_SIZE;
+    }
+  }
+
+  //! Constructor.
+  //!
+  //! Construct a `Supertable` from a collection of parsed messages. This
+  //! constructor leaves `segments` uninitialized. This is because `Supertable`
+  //! doesn't know which field of `Message` is the subtable.
+  //!
+  //!\param nbytes_subtables Sizes in bytes of the subtables (field in
+  //! `pb::HuffmanHeader`).
+  //!\param window Window into buffer containing messages to be parsed.
+  Supertable(SubtableSizes &nbytes_subtables, BufferWindow &window)
+      : nsubtables(nbytes_subtables.size()), subtables(nsubtables),
+        segments(nsubtables), nbytes_subtables(nbytes_subtables) {
+    for (std::size_t i = 0; i < nsubtables; ++i) {
+      subtables.at(i) = read_message<Message>(window, nbytes_subtables.Get(i));
+    }
+  }
+
+  //! Calculate and store the sizes in bytes of the subtables.
+  //!
+  //! This function should be called once the subtables are populated.
+  void calculate_nbytes_subtables() {
+    for (std::size_t i = 0; i < nsubtables; ++i) {
+      nbytes_subtables.Set(i, subtables.at(i).ByteSize());
+    }
+  }
+
+  //! Calculate the total size in bytes of the subtables.
+  //!
+  //! This function assumes no changes have been made to the subtables since the
+  //! last call to `calculate_nbytes_subtables`.
+  std::size_t ByteSize() const {
+    return std::accumulate(nbytes_subtables.begin(), nbytes_subtables.end(),
+                           static_cast<std::size_t>(0));
+  }
+
+  void SerializeToArray(void *const p, const std::size_t n) const {
+    unsigned char *const p_ = reinterpret_cast<unsigned char *>(p);
+    std::size_t total = 0;
+    for (std::size_t i = 0; i < nsubtables; ++i) {
+      const Message &subtable = subtables.at(i);
+      const google::protobuf::uint64 nbytes_subtable = nbytes_subtables.Get(i);
+
+      subtable.SerializeToArray(p_ + total, nbytes_subtable);
+      total += nbytes_subtable;
+    }
+    if (total != n) {
+      throw std::invalid_argument("serialization buffer size incorrect");
+    }
+  }
+
+  //! Number of subtables.
+  std::size_t nsubtables;
+
+  //! Subtables.
+  //!
+  //! It might be better to name this member 'messages.' Elsewhere we use
+  //! 'subtable' to refer to the fields of the messages containing the
+  //! supertable elements. Using that vocabulary, a `pb::FrequencySubtable`
+  //! would be a message while its `frequencies` field would be the subtable.
+  std::vector<Message> subtables;
+
+  //! Segments for a concatenated subtable chain.
+  //!
+  //! A `Chain<std::vector<Segment>::iterator>` can be constructed from this.
+  std::vector<Segment> segments;
+
+  //! Sizes in bytes of the subtables.
+  SubtableSizes &nbytes_subtables;
+};
+
+//! A logical frequency table split into one or more subtables of moderate size.
+struct FrequencySupertable
+    : Supertable<pb::FrequencySubtable, Frequencies::iterator> {
+  //! Constructor.
+  //!
+  //! Construct and populate a `FrequencySupertable` from a vector of symbol
+  //! frequencies.
+  //!
+  //!\param frequencies Symbol frequencies to store in the subtables.
+  //!\param nbytes_subtables Sizes in bytes of the subtables (field in
+  //! `pb::HuffmanHeader`). This field will be written to.
+  FrequencySupertable(const std::vector<std::size_t> &frequencies,
+                      SubtableSizes &nbytes_subtables)
+      : Supertable(std::count_if(frequencies.begin(), frequencies.end(),
+                                 [](const std::size_t frequency) -> bool {
+                                   return frequency;
+                                 }),
+                   nbytes_subtables) {
+    // `i` is the index of the subtable we're inserting into. (Technically
+    // we're inserting into the subtable's frequency map field rather than
+    // the subtable itself.) `j` is the number of entries we've inserted
+    // into subtable `i`. `k` is the index in the vector of frequencies
+    // passed to the constructor.
+    std::size_t k = 0;
+    for (std::size_t i = 0; i < nsubtables; ++i) {
+      Frequencies &frequencies_ = *subtables.at(i).mutable_frequencies();
+      Segment &segment = segments.at(i);
+      // How big `frequencies_` should be when we're done.
+      const std::size_t nfrequencies_ = segment.second;
+      for (std::size_t j = 0; j < nfrequencies_; ++k) {
+        const std::size_t frequency = frequencies.at(k);
+        if (frequency) {
+          frequencies_.insert({k, frequency});
+          ++j;
+        }
+      }
+      segment.first = frequencies_.begin();
+    }
+
+    calculate_nbytes_subtables();
+  }
+
+  //! Constructor.
+  //!
+  //! Construct a `FrequencySubtable` from a collection of parsed messages.
+  //!
+  //!\param nbytes_subtables Sizes in bytes of the subtables (field in
+  //! `pb::HuffmanHeader`).
+  //!\param window Window into buffer containing messages to be parsed.
+  FrequencySupertable(SubtableSizes &nbytes_subtables, BufferWindow &window)
+      : Supertable(nbytes_subtables, window) {
+    for (std::size_t i = 0; i < nsubtables; ++i) {
+      Segment &segment = segments.at(i);
+      Frequencies &frequencies = *subtables.at(i).mutable_frequencies();
+
+      segment.first = frequencies.begin();
+      segment.second = frequencies.size();
+    }
+  }
+};
+
+//! A logical 'missed' table split into one or more subtables of moderate size.
+struct MissedSupertable : Supertable<pb::MissedSubtable, Missed::iterator> {
+  //! Constructor.
+  //!
+  //! Construct an 'empty' `MissedSupertable`. It is expected that the caller
+  //! will subsequently write to the subtables using `Chain`.
+  //!
+  //!\param nmissed Number of missed symbols.
+  //!\param nbytes_subtables Sizes in bytes of the subtables (field in
+  //! `pb::HuffmanHeader`). This field will be written to.
+  MissedSupertable(const std::size_t nmissed, SubtableSizes &nbytes_subtables)
+      : Supertable(nmissed, nbytes_subtables) {
+    for (std::size_t i = 0; i < nsubtables; ++i) {
+      Missed &missed = *subtables.at(i).mutable_missed();
+      Segment &segment = segments.at(i);
+      // How big `missed` should be when we're done.
+      const std::size_t nmissed = segment.second;
+
+      missed.Resize(nmissed, 0);
+      segment.first = missed.begin();
+    }
+  }
+
+  //! Constructor.
+  //!
+  //! Construct a `MissedSubtable` from a collection of parsed messages.
+  //!
+  //!\param nbytes_subtables Sizes in bytes of the subtables (field in
+  //! `pb::HuffmanHeader`).
+  //!\param window Window into buffer containing messages to be parsed.
+  MissedSupertable(SubtableSizes &nbytes_subtables, BufferWindow &window)
+      : Supertable(nbytes_subtables, window) {
+    for (std::size_t i = 0; i < nsubtables; ++i) {
+      Segment &segment = segments.at(i);
+      Missed &missed = *subtables.at(i).mutable_missed();
+
+      segment.first = missed.begin();
+      segment.second = missed.size();
+    }
+  }
+};
+
+} // namespace
+
 template <typename Symbol>
 MemoryBuffer<unsigned char> huffman_encode(Symbol const *const begin,
                                            const std::size_t n) {
@@ -188,7 +404,7 @@ MemoryBuffer<unsigned char> huffman_encode(Symbol const *const begin,
   const std::size_t nbits =
       std::inner_product(code.frequencies.begin(), code.frequencies.end(),
                          lengths.begin(), static_cast<std::size_t>(0));
-  const std::size_t nbytes = (nbits + CHAR_BIT - 1) / CHAR_BIT;
+  const std::size_t nbytes_hit = (nbits + CHAR_BIT - 1) / CHAR_BIT;
 
   pb::HuffmanHeader header;
   header.set_index_mapping(pb::HuffmanHeader::INCLUSIVE_RANGE);
@@ -200,23 +416,18 @@ MemoryBuffer<unsigned char> huffman_encode(Symbol const *const begin,
   header.add_endpoints(code.endpoints.second);
   header.set_nbits(nbits);
 
-  Frequencies &frequencies = *header.mutable_frequencies();
-  {
-    std::size_t i = 0;
-    for (const std::size_t frequency : code.frequencies) {
-      if (frequency) {
-        frequencies.insert({i, frequency});
-      }
-      ++i;
-    }
-  }
+  FrequencySupertable frequency_supertable(
+      code.frequencies, *header.mutable_nbytes_frequency_subtables());
+  MissedSupertable missed_supertable(code.nmissed(),
+                                     *header.mutable_nbytes_missed_subtables());
 
-  Missed &missed_ = *header.mutable_missed();
-  missed_.Resize(code.nmissed(), 0);
-  Missed::iterator missed = missed_.begin();
+  Chain<Missed::iterator> chained_missed_supertable(missed_supertable.segments);
+  Chain<Missed::iterator>::iterator missed = chained_missed_supertable.begin();
+  // Now we're ready to populate the 'missed' subtables in the course of
+  // populating the 'hit' buffer.
 
   // Zero-initialize the bytes.
-  unsigned char *const hit_ = new unsigned char[nbytes]();
+  unsigned char *const hit_ = new unsigned char[nbytes_hit]();
   unsigned char *hit = hit_;
 
   unsigned char offset = 0;
@@ -249,8 +460,18 @@ MemoryBuffer<unsigned char> huffman_encode(Symbol const *const begin,
     }
   }
 
+  // We're done writing to the 'missed' subtables, so we can now calculate their
+  // serialized sizes. We need to do this before calling
+  // `missed_supertable.ByteSize`.
+  missed_supertable.calculate_nbytes_subtables();
+
   const std::uint_least64_t nheader = header.ByteSize();
-  MemoryBuffer<unsigned char> out(HEADER_SIZE_SIZE + nheader + nbytes);
+  const std::size_t nbytes_frequency_supertable =
+      frequency_supertable.ByteSize();
+  const std::size_t nbytes_missed_supertable = missed_supertable.ByteSize();
+  MemoryBuffer<unsigned char> out(HEADER_SIZE_SIZE + nheader +
+                                  nbytes_frequency_supertable +
+                                  nbytes_missed_supertable + nbytes_hit);
   {
     unsigned char *p = out.data.get();
     const std::array<unsigned char, HEADER_SIZE_SIZE> nheader_ =
@@ -261,8 +482,14 @@ MemoryBuffer<unsigned char> huffman_encode(Symbol const *const begin,
     header.SerializeToArray(p, nheader);
     p += nheader;
 
-    std::copy(hit_, hit_ + nbytes, p);
-    p += nbytes;
+    frequency_supertable.SerializeToArray(p, nbytes_frequency_supertable);
+    p += nbytes_frequency_supertable;
+
+    missed_supertable.SerializeToArray(p, nbytes_missed_supertable);
+    p += nbytes_missed_supertable;
+
+    std::copy(hit_, hit_ + nbytes_hit, p);
+    p += nbytes_hit;
   }
 
   delete[] hit_;
@@ -283,19 +510,24 @@ MemoryBuffer<Symbol> huffman_decode(const MemoryBuffer<unsigned char> &buffer) {
   if (endpoints_.size() != 2) {
     throw std::runtime_error("received an unexpected number of endpoints");
   }
-  const std::pair<std::size_t, std::size_t> endpoints(endpoints_.Get(0),
-                                                      endpoints_.Get(1));
+  const std::pair<Symbol, Symbol> endpoints(endpoints_.Get(0),
+                                            endpoints_.Get(1));
 
   if (header.codeword_mapping() != pb::HuffmanHeader::INDEX_FREQUENCY_PAIRS) {
     throw std::runtime_error("unrecognized Huffman codeword mapping");
   }
-  const Frequencies &frequencies_ = header.frequencies();
+  FrequencySupertable frequency_supertable(
+      *header.mutable_nbytes_frequency_subtables(), window);
+  Chain<Frequencies::iterator> chained_frequency_supertable(
+      frequency_supertable.segments);
 
   if (header.missed_encoding() != pb::HuffmanHeader::LITERAL) {
     throw std::runtime_error("unrecognized Huffman missed buffer encoding");
   }
-  const Missed &missed_ = header.missed();
-  Missed::const_iterator missed = missed_.cbegin();
+  MissedSupertable missed_supertable(*header.mutable_nbytes_missed_subtables(),
+                                     window);
+  Chain<Missed::iterator> chained_missed_supertable(missed_supertable.segments);
+  Chain<Missed::iterator>::iterator missed = chained_missed_supertable.begin();
 
   if (header.hit_encoding() != pb::HuffmanHeader::RUN_TOGETHER) {
     throw std::runtime_error("unrecognized Huffman hit buffer encoding");
@@ -308,8 +540,9 @@ MemoryBuffer<Symbol> huffman_decode(const MemoryBuffer<unsigned char> &buffer) {
                              "number of bytes in hit buffer");
   }
 
-  const HuffmanCode<Symbol> code(endpoints, frequencies_.begin(),
-                                 frequencies_.end());
+  const HuffmanCode<Symbol> code(endpoints,
+                                 chained_frequency_supertable.begin(),
+                                 chained_frequency_supertable.end());
   // TODO: Maybe add a member function for this.
   const std::size_t nout =
       std::accumulate(code.frequencies.begin(), code.frequencies.end(),
@@ -332,7 +565,7 @@ MemoryBuffer<Symbol> huffman_decode(const MemoryBuffer<unsigned char> &buffer) {
     *q++ = decoded.first ? decoded.second : *missed++;
   }
   assert(nbits_read == nbits);
-  assert(missed == missed_.cend());
+  assert(missed == chained_missed_supertable.end());
 
   return out;
 }
diff --git a/src/mgard.proto b/src/mgard.proto
index 2b80d7de0a..a8353f1dec 100644
--- a/src/mgard.proto
+++ b/src/mgard.proto
@@ -189,12 +189,24 @@ message HuffmanHeader {
 
   // Minimum and maximum symbols eligible for codewords.
   repeated sint64 endpoints = 5;
+  // Sizes in bytes of serialized `FrequencySubtable`s to followw.
+  repeated uint64 nbytes_frequency_subtables = 6;
+  // Sizes in bytes of serialized `MissedSubtable`s to follow.
+  repeated uint64 nbytes_missed_subtables = 7;
+  // Size in bits of the hit buffer to follow.
+  uint64 nbits = 8;
+}
+
+// One or more of these will follow a `HuffmanHeader`.
+message FrequencySubtable {
   // Index–frequency pairs for frequency table.
   map<uint64, uint64> frequencies = 6;
+}
+
+// One or more of these will follow the `FrequencySubtable`s after a `HuffmanHeader`.
+message MissedSubtable {
   // Encountered symbols that were not assigned codewords.
   repeated sint64 missed = 7;
-  // Size of the hit buffer in bits.
-  uint64 nbits = 8;
 }
 
 message Device {

From f887c7d61e7910accef8311067c0fc10c493bb7b Mon Sep 17 00:00:00 2001
From: Ben Whitney <whitneybe@ornl.gov>
Date: Thu, 30 Jun 2022 11:15:17 -0400
Subject: [PATCH 54/58] Add comments motivating `Supertable`, `Chain` use.

---
 include/huffman.tpp | 25 +++++++++++++++++++++++++
 1 file changed, 25 insertions(+)

diff --git a/include/huffman.tpp b/include/huffman.tpp
index cf5706e9b1..64e506051b 100644
--- a/include/huffman.tpp
+++ b/include/huffman.tpp
@@ -237,6 +237,8 @@ template <typename Message, typename It> struct Supertable {
   //! Calculate and store the sizes in bytes of the subtables.
   //!
   //! This function should be called once the subtables are populated.
+  //! `nbytes_subtables` (a field in some `pb::HuffmanHeader`) will be modified.
+  //! Subsequent changes to the subtables will invalidate the sizes.
   void calculate_nbytes_subtables() {
     for (std::size_t i = 0; i < nsubtables; ++i) {
       nbytes_subtables.Set(i, subtables.at(i).ByteSize());
@@ -252,6 +254,10 @@ template <typename Message, typename It> struct Supertable {
                            static_cast<std::size_t>(0));
   }
 
+  //! Write the subtables out to a buffer.
+  //!
+  //!\param p Buffer to which to serialize the subtables.
+  //!\param n Expected number of bytes that will be written.
   void SerializeToArray(void *const p, const std::size_t n) const {
     unsigned char *const p_ = reinterpret_cast<unsigned char *>(p);
     std::size_t total = 0;
@@ -416,11 +422,28 @@ MemoryBuffer<unsigned char> huffman_encode(Symbol const *const begin,
   header.add_endpoints(code.endpoints.second);
   header.set_nbits(nbits);
 
+  // Originally, `pb::HuffmanHeader` had a field each for the frequency and
+  // 'missed' tables. Unfortunately, these tables can get very big. In
+  // particular, if the error tolerance is very low, the quantized coefficients
+  // will be very large, and many of them will be missed. This could result in
+  // the size of the 'missed' table exceeding the (default) limit imposed by
+  // `google::protobuf::CodedInputStream`. See <https://developers.google.com/
+  // protocol-buffers/docs/reference/csharp/class/google/protobuf/
+  // coded-input-stream#sizelimit>. As a workaround, we are splitting the
+  // 'missed' table (and, for good measure, the frequency table, too) into a
+  // sequence of subtables of moderate size.
+
+  // This `FrequencySupertable` creates and populates the frequency subtables.
   FrequencySupertable frequency_supertable(
       code.frequencies, *header.mutable_nbytes_frequency_subtables());
+  // This `MissedSupertable` creates but does not populate the 'missed'
+  // subtables. We'll populate the subtables below, as we encode the stream.
   MissedSupertable missed_supertable(code.nmissed(),
                                      *header.mutable_nbytes_missed_subtables());
 
+  // This `Chain` lets us treat the 'missed' subtables as a single logical
+  // table. It frees us from manually keeping track of when we need to advance
+  // from one subtable to the next.
   Chain<Missed::iterator> chained_missed_supertable(missed_supertable.segments);
   Chain<Missed::iterator>::iterator missed = chained_missed_supertable.begin();
   // Now we're ready to populate the 'missed' subtables in the course of
@@ -516,6 +539,8 @@ MemoryBuffer<Symbol> huffman_decode(const MemoryBuffer<unsigned char> &buffer) {
   if (header.codeword_mapping() != pb::HuffmanHeader::INDEX_FREQUENCY_PAIRS) {
     throw std::runtime_error("unrecognized Huffman codeword mapping");
   }
+  // See the comments in `huffman_encode` for an explanation of why we use these
+  // `Supertable`s and `Chain`s.
   FrequencySupertable frequency_supertable(
       *header.mutable_nbytes_frequency_subtables(), window);
   Chain<Frequencies::iterator> chained_frequency_supertable(

From 6e281a6646a19e87cdd4382c773cda3d8b14edbf Mon Sep 17 00:00:00 2001
From: Ben Whitney <whitneybe@ornl.gov>
Date: Thu, 30 Jun 2022 11:45:33 -0400
Subject: [PATCH 55/58] Add member functions for common size computations.

---
 include/huffman.hpp | 12 +++++++++---
 include/huffman.tpp | 30 +++++++++++++++++-------------
 src/huffman.cpp     |  8 +-------
 3 files changed, 27 insertions(+), 23 deletions(-)

diff --git a/include/huffman.hpp b/include/huffman.hpp
index 7233f6e6a7..a943256d45 100644
--- a/include/huffman.hpp
+++ b/include/huffman.hpp
@@ -166,11 +166,11 @@ template <typename Symbol> class HuffmanCode {
   HuffmanCode(const std::pair<Symbol, Symbol> &endpoints, const It begin,
               const It end);
 
-  //! Smallest and largest symbols (inclusive) to receive codewords.
+  //! Smallest and largest symbols (inclusive) eligible for codewords.
   std::pair<Symbol, Symbol> endpoints;
 
-  //! Number of symbols that will be assigned codewords (including one for the
-  //! 'missed' symbol).
+  //! Number of symbols eligible for codewords (including one for the 'missed'
+  //! symbol).
   std::size_t ncodewords;
 
   //! Frequencies of the symbols in the input stream.
@@ -179,10 +179,16 @@ template <typename Symbol> class HuffmanCode {
   //! Codewords associated to the symbols.
   std::vector<HuffmanCodeword> codewords;
 
+  //! Report the number of symbols in the stream.
+  std::size_t nsymbols() const;
+
   //! Report the number of out-of-range symbols encountered in the stream or
   //! given in the frequency table pairs.
   std::size_t nmissed() const;
 
+  //! Report the size in bits of the encoded stream.
+  std::size_t nbits_hit() const;
+
   //! Check whether a symbol is eligible for a codeword.
   bool out_of_range(const Symbol symbol) const;
 
diff --git a/include/huffman.tpp b/include/huffman.tpp
index 64e506051b..29239ac0fc 100644
--- a/include/huffman.tpp
+++ b/include/huffman.tpp
@@ -149,10 +149,23 @@ HuffmanCode<Symbol>::HuffmanCode(const std::pair<Symbol, Symbol> &endpoints,
   recursively_set_codewords(queue.top(), {});
 }
 
+template <typename Symbol> std::size_t HuffmanCode<Symbol>::nsymbols() const {
+  return std::accumulate(frequencies.begin(), frequencies.end(),
+                         static_cast<std::size_t>(0));
+}
+
 template <typename Symbol> std::size_t HuffmanCode<Symbol>::nmissed() const {
   return frequencies.at(0);
 }
 
+template <typename Symbol> std::size_t HuffmanCode<Symbol>::nbits_hit() const {
+  std::size_t nbits = 0;
+  for (std::size_t i = 0; i < ncodewords; ++i) {
+    nbits += frequencies.at(i) * codewords.at(i).length;
+  }
+  return nbits;
+}
+
 template <typename Symbol>
 bool HuffmanCode<Symbol>::out_of_range(const Symbol symbol) const {
   return symbol < endpoints.first or symbol > endpoints.second;
@@ -403,13 +416,7 @@ MemoryBuffer<unsigned char> huffman_encode(Symbol const *const begin,
                                            const std::size_t n) {
   const HuffmanCode<Symbol> code(begin, begin + n);
 
-  std::vector<std::size_t> lengths;
-  for (const HuffmanCodeword &codeword : code.codewords) {
-    lengths.push_back(codeword.length);
-  }
-  const std::size_t nbits =
-      std::inner_product(code.frequencies.begin(), code.frequencies.end(),
-                         lengths.begin(), static_cast<std::size_t>(0));
+  const std::size_t nbits = code.nbits_hit();
   const std::size_t nbytes_hit = (nbits + CHAR_BIT - 1) / CHAR_BIT;
 
   pb::HuffmanHeader header;
@@ -568,11 +575,8 @@ MemoryBuffer<Symbol> huffman_decode(const MemoryBuffer<unsigned char> &buffer) {
   const HuffmanCode<Symbol> code(endpoints,
                                  chained_frequency_supertable.begin(),
                                  chained_frequency_supertable.end());
-  // TODO: Maybe add a member function for this.
-  const std::size_t nout =
-      std::accumulate(code.frequencies.begin(), code.frequencies.end(),
-                      static_cast<std::size_t>(0));
-  MemoryBuffer<Symbol> out(nout);
+  const std::size_t nsymbols = code.nsymbols();
+  MemoryBuffer<Symbol> out(nsymbols);
   Symbol *q = out.data.get();
 
   const Bits bits(window.current, window.current + nbits / CHAR_BIT,
@@ -581,7 +585,7 @@ MemoryBuffer<Symbol> huffman_decode(const MemoryBuffer<unsigned char> &buffer) {
   const typename HuffmanCode<Symbol>::Node root = code.queue.top();
   assert(root);
   Bits::iterator b = bits.begin();
-  for (std::size_t i = 0; i < nout; ++i) {
+  for (std::size_t i = 0; i < nsymbols; ++i) {
     typename HuffmanCode<Symbol>::Node node;
     for (node = root; node->left;
          node = *b++ ? node->right : node->left, ++nbits_read)
diff --git a/src/huffman.cpp b/src/huffman.cpp
index a9f9fbca1e..2f771c3969 100644
--- a/src/huffman.cpp
+++ b/src/huffman.cpp
@@ -248,13 +248,7 @@ HuffmanEncodedStream huffman_encoding(long int const *const quantized_data,
   const HuffmanCode<Symbol> code(nql_endpoints, quantized_data,
                                  quantized_data + n);
 
-  std::vector<std::size_t> lengths;
-  for (const HuffmanCodeword &codeword : code.codewords) {
-    lengths.push_back(codeword.length);
-  }
-  const std::size_t nbits =
-      std::inner_product(code.frequencies.begin(), code.frequencies.end(),
-                         lengths.begin(), static_cast<std::size_t>(0));
+  const std::size_t nbits = code.nbits_hit();
   const std::size_t nnz =
       code.ncodewords -
       std::count(code.frequencies.begin(), code.frequencies.end(), 0);

From aeb89a97641f9c3580ddea882c4bb47dd2f892fc Mon Sep 17 00:00:00 2001
From: Jieyang Chen <cjy7117@gmail.com>
Date: Thu, 14 Jul 2022 16:33:56 -0400
Subject: [PATCH 56/58] Fix CPU lossless in MGARD-X

---
 .../mgard-x/CompressionHighLevel/Metadata.hpp |  20 +-
 include/mgard-x/Lossless/CPU.hpp              | 215 ++----------------
 include/utilities.tpp                         |   4 +
 3 files changed, 47 insertions(+), 192 deletions(-)

diff --git a/include/mgard-x/CompressionHighLevel/Metadata.hpp b/include/mgard-x/CompressionHighLevel/Metadata.hpp
index a0111629ce..38519d6f9e 100644
--- a/include/mgard-x/CompressionHighLevel/Metadata.hpp
+++ b/include/mgard-x/CompressionHighLevel/Metadata.hpp
@@ -469,7 +469,15 @@ template <typename DeviceType> struct Metadata {
       mgard::pb::Quantization &quantization = *header.mutable_quantization();
       quantization.set_method(mgard::pb::Quantization::COEFFICIENTWISE_LINEAR);
       quantization.set_bin_widths(mgard::pb::Quantization::PER_COEFFICIENT);
-      quantization.set_type(mgard::pb::Quantization::INT64_T);
+      if (std::is_same<QUANTIZED_INT, std::int8_t>::value) {
+        quantization.set_type(mgard::pb::Quantization::INT8_T);
+      } else if (std::is_same<QUANTIZED_INT, std::int16_t>::value) {
+        quantization.set_type(mgard::pb::Quantization::INT16_T);
+      } else if (std::is_same<QUANTIZED_INT, std::int32_t>::value) {
+        quantization.set_type(mgard::pb::Quantization::INT32_T);
+      } else if (std::is_same<QUANTIZED_INT, std::int64_t>::value) {
+        quantization.set_type(mgard::pb::Quantization::INT64_T);
+      }
       quantization.set_big_endian(big_endian<std::int64_t>());
       if (big_endian<std::int64_t>()) {
         etype = endiness_type::Big_Endian;
@@ -710,7 +718,15 @@ template <typename DeviceType> struct Metadata {
              mgard::pb::Quantization::COEFFICIENTWISE_LINEAR);
       assert(quantization.bin_widths() ==
              mgard::pb::Quantization::PER_COEFFICIENT);
-      assert(quantization.type() == mgard::pb::Quantization::INT64_T);
+      if (std::is_same<QUANTIZED_INT, std::int8_t>::value) {
+        assert(quantization.type() == mgard::pb::Quantization::INT8_T);
+      } else if (std::is_same<QUANTIZED_INT, std::int16_t>::value) {
+        assert(quantization.type() == mgard::pb::Quantization::INT16_T);
+      } else if (std::is_same<QUANTIZED_INT, std::int32_t>::value) {
+        assert(quantization.type() == mgard::pb::Quantization::INT32_T);
+      } else if (std::is_same<QUANTIZED_INT, std::int64_t>::value) {
+        assert(quantization.type() == mgard::pb::Quantization::INT64_T);
+      }
       assert(quantization.big_endian() == big_endian<std::int64_t>());
       if (big_endian<std::int64_t>()) {
         etype = endiness_type::Big_Endian;
diff --git a/include/mgard-x/Lossless/CPU.hpp b/include/mgard-x/Lossless/CPU.hpp
index 9171f87678..2583e155db 100644
--- a/include/mgard-x/Lossless/CPU.hpp
+++ b/include/mgard-x/Lossless/CPU.hpp
@@ -1,181 +1,33 @@
 #ifndef MGARD_X_CPU_LOSSLESS_TEMPLATE_HPP
 #define MGARD_X_CPU_LOSSLESS_TEMPLATE_HPP
 
-#include <zstd.h>
-
-/*! CHECK
- * Check that the condition holds. If it doesn't print a message and die.
- */
-#define CHECK(cond, ...)                                                       \
-  do {                                                                         \
-    if (!(cond)) {                                                             \
-      fprintf(stderr, "%s:%d CHECK(%s) failed: ", __FILE__, __LINE__, #cond);  \
-      fprintf(stderr, "" __VA_ARGS__);                                         \
-      fprintf(stderr, "\n");                                                   \
-      exit(1);                                                                 \
-    }                                                                          \
-  } while (0)
-
-/*! CHECK_ZSTD
- * Check the zstd error code and die if an error occurred after printing a
- * message.
- */
-/*! CHECK_ZSTD
- * Check the zstd error code and die if an error occurred after printing a
- * message.
- */
-#define CHECK_ZSTD(fn, ...)                                                    \
-  do {                                                                         \
-    size_t const err = (fn);                                                   \
-    CHECK(!ZSTD_isError(err), "%s", ZSTD_getErrorName(err));                   \
-  } while (0)
-
-namespace mgard {
-void huffman_encoding(long int *quantized_data, const std::size_t n,
-                      unsigned char **out_data_hit, size_t *out_data_hit_size,
-                      unsigned char **out_data_miss, size_t *out_data_miss_size,
-                      unsigned char **out_tree, size_t *out_tree_size);
-void huffman_decoding(long int *quantized_data,
-                      const std::size_t quantized_data_size,
-                      unsigned char *out_data_hit, size_t out_data_hit_size,
-                      unsigned char *out_data_miss, size_t out_data_miss_size,
-                      unsigned char *out_tree, size_t out_tree_size);
-} // namespace mgard
+#include "proto/mgard.pb.h"
+#include <lossless.hpp>
 
 namespace mgard_x {
 
-template <typename DeviceType>
-unsigned char *compress_memory_huffman(long int *const src,
-                                       const std::size_t srcLen,
-                                       std::size_t &outsize) {
-  unsigned char *out_data_hit = 0;
-  size_t out_data_hit_size;
-  unsigned char *out_data_miss = 0;
-  size_t out_data_miss_size;
-  unsigned char *out_tree = 0;
-  size_t out_tree_size;
-  ::mgard::huffman_encoding(src, srcLen, &out_data_hit, &out_data_hit_size,
-                            &out_data_miss, &out_data_miss_size, &out_tree,
-                            &out_tree_size);
-
-  const size_t total_size =
-      out_data_hit_size / 8 + 4 + out_data_miss_size + out_tree_size;
-  unsigned char *payload = (unsigned char *)malloc(total_size);
-  unsigned char *bufp = payload;
-
-  if (out_tree_size) {
-    std::memcpy(bufp, out_tree, out_tree_size);
-    bufp += out_tree_size;
+template <typename C> mgard::pb::Header setup_header() {
+  mgard::pb::Header header;
+  mgard::pb::Quantization &q = *header.mutable_quantization();
+  if (std::is_same<C, std::int8_t>::value) {
+    q.set_type(mgard::pb::Quantization::INT8_T);
+  } else if (std::is_same<C, std::int16_t>::value) {
+    q.set_type(mgard::pb::Quantization::INT16_T);
+  } else if (std::is_same<C, std::int32_t>::value) {
+    q.set_type(mgard::pb::Quantization::INT32_T);
+  } else if (std::is_same<C, std::int64_t>::value) {
+    q.set_type(mgard::pb::Quantization::INT64_T);
   }
-
-  std::memcpy(bufp, out_data_hit, out_data_hit_size / 8 + 4);
-  bufp += out_data_hit_size / 8 + 4;
-
-  if (out_data_miss_size) {
-    std::memcpy(bufp, out_data_miss, out_data_miss_size);
-    bufp += out_data_miss_size;
-  }
-
-  free(out_tree);
-  free(out_data_hit);
-  free(out_data_miss);
-
-  const size_t cBuffSize = ZSTD_compressBound(total_size);
-  unsigned char *const zstd_buffer = new unsigned char[cBuffSize];
-
-  const std::size_t cSize =
-      ZSTD_compress(zstd_buffer, cBuffSize, payload, total_size, 1);
-  CHECK_ZSTD(cSize);
-
-  free(payload);
-  payload = 0;
-
-  const std::size_t bufferLen = 3 * sizeof(size_t) + cSize;
-  unsigned char *const buffer = new unsigned char[bufferLen];
-  outsize = bufferLen;
-
-  bufp = buffer;
-  *(size_t *)bufp = out_tree_size;
-  bufp += sizeof(size_t);
-
-  *(size_t *)bufp = out_data_hit_size;
-  bufp += sizeof(size_t);
-
-  *(size_t *)bufp = out_data_miss_size;
-  bufp += sizeof(size_t);
-
-  {
-    unsigned char const *const p = zstd_buffer;
-    std::copy(p, p + cSize, bufp);
-  }
-
-  {
-    unsigned char *buf = buffer;
-    out_tree_size = *(size_t *)buf;
-    buf += sizeof(size_t);
-
-    out_data_hit_size = *(size_t *)buf;
-    buf += sizeof(size_t);
-
-    out_data_miss_size = *(size_t *)buf;
-    buf += sizeof(size_t);
-  }
-
-  return buffer;
-}
-
-template <typename DeviceType>
-void decompress_memory_huffman(unsigned char *const src,
-                               const std::size_t srcLen, long int *const dst,
-                               const std::size_t dstLen) {
-  unsigned char *out_data_hit = 0;
-  size_t out_data_hit_size;
-  unsigned char *out_data_miss = 0;
-  size_t out_data_miss_size;
-  unsigned char *out_tree = 0;
-  size_t out_tree_size;
-
-  unsigned char *buf = src;
-
-  out_tree_size = *(size_t *)buf;
-  buf += sizeof(size_t);
-
-  out_data_hit_size = *(size_t *)buf;
-  buf += sizeof(size_t);
-
-  out_data_miss_size = *(size_t *)buf;
-  buf += sizeof(size_t);
-
-  size_t total_huffman_size =
-      out_tree_size + out_data_hit_size / 8 + 4 + out_data_miss_size;
-  unsigned char *huffman_encoding_p =
-      (unsigned char *)malloc(total_huffman_size);
-
-  size_t const dSize = ZSTD_decompress(huffman_encoding_p, total_huffman_size,
-                                       buf, srcLen - 3 * sizeof(size_t));
-  CHECK_ZSTD(dSize);
-
-  /* When zstd knows the content size, it will error if it doesn't match. */
-  CHECK(total_huffman_size == dSize,
-        "Impossible because zstd will check this condition!");
-
-  out_tree = huffman_encoding_p;
-  out_data_hit = huffman_encoding_p + out_tree_size;
-  out_data_miss =
-      huffman_encoding_p + out_tree_size + out_data_hit_size / 8 + 4;
-
-  mgard::huffman_decoding(dst, dstLen, out_data_hit, out_data_hit_size,
-                          out_data_miss, out_data_miss_size, out_tree,
-                          out_tree_size);
-
-  free(huffman_encoding_p);
+  mgard::pb::Encoding &e = *header.mutable_encoding();
+  // MGARD-X requires Zstd, so we always use CPU_HUFFMAN_ZSTD
+  e.set_compressor(mgard::pb::Encoding::CPU_HUFFMAN_ZSTD);
+  e.set_serialization(mgard::pb::Encoding::RFMH);
+  return header;
 }
 
 template <typename C, typename DeviceType>
 Array<1, Byte, DeviceType> CPUCompress(SubArray<1, C, DeviceType> &input_data) {
 
-  // PrintSubarray("CPUCompress input", input_data);
-
   size_t input_count = input_data.getShape(0);
 
   C *in_data = NULL;
@@ -183,14 +35,10 @@ Array<1, Byte, DeviceType> CPUCompress(SubArray<1, C, DeviceType> &input_data) {
   MemoryManager<DeviceType>::Copy1D(in_data, input_data.data(), input_count, 0);
   DeviceRuntime<DeviceType>::SyncQueue(0);
 
-  std::vector<long int> qv(input_count);
-  for (size_t i = 0; i < input_count; i++) {
-    qv[i] = (long int)in_data[i];
-  }
-
-  std::size_t actual_out_size;
-  unsigned char *lossless_data = compress_memory_huffman<DeviceType>(
-      qv.data(), qv.size(), actual_out_size);
+  mgard::MemoryBuffer<unsigned char> lossless_data_buffer =
+      mgard::compress(setup_header<C>(), in_data, input_count * sizeof(C));
+  unsigned char *lossless_data = lossless_data_buffer.data.get();
+  std::size_t actual_out_size = lossless_data_buffer.size;
 
   uint8_t *out_data = NULL;
   MemoryManager<DeviceType>::MallocHost(out_data,
@@ -205,10 +53,6 @@ Array<1, Byte, DeviceType> CPUCompress(SubArray<1, C, DeviceType> &input_data) {
 
   MemoryManager<DeviceType>::FreeHost(out_data);
   MemoryManager<DeviceType>::FreeHost(in_data);
-  delete[] lossless_data;
-
-  // PrintSubarray("CPUCompress output", SubArray(output_data));
-
   return output_data;
 }
 
@@ -216,7 +60,6 @@ template <typename C, typename DeviceType>
 Array<1, C, DeviceType>
 CPUDecompress(SubArray<1, Byte, DeviceType> &input_data) {
 
-  // PrintSubarray("CPUDecompress input", input_data);
   size_t input_count = input_data.getShape(0);
   Byte *in_data = NULL;
   MemoryManager<DeviceType>::MallocHost(in_data, input_count, 0);
@@ -225,19 +68,13 @@ CPUDecompress(SubArray<1, Byte, DeviceType> &input_data) {
 
   uint32_t actual_out_count = 0;
   actual_out_count = *reinterpret_cast<const size_t *>(in_data);
-  // *oriData = (uint8_t*)malloc(outSize);
   C *out_data = NULL;
   MemoryManager<DeviceType>::MallocHost(out_data, actual_out_count, 0);
   DeviceRuntime<DeviceType>::SyncQueue(0);
 
-  long int *qv = new long int[actual_out_count];
-  size_t out_size = actual_out_count * sizeof(long int);
-  decompress_memory_huffman<DeviceType>(
-      in_data + sizeof(size_t), input_count - sizeof(size_t), qv, out_size);
-
-  for (size_t i = 0; i < actual_out_count; i++) {
-    out_data[i] = (C)qv[i];
-  }
+  mgard::decompress(setup_header<C>(), in_data + sizeof(size_t),
+                    input_count - sizeof(size_t), out_data,
+                    actual_out_count * sizeof(C));
 
   Array<1, C, DeviceType> output_data({(SIZE)actual_out_count});
   output_data.load(out_data);
@@ -245,8 +82,6 @@ CPUDecompress(SubArray<1, Byte, DeviceType> &input_data) {
   MemoryManager<DeviceType>::FreeHost(out_data);
   MemoryManager<DeviceType>::FreeHost(in_data);
 
-  // PrintSubarray("CPUDecompress output", SubArray(output_data));
-
   return output_data;
 }
 
diff --git a/include/utilities.tpp b/include/utilities.tpp
index 6260fa5af4..296e1f6f0c 100644
--- a/include/utilities.tpp
+++ b/include/utilities.tpp
@@ -201,6 +201,8 @@ bool operator!=(const RangeSlice<It> &a, const RangeSlice<It> &b) {
   return !operator==(a, b);
 }
 
+#ifndef __NVCC__
+
 template <typename T, std::size_t N>
 CartesianProduct<T, N>::CartesianProduct(const std::array<T, N> factors)
     : factors(factors) {
@@ -325,6 +327,8 @@ typename CartesianProduct<T, N>::iterator::reference
   return value;
 }
 
+#endif
+
 template <std::size_t N>
 void check_dimension_index_bounds(const std::size_t dimension) {
   if (dimension >= N) {

From eed1eb17c0ea5803571377daf28635bf25aaa4b5 Mon Sep 17 00:00:00 2001
From: Jieyang Chen <cjy7117@gmail.com>
Date: Mon, 18 Jul 2022 10:44:01 -0400
Subject: [PATCH 57/58] =?UTF-8?q?Put=20CartesianProduct=20and=20CartesianP?=
 =?UTF-8?q?roduct::iterator=20in=20#ifndef=20=5F=5FNVCC=5F=5F=20=E2=80=93?=
 =?UTF-8?q?=20#endif=20block?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 include/utilities.hpp | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/include/utilities.hpp b/include/utilities.hpp
index 43b005a0d4..f687277eaa 100644
--- a/include/utilities.hpp
+++ b/include/utilities.hpp
@@ -293,6 +293,8 @@ bool operator==(const RangeSlice<It> &a, const RangeSlice<It> &b);
 template <typename It>
 bool operator!=(const RangeSlice<It> &a, const RangeSlice<It> &b);
 
+#ifndef __NVCC__
+
 //! Mimic Python's `itertools.product`. Allow iteration over the Cartesian
 //! product of a collection of ranges.
 //!
@@ -411,6 +413,8 @@ template <typename T, std::size_t N> class CartesianProduct<T, N>::iterator {
   std::array<T_iterator, N> inner;
 };
 
+#endif
+
 //! Check that a dimension index is in bounds.
 //!
 //!\param dimension Dimension index.

From 4ef023d0affabc2023ab3c575af9c798055ac8d9 Mon Sep 17 00:00:00 2001
From: Ben Whitney <whitneybe@ornl.gov>
Date: Tue, 19 Jul 2022 12:32:51 -0400
Subject: [PATCH 58/58] Add quantization type function template.

---
 include/format.hpp                            |  5 ++
 .../mgard-x/CompressionHighLevel/Metadata.hpp | 53 +++++--------------
 src/format.cpp                                | 16 ++++++
 tests/src/test_format.cpp                     | 11 ++++
 4 files changed, 45 insertions(+), 40 deletions(-)

diff --git a/include/format.hpp b/include/format.hpp
index f3b166ffac..cbc7da334f 100644
--- a/include/format.hpp
+++ b/include/format.hpp
@@ -82,6 +82,11 @@ template <typename Int> bool big_endian();
 //!\return `Dataset::Type` corresponding to `Real`.
 template <typename Real> pb::Dataset::Type type_to_dataset_type();
 
+//! Return the `Quantization::Type` value corrresponding to an integral type.
+//!
+//!\return `Quantization::Type` corresponding to `Int`.
+template <typename Int> pb::Quantization::Type type_to_quantization_type();
+
 //! Allocate a quantization buffer of the proper alignment and size.
 //!
 //!\param header Self-describing dataset header.
diff --git a/include/mgard-x/CompressionHighLevel/Metadata.hpp b/include/mgard-x/CompressionHighLevel/Metadata.hpp
index 38519d6f9e..3ba5bb1dd4 100644
--- a/include/mgard-x/CompressionHighLevel/Metadata.hpp
+++ b/include/mgard-x/CompressionHighLevel/Metadata.hpp
@@ -86,11 +86,8 @@ template <typename DeviceType> struct Metadata {
     std::cout << "Metadata size: " << metadata_size << "\n";
     std::cout << "Metadata crc32: " << metadata_crc32 << "\n";
     std::cout << "Endiness: ";
-    if (etype == endiness_type::Big_Endian) {
-      std::cout << "Big Endian\n";
-    } else {
-      std::cout << "Little Endian\n";
-    }
+    std::cout << (etype == endiness_type::Big_Endian ? "Big Endian\n"
+                                                     : "Little Endian\n");
     std::cout << "Data type: ";
     if (dtype == data_type::Float) {
       std::cout << "Float\n";
@@ -173,11 +170,8 @@ template <typename DeviceType> struct Metadata {
 
 private:
   SERIALIZED_TYPE *SerializeAll(uint32_t &total_size) {
-    if (big_endian<std::int64_t>()) {
-      etype = endiness_type::Big_Endian;
-    } else {
-      etype = endiness_type::Little_Endian;
-    }
+    etype = big_endian<QUANTIZED_INT>() ? endiness_type::Big_Endian
+                                        : endiness_type::Little_Endian;
     total_size = 0;
 
     // about MGARD software
@@ -469,21 +463,10 @@ template <typename DeviceType> struct Metadata {
       mgard::pb::Quantization &quantization = *header.mutable_quantization();
       quantization.set_method(mgard::pb::Quantization::COEFFICIENTWISE_LINEAR);
       quantization.set_bin_widths(mgard::pb::Quantization::PER_COEFFICIENT);
-      if (std::is_same<QUANTIZED_INT, std::int8_t>::value) {
-        quantization.set_type(mgard::pb::Quantization::INT8_T);
-      } else if (std::is_same<QUANTIZED_INT, std::int16_t>::value) {
-        quantization.set_type(mgard::pb::Quantization::INT16_T);
-      } else if (std::is_same<QUANTIZED_INT, std::int32_t>::value) {
-        quantization.set_type(mgard::pb::Quantization::INT32_T);
-      } else if (std::is_same<QUANTIZED_INT, std::int64_t>::value) {
-        quantization.set_type(mgard::pb::Quantization::INT64_T);
-      }
-      quantization.set_big_endian(big_endian<std::int64_t>());
-      if (big_endian<std::int64_t>()) {
-        etype = endiness_type::Big_Endian;
-      } else {
-        etype = endiness_type::Little_Endian;
-      }
+      quantization.set_type(mgard::type_to_quantization_type<QUANTIZED_INT>());
+      quantization.set_big_endian(big_endian<QUANTIZED_INT>());
+      etype = big_endian<QUANTIZED_INT>() ? endiness_type::Big_Endian
+                                          : endiness_type::Little_Endian;
     }
 
     { // Encoding
@@ -718,21 +701,11 @@ template <typename DeviceType> struct Metadata {
              mgard::pb::Quantization::COEFFICIENTWISE_LINEAR);
       assert(quantization.bin_widths() ==
              mgard::pb::Quantization::PER_COEFFICIENT);
-      if (std::is_same<QUANTIZED_INT, std::int8_t>::value) {
-        assert(quantization.type() == mgard::pb::Quantization::INT8_T);
-      } else if (std::is_same<QUANTIZED_INT, std::int16_t>::value) {
-        assert(quantization.type() == mgard::pb::Quantization::INT16_T);
-      } else if (std::is_same<QUANTIZED_INT, std::int32_t>::value) {
-        assert(quantization.type() == mgard::pb::Quantization::INT32_T);
-      } else if (std::is_same<QUANTIZED_INT, std::int64_t>::value) {
-        assert(quantization.type() == mgard::pb::Quantization::INT64_T);
-      }
-      assert(quantization.big_endian() == big_endian<std::int64_t>());
-      if (big_endian<std::int64_t>()) {
-        etype = endiness_type::Big_Endian;
-      } else {
-        etype = endiness_type::Little_Endian;
-      }
+      assert(quantization.type() ==
+             mgard::type_to_quantization_type<QUANTIZED_INT>());
+      assert(quantization.big_endian() == big_endian<QUANTIZED_INT>());
+      etype = big_endian<QUANTIZED_INT>() ? endiness_type::Big_Endian
+                                          : endiness_type::Little_Endian;
     }
 
     { // Encoding
diff --git a/src/format.cpp b/src/format.cpp
index e9cda8e756..6056c8d262 100644
--- a/src/format.cpp
+++ b/src/format.cpp
@@ -82,6 +82,22 @@ template <> pb::Dataset::Type type_to_dataset_type<double>() {
   return pb::Dataset::DOUBLE;
 }
 
+template <> pb::Quantization::Type type_to_quantization_type<std::int8_t>() {
+  return pb::Quantization::INT8_T;
+}
+
+template <> pb::Quantization::Type type_to_quantization_type<std::int16_t>() {
+  return pb::Quantization::INT16_T;
+}
+
+template <> pb::Quantization::Type type_to_quantization_type<std::int32_t>() {
+  return pb::Quantization::INT32_T;
+}
+
+template <> pb::Quantization::Type type_to_quantization_type<std::int64_t>() {
+  return pb::Quantization::INT64_T;
+}
+
 MemoryBuffer<unsigned char> quantization_buffer(const pb::Header &header,
                                                 const std::size_t ndof) {
   static_assert(CHAR_BIT == 8, "unexpected number of bits in a byte");
diff --git a/tests/src/test_format.cpp b/tests/src/test_format.cpp
index 47055e6ac1..48c0751c87 100644
--- a/tests/src/test_format.cpp
+++ b/tests/src/test_format.cpp
@@ -180,6 +180,17 @@ TEST_CASE("dataset types", "[format]") {
   REQUIRE(mgard::type_to_dataset_type<double>() == mgard::pb::Dataset::DOUBLE);
 }
 
+TEST_CASE("quantization types", "[format]") {
+  REQUIRE(mgard::type_to_quantization_type<std::int8_t>() ==
+          mgard::pb::Quantization::INT8_T);
+  REQUIRE(mgard::type_to_quantization_type<std::int16_t>() ==
+          mgard::pb::Quantization::INT16_T);
+  REQUIRE(mgard::type_to_quantization_type<std::int32_t>() ==
+          mgard::pb::Quantization::INT32_T);
+  REQUIRE(mgard::type_to_quantization_type<std::int64_t>() ==
+          mgard::pb::Quantization::INT64_T);
+}
+
 namespace {
 
 void test_quantization_buffer(const mgard::pb::Quantization::Type type,