From ddc734c6f16510306b524891c094ef02d57717fc Mon Sep 17 00:00:00 2001
From: Thomas Hall <tjh2822@gmail.com>
Date: Tue, 17 Mar 2026 21:38:47 -0400
Subject: [PATCH 1/2] feature: update mechanism.py to work with opendp noise
 mechanisms for aim and mst

---
 mechanisms/aim.py       |   8 +--
 mechanisms/cdp2adp.py   |   1 -
 mechanisms/mechanism.py |  73 ++++++++++++++++---------
 mechanisms/mst.py       | 117 +++++++++++++++++++++++-----------------
 4 files changed, 120 insertions(+), 79 deletions(-)

diff --git a/mechanisms/aim.py b/mechanisms/aim.py
index 6f5e4a6..e01c6be 100644
--- a/mechanisms/aim.py
+++ b/mechanisms/aim.py
@@ -75,8 +75,9 @@ def __init__(
         max_model_size=80,
         max_iters=1000,
         structural_zeros={},
+        bounded=False,
     ):
-        super(AIM, self).__init__(epsilon, delta, prng)
+        super(AIM, self).__init__(epsilon, delta, bounded=bounded, prng=prng)
         self.rounds = rounds
         self.max_iters = max_iters
         self.max_model_size = max_model_size
@@ -110,7 +111,7 @@ def run(self, data, workload, num_synth_rows=None, initial_cliques=None):
 
         oneway = [cl for cl in candidates if len(cl) == 1]
 
-        sigma = np.sqrt(rounds / (2 * 0.9 * self.rho))
+        sigma = self.gaussian_noise_scale_zcdp(1.0, 0.9 * self.rho / rounds)
         epsilon = np.sqrt(8 * 0.1 * self.rho / rounds)
 
         measurements = []
@@ -134,7 +135,7 @@ def run(self, data, workload, num_synth_rows=None, initial_cliques=None):
             if self.rho - rho_used < 2 * (0.5 / sigma**2 + 1.0 / 8 * epsilon**2):
                 # Just use up whatever remaining budget there is for one last round
                 remaining = self.rho - rho_used
-                sigma = np.sqrt(1 / (2 * 0.9 * remaining))
+                sigma = self.gaussian_noise_scale_zcdp(1.0, 0.9 * remaining)
                 epsilon = np.sqrt(8 * 0.1 * remaining)
                 terminate = True
 
@@ -198,7 +199,6 @@ def default_params():
 
 
 if __name__ == "__main__":
-
     description = ""
     formatter = argparse.ArgumentDefaultsHelpFormatter
     parser = argparse.ArgumentParser(description=description, formatter_class=formatter)
diff --git a/mechanisms/cdp2adp.py b/mechanisms/cdp2adp.py
index e1bae47..8b99606 100644
--- a/mechanisms/cdp2adp.py
+++ b/mechanisms/cdp2adp.py
@@ -20,7 +20,6 @@
 # - Thomas Steinke dgauss@thomas-steinke.net 2020
 
 import math
-import matplotlib.pyplot as plt
 
 
 # *********************************************************************
diff --git a/mechanisms/mechanism.py b/mechanisms/mechanism.py
index 8cd841d..2ef1d9b 100644
--- a/mechanisms/mechanism.py
+++ b/mechanisms/mechanism.py
@@ -1,8 +1,9 @@
 import numpy as np
-#from autodp import privacy_calibrator
 from functools import partial
 from cdp2adp import cdp_rho
-from scipy.special import softmax
+import opendp.prelude as dp
+
+dp.enable_features("contrib", "floating-point")
 
 
 def pareto_efficient(costs):
@@ -26,7 +27,7 @@ def generalized_em_scores(q, ds, t):
 
 
 class Mechanism:
-    def __init__(self, epsilon, delta, bounded, prng=np.random):
+    def __init__(self, epsilon, delta, bounded, prng=None):
         """
         Base class for a mechanism.  
         :param epsilon: privacy parameter
@@ -38,7 +39,7 @@ def __init__(self, epsilon, delta, bounded, prng=np.random):
         self.delta = delta
         self.rho = 0 if delta == 0 else cdp_rho(epsilon, delta)
         self.bounded = bounded
-        self.prng = prng
+        self.prng = prng if prng is not None else np.random
 
     def run(self, dataset, workload):
         pass
@@ -63,19 +64,20 @@ def generalized_exponential_mechanism(
         return keys[key]
 
     def permute_and_flip(self, qualities, epsilon, sensitivity=1.0):
-        """ Sample a candidate from the permute-and-flip mechanism """
-        q = qualities - qualities.max()
-        p = np.exp(0.5 * epsilon / sensitivity * q)
-        for i in np.random.permutation(p.size):
-            if np.random.rand() <= p[i]:
-                return i
+        """Sample a candidate from the permute-and-flip mechanism"""
+        qualities = np.array(qualities)
+        scale = 2.0 * sensitivity / epsilon
+        input_space = (
+            dp.vector_domain(dp.atom_domain(T=float, nan=False)),
+            dp.linf_distance(T=float),
+        )
+        noisy_max = dp.m.make_noisy_max(*input_space, dp.max_divergence(), scale=scale)
+        return noisy_max(qualities.tolist())
 
     def exponential_mechanism(
         self, qualities, epsilon, sensitivity=1.0, base_measure=None
     ):
         if isinstance(qualities, dict):
-            # import pandas as pd
-            # print(pd.Series(list(qualities.values()), list(qualities.keys())).sort_values().tail())
             keys = list(qualities.keys())
             qualities = np.array([qualities[key] for key in keys])
             if base_measure is not None:
@@ -85,31 +87,52 @@ def exponential_mechanism(
             keys = np.arange(qualities.size)
 
         """ Sample a candidate from the permute-and-flip mechanism """
-        q = qualities - qualities.max()
-        if base_measure is None:
-            p = softmax(0.5 * epsilon / sensitivity * q)
-        else:
-            p = softmax(0.5 * epsilon / sensitivity * q + base_measure)
+        scale = 2.0 * sensitivity / epsilon
+        input_space = (
+            dp.vector_domain(dp.atom_domain(T=float, nan=False)),
+            dp.linf_distance(T=float),
+        )
+        noisy_max = dp.m.make_noisy_max(*input_space, dp.max_divergence(), scale=scale)
+
+        if base_measure is not None:
+            qualities = qualities + base_measure
 
-        return keys[self.prng.choice(p.size, p=p)]
+        idx = noisy_max(qualities.tolist())
+        return keys[idx]
 
     def gaussian_noise_scale(self, l2_sensitivity, epsilon, delta):
-        """ Return the Gaussian noise necessary to attain (epsilon, delta)-DP """
-        raise ValueError('Temporarily Deprecated due to broken dependency')
+        """Return the Gaussian noise necessary to attain (epsilon, delta)-DP"""
+        if self.bounded:
+            l2_sensitivity *= 2.0
+        return l2_sensitivity * np.sqrt(2 * np.log(1.25 / delta)) / epsilon
+
+    def gaussian_noise_scale_zcdp(self, l2_sensitivity, rho):
+        """Return the Gaussian noise necessary to attain zCDP with rho"""
+        if self.bounded:
+            l2_sensitivity *= 2.0
+        return l2_sensitivity / np.sqrt(2 * rho)
 
     def laplace_noise_scale(self, l1_sensitivity, epsilon):
-        """ Return the Laplace noise necessary to attain epsilon-DP """
+        """Return the Laplace noise necessary to attain epsilon-DP"""
         if self.bounded:
             l1_sensitivity *= 2.0
         return l1_sensitivity / epsilon
 
     def gaussian_noise(self, sigma, size):
-        """ Generate iid Gaussian noise  of a given scale and size """
-        return self.prng.normal(0, sigma, size)
+        """Generate iid Gaussian noise  of a given scale and size"""
+        measure = dp.m.make_gaussian(
+            dp.atom_domain(T=float, nan=False),
+            dp.absolute_distance(T=float),
+            scale=sigma,
+        )
+        return np.array([measure(0.0) for _ in range(size)])
 
     def laplace_noise(self, b, size):
-        """ Generate iid Laplace noise  of a given scale and size """
-        return self.prng.laplace(0, b, size)
+        """Generate iid Laplace noise  of a given scale and size"""
+        measure = dp.m.make_laplace(
+            dp.atom_domain(T=float, nan=False), dp.absolute_distance(T=float), scale=b
+        )
+        return np.array([measure(0.0) for _ in range(size)])
 
     def best_noise_distribution(self, l1_sensitivity, l2_sensitivity, epsilon, delta):
         """ Adaptively determine if Laplace or Gaussian noise will be better, and
diff --git a/mechanisms/mst.py b/mechanisms/mst.py
index 8f59a51..3a00831 100644
--- a/mechanisms/mst.py
+++ b/mechanisms/mst.py
@@ -4,6 +4,7 @@
 from scipy.cluster.hierarchy import DisjointSet
 import networkx as nx
 import itertools
+from mechanism import Mechanism
 from cdp2adp import cdp_rho
 from scipy.special import logsumexp
 import argparse
@@ -19,18 +20,75 @@
 and does not rely on public provisional data for measurement selection.  
 """
 
+ESTIMATION_ITERATIONS=10_000
+SELECT_ITERATIONS=2500
+
+class MSTMechanism(Mechanism):
+
+  def __init__(self, epsilon, delta):
+    super().__init__(epsilon, delta, bounded=False)
+
+  def run(self, data):
+    sigma = np.sqrt(3 / (2 * self.rho))
+    cliques = [(col,) for col in data.domain]
+    log1 = self.measure(data, cliques, sigma)
+    data, log1, undo_compress_fn = compress_domain(data, log1)
+    cliques = self.select(data, self.rho /3.0, log1)
+    log2 = self.measure(data, cliques, sigma)
+    est = estimation.mirror_descent(data.domain, log1+log2, iters=ESTIMATION_ITERATIONS)
+    synth = est.synthetic_data()
+    return undo_compress_fn(synth)
+
+  def measure(self, data, cliques, sigma, weights=None):
+    if weights is None:
+      weights = np.ones(len(cliques))
+    weights = np.array(weights) / np.linalg.norm(weights)
+    measurements = []
+    for proj, weight in zip(cliques, weights):
+      x = data.project(proj).datavector()
+      y = x + self.gaussian_noise(sigma / weight, x.size)
+      measurements.append(LinearMeasurement(y, proj, sigma / weight))
+    return measurements
+
+  def select(self, data, rho, measurement_log, cliques=None):
+    if cliques is None:
+      cliques = []
+
+    est = estimation.mirror_descent(data.domain, measurement_log, iters=SELECT_ITERATIONS)
+
+    weights = {}
+    candidates = list(itertools.combinations(data.domain.attrs, 2))
+    for a, b in candidates:
+      xhat = est.project([a, b]).datavector()
+      x = data.project([a, b]).datavector()
+      weights[a, b] = np.linalg.norm(x - xhat, 1)
+
+    T = nx.Graph()
+    T.add_nodes_from(data.domain.attrs)
+    ds = DisjointSet(data.domain.attrs)
+
+    for e in cliques:
+      T.add_edge(*e)
+      ds.merge(*e)
+    
+    r = len(list(nx.connected_components(T)))
+    epsilon = np.sqrt((8 * rho) / (r - 1))
+
+    for _ in range(r - 1):
+      candidates = [e for e in candidates if not ds.connected(*e)]
+      wgts = np.array([weights[e] for e in candidates])
+      idx = self.exponential_mechanism(dict(zip(range(len(wgts)), wgts)), epsilon, sensitivity=1.0)
+      e = candidates[idx]
+      T.add_edge(*e)
+      ds.merge(*e)
+    
+    return list(T.edges)
+  
+
 
 def MST(data, epsilon, delta):
-  rho = cdp_rho(epsilon, delta)
-  sigma = np.sqrt(3 / (2 * rho))
-  cliques = [(col,) for col in data.domain]
-  log1 = measure(data, cliques, sigma)
-  data, log1, undo_compress_fn = compress_domain(data, log1)
-  cliques = select(data, rho / 3.0, log1)
-  log2 = measure(data, cliques, sigma)
-  est = estimation.mirror_descent(data.domain, log1+log2, iters=10000)
-  synth = est.synthetic_data()
-  return undo_compress_fn(synth)
+  mech = MSTMechanism(epsilon, delta)
+  return mech.run(data)
 
 
 def measure(data, cliques, sigma, weights=None):
@@ -67,45 +125,6 @@ def compress_domain(data, measurements):
   return transform_data(data, supports), new_measurements, undo_compress_fn
 
 
-def exponential_mechanism(q, eps, sensitivity, prng=np.random, monotonic=False):
-  coef = 1.0 if monotonic else 0.5
-  scores = coef * eps / sensitivity * q
-  probas = np.exp(scores - logsumexp(scores))
-  return prng.choice(q.size, p=probas)
-
-
-def select(data, rho, measurement_log, cliques=[]):
-
-  est = estimation.mirror_descent(data.domain, measurement_log, iters=2500)
-
-  weights = {}
-  candidates = list(itertools.combinations(data.domain.attrs, 2))
-  for a, b in candidates:
-    xhat = est.project([a, b]).datavector()
-    x = data.project([a, b]).datavector()
-    weights[a, b] = np.linalg.norm(x - xhat, 1)
-
-  T = nx.Graph()
-  T.add_nodes_from(data.domain.attrs)
-  ds = DisjointSet(data.domain.attrs)
-
-  for e in cliques:
-    T.add_edge(*e)
-    ds.merge(*e)
-
-  r = len(list(nx.connected_components(T)))
-  epsilon = np.sqrt(8 * rho / (r - 1))
-  for i in range(r - 1):
-    candidates = [e for e in candidates if not ds.connected(*e)]
-    wgts = np.array([weights[e] for e in candidates])
-    idx = exponential_mechanism(wgts, epsilon, sensitivity=1.0)
-    e = candidates[idx]
-    T.add_edge(*e)
-    ds.merge(*e)
-
-  return list(T.edges)
-
-
 def transform_data(data, supports):
   df = pd.DataFrame(data.to_dict(), columns=data.domain.attrs)
   newdom = {}

From 73bcd3ed4efffcf0b68de1edbba44a55409e8fe7 Mon Sep 17 00:00:00 2001
From: Thomas Hall <tjh2822@gmail.com>
Date: Tue, 17 Mar 2026 21:54:07 -0400
Subject: [PATCH 2/2] feature: remove old measure function

---
 mechanisms/mst.py | 12 ------------
 1 file changed, 12 deletions(-)

diff --git a/mechanisms/mst.py b/mechanisms/mst.py
index 3a00831..22b39cf 100644
--- a/mechanisms/mst.py
+++ b/mechanisms/mst.py
@@ -91,18 +91,6 @@ def MST(data, epsilon, delta):
   return mech.run(data)
 
 
-def measure(data, cliques, sigma, weights=None):
-  if weights is None:
-    weights = np.ones(len(cliques))
-  weights = np.array(weights) / np.linalg.norm(weights)
-  measurements = []
-  for proj, wgt in zip(cliques, weights):
-    x = data.project(proj).datavector()
-    y = x + np.random.normal(loc=0, scale=sigma / wgt, size=x.size)
-    measurements.append(LinearMeasurement(y, proj, sigma / wgt))
-  return measurements
-
-
 def compress_domain(data, measurements):
   supports = {}
   new_measurements = []