From ddc734c6f16510306b524891c094ef02d57717fc Mon Sep 17 00:00:00 2001 From: Thomas Hall Date: Tue, 17 Mar 2026 21:38:47 -0400 Subject: [PATCH 1/2] feature: update mechanism.py to work with opendp noise mechanisms for aim and mst --- mechanisms/aim.py | 8 +-- mechanisms/cdp2adp.py | 1 - mechanisms/mechanism.py | 73 ++++++++++++++++--------- mechanisms/mst.py | 117 +++++++++++++++++++++++----------------- 4 files changed, 120 insertions(+), 79 deletions(-) diff --git a/mechanisms/aim.py b/mechanisms/aim.py index 6f5e4a6..e01c6be 100644 --- a/mechanisms/aim.py +++ b/mechanisms/aim.py @@ -75,8 +75,9 @@ def __init__( max_model_size=80, max_iters=1000, structural_zeros={}, + bounded=False, ): - super(AIM, self).__init__(epsilon, delta, prng) + super(AIM, self).__init__(epsilon, delta, bounded=bounded, prng=prng) self.rounds = rounds self.max_iters = max_iters self.max_model_size = max_model_size @@ -110,7 +111,7 @@ def run(self, data, workload, num_synth_rows=None, initial_cliques=None): oneway = [cl for cl in candidates if len(cl) == 1] - sigma = np.sqrt(rounds / (2 * 0.9 * self.rho)) + sigma = self.gaussian_noise_scale_zcdp(1.0, 0.9 * self.rho / rounds) epsilon = np.sqrt(8 * 0.1 * self.rho / rounds) measurements = [] @@ -134,7 +135,7 @@ def run(self, data, workload, num_synth_rows=None, initial_cliques=None): if self.rho - rho_used < 2 * (0.5 / sigma**2 + 1.0 / 8 * epsilon**2): # Just use up whatever remaining budget there is for one last round remaining = self.rho - rho_used - sigma = np.sqrt(1 / (2 * 0.9 * remaining)) + sigma = self.gaussian_noise_scale_zcdp(1.0, 0.9 * remaining) epsilon = np.sqrt(8 * 0.1 * remaining) terminate = True @@ -198,7 +199,6 @@ def default_params(): if __name__ == "__main__": - description = "" formatter = argparse.ArgumentDefaultsHelpFormatter parser = argparse.ArgumentParser(description=description, formatter_class=formatter) diff --git a/mechanisms/cdp2adp.py b/mechanisms/cdp2adp.py index e1bae47..8b99606 100644 --- a/mechanisms/cdp2adp.py +++ b/mechanisms/cdp2adp.py @@ -20,7 +20,6 @@ # - Thomas Steinke dgauss@thomas-steinke.net 2020 import math -import matplotlib.pyplot as plt # ********************************************************************* diff --git a/mechanisms/mechanism.py b/mechanisms/mechanism.py index 8cd841d..2ef1d9b 100644 --- a/mechanisms/mechanism.py +++ b/mechanisms/mechanism.py @@ -1,8 +1,9 @@ import numpy as np -#from autodp import privacy_calibrator from functools import partial from cdp2adp import cdp_rho -from scipy.special import softmax +import opendp.prelude as dp + +dp.enable_features("contrib", "floating-point") def pareto_efficient(costs): @@ -26,7 +27,7 @@ def generalized_em_scores(q, ds, t): class Mechanism: - def __init__(self, epsilon, delta, bounded, prng=np.random): + def __init__(self, epsilon, delta, bounded, prng=None): """ Base class for a mechanism. :param epsilon: privacy parameter @@ -38,7 +39,7 @@ def __init__(self, epsilon, delta, bounded, prng=np.random): self.delta = delta self.rho = 0 if delta == 0 else cdp_rho(epsilon, delta) self.bounded = bounded - self.prng = prng + self.prng = prng if prng is not None else np.random def run(self, dataset, workload): pass @@ -63,19 +64,20 @@ def generalized_exponential_mechanism( return keys[key] def permute_and_flip(self, qualities, epsilon, sensitivity=1.0): - """ Sample a candidate from the permute-and-flip mechanism """ - q = qualities - qualities.max() - p = np.exp(0.5 * epsilon / sensitivity * q) - for i in np.random.permutation(p.size): - if np.random.rand() <= p[i]: - return i + """Sample a candidate from the permute-and-flip mechanism""" + qualities = np.array(qualities) + scale = 2.0 * sensitivity / epsilon + input_space = ( + dp.vector_domain(dp.atom_domain(T=float, nan=False)), + dp.linf_distance(T=float), + ) + noisy_max = dp.m.make_noisy_max(*input_space, dp.max_divergence(), scale=scale) + return noisy_max(qualities.tolist()) def exponential_mechanism( self, qualities, epsilon, sensitivity=1.0, base_measure=None ): if isinstance(qualities, dict): - # import pandas as pd - # print(pd.Series(list(qualities.values()), list(qualities.keys())).sort_values().tail()) keys = list(qualities.keys()) qualities = np.array([qualities[key] for key in keys]) if base_measure is not None: @@ -85,31 +87,52 @@ def exponential_mechanism( keys = np.arange(qualities.size) """ Sample a candidate from the permute-and-flip mechanism """ - q = qualities - qualities.max() - if base_measure is None: - p = softmax(0.5 * epsilon / sensitivity * q) - else: - p = softmax(0.5 * epsilon / sensitivity * q + base_measure) + scale = 2.0 * sensitivity / epsilon + input_space = ( + dp.vector_domain(dp.atom_domain(T=float, nan=False)), + dp.linf_distance(T=float), + ) + noisy_max = dp.m.make_noisy_max(*input_space, dp.max_divergence(), scale=scale) + + if base_measure is not None: + qualities = qualities + base_measure - return keys[self.prng.choice(p.size, p=p)] + idx = noisy_max(qualities.tolist()) + return keys[idx] def gaussian_noise_scale(self, l2_sensitivity, epsilon, delta): - """ Return the Gaussian noise necessary to attain (epsilon, delta)-DP """ - raise ValueError('Temporarily Deprecated due to broken dependency') + """Return the Gaussian noise necessary to attain (epsilon, delta)-DP""" + if self.bounded: + l2_sensitivity *= 2.0 + return l2_sensitivity * np.sqrt(2 * np.log(1.25 / delta)) / epsilon + + def gaussian_noise_scale_zcdp(self, l2_sensitivity, rho): + """Return the Gaussian noise necessary to attain zCDP with rho""" + if self.bounded: + l2_sensitivity *= 2.0 + return l2_sensitivity / np.sqrt(2 * rho) def laplace_noise_scale(self, l1_sensitivity, epsilon): - """ Return the Laplace noise necessary to attain epsilon-DP """ + """Return the Laplace noise necessary to attain epsilon-DP""" if self.bounded: l1_sensitivity *= 2.0 return l1_sensitivity / epsilon def gaussian_noise(self, sigma, size): - """ Generate iid Gaussian noise of a given scale and size """ - return self.prng.normal(0, sigma, size) + """Generate iid Gaussian noise of a given scale and size""" + measure = dp.m.make_gaussian( + dp.atom_domain(T=float, nan=False), + dp.absolute_distance(T=float), + scale=sigma, + ) + return np.array([measure(0.0) for _ in range(size)]) def laplace_noise(self, b, size): - """ Generate iid Laplace noise of a given scale and size """ - return self.prng.laplace(0, b, size) + """Generate iid Laplace noise of a given scale and size""" + measure = dp.m.make_laplace( + dp.atom_domain(T=float, nan=False), dp.absolute_distance(T=float), scale=b + ) + return np.array([measure(0.0) for _ in range(size)]) def best_noise_distribution(self, l1_sensitivity, l2_sensitivity, epsilon, delta): """ Adaptively determine if Laplace or Gaussian noise will be better, and diff --git a/mechanisms/mst.py b/mechanisms/mst.py index 8f59a51..3a00831 100644 --- a/mechanisms/mst.py +++ b/mechanisms/mst.py @@ -4,6 +4,7 @@ from scipy.cluster.hierarchy import DisjointSet import networkx as nx import itertools +from mechanism import Mechanism from cdp2adp import cdp_rho from scipy.special import logsumexp import argparse @@ -19,18 +20,75 @@ and does not rely on public provisional data for measurement selection. """ +ESTIMATION_ITERATIONS=10_000 +SELECT_ITERATIONS=2500 + +class MSTMechanism(Mechanism): + + def __init__(self, epsilon, delta): + super().__init__(epsilon, delta, bounded=False) + + def run(self, data): + sigma = np.sqrt(3 / (2 * self.rho)) + cliques = [(col,) for col in data.domain] + log1 = self.measure(data, cliques, sigma) + data, log1, undo_compress_fn = compress_domain(data, log1) + cliques = self.select(data, self.rho /3.0, log1) + log2 = self.measure(data, cliques, sigma) + est = estimation.mirror_descent(data.domain, log1+log2, iters=ESTIMATION_ITERATIONS) + synth = est.synthetic_data() + return undo_compress_fn(synth) + + def measure(self, data, cliques, sigma, weights=None): + if weights is None: + weights = np.ones(len(cliques)) + weights = np.array(weights) / np.linalg.norm(weights) + measurements = [] + for proj, weight in zip(cliques, weights): + x = data.project(proj).datavector() + y = x + self.gaussian_noise(sigma / weight, x.size) + measurements.append(LinearMeasurement(y, proj, sigma / weight)) + return measurements + + def select(self, data, rho, measurement_log, cliques=None): + if cliques is None: + cliques = [] + + est = estimation.mirror_descent(data.domain, measurement_log, iters=SELECT_ITERATIONS) + + weights = {} + candidates = list(itertools.combinations(data.domain.attrs, 2)) + for a, b in candidates: + xhat = est.project([a, b]).datavector() + x = data.project([a, b]).datavector() + weights[a, b] = np.linalg.norm(x - xhat, 1) + + T = nx.Graph() + T.add_nodes_from(data.domain.attrs) + ds = DisjointSet(data.domain.attrs) + + for e in cliques: + T.add_edge(*e) + ds.merge(*e) + + r = len(list(nx.connected_components(T))) + epsilon = np.sqrt((8 * rho) / (r - 1)) + + for _ in range(r - 1): + candidates = [e for e in candidates if not ds.connected(*e)] + wgts = np.array([weights[e] for e in candidates]) + idx = self.exponential_mechanism(dict(zip(range(len(wgts)), wgts)), epsilon, sensitivity=1.0) + e = candidates[idx] + T.add_edge(*e) + ds.merge(*e) + + return list(T.edges) + + def MST(data, epsilon, delta): - rho = cdp_rho(epsilon, delta) - sigma = np.sqrt(3 / (2 * rho)) - cliques = [(col,) for col in data.domain] - log1 = measure(data, cliques, sigma) - data, log1, undo_compress_fn = compress_domain(data, log1) - cliques = select(data, rho / 3.0, log1) - log2 = measure(data, cliques, sigma) - est = estimation.mirror_descent(data.domain, log1+log2, iters=10000) - synth = est.synthetic_data() - return undo_compress_fn(synth) + mech = MSTMechanism(epsilon, delta) + return mech.run(data) def measure(data, cliques, sigma, weights=None): @@ -67,45 +125,6 @@ def compress_domain(data, measurements): return transform_data(data, supports), new_measurements, undo_compress_fn -def exponential_mechanism(q, eps, sensitivity, prng=np.random, monotonic=False): - coef = 1.0 if monotonic else 0.5 - scores = coef * eps / sensitivity * q - probas = np.exp(scores - logsumexp(scores)) - return prng.choice(q.size, p=probas) - - -def select(data, rho, measurement_log, cliques=[]): - - est = estimation.mirror_descent(data.domain, measurement_log, iters=2500) - - weights = {} - candidates = list(itertools.combinations(data.domain.attrs, 2)) - for a, b in candidates: - xhat = est.project([a, b]).datavector() - x = data.project([a, b]).datavector() - weights[a, b] = np.linalg.norm(x - xhat, 1) - - T = nx.Graph() - T.add_nodes_from(data.domain.attrs) - ds = DisjointSet(data.domain.attrs) - - for e in cliques: - T.add_edge(*e) - ds.merge(*e) - - r = len(list(nx.connected_components(T))) - epsilon = np.sqrt(8 * rho / (r - 1)) - for i in range(r - 1): - candidates = [e for e in candidates if not ds.connected(*e)] - wgts = np.array([weights[e] for e in candidates]) - idx = exponential_mechanism(wgts, epsilon, sensitivity=1.0) - e = candidates[idx] - T.add_edge(*e) - ds.merge(*e) - - return list(T.edges) - - def transform_data(data, supports): df = pd.DataFrame(data.to_dict(), columns=data.domain.attrs) newdom = {} From 73bcd3ed4efffcf0b68de1edbba44a55409e8fe7 Mon Sep 17 00:00:00 2001 From: Thomas Hall Date: Tue, 17 Mar 2026 21:54:07 -0400 Subject: [PATCH 2/2] feature: remove old measure function --- mechanisms/mst.py | 12 ------------ 1 file changed, 12 deletions(-) diff --git a/mechanisms/mst.py b/mechanisms/mst.py index 3a00831..22b39cf 100644 --- a/mechanisms/mst.py +++ b/mechanisms/mst.py @@ -91,18 +91,6 @@ def MST(data, epsilon, delta): return mech.run(data) -def measure(data, cliques, sigma, weights=None): - if weights is None: - weights = np.ones(len(cliques)) - weights = np.array(weights) / np.linalg.norm(weights) - measurements = [] - for proj, wgt in zip(cliques, weights): - x = data.project(proj).datavector() - y = x + np.random.normal(loc=0, scale=sigma / wgt, size=x.size) - measurements.append(LinearMeasurement(y, proj, sigma / wgt)) - return measurements - - def compress_domain(data, measurements): supports = {} new_measurements = []