Skip to content

Commit f613703

Browse files
authored
Merge pull request #12 from pmclSF/security/fix-critical-vulnerabilities
Fix critical/high security vulnerabilities + comprehensive test coverage
2 parents 2be8fb6 + 2dfa8bf commit f613703

8 files changed

Lines changed: 561 additions & 24 deletions

src/cli_train.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -70,7 +70,7 @@ def tune_hyperparameters(input_dir, output_dir, num_epochs=10):
7070
best_hps = tuner.get_best_hyperparameters(num_trials=1)[0]
7171

7272
print("Best Hyperparameters:", best_hps.values)
73-
best_model.save(os.path.join(output_dir, 'best_model'))
73+
best_model.save_weights(os.path.join(output_dir, 'best_model.weights.h5'))
7474

7575
def main():
7676
parser = argparse.ArgumentParser(description="Train a point cloud compression model with hyperparameter tuning.")
@@ -94,7 +94,7 @@ def main():
9494
model.compile(optimizer='adam', loss='mean_squared_error')
9595
dataset = load_and_preprocess_data(args.input_dir, args.batch_size)
9696
model.fit(dataset, epochs=args.num_epochs)
97-
model.save(os.path.join(args.output_dir, 'trained_model'))
97+
model.save_weights(os.path.join(args.output_dir, 'trained_model.weights.h5'))
9898

9999
if __name__ == "__main__":
100100
main()

src/compress_octree.py

Lines changed: 36 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -181,19 +181,47 @@ def _save_debug_info(self, stage: str, data: Dict[str, Any]) -> None:
181181
os.makedirs(debug_dir, exist_ok=True)
182182

183183
for name, array in data.items():
184-
if isinstance(array, (np.ndarray, dict)):
184+
if isinstance(array, np.ndarray):
185185
np.save(os.path.join(debug_dir, f"{name}.npy"), array)
186186

187187
def save_compressed(self, grid: np.ndarray, metadata: Dict[str, Any], filename: str) -> None:
188188
"""Save compressed data with metadata."""
189-
os.makedirs(os.path.dirname(os.path.abspath(filename)), exist_ok=True)
190-
np.savez_compressed(filename, grid=grid, metadata=metadata)
189+
import json
190+
import math
191191

192-
if self.debug_output:
193-
debug_path = f"{filename}.debug.npz"
194-
np.savez_compressed(debug_path, **metadata)
192+
os.makedirs(os.path.dirname(os.path.abspath(filename)), exist_ok=True)
193+
# Save grid without pickle (bool array, no object dtype)
194+
np.savez_compressed(filename, grid=grid)
195+
# Save metadata as JSON sidecar (safe, no arbitrary code execution)
196+
meta_path = filename + '.meta.json'
197+
serializable = {}
198+
for k, v in metadata.items():
199+
if isinstance(v, np.ndarray):
200+
serializable[k] = v.tolist()
201+
elif isinstance(v, (np.floating, np.integer)):
202+
val = v.item()
203+
if isinstance(val, float) and (math.isnan(val) or math.isinf(val)):
204+
serializable[k] = None
205+
else:
206+
serializable[k] = val
207+
elif isinstance(v, float) and (math.isnan(v) or math.isinf(v)):
208+
serializable[k] = None
209+
else:
210+
serializable[k] = v
211+
with open(meta_path, 'w') as f:
212+
json.dump(serializable, f)
195213

196214
def load_compressed(self, filename: str) -> Tuple[np.ndarray, Dict[str, Any]]:
197215
"""Load compressed data with metadata."""
198-
data = np.load(filename, allow_pickle=True)
199-
return data['grid'], data['metadata'].item()
216+
import json
217+
218+
data = np.load(filename, allow_pickle=False)
219+
grid = data['grid']
220+
meta_path = filename + '.meta.json'
221+
with open(meta_path, 'r') as f:
222+
metadata = json.load(f)
223+
# Convert lists back to numpy arrays for known array fields
224+
for key in ('min_bounds', 'max_bounds', 'ranges', 'normal_grid'):
225+
if key in metadata:
226+
metadata[key] = np.array(metadata[key])
227+
return grid, metadata

src/evaluation_pipeline.py

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -52,7 +52,10 @@ def _load_model(self) -> DeepCompressModel:
5252
# Load weights if checkpoint provided
5353
checkpoint_path = self.config.get('checkpoint_path')
5454
if checkpoint_path:
55-
model.load_weights(checkpoint_path)
55+
resolved = Path(checkpoint_path).resolve()
56+
if not resolved.exists():
57+
raise FileNotFoundError(f"Checkpoint not found: {resolved}")
58+
model.load_weights(str(resolved))
5659

5760
return model
5861

src/training_pipeline.py

Lines changed: 15 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -155,26 +155,29 @@ def save_checkpoint(self, name: str):
155155

156156
for opt_name, optimizer in self.optimizers.items():
157157
if optimizer.variables:
158-
opt_weights = [v.numpy() for v in optimizer.variables]
159-
np.save(
160-
str(checkpoint_path / f'{opt_name}_optimizer.npy'),
161-
np.array(opt_weights, dtype=object),
162-
allow_pickle=True,
163-
)
158+
opt_dir = checkpoint_path / f'{opt_name}_optimizer'
159+
opt_dir.mkdir(parents=True, exist_ok=True)
160+
for i, v in enumerate(optimizer.variables):
161+
np.save(str(opt_dir / f'{i}.npy'), v.numpy())
164162

165163
self.logger.info(f"Saved checkpoint: {name}")
166164

167165
def load_checkpoint(self, name: str):
168-
checkpoint_path = self.checkpoint_dir / name
166+
checkpoint_path = (self.checkpoint_dir / name).resolve()
167+
try:
168+
checkpoint_path.relative_to(self.checkpoint_dir.resolve())
169+
except ValueError:
170+
raise ValueError(f"Checkpoint path escapes checkpoint directory: {name}")
169171
self.model.load_weights(str(checkpoint_path / 'model.weights.h5'))
170172
self.entropy_model.load_weights(str(checkpoint_path / 'entropy.weights.h5'))
171173

172174
for opt_name, optimizer in self.optimizers.items():
173-
opt_path = checkpoint_path / f'{opt_name}_optimizer.npy'
174-
if opt_path.exists() and optimizer.variables:
175-
opt_weights = np.load(str(opt_path), allow_pickle=True)
176-
for var, w in zip(optimizer.variables, opt_weights):
177-
var.assign(w)
175+
opt_dir = checkpoint_path / f'{opt_name}_optimizer'
176+
if opt_dir.exists() and optimizer.variables:
177+
for i, var in enumerate(optimizer.variables):
178+
path = opt_dir / f'{i}.npy'
179+
if path.exists():
180+
var.assign(np.load(str(path), allow_pickle=False))
178181

179182
self.logger.info(f"Loaded checkpoint: {name}")
180183

tests/test_compress_octree.py

Lines changed: 220 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -116,6 +116,7 @@ def test_octree_partitioning(self):
116116
def test_save_and_load(self):
117117
"""Test saving and loading functionality."""
118118
save_path = Path(self.test_env['tmp_path']) / "test_compressed.npz"
119+
meta_path = Path(str(save_path) + '.meta.json')
119120

120121
# Compress and save
121122
grid, metadata = self.compressor.compress(
@@ -124,8 +125,9 @@ def test_save_and_load(self):
124125
)
125126
self.compressor.save_compressed(grid, metadata, str(save_path))
126127

127-
# Verify file exists
128+
# Verify both files exist
128129
self.assertTrue(save_path.exists())
130+
self.assertTrue(meta_path.exists())
129131

130132
# Load and verify
131133
loaded_grid, loaded_metadata = self.compressor.load_compressed(str(save_path))
@@ -137,6 +139,10 @@ def test_save_and_load(self):
137139
for key in ['min_bounds', 'max_bounds', 'ranges', 'has_normals']:
138140
self.assertIn(key, loaded_metadata)
139141

142+
# Check array fields are numpy arrays after load
143+
for key in ['min_bounds', 'max_bounds', 'ranges']:
144+
self.assertIsInstance(loaded_metadata[key], np.ndarray)
145+
140146
def test_error_handling(self):
141147
"""Test error handling."""
142148
# Test empty point cloud
@@ -156,5 +162,218 @@ def test_error_handling(self):
156162
with self.assertRaisesRegex(ValueError, "shape must match"):
157163
self.compressor.compress(self.point_cloud, normals=wrong_shape_normals)
158164

165+
# --- NaN / Inf / degenerate value tests ---
166+
167+
def test_save_load_metadata_with_nan_and_inf(self):
168+
"""NaN and Inf scalar values in metadata are converted to None."""
169+
save_path = Path(self.test_env['tmp_path']) / "special_values.npz"
170+
grid = np.zeros((64, 64, 64), dtype=bool)
171+
grid[0, 0, 0] = True
172+
metadata = {
173+
'min_bounds': np.array([0.0, 0.0, 0.0]),
174+
'max_bounds': np.array([1.0, 1.0, 1.0]),
175+
'ranges': np.array([1.0, 1.0, 1.0]),
176+
'has_normals': False,
177+
'nan_value': float('nan'),
178+
'inf_value': float('inf'),
179+
'neg_inf_value': float('-inf'),
180+
}
181+
self.compressor.save_compressed(grid, metadata, str(save_path))
182+
_, loaded = self.compressor.load_compressed(str(save_path))
183+
self.assertIsNone(loaded['nan_value'])
184+
self.assertIsNone(loaded['inf_value'])
185+
self.assertIsNone(loaded['neg_inf_value'])
186+
187+
def test_save_load_metadata_with_numpy_nan(self):
188+
"""NaN from np.floating scalar is also converted to None."""
189+
save_path = Path(self.test_env['tmp_path']) / "np_nan.npz"
190+
grid = np.zeros((64, 64, 64), dtype=bool)
191+
grid[0, 0, 0] = True
192+
metadata = {
193+
'min_bounds': np.array([0.0, 0.0, 0.0]),
194+
'max_bounds': np.array([1.0, 1.0, 1.0]),
195+
'ranges': np.array([1.0, 1.0, 1.0]),
196+
'has_normals': False,
197+
'compression_error': np.float64('nan'),
198+
}
199+
self.compressor.save_compressed(grid, metadata, str(save_path))
200+
_, loaded = self.compressor.load_compressed(str(save_path))
201+
self.assertIsNone(loaded['compression_error'])
202+
203+
def test_compress_all_points_same_voxel(self):
204+
"""All identical points compress to single occupied voxel."""
205+
same_points = np.full((100, 3), 5.0, dtype=np.float32)
206+
grid, metadata = self.compressor.compress(same_points, validate=False)
207+
self.assertEqual(np.sum(grid), 1)
208+
np.testing.assert_allclose(metadata['ranges'], [1e-6, 1e-6, 1e-6])
209+
210+
# --- Zero / empty / boundary tests ---
211+
212+
def test_save_load_empty_grid(self):
213+
"""All-False grid saves and loads correctly."""
214+
save_path = Path(self.test_env['tmp_path']) / "empty_grid.npz"
215+
grid = np.zeros((64, 64, 64), dtype=bool)
216+
metadata = {
217+
'min_bounds': np.array([0.0, 0.0, 0.0]),
218+
'max_bounds': np.array([1.0, 1.0, 1.0]),
219+
'ranges': np.array([1.0, 1.0, 1.0]),
220+
'has_normals': False,
221+
}
222+
self.compressor.save_compressed(grid, metadata, str(save_path))
223+
loaded_grid, loaded_metadata = self.compressor.load_compressed(str(save_path))
224+
self.assertEqual(np.sum(loaded_grid), 0)
225+
self.assertFalse(loaded_metadata['has_normals'])
226+
227+
def test_save_load_without_normals(self):
228+
"""Metadata without normal_grid round-trips correctly."""
229+
save_path = Path(self.test_env['tmp_path']) / "no_normals.npz"
230+
grid, metadata = self.compressor.compress(self.point_cloud, validate=False)
231+
self.assertFalse(metadata['has_normals'])
232+
self.assertNotIn('normal_grid', metadata)
233+
234+
self.compressor.save_compressed(grid, metadata, str(save_path))
235+
loaded_grid, loaded_metadata = self.compressor.load_compressed(str(save_path))
236+
np.testing.assert_array_equal(grid, loaded_grid)
237+
self.assertFalse(loaded_metadata['has_normals'])
238+
self.assertNotIn('normal_grid', loaded_metadata)
239+
240+
# --- Negative / error path tests ---
241+
242+
def test_load_compressed_missing_metadata_file(self):
243+
"""Missing .meta.json sidecar raises FileNotFoundError."""
244+
save_path = Path(self.test_env['tmp_path']) / "partial_write.npz"
245+
grid = np.zeros((64, 64, 64), dtype=bool)
246+
metadata = {
247+
'min_bounds': np.array([0.0, 0.0, 0.0]),
248+
'max_bounds': np.array([1.0, 1.0, 1.0]),
249+
'ranges': np.array([1.0, 1.0, 1.0]),
250+
'has_normals': False,
251+
}
252+
self.compressor.save_compressed(grid, metadata, str(save_path))
253+
254+
# Simulate partial write: delete the sidecar
255+
meta_path = Path(str(save_path) + '.meta.json')
256+
meta_path.unlink()
257+
258+
with self.assertRaises(FileNotFoundError):
259+
self.compressor.load_compressed(str(save_path))
260+
261+
def test_load_compressed_missing_grid_file(self):
262+
"""Missing .npz grid file raises error."""
263+
missing_path = Path(self.test_env['tmp_path']) / "nonexistent.npz"
264+
with self.assertRaises(FileNotFoundError):
265+
self.compressor.load_compressed(str(missing_path))
266+
267+
# --- Debug output security test ---
268+
269+
def test_debug_info_does_not_pickle_dicts(self):
270+
"""Debug output skips dict values, only saves numpy arrays."""
271+
self.compressor.compress(self.point_cloud, validate=False)
272+
273+
debug_dir = Path(self.test_env['tmp_path']) / 'debug' / 'grid_creation'
274+
self.assertTrue(debug_dir.exists())
275+
276+
# 'metadata' (a dict) should NOT be saved as .npy
277+
self.assertFalse((debug_dir / 'metadata.npy').exists())
278+
279+
# 'grid' and 'scaled_points' (arrays) SHOULD be saved
280+
self.assertTrue((debug_dir / 'grid.npy').exists())
281+
self.assertTrue((debug_dir / 'scaled_points.npy').exists())
282+
283+
# All saved .npy files must be loadable without pickle
284+
for npy_file in debug_dir.glob('*.npy'):
285+
np.load(str(npy_file), allow_pickle=False)
286+
287+
# --- Regression / format fidelity tests ---
288+
289+
def test_save_load_metadata_values_roundtrip(self):
290+
"""Numeric metadata values are preserved after JSON round-trip."""
291+
save_path = Path(self.test_env['tmp_path']) / "fidelity.npz"
292+
grid, metadata = self.compressor.compress(self.point_cloud)
293+
self.compressor.save_compressed(grid, metadata, str(save_path))
294+
_, loaded = self.compressor.load_compressed(str(save_path))
295+
296+
np.testing.assert_allclose(
297+
loaded['min_bounds'], metadata['min_bounds'], rtol=1e-6
298+
)
299+
np.testing.assert_allclose(
300+
loaded['max_bounds'], metadata['max_bounds'], rtol=1e-6
301+
)
302+
np.testing.assert_allclose(
303+
loaded['ranges'], metadata['ranges'], rtol=1e-6
304+
)
305+
self.assertAlmostEqual(
306+
loaded['compression_error'], metadata['compression_error'], places=6
307+
)
308+
309+
def test_save_load_numpy_scalar_metadata(self):
310+
"""np.float64 and np.int32 scalars survive type conversion."""
311+
save_path = Path(self.test_env['tmp_path']) / "scalar_types.npz"
312+
grid = np.zeros((64, 64, 64), dtype=bool)
313+
grid[0, 0, 0] = True
314+
metadata = {
315+
'min_bounds': np.array([0.0, 0.0, 0.0]),
316+
'max_bounds': np.array([1.0, 1.0, 1.0]),
317+
'ranges': np.array([1.0, 1.0, 1.0]),
318+
'has_normals': False,
319+
'float_scalar': np.float64(3.14),
320+
'int_scalar': np.int32(42),
321+
}
322+
self.compressor.save_compressed(grid, metadata, str(save_path))
323+
_, loaded = self.compressor.load_compressed(str(save_path))
324+
self.assertAlmostEqual(loaded['float_scalar'], 3.14, places=10)
325+
self.assertEqual(loaded['int_scalar'], 42)
326+
327+
def test_save_load_dtype_after_roundtrip(self):
328+
"""Documents that float32 arrays become float64 after JSON round-trip."""
329+
save_path = Path(self.test_env['tmp_path']) / "dtype_test.npz"
330+
grid, metadata = self.compressor.compress(self.point_cloud, validate=False)
331+
# Original is float32 from np.min on float32 input
332+
self.assertEqual(metadata['min_bounds'].dtype, np.float32)
333+
334+
self.compressor.save_compressed(grid, metadata, str(save_path))
335+
_, loaded = self.compressor.load_compressed(str(save_path))
336+
# After JSON round-trip, np.array() defaults to float64
337+
self.assertEqual(loaded['min_bounds'].dtype, np.float64)
338+
339+
def test_decompress_after_save_load_matches_direct(self):
340+
"""Decompress from loaded metadata produces same points as from original."""
341+
save_path = Path(self.test_env['tmp_path']) / "roundtrip_quality.npz"
342+
grid, metadata = self.compressor.compress(self.point_cloud, validate=False)
343+
344+
# Decompress directly from original metadata
345+
direct_points, _ = self.compressor.decompress(grid, metadata)
346+
347+
# Save, load, decompress
348+
self.compressor.save_compressed(grid, metadata, str(save_path))
349+
loaded_grid, loaded_metadata = self.compressor.load_compressed(str(save_path))
350+
loaded_points, _ = self.compressor.decompress(loaded_grid, loaded_metadata)
351+
352+
# Points should match despite dtype change (float32 vs float64)
353+
np.testing.assert_allclose(
354+
loaded_points, direct_points.astype(np.float64), rtol=1e-5
355+
)
356+
357+
# --- E2E test ---
358+
359+
@pytest.mark.e2e
360+
def test_compress_save_load_decompress_quality(self):
361+
"""Full pipeline: compress, save, load, decompress, verify quality."""
362+
save_path = Path(self.test_env['tmp_path']) / "e2e.npz"
363+
364+
grid, metadata = self.compressor.compress(self.point_cloud)
365+
original_error = metadata['compression_error']
366+
self.compressor.save_compressed(grid, metadata, str(save_path))
367+
368+
loaded_grid, loaded_metadata = self.compressor.load_compressed(str(save_path))
369+
decompressed, _ = self.compressor.decompress(loaded_grid, loaded_metadata)
370+
371+
# Decompressed point count should be reasonable
372+
self.assertGreater(len(decompressed), 0)
373+
# Reconstruction error should match original
374+
self.assertAlmostEqual(
375+
loaded_metadata['compression_error'], original_error, places=6
376+
)
377+
159378
if __name__ == "__main__":
160379
tf.test.main()

0 commit comments

Comments
 (0)