From da79b927ac3e82abdce318f89f67bdf0585a1b8f Mon Sep 17 00:00:00 2001 From: Oleg Savchenko Date: Fri, 22 May 2026 16:31:07 +0200 Subject: [PATCH 1/4] Add paths.buffer config key to redirect buffer snapshot storage Mirrors the existing paths.samples pattern: if paths.buffer is set in the config (or passed as a Hydra override), buffer snapshots are written to {paths.buffer}/snapshots/ instead of the default {run_dir}/buffer/snapshots/. Useful when run_dir is on a fast local filesystem but buffer snapshots (which can be large) should land on a separate scratch volume. Co-Authored-By: Claude Sonnet 4.6 --- docs/configuration.md | 8 +++++--- falcon/cli.py | 5 +++-- 2 files changed, 8 insertions(+), 5 deletions(-) diff --git a/docs/configuration.md b/docs/configuration.md index 309c85e..7f93e33 100644 --- a/docs/configuration.md +++ b/docs/configuration.md @@ -32,15 +32,17 @@ Configure file paths: ```yaml paths: import: "." - graph: ${run_dir}/graph + graph: ${run_dir}/graph samples: ${run_dir}/samples + buffer: ${run_dir}/buffer # optional; redirect to a separate volume (e.g. scratch) ``` | Key | Type | Default | Description | |-----|------|---------|-------------| | `import` | str | `"."` | Path to import custom modules | -| `graph` | str | `${run_dir}/graph` | Trained models directory | +| `graph` | str | `${run_dir}/graph` | Trained model checkpoints directory | | `samples` | str | `${run_dir}/samples` | Output samples directory | +| `buffer` | str | `${run_dir}/buffer` | Buffer snapshots directory (`snapshots/` is appended); useful for routing large temporary simulation data to a separate scratch volume while keeping `run_dir` on persistent storage | ### `buffer` @@ -65,7 +67,7 @@ buffer: | `simulate_count` | int | `64` | Number of new samples generated per simulation round. For simulators taking >1s per sample, keep this small (4–16) to avoid long delays between buffer updates; for fast simulators, increase to reduce Ray overhead. | | `simulate_interval` | float | `1` | Seconds between simulation rounds | | `simulate_when_full` | bool | `true` | If `true`, simulation continues after `max_samples` is reached and old samples are replaced; if `false`, simulation stops once the buffer is full | -| `store_fraction` | float | `0.0` | Fraction of simulated samples written to `buffer/snapshots/` for inspection (0 = none, 1 = all) | +| `store_fraction` | float | `0.0` | Fraction of simulated samples written to `{paths.buffer}/snapshots/` for inspection (0 = none, 1 = all) | ### `graph` diff --git a/falcon/cli.py b/falcon/cli.py index b161301..b479999 100644 --- a/falcon/cli.py +++ b/falcon/cli.py @@ -668,9 +668,10 @@ def stop_check(): from omegaconf import OmegaConf as _OmegaConf from falcon.core.raystore import BufferConfig as _BufferConfig buffer_cfg = _OmegaConf.merge(_OmegaConf.structured(_BufferConfig), cfg.buffer) + buffer_base = cfg.paths.get("buffer", str(Path(cfg.run_dir) / "buffer")) dataset_manager = falcon.get_ray_dataset_manager( buffer_cfg, - snapshots_path=str(Path(cfg.run_dir) / "buffer" / "snapshots"), + snapshots_path=str(Path(buffer_base) / "snapshots"), log_config=logging_cfg, ) @@ -899,7 +900,7 @@ def parse_args(): elif arg.startswith("--refresh="): refresh = float(arg.split("=", 1)[1]) i += 1 - return mode, None, None, None, None, False, 16, address, refresh + return mode, None, None, None, None, False, 16, True, None, address, refresh sample_type = None if mode == "sample": From a7bd7a6d7e1cdcc18513947b6da33e76a7066017 Mon Sep 17 00:00:00 2001 From: Christoph Weniger Date: Sat, 6 Jun 2026 08:48:02 +0200 Subject: [PATCH 2/4] Update snapshot_every doc to reference {paths.buffer}/snapshots/ Co-Authored-By: Claude Sonnet 4.6 --- docs/configuration.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/configuration.md b/docs/configuration.md index 23520b3..51010f2 100644 --- a/docs/configuration.md +++ b/docs/configuration.md @@ -67,7 +67,7 @@ buffer: | `simulate_count` | int | `64` | Number of new samples generated per simulation round. For simulators taking >1s per sample, keep this small (4–16) to avoid long delays between buffer updates; for fast simulators, increase to reduce Ray overhead. | | `simulate_interval` | float | `1` | Seconds between simulation rounds | | `simulate_when_full` | bool | `true` | If `true`, simulation continues after `max_samples` is reached and old samples are replaced; if `false`, simulation stops once the buffer is full | -| `snapshot_every` | int | `0` | Save every Nth sample to `buffer/snapshots/` for inspection (0 = disabled, 1 = all, 10 = every 10th sample) | +| `snapshot_every` | int | `0` | Save every Nth sample to `{paths.buffer}/snapshots/` for inspection (0 = disabled, 1 = all, 10 = every 10th sample) | ### `graph` From de83597407fa06ce0daffa76df13cf21a342c6e0 Mon Sep 17 00:00:00 2001 From: Christoph Weniger Date: Sat, 6 Jun 2026 09:03:23 +0200 Subject: [PATCH 3/4] Add PathConfig, rename import->imports (list), rename model_path->import_dirs - PathConfig dataclass validates cfg.paths against a typed schema (analogous to BufferConfig for cfg.buffer) - imports: Optional[List[str]] replaces the bare import: str key; avoids Python keyword clash and supports multiple search dirs - DeployedGraph/NodeWrapper/MultiplexNodeWrapper accept import_dirs (list) instead of model_path (str) and iterate over all entries when extending sys.path - _resolve_paths() helper merges cfg.paths against PathConfig and returns a plain dict; used in launch_mode and sample_mode - PathConfig exported from falcon top-level alongside BufferConfig Co-Authored-By: Claude Sonnet 4.6 --- docs/configuration.md | 4 ++-- falcon/__init__.py | 2 ++ falcon/cli.py | 38 +++++++++++++++++++++++------------ falcon/core/deployed_graph.py | 25 +++++++++++------------ falcon/core/raystore.py | 12 ++++++++++- 5 files changed, 52 insertions(+), 29 deletions(-) diff --git a/docs/configuration.md b/docs/configuration.md index 51010f2..0db1fd0 100644 --- a/docs/configuration.md +++ b/docs/configuration.md @@ -31,7 +31,7 @@ Configure file paths: ```yaml paths: - import: "." + imports: ["."] graph: ${run_dir}/graph samples: ${run_dir}/samples buffer: ${run_dir}/buffer # optional; redirect to a separate volume (e.g. scratch) @@ -39,7 +39,7 @@ paths: | Key | Type | Default | Description | |-----|------|---------|-------------| -| `import` | str | `"."` | Path to import custom modules | +| `imports` | list[str] | `null` | Directories prepended to `sys.path` in Ray workers so custom modules (e.g. `model.Simulator`) can be imported | | `graph` | str | `${run_dir}/graph` | Trained model checkpoints directory | | `samples` | str | `${run_dir}/samples` | Output samples directory | | `buffer` | str | `${run_dir}/buffer` | Buffer snapshots directory (`snapshots/` is appended); useful for routing large temporary simulation data to a separate scratch volume while keeping `run_dir` on persistent storage | diff --git a/falcon/__init__.py b/falcon/__init__.py index e415c62..ac4eca4 100644 --- a/falcon/__init__.py +++ b/falcon/__init__.py @@ -9,6 +9,7 @@ "Node", "Graph", "CompositeNode", "DeployedGraph", "get_ray_dataset_manager", + "PathConfig", "BufferConfig", "LazyLoader", "Logger", "get_logger", "set_logger", "log", "debug", "info", "warning", "error", @@ -26,6 +27,7 @@ "CompositeNode": ".core.graph", "DeployedGraph": ".core.deployed_graph", "get_ray_dataset_manager": ".core.raystore", + "PathConfig": ".core.raystore", "BufferConfig": ".core.raystore", "LazyLoader": ".core.utils", "Logger": ".core.logger", diff --git a/falcon/cli.py b/falcon/cli.py index 096b327..44a2316 100644 --- a/falcon/cli.py +++ b/falcon/cli.py @@ -233,6 +233,16 @@ def load_config(config_name: str = "config.yml", run_dir: str = None, overrides: return cfg +def _resolve_paths(cfg): + """Merge cfg.paths against PathConfig and return a plain dict.""" + from omegaconf import OmegaConf + from falcon.core.raystore import PathConfig as _PathConfig + return OmegaConf.to_container( + OmegaConf.merge(OmegaConf.structured(_PathConfig), cfg.paths), + resolve=True, + ) + + class TeeOutput: """Write to both terminal and log file.""" def __init__(self, log_file, terminal): @@ -318,9 +328,9 @@ def _build_run_summary(status, output_dir, cfg, deployed_graph, start_time=None, lines.append("=" * 60) lines.append(f"falcon launch {status}") lines.append(f"Output: {output_dir}") - samples_path = cfg.paths.get("samples", f"{cfg.run_dir}/samples") - lines.append(f"Samples: {samples_path}") - graph_path = Path(cfg.paths.graph) + paths = _resolve_paths(cfg) + lines.append(f"Samples: {paths['samples'] or f'{cfg.run_dir}/samples'}") + graph_path = Path(paths['graph']) lines.append(f"Logs: {graph_path / 'driver' / 'output.log'} (driver)") try: node_names = list(cfg.graph.keys()) @@ -453,7 +463,7 @@ def _save_samples(samples, sample_cfg, sample_type, graph, cfg, info_fn=print): info_fn(f" {key}: {value.shape}") # Determine output directory (flat structure) - samples_dir = cfg.paths.get("samples", f"{cfg.run_dir}/samples") + samples_dir = _resolve_paths(cfg)["samples"] or f"{cfg.run_dir}/samples" output_dir = Path(samples_dir) / sample_type output_dir.mkdir(parents=True, exist_ok=True) @@ -505,6 +515,7 @@ def launch_mode(cfg, interactive: bool = False, log_lines: int = 16, auto_sample # Get output directory from config output_dir = Path(cfg.run_dir) + path_cfg = _resolve_paths(cfg) # Generate wandb group if not set - use run-dir folder name logging_cfg = OmegaConf.to_container(cfg.get("logging", {}), resolve=True) @@ -514,7 +525,7 @@ def launch_mode(cfg, interactive: bool = False, log_lines: int = 16, auto_sample logging_cfg.setdefault("wandb", {})["group"] = output_dir.name # Ensure local dir is set to graph path - logging_cfg.setdefault("local", {})["dir"] = str(cfg.paths.graph) + logging_cfg.setdefault("local", {})["dir"] = path_cfg["graph"] # Create driver logger and set as module-level logger # This enables falcon.info(), falcon.log() etc. for DeployedGraph and other components @@ -588,7 +599,7 @@ def launch_mode(cfg, interactive: bool = False, log_lines: int = 16, auto_sample # Start status polling thread for interactive mode status_thread = None - graph_path = Path(cfg.paths.graph) + graph_path = Path(path_cfg["graph"]) if display: # Set log directory so display can read node output.log files display.set_log_dir(str(graph_path)) @@ -660,7 +671,7 @@ def stop_check(): # 1) Deploy graph (pass logging config) deployed_graph = falcon.DeployedGraph( graph, - model_path=cfg.paths.get("import"), + import_dirs=path_cfg["imports"], log_config=logging_cfg, ) @@ -668,7 +679,7 @@ def stop_check(): from omegaconf import OmegaConf as _OmegaConf from falcon.core.raystore import BufferConfig as _BufferConfig buffer_cfg = _OmegaConf.merge(_OmegaConf.structured(_BufferConfig), cfg.buffer) - buffer_base = cfg.paths.get("buffer", str(Path(cfg.run_dir) / "buffer")) + buffer_base = path_cfg["buffer"] or str(Path(cfg.run_dir) / "buffer") dataset_manager = falcon.get_ray_dataset_manager( buffer_cfg, snapshots_path=str(Path(buffer_base) / "snapshots"), @@ -781,8 +792,9 @@ def sample_mode(cfg, sample_type: str) -> None: from falcon.core.logger import Logger, set_logger, info # Setup logging config + path_cfg = _resolve_paths(cfg) logging_cfg = OmegaConf.to_container(cfg.get("logging", {}), resolve=True) - logging_cfg.setdefault("local", {})["dir"] = str(cfg.paths.graph) + logging_cfg.setdefault("local", {})["dir"] = path_cfg["graph"] # Create driver logger and set as module-level logger driver_logger = Logger("driver", logging_cfg, capture_exceptions=True) @@ -824,7 +836,7 @@ def sample_mode(cfg, sample_type: str) -> None: # Deploy graph for sampling deployed_graph = falcon.DeployedGraph( graph, - model_path=cfg.paths.get("import"), + import_dirs=path_cfg["imports"], log_config=logging_cfg, ) @@ -833,15 +845,15 @@ def sample_mode(cfg, sample_type: str) -> None: sample_refs = deployed_graph.sample(num_samples) elif sample_type == "posterior": - deployed_graph.load(Path(cfg.paths.graph)) + deployed_graph.load(Path(path_cfg["graph"])) sample_refs = deployed_graph.sample_posterior(num_samples, observations) elif sample_type == "proposal": - deployed_graph.load(Path(cfg.paths.graph)) + deployed_graph.load(Path(path_cfg["graph"])) sample_refs = deployed_graph.sample_proposal(num_samples, observations) elif sample_type == "ppd": - deployed_graph.load(Path(cfg.paths.graph)) + deployed_graph.load(Path(path_cfg["graph"])) sample_refs = deployed_graph.sample_ppd(num_samples, observations) else: diff --git a/falcon/core/deployed_graph.py b/falcon/core/deployed_graph.py index d99cd5e..bd273cf 100644 --- a/falcon/core/deployed_graph.py +++ b/falcon/core/deployed_graph.py @@ -31,10 +31,10 @@ def _ray_options(actor_config): @ray.remote class MultiplexNodeWrapper: - def __init__(self, actor_config, node, graph, num_actors, model_path=None, log_config=None): + def __init__(self, actor_config, node, graph, num_actors, import_dirs=None, log_config=None): self.num_actors = num_actors self.wrapped_node_list = [ - NodeWrapper.options(**_ray_options(actor_config)).remote(node, graph, model_path, log_config) + NodeWrapper.options(**_ray_options(actor_config)).remote(node, graph, import_dirs, log_config) for _ in range(self.num_actors) ] @@ -120,7 +120,7 @@ def get_output_log_tail(self, num_lines: int = 50) -> list: # actors — sampling reads best_model which is independent of training state. @ray.remote class NodeWrapper: - def __init__(self, node, graph, model_path=None, log_config=None): + def __init__(self, node, graph, import_dirs=None, log_config=None): # Suppress Ray warning about blocking ray.get in async actor. # Ray emits this once per actor via a global flag. We set the flag # to True before any ray.get calls to prevent the warning. @@ -134,11 +134,10 @@ def __init__(self, node, graph, model_path=None, log_config=None): except (ImportError, AttributeError): pass # Ray internals changed, warning will appear - # Add model_path to sys.path if provided - if model_path: - model_path = Path(model_path).resolve() - if str(model_path) not in sys.path: - sys.path.insert(0, str(model_path)) + for p in (import_dirs or []): + resolved = str(Path(p).resolve()) + if resolved not in sys.path: + sys.path.insert(0, resolved) self.node = node self.name = node.name @@ -446,14 +445,14 @@ def shutdown(self): class DeployedGraph: - def __init__(self, graph, model_path=None, log_config=None): + def __init__(self, graph, import_dirs=None, log_config=None): """Initialize a DeployedGraph with the given conceptual graph of nodes. Note: This class uses falcon.info(), falcon.warning() etc. for logging. These functions use the module-level logger set by cli.py via set_logger(). """ self.graph = graph - self.model_path = model_path + self.import_dirs = import_dirs or [] self.log_config = log_config or {} self.wrapped_nodes_dict = {} self.monitor_bridge = None @@ -464,7 +463,7 @@ def __init__(self, graph, model_path=None, log_config=None): def _create_monitor_bridge(self): """Create the MonitorBridge actor for falcon monitor TUI.""" from falcon.core.monitor_bridge import MonitorBridge - run_dir = str(self.model_path) if self.model_path else "unknown" + run_dir = str(self.import_dirs[0]) if self.import_dirs else "unknown" try: # Name the actor so falcon monitor can discover it self.monitor_bridge = MonitorBridge.options( @@ -532,13 +531,13 @@ def deploy_nodes(self): node, self.graph, node.num_actors, - self.model_path, + self.import_dirs, self.log_config, ) else: self.wrapped_nodes_dict[node.name] = NodeWrapper.options( **_ray_options(node.actor_config) - ).remote(node, self.graph, self.model_path, self.log_config) + ).remote(node, self.graph, self.import_dirs, self.log_config) # Wait for all actors to initialize and register with monitor bridge for name, actor in self.wrapped_nodes_dict.items(): diff --git a/falcon/core/raystore.py b/falcon/core/raystore.py index 40b143a..b9434bf 100644 --- a/falcon/core/raystore.py +++ b/falcon/core/raystore.py @@ -4,13 +4,23 @@ from dataclasses import dataclass from enum import IntEnum from pathlib import Path -from typing import Optional +from typing import List, Optional import ray from omegaconf import MISSING from falcon.core.logger import Logger, set_logger, log, info, warning, error +@dataclass +class PathConfig: + """Configuration for file-system paths.""" + + graph: str = MISSING + samples: Optional[str] = None + buffer: Optional[str] = None + imports: Optional[List[str]] = None # directories prepended to sys.path in Ray workers + + @dataclass class BufferConfig: """Configuration for the rolling sample buffer.""" From 1785b5dcc938e9ea37a4cec53ebc74cf97d5736b Mon Sep 17 00:00:00 2001 From: Christoph Weniger Date: Sat, 6 Jun 2026 09:09:46 +0200 Subject: [PATCH 4/4] Rename import -> imports in example configs Co-Authored-By: Claude Sonnet 4.6 --- examples/01_minimal/config.yml | 2 +- examples/02_bimodal/config_amortized.yml | 2 +- examples/02_bimodal/config_regular.yml | 2 +- examples/02_bimodal/config_rounds_fill.yml | 2 +- examples/02_bimodal/config_rounds_renew.yml | 2 +- examples/03_composite/config.yml | 2 +- examples/04_gaussian/config.yml | 2 +- examples/05_linear_regression/config.yml | 2 +- 8 files changed, 8 insertions(+), 8 deletions(-) diff --git a/examples/01_minimal/config.yml b/examples/01_minimal/config.yml index a6c582d..8458eeb 100644 --- a/examples/01_minimal/config.yml +++ b/examples/01_minimal/config.yml @@ -44,7 +44,7 @@ logging: # Directory configuration # ----------------------------------------------------------------------------- paths: - import: "./src" # Local folder(s) with user-defined code (e.g. model.py) + imports: ["./src"] # Local folder(s) with user-defined code (e.g. model.py) graph: ${run_dir}/graph # Directory for serialized graph and trained networks samples: ${run_dir}/samples # Directory for generated samples (posterior, prior, etc.) diff --git a/examples/02_bimodal/config_amortized.yml b/examples/02_bimodal/config_amortized.yml index de4e0e3..45a1249 100644 --- a/examples/02_bimodal/config_amortized.yml +++ b/examples/02_bimodal/config_amortized.yml @@ -11,7 +11,7 @@ logging: # Directory configuration paths: - import: "./src" + imports: ["./src"] graph: ${run_dir}/graph samples: ${run_dir}/samples diff --git a/examples/02_bimodal/config_regular.yml b/examples/02_bimodal/config_regular.yml index 014f114..c37c7fa 100644 --- a/examples/02_bimodal/config_regular.yml +++ b/examples/02_bimodal/config_regular.yml @@ -11,7 +11,7 @@ logging: # Directory configuration paths: - import: "./src" + imports: ["./src"] graph: ${run_dir}/graph samples: ${run_dir}/samples diff --git a/examples/02_bimodal/config_rounds_fill.yml b/examples/02_bimodal/config_rounds_fill.yml index 7fd7b8a..f8c4794 100644 --- a/examples/02_bimodal/config_rounds_fill.yml +++ b/examples/02_bimodal/config_rounds_fill.yml @@ -11,7 +11,7 @@ logging: # Directory configuration paths: - import: "./src" + imports: ["./src"] graph: ${run_dir}/graph samples: ${run_dir}/samples diff --git a/examples/02_bimodal/config_rounds_renew.yml b/examples/02_bimodal/config_rounds_renew.yml index ef0d0f9..521f488 100644 --- a/examples/02_bimodal/config_rounds_renew.yml +++ b/examples/02_bimodal/config_rounds_renew.yml @@ -11,7 +11,7 @@ logging: # Directory configuration paths: - import: "./src" + imports: ["./src"] graph: ${run_dir}/graph samples: ${run_dir}/samples diff --git a/examples/03_composite/config.yml b/examples/03_composite/config.yml index 09b9def..ec1be6b 100644 --- a/examples/03_composite/config.yml +++ b/examples/03_composite/config.yml @@ -11,7 +11,7 @@ logging: # Directory configuration paths: - import: "./src" + imports: ["./src"] graph: ${run_dir}/graph samples: ${run_dir}/samples diff --git a/examples/04_gaussian/config.yml b/examples/04_gaussian/config.yml index 78ebaad..20b5c0a 100644 --- a/examples/04_gaussian/config.yml +++ b/examples/04_gaussian/config.yml @@ -37,7 +37,7 @@ logging: # Directory configuration # ----------------------------------------------------------------------------- paths: - import: "./src" + imports: ["./src"] graph: ${run_dir}/graph samples: ${run_dir}/samples diff --git a/examples/05_linear_regression/config.yml b/examples/05_linear_regression/config.yml index 12acaf0..b99ef63 100644 --- a/examples/05_linear_regression/config.yml +++ b/examples/05_linear_regression/config.yml @@ -38,7 +38,7 @@ logging: # Directory configuration # ----------------------------------------------------------------------------- paths: - import: "./src" + imports: ["./src"] graph: ${run_dir}/graph samples: ${run_dir}/samples