From 1b804e43d5064d6087c5e24e1695f440f51c2e73 Mon Sep 17 00:00:00 2001 From: Niklas Heim Date: Mon, 6 Apr 2026 16:27:23 +0000 Subject: [PATCH 01/17] Rename InputFileReference/OutputFileReference to InputPathReference/OutputPathReference Generalizes file references to accept any existing filesystem path (file or directory). Removes the is_file() constraint from input validation; existence check is preserved. Output validator is unchanged. --- demo/_showcase/ansys-qoi/qoi_dataset/tesseract_api.py | 6 +++--- .../_showcase/ansys-qoi/qoi_inference/tesseract_api.py | 8 ++++---- demo/_showcase/ansys-qoi/qoi_train/tesseract_api.py | 10 +++++----- docs/content/examples/building-blocks/filereference.md | 2 +- examples/filereference/tesseract_api.py | 8 ++++---- tesseract_core/runtime/experimental.py | 10 ++++------ 6 files changed, 21 insertions(+), 23 deletions(-) diff --git a/demo/_showcase/ansys-qoi/qoi_dataset/tesseract_api.py b/demo/_showcase/ansys-qoi/qoi_dataset/tesseract_api.py index 57788c2e..00e078f1 100644 --- a/demo/_showcase/ansys-qoi/qoi_dataset/tesseract_api.py +++ b/demo/_showcase/ansys-qoi/qoi_dataset/tesseract_api.py @@ -10,13 +10,13 @@ from torch.utils._pytree import tree_map from tesseract_core.runtime.config import get_config -from tesseract_core.runtime.experimental import InputFileReference, OutputFileReference +from tesseract_core.runtime.experimental import InputPathReference, OutputPathReference class InputSchema(BaseModel): """Input schema for QoI dataset generation.""" - config: InputFileReference = Field(description="Configuration file") + config: InputPathReference = Field(description="Configuration file") sim_folder: str = Field( description="Folder path containing Ansys Fluent simulations with CAD files and QoI reports", @@ -30,7 +30,7 @@ class InputSchema(BaseModel): class OutputSchema(BaseModel): """Output schema for QoI dataset generation.""" - data: list[OutputFileReference] = Field( + data: list[OutputPathReference] = Field( description="List of npz files containing point cloud data, simulation parameters and QoI (if available)", ) diff --git a/demo/_showcase/ansys-qoi/qoi_inference/tesseract_api.py b/demo/_showcase/ansys-qoi/qoi_inference/tesseract_api.py index 884da7fa..e086082d 100644 --- a/demo/_showcase/ansys-qoi/qoi_inference/tesseract_api.py +++ b/demo/_showcase/ansys-qoi/qoi_inference/tesseract_api.py @@ -16,21 +16,21 @@ from tesseract_core.runtime import Array, Float32 from tesseract_core.runtime.config import get_config -from tesseract_core.runtime.experimental import InputFileReference +from tesseract_core.runtime.experimental import InputPathReference class InputSchema(BaseModel): """Input schema for QoI model inference.""" - config: InputFileReference = Field(description="Configuration file") + config: InputPathReference = Field(description="Configuration file") data_folder: str = Field( description="Folder containing npz files with point cloud data and simulation parameters" ) - trained_model: InputFileReference = Field( + trained_model: InputPathReference = Field( description="Pickle file containing weights of trained model" ) - scaler: InputFileReference = Field( + scaler: InputPathReference = Field( description="Pickle file containing the scaling method for the dataset" ) diff --git a/demo/_showcase/ansys-qoi/qoi_train/tesseract_api.py b/demo/_showcase/ansys-qoi/qoi_train/tesseract_api.py index ea3b7714..7cf66ca2 100644 --- a/demo/_showcase/ansys-qoi/qoi_train/tesseract_api.py +++ b/demo/_showcase/ansys-qoi/qoi_train/tesseract_api.py @@ -13,13 +13,13 @@ from torch.utils._pytree import tree_map from tesseract_core.runtime.config import get_config -from tesseract_core.runtime.experimental import InputFileReference, OutputFileReference +from tesseract_core.runtime.experimental import InputPathReference, OutputPathReference class InputSchema(BaseModel): """Input schema for QoI model training.""" - config: InputFileReference = Field(description="Configuration file") + config: InputPathReference = Field(description="Configuration file") data_folder: str = Field( description="Folder containing npz files containing point cloud data information, " @@ -30,10 +30,10 @@ class InputSchema(BaseModel): class OutputSchema(BaseModel): """Output schema for QoI model training.""" - trained_models: list[OutputFileReference] = Field( + trained_models: list[OutputPathReference] = Field( description="Pickle file containing weights of trained model" ) - scalers: list[OutputFileReference] = Field( + scalers: list[OutputPathReference] = Field( description="Pickle file containing the scaling method for the dataset" ) @@ -44,7 +44,7 @@ def evaluate(inputs: Any) -> Any: from process.scaler import ScalingPipeline from process.train import train_hybrid_models - # Convert all inputs to Path objects (handles strings, InputFileReference, and Path) + # Convert all inputs to Path objects (handles strings, InputPathReference, and Path) config = get_config() input_base = Path(config.input_path) output_base = Path(config.output_path) diff --git a/docs/content/examples/building-blocks/filereference.md b/docs/content/examples/building-blocks/filereference.md index 6e39ecf8..8052d550 100644 --- a/docs/content/examples/building-blocks/filereference.md +++ b/docs/content/examples/building-blocks/filereference.md @@ -7,7 +7,7 @@ To be used for Tesseracts with large inputs and/or outputs. ## Example Tesseract (`examples/filereference`) -Using `InputFileReference` and `OutputFileReference` you can +Using `InputPathReference` and `OutputPathReference` you can include references to files in the `InputSchema` and `OutputSchema` of a Tesseract. The file reference schemas make sure that a file exists (either locally or in the Tesseract) and resolve paths correctly in both `tesseract-runtime` and `tesseract run` calls. diff --git a/examples/filereference/tesseract_api.py b/examples/filereference/tesseract_api.py index fef9584e..d513ef39 100644 --- a/examples/filereference/tesseract_api.py +++ b/examples/filereference/tesseract_api.py @@ -8,17 +8,17 @@ from tesseract_core.runtime.config import get_config from tesseract_core.runtime.experimental import ( - InputFileReference, - OutputFileReference, + InputPathReference, + OutputPathReference, ) class InputSchema(BaseModel): - data: list[InputFileReference] + data: list[InputPathReference] class OutputSchema(BaseModel): - data: list[OutputFileReference] + data: list[OutputPathReference] def apply(inputs: InputSchema) -> OutputSchema: diff --git a/tesseract_core/runtime/experimental.py b/tesseract_core/runtime/experimental.py index 4b7013a3..642f474f 100644 --- a/tesseract_core/runtime/experimental.py +++ b/tesseract_core/runtime/experimental.py @@ -226,8 +226,6 @@ def _resolve_input_path(path: Path) -> Path: ) if not tess_path.exists(): raise FileNotFoundError(f"Input path {tess_path} does not exist.") - if not tess_path.is_file(): - raise ValueError(f"Input path {tess_path} is not a file.") return tess_path @@ -241,8 +239,8 @@ def _strip_output_path(path: Path) -> Path: return path -InputFileReference = Annotated[Path, AfterValidator(_resolve_input_path)] -OutputFileReference = Annotated[Path, AfterValidator(_strip_output_path)] +InputPathReference = Annotated[Path, AfterValidator(_resolve_input_path)] +OutputPathReference = Annotated[Path, AfterValidator(_strip_output_path)] def require_file(file_path: PathLike) -> Path: @@ -346,9 +344,9 @@ def __get_pydantic_json_schema__( __all__ = [ - "InputFileReference", + "InputPathReference", "LazySequence", - "OutputFileReference", + "OutputPathReference", "PydanticLazySequenceAnnotation", "TesseractReference", "finite_difference_jacobian", From 7c7159e4b8067fad5ca183fb9f8f76aa186e710a Mon Sep 17 00:00:00 2001 From: nmheim Date: Tue, 7 Apr 2026 13:31:45 +0200 Subject: [PATCH 02/17] *DirectoryReference --- .../ansys-qoi/qoi_dataset/tesseract_api.py | 6 +- .../ansys-qoi/qoi_inference/tesseract_api.py | 8 +-- .../ansys-qoi/qoi_train/tesseract_api.py | 10 ++-- .../examples/building-blocks/filereference.md | 2 +- examples/directoryreference/inputs.json | 8 +++ examples/directoryreference/tesseract_api.py | 32 +++++++++++ .../directoryreference/tesseract_config.yaml | 9 +++ .../test_cases/test_apply.json | 24 ++++++++ .../testdata/sample_dir_0/data.json | 1 + .../testdata/sample_dir_1/data.json | 1 + examples/filereference/tesseract_api.py | 8 +-- tesseract_core/runtime/experimental.py | 56 +++++++++++++++++-- 12 files changed, 144 insertions(+), 21 deletions(-) create mode 100644 examples/directoryreference/inputs.json create mode 100644 examples/directoryreference/tesseract_api.py create mode 100644 examples/directoryreference/tesseract_config.yaml create mode 100644 examples/directoryreference/test_cases/test_apply.json create mode 100644 examples/directoryreference/test_cases/testdata/sample_dir_0/data.json create mode 100644 examples/directoryreference/test_cases/testdata/sample_dir_1/data.json diff --git a/demo/_showcase/ansys-qoi/qoi_dataset/tesseract_api.py b/demo/_showcase/ansys-qoi/qoi_dataset/tesseract_api.py index 00e078f1..57788c2e 100644 --- a/demo/_showcase/ansys-qoi/qoi_dataset/tesseract_api.py +++ b/demo/_showcase/ansys-qoi/qoi_dataset/tesseract_api.py @@ -10,13 +10,13 @@ from torch.utils._pytree import tree_map from tesseract_core.runtime.config import get_config -from tesseract_core.runtime.experimental import InputPathReference, OutputPathReference +from tesseract_core.runtime.experimental import InputFileReference, OutputFileReference class InputSchema(BaseModel): """Input schema for QoI dataset generation.""" - config: InputPathReference = Field(description="Configuration file") + config: InputFileReference = Field(description="Configuration file") sim_folder: str = Field( description="Folder path containing Ansys Fluent simulations with CAD files and QoI reports", @@ -30,7 +30,7 @@ class InputSchema(BaseModel): class OutputSchema(BaseModel): """Output schema for QoI dataset generation.""" - data: list[OutputPathReference] = Field( + data: list[OutputFileReference] = Field( description="List of npz files containing point cloud data, simulation parameters and QoI (if available)", ) diff --git a/demo/_showcase/ansys-qoi/qoi_inference/tesseract_api.py b/demo/_showcase/ansys-qoi/qoi_inference/tesseract_api.py index e086082d..884da7fa 100644 --- a/demo/_showcase/ansys-qoi/qoi_inference/tesseract_api.py +++ b/demo/_showcase/ansys-qoi/qoi_inference/tesseract_api.py @@ -16,21 +16,21 @@ from tesseract_core.runtime import Array, Float32 from tesseract_core.runtime.config import get_config -from tesseract_core.runtime.experimental import InputPathReference +from tesseract_core.runtime.experimental import InputFileReference class InputSchema(BaseModel): """Input schema for QoI model inference.""" - config: InputPathReference = Field(description="Configuration file") + config: InputFileReference = Field(description="Configuration file") data_folder: str = Field( description="Folder containing npz files with point cloud data and simulation parameters" ) - trained_model: InputPathReference = Field( + trained_model: InputFileReference = Field( description="Pickle file containing weights of trained model" ) - scaler: InputPathReference = Field( + scaler: InputFileReference = Field( description="Pickle file containing the scaling method for the dataset" ) diff --git a/demo/_showcase/ansys-qoi/qoi_train/tesseract_api.py b/demo/_showcase/ansys-qoi/qoi_train/tesseract_api.py index 7cf66ca2..ea3b7714 100644 --- a/demo/_showcase/ansys-qoi/qoi_train/tesseract_api.py +++ b/demo/_showcase/ansys-qoi/qoi_train/tesseract_api.py @@ -13,13 +13,13 @@ from torch.utils._pytree import tree_map from tesseract_core.runtime.config import get_config -from tesseract_core.runtime.experimental import InputPathReference, OutputPathReference +from tesseract_core.runtime.experimental import InputFileReference, OutputFileReference class InputSchema(BaseModel): """Input schema for QoI model training.""" - config: InputPathReference = Field(description="Configuration file") + config: InputFileReference = Field(description="Configuration file") data_folder: str = Field( description="Folder containing npz files containing point cloud data information, " @@ -30,10 +30,10 @@ class InputSchema(BaseModel): class OutputSchema(BaseModel): """Output schema for QoI model training.""" - trained_models: list[OutputPathReference] = Field( + trained_models: list[OutputFileReference] = Field( description="Pickle file containing weights of trained model" ) - scalers: list[OutputPathReference] = Field( + scalers: list[OutputFileReference] = Field( description="Pickle file containing the scaling method for the dataset" ) @@ -44,7 +44,7 @@ def evaluate(inputs: Any) -> Any: from process.scaler import ScalingPipeline from process.train import train_hybrid_models - # Convert all inputs to Path objects (handles strings, InputPathReference, and Path) + # Convert all inputs to Path objects (handles strings, InputFileReference, and Path) config = get_config() input_base = Path(config.input_path) output_base = Path(config.output_path) diff --git a/docs/content/examples/building-blocks/filereference.md b/docs/content/examples/building-blocks/filereference.md index 8052d550..6e39ecf8 100644 --- a/docs/content/examples/building-blocks/filereference.md +++ b/docs/content/examples/building-blocks/filereference.md @@ -7,7 +7,7 @@ To be used for Tesseracts with large inputs and/or outputs. ## Example Tesseract (`examples/filereference`) -Using `InputPathReference` and `OutputPathReference` you can +Using `InputFileReference` and `OutputFileReference` you can include references to files in the `InputSchema` and `OutputSchema` of a Tesseract. The file reference schemas make sure that a file exists (either locally or in the Tesseract) and resolve paths correctly in both `tesseract-runtime` and `tesseract run` calls. diff --git a/examples/directoryreference/inputs.json b/examples/directoryreference/inputs.json new file mode 100644 index 00000000..7afaba57 --- /dev/null +++ b/examples/directoryreference/inputs.json @@ -0,0 +1,8 @@ +{ + "inputs": { + "dirs": [ + "sample_dir_0", + "sample_dir_1" + ] + } +} diff --git a/examples/directoryreference/tesseract_api.py b/examples/directoryreference/tesseract_api.py new file mode 100644 index 00000000..9527a178 --- /dev/null +++ b/examples/directoryreference/tesseract_api.py @@ -0,0 +1,32 @@ +# Copyright 2025 Pasteur Labs. All Rights Reserved. +# SPDX-License-Identifier: Apache-2.0 + +import shutil +from pathlib import Path + +from pydantic import BaseModel + +from tesseract_core.runtime.config import get_config +from tesseract_core.runtime.experimental import ( + InputDirectoryReference, + OutputDirectoryReference, +) + + +class InputSchema(BaseModel): + dirs: list[InputDirectoryReference] + + +class OutputSchema(BaseModel): + dirs: list[OutputDirectoryReference] + + +def apply(inputs: InputSchema) -> OutputSchema: + output_path = Path(get_config().output_path) + result = [] + for src in inputs.dirs: + # src is an absolute Path to the input directory + dest = output_path / src.name + shutil.copytree(src, dest) + result.append(dest) + return OutputSchema(dirs=result) diff --git a/examples/directoryreference/tesseract_config.yaml b/examples/directoryreference/tesseract_config.yaml new file mode 100644 index 00000000..5e258f2b --- /dev/null +++ b/examples/directoryreference/tesseract_config.yaml @@ -0,0 +1,9 @@ +name: directoryreference +version: "1.0.0" +description: | + Tesseract that copies input directories to the output directory. + Demonstrates InputDirectoryReference and OutputDirectoryReference. + +build_config: + package_data: [] + custom_build_steps: [] diff --git a/examples/directoryreference/test_cases/test_apply.json b/examples/directoryreference/test_cases/test_apply.json new file mode 100644 index 00000000..f909cf4f --- /dev/null +++ b/examples/directoryreference/test_cases/test_apply.json @@ -0,0 +1,24 @@ +{ + "endpoint": "apply", + "expected_outputs": { + "dirs": [ + "sample_dir_0", + "sample_dir_1" + ] + }, + "expected_exception": null, + "expected_exception_regex": null, + "atol": 1e-8, + "rtol": 1e-5, + "cli_config": { + "input_path": "testdata/" + }, + "payload": { + "inputs": { + "dirs": [ + "sample_dir_0", + "sample_dir_1" + ] + } + } +} diff --git a/examples/directoryreference/test_cases/testdata/sample_dir_0/data.json b/examples/directoryreference/test_cases/testdata/sample_dir_0/data.json new file mode 100644 index 00000000..45e8a9bf --- /dev/null +++ b/examples/directoryreference/test_cases/testdata/sample_dir_0/data.json @@ -0,0 +1 @@ +{"value": 0} diff --git a/examples/directoryreference/test_cases/testdata/sample_dir_1/data.json b/examples/directoryreference/test_cases/testdata/sample_dir_1/data.json new file mode 100644 index 00000000..4f8c6a37 --- /dev/null +++ b/examples/directoryreference/test_cases/testdata/sample_dir_1/data.json @@ -0,0 +1 @@ +{"value": 1} diff --git a/examples/filereference/tesseract_api.py b/examples/filereference/tesseract_api.py index d513ef39..fef9584e 100644 --- a/examples/filereference/tesseract_api.py +++ b/examples/filereference/tesseract_api.py @@ -8,17 +8,17 @@ from tesseract_core.runtime.config import get_config from tesseract_core.runtime.experimental import ( - InputPathReference, - OutputPathReference, + InputFileReference, + OutputFileReference, ) class InputSchema(BaseModel): - data: list[InputPathReference] + data: list[InputFileReference] class OutputSchema(BaseModel): - data: list[OutputPathReference] + data: list[OutputFileReference] def apply(inputs: InputSchema) -> OutputSchema: diff --git a/tesseract_core/runtime/experimental.py b/tesseract_core/runtime/experimental.py index 642f474f..39a514ae 100644 --- a/tesseract_core/runtime/experimental.py +++ b/tesseract_core/runtime/experimental.py @@ -229,6 +229,20 @@ def _resolve_input_path(path: Path) -> Path: return tess_path +def _resolve_input_file(path: Path) -> Path: + tess_path = _resolve_input_path(path) + if not tess_path.is_file(): + raise ValueError(f"Input path {tess_path} is not a file.") + return tess_path + + +def _resolve_input_dir(path: Path) -> Path: + tess_path = _resolve_input_path(path) + if not tess_path.is_dir(): + raise ValueError(f"Input path {tess_path} is not a directory.") + return tess_path + + def _strip_output_path(path: Path) -> Path: from tesseract_core.runtime.config import get_config @@ -239,8 +253,40 @@ def _strip_output_path(path: Path) -> Path: return path -InputPathReference = Annotated[Path, AfterValidator(_resolve_input_path)] -OutputPathReference = Annotated[Path, AfterValidator(_strip_output_path)] +def _strip_output_file(path: Path) -> Path: + from tesseract_core.runtime.config import get_config + + output_path = get_config().output_path + if path.is_relative_to(output_path): + if not path.is_file(): + raise ValueError(f"Output path {path} is not a file.") + return path.relative_to(output_path) + else: + full_path = Path(output_path) / path + if not full_path.is_file(): + raise ValueError(f"Output path {full_path} is not a file.") + return path + + +def _strip_output_dir(path: Path) -> Path: + from tesseract_core.runtime.config import get_config + + output_path = get_config().output_path + if path.is_relative_to(output_path): + if not path.is_dir(): + raise ValueError(f"Output path {path} is not a directory.") + return path.relative_to(output_path) + else: + full_path = Path(output_path) / path + if not full_path.is_dir(): + raise ValueError(f"Output path {full_path} is not a directory.") + return path + + +InputFileReference = Annotated[Path, AfterValidator(_resolve_input_file)] +InputDirectoryReference = Annotated[Path, AfterValidator(_resolve_input_dir)] +OutputFileReference = Annotated[Path, AfterValidator(_strip_output_file)] +OutputDirectoryReference = Annotated[Path, AfterValidator(_strip_output_dir)] def require_file(file_path: PathLike) -> Path: @@ -344,9 +390,11 @@ def __get_pydantic_json_schema__( __all__ = [ - "InputPathReference", + "InputDirectoryReference", + "InputFileReference", "LazySequence", - "OutputPathReference", + "OutputDirectoryReference", + "OutputFileReference", "PydanticLazySequenceAnnotation", "TesseractReference", "finite_difference_jacobian", From 3de262f2bd122a0f8ec477e8dc40ef70753047ba Mon Sep 17 00:00:00 2001 From: nmheim Date: Tue, 7 Apr 2026 13:34:37 +0200 Subject: [PATCH 03/17] fix example test path --- examples/filereference/test_tesseract.py | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) diff --git a/examples/filereference/test_tesseract.py b/examples/filereference/test_tesseract.py index b8ca5af2..f7c6f6d6 100644 --- a/examples/filereference/test_tesseract.py +++ b/examples/filereference/test_tesseract.py @@ -5,7 +5,7 @@ from tesseract_core import Tesseract here = Path(__file__).parent.resolve() -input_path = Path("./testdata") +input_path = Path("./test_cases/testdata") output_path = Path("./output") # these are relative to input_path @@ -27,7 +27,9 @@ ) as tess: result = tess.apply({"data": data}) print(result) - assert all((output_path / p).exists() for p in result["data"]) + paths = [(output_path / p) for p in result["data"]] + assert len(paths) == len(data) + assert all(p.exists() for p in paths) with Tesseract.from_image( @@ -37,4 +39,6 @@ ) as tess: result = tess.apply({"data": data}) print(result) - assert all((output_path / p).exists() for p in result["data"]) + paths = [(output_path / p) for p in result["data"]] + assert len(paths) == len(data) + assert all(p.exists() for p in paths) From 956531bd89a9be3c0f6b9bcf0c3a7bd4d882f345 Mon Sep 17 00:00:00 2001 From: nmheim Date: Tue, 7 Apr 2026 13:41:31 +0200 Subject: [PATCH 04/17] include directory reference in tests --- examples/directoryreference/.gitignore | 1 + examples/directoryreference/test_tesseract.py | 35 +++++++++++++++++++ tests/endtoend_tests/test_examples.py | 1 + 3 files changed, 37 insertions(+) create mode 100644 examples/directoryreference/.gitignore create mode 100644 examples/directoryreference/test_tesseract.py diff --git a/examples/directoryreference/.gitignore b/examples/directoryreference/.gitignore new file mode 100644 index 00000000..77320b33 --- /dev/null +++ b/examples/directoryreference/.gitignore @@ -0,0 +1 @@ +output/* diff --git a/examples/directoryreference/test_tesseract.py b/examples/directoryreference/test_tesseract.py new file mode 100644 index 00000000..8c936c9b --- /dev/null +++ b/examples/directoryreference/test_tesseract.py @@ -0,0 +1,35 @@ +from pathlib import Path + +from rich import print + +from tesseract_core import Tesseract + +input_path = Path("./test_cases/testdata") +output_path = Path("./output") + +# these are relative to input_path +dirs = [ + "sample_dir_0", + "sample_dir_1", +] + +with Tesseract.from_tesseract_api( + "tesseract_api.py", input_path=input_path, output_path=output_path +) as tess: + result = tess.apply({"dirs": dirs}) + print(result) + paths = [(output_path / p) for p in result["dirs"]] + assert len(paths) == len(dirs) + assert all(p.is_dir() for p in paths) + + +with Tesseract.from_image( + "directoryreference", + input_path=input_path, + output_path=output_path, +) as tess: + result = tess.apply({"dirs": dirs}) + print(result) + paths = [(output_path / p) for p in result["dirs"]] + assert len(paths) == len(dirs) + assert all(p.is_dir() for p in paths) diff --git a/tests/endtoend_tests/test_examples.py b/tests/endtoend_tests/test_examples.py index c8db19a4..9ee85cdb 100644 --- a/tests/endtoend_tests/test_examples.py +++ b/tests/endtoend_tests/test_examples.py @@ -103,6 +103,7 @@ class Config: "conda": Config(), "required_files": Config(input_path="input"), "filereference": Config(input_path="test_cases/testdata", output_path="output"), + "directoryreference": Config(input_path="test_cases/testdata", output_path="output"), "metrics": Config(test_with_random_inputs=True), "qp_solve": Config(), "tesseractreference": Config(), # Can't test requests standalone; needs target Tesseract. Covered in separate test. From ace231372a32cfc50f03c776611c9896e7338ae3 Mon Sep 17 00:00:00 2001 From: nmheim Date: Tue, 7 Apr 2026 13:43:50 +0200 Subject: [PATCH 05/17] fix example --- examples/directoryreference/tesseract_api.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/examples/directoryreference/tesseract_api.py b/examples/directoryreference/tesseract_api.py index 9527a178..4016b46f 100644 --- a/examples/directoryreference/tesseract_api.py +++ b/examples/directoryreference/tesseract_api.py @@ -27,6 +27,8 @@ def apply(inputs: InputSchema) -> OutputSchema: for src in inputs.dirs: # src is an absolute Path to the input directory dest = output_path / src.name + if dest.exists(): + shutil.rmtree(dest) shutil.copytree(src, dest) result.append(dest) return OutputSchema(dirs=result) From a11ef264df7d9b5f807affdd40cf6a20ef41ece2 Mon Sep 17 00:00:00 2001 From: nmheim Date: Tue, 7 Apr 2026 13:48:14 +0200 Subject: [PATCH 06/17] lint --- examples/directoryreference/inputs.json | 8 -------- examples/directoryreference/test_cases/test_apply.json | 10 ++-------- .../test_cases/testdata/sample_dir_0/data.json | 2 +- .../test_cases/testdata/sample_dir_1/data.json | 2 +- tesseract_core/runtime/experimental.py | 10 ---------- tests/endtoend_tests/test_examples.py | 4 +++- 6 files changed, 7 insertions(+), 29 deletions(-) delete mode 100644 examples/directoryreference/inputs.json diff --git a/examples/directoryreference/inputs.json b/examples/directoryreference/inputs.json deleted file mode 100644 index 7afaba57..00000000 --- a/examples/directoryreference/inputs.json +++ /dev/null @@ -1,8 +0,0 @@ -{ - "inputs": { - "dirs": [ - "sample_dir_0", - "sample_dir_1" - ] - } -} diff --git a/examples/directoryreference/test_cases/test_apply.json b/examples/directoryreference/test_cases/test_apply.json index f909cf4f..69bfba33 100644 --- a/examples/directoryreference/test_cases/test_apply.json +++ b/examples/directoryreference/test_cases/test_apply.json @@ -1,10 +1,7 @@ { "endpoint": "apply", "expected_outputs": { - "dirs": [ - "sample_dir_0", - "sample_dir_1" - ] + "dirs": ["sample_dir_0", "sample_dir_1"] }, "expected_exception": null, "expected_exception_regex": null, @@ -15,10 +12,7 @@ }, "payload": { "inputs": { - "dirs": [ - "sample_dir_0", - "sample_dir_1" - ] + "dirs": ["sample_dir_0", "sample_dir_1"] } } } diff --git a/examples/directoryreference/test_cases/testdata/sample_dir_0/data.json b/examples/directoryreference/test_cases/testdata/sample_dir_0/data.json index 45e8a9bf..5eb84b0a 100644 --- a/examples/directoryreference/test_cases/testdata/sample_dir_0/data.json +++ b/examples/directoryreference/test_cases/testdata/sample_dir_0/data.json @@ -1 +1 @@ -{"value": 0} +{ "value": 0 } diff --git a/examples/directoryreference/test_cases/testdata/sample_dir_1/data.json b/examples/directoryreference/test_cases/testdata/sample_dir_1/data.json index 4f8c6a37..e1cbe3c1 100644 --- a/examples/directoryreference/test_cases/testdata/sample_dir_1/data.json +++ b/examples/directoryreference/test_cases/testdata/sample_dir_1/data.json @@ -1 +1 @@ -{"value": 1} +{ "value": 1 } diff --git a/tesseract_core/runtime/experimental.py b/tesseract_core/runtime/experimental.py index 39a514ae..475c67b8 100644 --- a/tesseract_core/runtime/experimental.py +++ b/tesseract_core/runtime/experimental.py @@ -243,16 +243,6 @@ def _resolve_input_dir(path: Path) -> Path: return tess_path -def _strip_output_path(path: Path) -> Path: - from tesseract_core.runtime.config import get_config - - output_path = get_config().output_path - if path.is_relative_to(output_path): - return path.relative_to(output_path) - else: - return path - - def _strip_output_file(path: Path) -> Path: from tesseract_core.runtime.config import get_config diff --git a/tests/endtoend_tests/test_examples.py b/tests/endtoend_tests/test_examples.py index 9ee85cdb..63184dfa 100644 --- a/tests/endtoend_tests/test_examples.py +++ b/tests/endtoend_tests/test_examples.py @@ -103,7 +103,9 @@ class Config: "conda": Config(), "required_files": Config(input_path="input"), "filereference": Config(input_path="test_cases/testdata", output_path="output"), - "directoryreference": Config(input_path="test_cases/testdata", output_path="output"), + "directoryreference": Config( + input_path="test_cases/testdata", output_path="output" + ), "metrics": Config(test_with_random_inputs=True), "qp_solve": Config(), "tesseractreference": Config(), # Can't test requests standalone; needs target Tesseract. Covered in separate test. From e63438c0fd0826017d7019946015e0ac0aced70a Mon Sep 17 00:00:00 2001 From: Niklas Heim Date: Tue, 7 Apr 2026 18:32:03 +0200 Subject: [PATCH 07/17] Get *PathReference back --- .../directoryreference/tesseract_config.yaml | 9 ---- .../test_cases/test_apply.json | 18 ------- .../testdata/sample_dir_0/data.json | 1 - .../testdata/sample_dir_1/data.json | 1 - examples/directoryreference/test_tesseract.py | 35 ------------- examples/filereference/.gitignore | 1 - examples/filereference/inputs.json | 16 ------ examples/filereference/tesseract_api.py | 34 ------------ examples/filereference/tesseract_config.yaml | 8 --- examples/filereference/test_tesseract.py | 44 ---------------- .../.gitignore | 0 .../generate_data.py | 0 .../tesseract_api.py | 25 +++++---- examples/pathreference/tesseract_config.yaml | 10 ++++ .../test_cases/test_apply.json | 6 ++- .../a2af4236-15c5-40ec-9014-b79f9cbad50c.bin | Bin .../test_cases/testdata/sample_0.json | 0 .../test_cases/testdata/sample_1.json | 0 .../test_cases/testdata/sample_2.json | 0 .../test_cases/testdata/sample_3.json | 0 .../test_cases/testdata/sample_4.json | 0 .../test_cases/testdata/sample_5.json | 0 .../test_cases/testdata/sample_6.json | 0 .../test_cases/testdata/sample_7.json | 0 .../test_cases/testdata/sample_8.json | 0 .../test_cases/testdata/sample_9.json | 0 .../test_cases/testdata/sample_dir/data.json | 1 + examples/pathreference/test_tesseract.py | 37 +++++++++++++ tesseract_core/runtime/experimental.py | 49 +++++++----------- tests/endtoend_tests/test_examples.py | 5 +- 30 files changed, 87 insertions(+), 213 deletions(-) delete mode 100644 examples/directoryreference/tesseract_config.yaml delete mode 100644 examples/directoryreference/test_cases/test_apply.json delete mode 100644 examples/directoryreference/test_cases/testdata/sample_dir_0/data.json delete mode 100644 examples/directoryreference/test_cases/testdata/sample_dir_1/data.json delete mode 100644 examples/directoryreference/test_tesseract.py delete mode 100644 examples/filereference/.gitignore delete mode 100644 examples/filereference/inputs.json delete mode 100644 examples/filereference/tesseract_api.py delete mode 100644 examples/filereference/tesseract_config.yaml delete mode 100644 examples/filereference/test_tesseract.py rename examples/{directoryreference => pathreference}/.gitignore (100%) rename examples/{filereference => pathreference}/generate_data.py (100%) rename examples/{directoryreference => pathreference}/tesseract_api.py (50%) create mode 100644 examples/pathreference/tesseract_config.yaml rename examples/{filereference => pathreference}/test_cases/test_apply.json (89%) rename examples/{filereference => pathreference}/test_cases/testdata/a2af4236-15c5-40ec-9014-b79f9cbad50c.bin (100%) rename examples/{filereference => pathreference}/test_cases/testdata/sample_0.json (100%) rename examples/{filereference => pathreference}/test_cases/testdata/sample_1.json (100%) rename examples/{filereference => pathreference}/test_cases/testdata/sample_2.json (100%) rename examples/{filereference => pathreference}/test_cases/testdata/sample_3.json (100%) rename examples/{filereference => pathreference}/test_cases/testdata/sample_4.json (100%) rename examples/{filereference => pathreference}/test_cases/testdata/sample_5.json (100%) rename examples/{filereference => pathreference}/test_cases/testdata/sample_6.json (100%) rename examples/{filereference => pathreference}/test_cases/testdata/sample_7.json (100%) rename examples/{filereference => pathreference}/test_cases/testdata/sample_8.json (100%) rename examples/{filereference => pathreference}/test_cases/testdata/sample_9.json (100%) create mode 100644 examples/pathreference/test_cases/testdata/sample_dir/data.json create mode 100644 examples/pathreference/test_tesseract.py diff --git a/examples/directoryreference/tesseract_config.yaml b/examples/directoryreference/tesseract_config.yaml deleted file mode 100644 index 5e258f2b..00000000 --- a/examples/directoryreference/tesseract_config.yaml +++ /dev/null @@ -1,9 +0,0 @@ -name: directoryreference -version: "1.0.0" -description: | - Tesseract that copies input directories to the output directory. - Demonstrates InputDirectoryReference and OutputDirectoryReference. - -build_config: - package_data: [] - custom_build_steps: [] diff --git a/examples/directoryreference/test_cases/test_apply.json b/examples/directoryreference/test_cases/test_apply.json deleted file mode 100644 index 69bfba33..00000000 --- a/examples/directoryreference/test_cases/test_apply.json +++ /dev/null @@ -1,18 +0,0 @@ -{ - "endpoint": "apply", - "expected_outputs": { - "dirs": ["sample_dir_0", "sample_dir_1"] - }, - "expected_exception": null, - "expected_exception_regex": null, - "atol": 1e-8, - "rtol": 1e-5, - "cli_config": { - "input_path": "testdata/" - }, - "payload": { - "inputs": { - "dirs": ["sample_dir_0", "sample_dir_1"] - } - } -} diff --git a/examples/directoryreference/test_cases/testdata/sample_dir_0/data.json b/examples/directoryreference/test_cases/testdata/sample_dir_0/data.json deleted file mode 100644 index 5eb84b0a..00000000 --- a/examples/directoryreference/test_cases/testdata/sample_dir_0/data.json +++ /dev/null @@ -1 +0,0 @@ -{ "value": 0 } diff --git a/examples/directoryreference/test_cases/testdata/sample_dir_1/data.json b/examples/directoryreference/test_cases/testdata/sample_dir_1/data.json deleted file mode 100644 index e1cbe3c1..00000000 --- a/examples/directoryreference/test_cases/testdata/sample_dir_1/data.json +++ /dev/null @@ -1 +0,0 @@ -{ "value": 1 } diff --git a/examples/directoryreference/test_tesseract.py b/examples/directoryreference/test_tesseract.py deleted file mode 100644 index 8c936c9b..00000000 --- a/examples/directoryreference/test_tesseract.py +++ /dev/null @@ -1,35 +0,0 @@ -from pathlib import Path - -from rich import print - -from tesseract_core import Tesseract - -input_path = Path("./test_cases/testdata") -output_path = Path("./output") - -# these are relative to input_path -dirs = [ - "sample_dir_0", - "sample_dir_1", -] - -with Tesseract.from_tesseract_api( - "tesseract_api.py", input_path=input_path, output_path=output_path -) as tess: - result = tess.apply({"dirs": dirs}) - print(result) - paths = [(output_path / p) for p in result["dirs"]] - assert len(paths) == len(dirs) - assert all(p.is_dir() for p in paths) - - -with Tesseract.from_image( - "directoryreference", - input_path=input_path, - output_path=output_path, -) as tess: - result = tess.apply({"dirs": dirs}) - print(result) - paths = [(output_path / p) for p in result["dirs"]] - assert len(paths) == len(dirs) - assert all(p.is_dir() for p in paths) diff --git a/examples/filereference/.gitignore b/examples/filereference/.gitignore deleted file mode 100644 index 77320b33..00000000 --- a/examples/filereference/.gitignore +++ /dev/null @@ -1 +0,0 @@ -output/* diff --git a/examples/filereference/inputs.json b/examples/filereference/inputs.json deleted file mode 100644 index e2626cec..00000000 --- a/examples/filereference/inputs.json +++ /dev/null @@ -1,16 +0,0 @@ -{ - "inputs": { - "data": [ - "sample_0.json", - "sample_1.json", - "sample_2.json", - "sample_3.json", - "sample_4.json", - "sample_5.json", - "sample_6.json", - "sample_7.json", - "sample_8.json", - "sample_9.json" - ] - } -} diff --git a/examples/filereference/tesseract_api.py b/examples/filereference/tesseract_api.py deleted file mode 100644 index fef9584e..00000000 --- a/examples/filereference/tesseract_api.py +++ /dev/null @@ -1,34 +0,0 @@ -# Copyright 2025 Pasteur Labs. All Rights Reserved. -# SPDX-License-Identifier: Apache-2.0 - -import shutil -from pathlib import Path - -from pydantic import BaseModel - -from tesseract_core.runtime.config import get_config -from tesseract_core.runtime.experimental import ( - InputFileReference, - OutputFileReference, -) - - -class InputSchema(BaseModel): - data: list[InputFileReference] - - -class OutputSchema(BaseModel): - data: list[OutputFileReference] - - -def apply(inputs: InputSchema) -> OutputSchema: - output_path = Path(get_config().output_path) - files = [] - for source in inputs.data: - # source is a pathlib.Path starting with /path/to/input_path/... - target = output_path / source.name - # target must be a pathlib.Path at /path/to/output_path - target = target.with_suffix(".copy") - shutil.copy(source, target) - files.append(target) - return OutputSchema(data=files) diff --git a/examples/filereference/tesseract_config.yaml b/examples/filereference/tesseract_config.yaml deleted file mode 100644 index 92219a45..00000000 --- a/examples/filereference/tesseract_config.yaml +++ /dev/null @@ -1,8 +0,0 @@ -name: filereference -version: "1.0.0" -description: | - Tesseract that loads data from a folder. - -build_config: - package_data: [] - custom_build_steps: [] diff --git a/examples/filereference/test_tesseract.py b/examples/filereference/test_tesseract.py deleted file mode 100644 index f7c6f6d6..00000000 --- a/examples/filereference/test_tesseract.py +++ /dev/null @@ -1,44 +0,0 @@ -from pathlib import Path - -from rich import print - -from tesseract_core import Tesseract - -here = Path(__file__).parent.resolve() -input_path = Path("./test_cases/testdata") -output_path = Path("./output") - -# these are relative to input_path -data = [ - "sample_0.json", - "sample_1.json", - "sample_2.json", - "sample_3.json", - "sample_4.json", - "sample_5.json", - "sample_6.json", - "sample_7.json", - "sample_8.json", - "sample_9.json", -] - -with Tesseract.from_tesseract_api( - "tesseract_api.py", input_path=input_path, output_path=output_path -) as tess: - result = tess.apply({"data": data}) - print(result) - paths = [(output_path / p) for p in result["data"]] - assert len(paths) == len(data) - assert all(p.exists() for p in paths) - - -with Tesseract.from_image( - "filereference", - input_path=input_path, - output_path=output_path, -) as tess: - result = tess.apply({"data": data}) - print(result) - paths = [(output_path / p) for p in result["data"]] - assert len(paths) == len(data) - assert all(p.exists() for p in paths) diff --git a/examples/directoryreference/.gitignore b/examples/pathreference/.gitignore similarity index 100% rename from examples/directoryreference/.gitignore rename to examples/pathreference/.gitignore diff --git a/examples/filereference/generate_data.py b/examples/pathreference/generate_data.py similarity index 100% rename from examples/filereference/generate_data.py rename to examples/pathreference/generate_data.py diff --git a/examples/directoryreference/tesseract_api.py b/examples/pathreference/tesseract_api.py similarity index 50% rename from examples/directoryreference/tesseract_api.py rename to examples/pathreference/tesseract_api.py index 4016b46f..e25bf717 100644 --- a/examples/directoryreference/tesseract_api.py +++ b/examples/pathreference/tesseract_api.py @@ -8,27 +8,30 @@ from tesseract_core.runtime.config import get_config from tesseract_core.runtime.experimental import ( - InputDirectoryReference, - OutputDirectoryReference, + InputPathReference, + OutputPathReference, ) class InputSchema(BaseModel): - dirs: list[InputDirectoryReference] + paths: list[InputPathReference] class OutputSchema(BaseModel): - dirs: list[OutputDirectoryReference] + paths: list[OutputPathReference] def apply(inputs: InputSchema) -> OutputSchema: output_path = Path(get_config().output_path) result = [] - for src in inputs.dirs: - # src is an absolute Path to the input directory - dest = output_path / src.name - if dest.exists(): - shutil.rmtree(dest) - shutil.copytree(src, dest) + for src in inputs.paths: + if src.is_dir(): + dest = output_path / src.name + if dest.exists(): + shutil.rmtree(dest) + shutil.copytree(src, dest) + else: + dest = output_path / src.with_suffix(".copy").name + shutil.copy(src, dest) result.append(dest) - return OutputSchema(dirs=result) + return OutputSchema(paths=result) diff --git a/examples/pathreference/tesseract_config.yaml b/examples/pathreference/tesseract_config.yaml new file mode 100644 index 00000000..ca1245aa --- /dev/null +++ b/examples/pathreference/tesseract_config.yaml @@ -0,0 +1,10 @@ +name: pathreference +version: "1.0.0" +description: | + Tesseract that copies input files and directories to the output directory. + Demonstrates InputPathReference and OutputPathReference, which accept both + files and directories. + +build_config: + package_data: [] + custom_build_steps: [] diff --git a/examples/filereference/test_cases/test_apply.json b/examples/pathreference/test_cases/test_apply.json similarity index 89% rename from examples/filereference/test_cases/test_apply.json rename to examples/pathreference/test_cases/test_apply.json index 13208964..0ec87689 100644 --- a/examples/filereference/test_cases/test_apply.json +++ b/examples/pathreference/test_cases/test_apply.json @@ -11,7 +11,8 @@ "sample_6.copy", "sample_7.copy", "sample_8.copy", - "sample_9.copy" + "sample_9.copy", + "sample_dir" ] }, "expected_exception": null, @@ -33,7 +34,8 @@ "sample_6.json", "sample_7.json", "sample_8.json", - "sample_9.json" + "sample_9.json", + "sample_dir" ] } } diff --git a/examples/filereference/test_cases/testdata/a2af4236-15c5-40ec-9014-b79f9cbad50c.bin b/examples/pathreference/test_cases/testdata/a2af4236-15c5-40ec-9014-b79f9cbad50c.bin similarity index 100% rename from examples/filereference/test_cases/testdata/a2af4236-15c5-40ec-9014-b79f9cbad50c.bin rename to examples/pathreference/test_cases/testdata/a2af4236-15c5-40ec-9014-b79f9cbad50c.bin diff --git a/examples/filereference/test_cases/testdata/sample_0.json b/examples/pathreference/test_cases/testdata/sample_0.json similarity index 100% rename from examples/filereference/test_cases/testdata/sample_0.json rename to examples/pathreference/test_cases/testdata/sample_0.json diff --git a/examples/filereference/test_cases/testdata/sample_1.json b/examples/pathreference/test_cases/testdata/sample_1.json similarity index 100% rename from examples/filereference/test_cases/testdata/sample_1.json rename to examples/pathreference/test_cases/testdata/sample_1.json diff --git a/examples/filereference/test_cases/testdata/sample_2.json b/examples/pathreference/test_cases/testdata/sample_2.json similarity index 100% rename from examples/filereference/test_cases/testdata/sample_2.json rename to examples/pathreference/test_cases/testdata/sample_2.json diff --git a/examples/filereference/test_cases/testdata/sample_3.json b/examples/pathreference/test_cases/testdata/sample_3.json similarity index 100% rename from examples/filereference/test_cases/testdata/sample_3.json rename to examples/pathreference/test_cases/testdata/sample_3.json diff --git a/examples/filereference/test_cases/testdata/sample_4.json b/examples/pathreference/test_cases/testdata/sample_4.json similarity index 100% rename from examples/filereference/test_cases/testdata/sample_4.json rename to examples/pathreference/test_cases/testdata/sample_4.json diff --git a/examples/filereference/test_cases/testdata/sample_5.json b/examples/pathreference/test_cases/testdata/sample_5.json similarity index 100% rename from examples/filereference/test_cases/testdata/sample_5.json rename to examples/pathreference/test_cases/testdata/sample_5.json diff --git a/examples/filereference/test_cases/testdata/sample_6.json b/examples/pathreference/test_cases/testdata/sample_6.json similarity index 100% rename from examples/filereference/test_cases/testdata/sample_6.json rename to examples/pathreference/test_cases/testdata/sample_6.json diff --git a/examples/filereference/test_cases/testdata/sample_7.json b/examples/pathreference/test_cases/testdata/sample_7.json similarity index 100% rename from examples/filereference/test_cases/testdata/sample_7.json rename to examples/pathreference/test_cases/testdata/sample_7.json diff --git a/examples/filereference/test_cases/testdata/sample_8.json b/examples/pathreference/test_cases/testdata/sample_8.json similarity index 100% rename from examples/filereference/test_cases/testdata/sample_8.json rename to examples/pathreference/test_cases/testdata/sample_8.json diff --git a/examples/filereference/test_cases/testdata/sample_9.json b/examples/pathreference/test_cases/testdata/sample_9.json similarity index 100% rename from examples/filereference/test_cases/testdata/sample_9.json rename to examples/pathreference/test_cases/testdata/sample_9.json diff --git a/examples/pathreference/test_cases/testdata/sample_dir/data.json b/examples/pathreference/test_cases/testdata/sample_dir/data.json new file mode 100644 index 00000000..39adceba --- /dev/null +++ b/examples/pathreference/test_cases/testdata/sample_dir/data.json @@ -0,0 +1 @@ +{ "value": "world" } diff --git a/examples/pathreference/test_tesseract.py b/examples/pathreference/test_tesseract.py new file mode 100644 index 00000000..bf0dd10e --- /dev/null +++ b/examples/pathreference/test_tesseract.py @@ -0,0 +1,37 @@ +from pathlib import Path + +from rich import print + +from tesseract_core import Tesseract + +input_path = Path("./test_cases/testdata") +output_path = Path("./output") + +# mix of a file and a directory, both relative to input_path +paths = [ + "sample_0.json", + "sample_dir", +] + +expected = ["sample_file.copy", "sample_dir"] + +with Tesseract.from_tesseract_api( + "tesseract_api.py", input_path=input_path, output_path=output_path +) as tess: + result = tess.apply({"paths": paths}) + print(result) + out_paths = [(output_path / p) for p in result["paths"]] + assert len(out_paths) == len(paths) + assert all(p.exists() for p in out_paths) + + +with Tesseract.from_image( + "pathreference", + input_path=input_path, + output_path=output_path, +) as tess: + result = tess.apply({"paths": paths}) + print(result) + out_paths = [(output_path / p) for p in result["paths"]] + assert len(out_paths) == len(paths) + assert all(p.exists() for p in out_paths) diff --git a/tesseract_core/runtime/experimental.py b/tesseract_core/runtime/experimental.py index 475c67b8..e276358f 100644 --- a/tesseract_core/runtime/experimental.py +++ b/tesseract_core/runtime/experimental.py @@ -20,6 +20,7 @@ from pydantic.json_schema import JsonSchemaValue from pydantic_core import CoreSchema, SchemaSerializer, SchemaValidator, core_schema +from tesseract_core.runtime.config import get_config from tesseract_core.runtime.file_interactions import PathLike, parent_path from tesseract_core.runtime.gradient_endpoint_derivation import ( jacobian_from_jvp, @@ -236,47 +237,37 @@ def _resolve_input_file(path: Path) -> Path: return tess_path -def _resolve_input_dir(path: Path) -> Path: - tess_path = _resolve_input_path(path) - if not tess_path.is_dir(): - raise ValueError(f"Input path {tess_path} is not a directory.") - return tess_path - - -def _strip_output_file(path: Path) -> Path: +def _strip_output_path(path: Path) -> Path: from tesseract_core.runtime.config import get_config output_path = get_config().output_path if path.is_relative_to(output_path): - if not path.is_file(): - raise ValueError(f"Output path {path} is not a file.") return path.relative_to(output_path) else: - full_path = Path(output_path) / path - if not full_path.is_file(): - raise ValueError(f"Output path {full_path} is not a file.") return path -def _strip_output_dir(path: Path) -> Path: - from tesseract_core.runtime.config import get_config +def _strip_output_file(path: Path) -> Path: + stripped = _strip_output_path(path) + full_path = Path(get_config().output_path) / stripped + if not full_path.is_file(): + raise ValueError(f"Output path {full_path} is not a file.") + return stripped + + +def _strip_output_exists(path: Path) -> Path: + stripped = _strip_output_path(path) + full_path = Path(get_config().output_path) / stripped + if not full_path.exists(): + raise ValueError(f"Output path {full_path} does not exist.") + return stripped - output_path = get_config().output_path - if path.is_relative_to(output_path): - if not path.is_dir(): - raise ValueError(f"Output path {path} is not a directory.") - return path.relative_to(output_path) - else: - full_path = Path(output_path) / path - if not full_path.is_dir(): - raise ValueError(f"Output path {full_path} is not a directory.") - return path +InputPathReference = Annotated[Path, AfterValidator(_resolve_input_path)] +OutputPathReference = Annotated[Path, AfterValidator(_strip_output_exists)] InputFileReference = Annotated[Path, AfterValidator(_resolve_input_file)] -InputDirectoryReference = Annotated[Path, AfterValidator(_resolve_input_dir)] OutputFileReference = Annotated[Path, AfterValidator(_strip_output_file)] -OutputDirectoryReference = Annotated[Path, AfterValidator(_strip_output_dir)] def require_file(file_path: PathLike) -> Path: @@ -380,11 +371,11 @@ def __get_pydantic_json_schema__( __all__ = [ - "InputDirectoryReference", "InputFileReference", + "InputPathReference", "LazySequence", - "OutputDirectoryReference", "OutputFileReference", + "OutputPathReference", "PydanticLazySequenceAnnotation", "TesseractReference", "finite_difference_jacobian", diff --git a/tests/endtoend_tests/test_examples.py b/tests/endtoend_tests/test_examples.py index 63184dfa..df3351f0 100644 --- a/tests/endtoend_tests/test_examples.py +++ b/tests/endtoend_tests/test_examples.py @@ -102,10 +102,7 @@ class Config: "fortran_heat": Config(), "conda": Config(), "required_files": Config(input_path="input"), - "filereference": Config(input_path="test_cases/testdata", output_path="output"), - "directoryreference": Config( - input_path="test_cases/testdata", output_path="output" - ), + "pathreference": Config(input_path="test_cases/testdata", output_path="output"), "metrics": Config(test_with_random_inputs=True), "qp_solve": Config(), "tesseractreference": Config(), # Can't test requests standalone; needs target Tesseract. Covered in separate test. From 868a9d1c40071b79fb9cb9587398ad04bb90a3d5 Mon Sep 17 00:00:00 2001 From: Niklas Heim Date: Wed, 8 Apr 2026 15:50:06 +0200 Subject: [PATCH 08/17] automagical Path references --- examples/dataloader/test_tesseract.py | 57 +++++++++++++ examples/pathreference/README.md | 91 +++++++++++++++++++++ examples/pathreference/tesseract_api.py | 42 ++++++++-- examples/pathreference/test_tesseract.py | 21 ++++- tesseract_core/runtime/schema_generation.py | 7 +- 5 files changed, 203 insertions(+), 15 deletions(-) create mode 100644 examples/dataloader/test_tesseract.py create mode 100644 examples/pathreference/README.md diff --git a/examples/dataloader/test_tesseract.py b/examples/dataloader/test_tesseract.py new file mode 100644 index 00000000..5b773e3a --- /dev/null +++ b/examples/dataloader/test_tesseract.py @@ -0,0 +1,57 @@ +# Copyright 2025 Pasteur Labs. All Rights Reserved. +# SPDX-License-Identifier: Apache-2.0 + +from pathlib import Path + +import numpy as np + +from tesseract_core import Tesseract + +here = Path(__file__).parent +testdata_dir = here / "testdata" +num_samples = len(list(testdata_dir.glob("sample_*.json"))) + +# Absolute glob pattern for local runs +data_glob = f"@{testdata_dir}/sample_*.json" + + +def check_apply_result(result): + assert len(result["data"]) == num_samples + data_sum = np.asarray(result["data_sum"]) + assert data_sum.shape == (3,) + # data_sum is the sum of each sample's column sums; must be positive + assert np.all(data_sum > 0) + + +def check_jacobian_result(result): + jac = np.asarray(result["data_sum"]["data.[0]"]) + # jacobian of data_sum w.r.t. data.[0] has shape (3, *data[0].shape) + assert jac.ndim == 3 + assert jac.shape[0] == 3 + + +with Tesseract.from_tesseract_api("tesseract_api.py") as tess: + result = tess.apply({"data": data_glob}) + check_apply_result(result) + + result = tess.jacobian( + inputs={"data": data_glob}, + jac_inputs=["data.[0]"], + jac_outputs=["data_sum"], + ) + check_jacobian_result(result) + + +with Tesseract.from_image( + "dataloader", + volumes=[f"{testdata_dir}:/mnt/testdata:ro"], +) as tess: + result = tess.apply({"data": "@/mnt/testdata/sample_*.json"}) + check_apply_result(result) + + result = tess.jacobian( + inputs={"data": "@/mnt/testdata/sample_*.json"}, + jac_inputs=["data.[0]"], + jac_outputs=["data_sum"], + ) + check_jacobian_result(result) diff --git a/examples/pathreference/README.md b/examples/pathreference/README.md new file mode 100644 index 00000000..a20c2101 --- /dev/null +++ b/examples/pathreference/README.md @@ -0,0 +1,91 @@ +# Path Reference Example + +A Tesseract that copies files and directories from `input_path` to `output_path`. +It demonstrates how to use `Path` in Tesseract schemas and how to compose custom +Pydantic validators on top of the built-in path-handling behaviour. + +## What `Path` does in a schema + +When you annotate a field with `Path`, the schema generation layer automatically +replaces it with `InputPathReference` on inputs and `OutputPathReference` on outputs. + +```python +class InputSchema(BaseModel): + paths: list[Path] # → list[InputPathReference] at runtime + +class OutputSchema(BaseModel): + paths: list[Path] # → list[OutputPathReference] at runtime +``` + +**`InputPathReference`** (inputs) + +- Accepts a *relative* path string from the caller. +- Resolves it to an absolute path under the configured `--input-path`. +- Rejects any path that would escape `input_path` (path traversal protection). +- Raises `FileNotFoundError` if the resolved path does not exist. +- Accepts both files **and** directories (use `InputFileReference` for files only). + +**`OutputPathReference`** (outputs) + +- Accepts the absolute path your `apply` function produces (e.g. `output_path / name`). +- Strips the `output_path` prefix, returning a *relative* path to the caller. +- Raises `ValueError` if the path does not exist inside `output_path`. +- Accepts both files **and** directories (use `OutputFileReference` for files only). + +So from the caller's perspective, both inputs and outputs are relative path strings; +the runtime handles all absolute-path resolution transparently. + +## Composing user-defined validators + +`AfterValidator`s placed on a `Path`-annotated field are preserved and run *after* +the built-in path resolution. The user validator therefore always receives an +already-resolved, validated absolute `Path`: + +```python +def has_bin_sidecar(path: Path) -> Path: + """Check that any binref JSON has its .bin sidecar present.""" + if path.is_file(): + name = bin_reference(path) + if name is not None: + bin = path.parent / name + assert bin.exists(), f"Expected .bin file for json {json} not found at {bin}" + return path + +class InputSchema(BaseModel): + paths: list[Annotated[Path, AfterValidator(has_bin_sidecar)]] +``` + +Execution order for each element of `paths`: + +1. Raw string (e.g. `"sample_8.json"`) is validated by `InputPathReference`: + resolves → `/abs/input_path/sample_8.json`, checks it exists. +2. The resolved `Path` is passed to `next_to_binary_path`, which reads the JSON + and checks that the referenced `.bin` sidecar is present beside it. + +The same pattern applies to `OutputSchema`: the validator runs after +`OutputPathReference` has verified the output file exists and stripped the prefix. +This example uses it to confirm that `apply` also copied the sidecar `.bin` file. + +## Test data + +The test dataset (`test_cases/testdata/`) contains: + +| File | Array encoding | +|------|---------------| +| `sample_0.json`, `sample_3.json`, `sample_6.json`, `sample_9.json` | `json` (inline) | +| `sample_1.json`, `sample_4.json`, `sample_7.json` | `base64` (inline) | +| `sample_2.json`, `sample_5.json`, `sample_8.json` | `binref` (references the shared `.bin` sidecar) | +| `sample_dir/` | directory containing `data.json` | + +`generate_data.py` re-creates this dataset using a fixed RNG seed. + +## Running + +```bash +# local (no Docker) +uv run python test_tesseract.py + +# build Docker image first, then re-run +uv run tesseract build . +uv run python test_tesseract.py +``` diff --git a/examples/pathreference/tesseract_api.py b/examples/pathreference/tesseract_api.py index e25bf717..3f982224 100644 --- a/examples/pathreference/tesseract_api.py +++ b/examples/pathreference/tesseract_api.py @@ -1,37 +1,61 @@ # Copyright 2025 Pasteur Labs. All Rights Reserved. # SPDX-License-Identifier: Apache-2.0 +import json import shutil from pathlib import Path +from typing import Annotated -from pydantic import BaseModel +from pydantic import AfterValidator, BaseModel from tesseract_core.runtime.config import get_config -from tesseract_core.runtime.experimental import ( - InputPathReference, - OutputPathReference, -) + + +def bin_reference(path: Path) -> str | None: + """Return the name of the .bin file if the json at 'path' references one, else None.""" + with open(path) as f: + contents = json.load(f) + if contents["data"]["encoding"] == "binref": + return contents["data"]["buffer"].split(":")[0] + return None + + +def has_bin_sidecar(path: Path) -> Path: + print(path) + """Pydantic validator to check for .bin file next to any json file that references one.""" + if path.is_file(): + name = bin_reference(path) + if name is not None: + bin = path.parent / name + assert bin.exists(), ( + f"Expected .bin file for json {json} not found at {bin}" + ) + return path class InputSchema(BaseModel): - paths: list[InputPathReference] + paths: list[Annotated[Path, AfterValidator(has_bin_sidecar)]] class OutputSchema(BaseModel): - paths: list[OutputPathReference] + paths: list[Annotated[Path, AfterValidator(has_bin_sidecar)]] def apply(inputs: InputSchema) -> OutputSchema: output_path = Path(get_config().output_path) result = [] + for src in inputs.paths: if src.is_dir(): + # copy any folder that is given dest = output_path / src.name - if dest.exists(): - shutil.rmtree(dest) shutil.copytree(src, dest) else: + # copy any file that is given, and if it references a .bin file, copy that too dest = output_path / src.with_suffix(".copy").name shutil.copy(src, dest) + bin = bin_reference(src) + if bin is not None: + shutil.copy(src.parent / bin, dest.parent / bin) result.append(dest) return OutputSchema(paths=result) diff --git a/examples/pathreference/test_tesseract.py b/examples/pathreference/test_tesseract.py index bf0dd10e..f6b1319e 100644 --- a/examples/pathreference/test_tesseract.py +++ b/examples/pathreference/test_tesseract.py @@ -1,37 +1,50 @@ +import shutil from pathlib import Path from rich import print from tesseract_core import Tesseract + +def _clean(): + # delete before copy + if output_path.exists(): + shutil.rmtree(output_path) + output_path.mkdir() + + input_path = Path("./test_cases/testdata") output_path = Path("./output") # mix of a file and a directory, both relative to input_path paths = [ "sample_0.json", + "sample_8.json", # contains .bin reference "sample_dir", ] expected = ["sample_file.copy", "sample_dir"] + +_clean() with Tesseract.from_tesseract_api( - "tesseract_api.py", input_path=input_path, output_path=output_path + "tesseract_api.py", input_path=input_path, output_path=output_path, stream_logs=True ) as tess: result = tess.apply({"paths": paths}) print(result) out_paths = [(output_path / p) for p in result["paths"]] assert len(out_paths) == len(paths) assert all(p.exists() for p in out_paths) + assert len(list(output_path.glob("*.bin"))) == 1 +_clean() with Tesseract.from_image( - "pathreference", - input_path=input_path, - output_path=output_path, + "pathreference", input_path=input_path, output_path=output_path, stream_logs=True ) as tess: result = tess.apply({"paths": paths}) print(result) out_paths = [(output_path / p) for p in result["paths"]] assert len(out_paths) == len(paths) assert all(p.exists() for p in out_paths) + assert len(list(output_path.glob("*.bin"))) == 1 diff --git a/tesseract_core/runtime/schema_generation.py b/tesseract_core/runtime/schema_generation.py index dad8d2e0..d8051773 100644 --- a/tesseract_core/runtime/schema_generation.py +++ b/tesseract_core/runtime/schema_generation.py @@ -5,6 +5,7 @@ import types from collections.abc import Callable, Iterable, Mapping, Sequence from copy import copy +from pathlib import Path from typing import ( Annotated, Any, @@ -255,6 +256,8 @@ def create_apply_schema( InputSchema: type[BaseModel], OutputSchema: type[BaseModel] ) -> tuple[type[BaseModel], type[BaseModel]]: """Create the input / output schemas for the /apply endpoint.""" + from .experimental import InputPathReference, OutputPathReference + # We add metadata to the input and output schemas to indicate which fields are differentiable, # what their paths are, and which expected shape / dtype they have. # This is used internally and by some official clients, but not advertised as part of the public API, @@ -268,13 +271,13 @@ def create_apply_schema( InputSchema = apply_function_to_model_tree( InputSchema, - lambda x, _: x, + lambda x, _: InputPathReference if x is Path else x, model_prefix="Apply_", default_model_config=dict(extra="forbid"), ) OutputSchema = apply_function_to_model_tree( OutputSchema, - lambda x, _: x, + lambda x, _: OutputPathReference if x is Path else x, model_prefix="Apply_", default_model_config=dict(extra="forbid"), ) From 63ec7197b0b06b360f5e9a3f3295d5a75535e308 Mon Sep 17 00:00:00 2001 From: Niklas Heim Date: Wed, 8 Apr 2026 16:02:25 +0200 Subject: [PATCH 09/17] remove stray test file --- examples/dataloader/test_tesseract.py | 57 --------------------------- 1 file changed, 57 deletions(-) delete mode 100644 examples/dataloader/test_tesseract.py diff --git a/examples/dataloader/test_tesseract.py b/examples/dataloader/test_tesseract.py deleted file mode 100644 index 5b773e3a..00000000 --- a/examples/dataloader/test_tesseract.py +++ /dev/null @@ -1,57 +0,0 @@ -# Copyright 2025 Pasteur Labs. All Rights Reserved. -# SPDX-License-Identifier: Apache-2.0 - -from pathlib import Path - -import numpy as np - -from tesseract_core import Tesseract - -here = Path(__file__).parent -testdata_dir = here / "testdata" -num_samples = len(list(testdata_dir.glob("sample_*.json"))) - -# Absolute glob pattern for local runs -data_glob = f"@{testdata_dir}/sample_*.json" - - -def check_apply_result(result): - assert len(result["data"]) == num_samples - data_sum = np.asarray(result["data_sum"]) - assert data_sum.shape == (3,) - # data_sum is the sum of each sample's column sums; must be positive - assert np.all(data_sum > 0) - - -def check_jacobian_result(result): - jac = np.asarray(result["data_sum"]["data.[0]"]) - # jacobian of data_sum w.r.t. data.[0] has shape (3, *data[0].shape) - assert jac.ndim == 3 - assert jac.shape[0] == 3 - - -with Tesseract.from_tesseract_api("tesseract_api.py") as tess: - result = tess.apply({"data": data_glob}) - check_apply_result(result) - - result = tess.jacobian( - inputs={"data": data_glob}, - jac_inputs=["data.[0]"], - jac_outputs=["data_sum"], - ) - check_jacobian_result(result) - - -with Tesseract.from_image( - "dataloader", - volumes=[f"{testdata_dir}:/mnt/testdata:ro"], -) as tess: - result = tess.apply({"data": "@/mnt/testdata/sample_*.json"}) - check_apply_result(result) - - result = tess.jacobian( - inputs={"data": "@/mnt/testdata/sample_*.json"}, - jac_inputs=["data.[0]"], - jac_outputs=["data_sum"], - ) - check_jacobian_result(result) From 18f9962c3b736226bafe68cf06b035bf04df96ad Mon Sep 17 00:00:00 2001 From: Niklas Heim Date: Wed, 8 Apr 2026 16:17:13 +0200 Subject: [PATCH 10/17] fix tests --- examples/pathreference/README.md | 18 +++++++++--------- examples/pathreference/tesseract_api.py | 1 - .../pathreference/test_cases/test_apply.json | 4 ++-- examples/pathreference/test_tesseract.py | 9 +++------ 4 files changed, 14 insertions(+), 18 deletions(-) diff --git a/examples/pathreference/README.md b/examples/pathreference/README.md index a20c2101..7afd03f0 100644 --- a/examples/pathreference/README.md +++ b/examples/pathreference/README.md @@ -19,7 +19,7 @@ class OutputSchema(BaseModel): **`InputPathReference`** (inputs) -- Accepts a *relative* path string from the caller. +- Accepts a _relative_ path string from the caller. - Resolves it to an absolute path under the configured `--input-path`. - Rejects any path that would escape `input_path` (path traversal protection). - Raises `FileNotFoundError` if the resolved path does not exist. @@ -28,7 +28,7 @@ class OutputSchema(BaseModel): **`OutputPathReference`** (outputs) - Accepts the absolute path your `apply` function produces (e.g. `output_path / name`). -- Strips the `output_path` prefix, returning a *relative* path to the caller. +- Strips the `output_path` prefix, returning a _relative_ path to the caller. - Raises `ValueError` if the path does not exist inside `output_path`. - Accepts both files **and** directories (use `OutputFileReference` for files only). @@ -37,7 +37,7 @@ the runtime handles all absolute-path resolution transparently. ## Composing user-defined validators -`AfterValidator`s placed on a `Path`-annotated field are preserved and run *after* +`AfterValidator`s placed on a `Path`-annotated field are preserved and run _after_ the built-in path resolution. The user validator therefore always receives an already-resolved, validated absolute `Path`: @@ -70,12 +70,12 @@ This example uses it to confirm that `apply` also copied the sidecar `.bin` file The test dataset (`test_cases/testdata/`) contains: -| File | Array encoding | -|------|---------------| -| `sample_0.json`, `sample_3.json`, `sample_6.json`, `sample_9.json` | `json` (inline) | -| `sample_1.json`, `sample_4.json`, `sample_7.json` | `base64` (inline) | -| `sample_2.json`, `sample_5.json`, `sample_8.json` | `binref` (references the shared `.bin` sidecar) | -| `sample_dir/` | directory containing `data.json` | +| File | Array encoding | +| ------------------------------------------------------------------ | ----------------------------------------------- | +| `sample_0.json`, `sample_3.json`, `sample_6.json`, `sample_9.json` | `json` (inline) | +| `sample_1.json`, `sample_4.json`, `sample_7.json` | `base64` (inline) | +| `sample_2.json`, `sample_5.json`, `sample_8.json` | `binref` (references the shared `.bin` sidecar) | +| `sample_dir/` | directory containing `data.json` | `generate_data.py` re-creates this dataset using a fixed RNG seed. diff --git a/examples/pathreference/tesseract_api.py b/examples/pathreference/tesseract_api.py index 3f982224..2c9418aa 100644 --- a/examples/pathreference/tesseract_api.py +++ b/examples/pathreference/tesseract_api.py @@ -21,7 +21,6 @@ def bin_reference(path: Path) -> str | None: def has_bin_sidecar(path: Path) -> Path: - print(path) """Pydantic validator to check for .bin file next to any json file that references one.""" if path.is_file(): name = bin_reference(path) diff --git a/examples/pathreference/test_cases/test_apply.json b/examples/pathreference/test_cases/test_apply.json index 0ec87689..058c4e64 100644 --- a/examples/pathreference/test_cases/test_apply.json +++ b/examples/pathreference/test_cases/test_apply.json @@ -1,7 +1,7 @@ { "endpoint": "apply", "expected_outputs": { - "data": [ + "paths": [ "sample_0.copy", "sample_1.copy", "sample_2.copy", @@ -24,7 +24,7 @@ }, "payload": { "inputs": { - "data": [ + "paths": [ "sample_0.json", "sample_1.json", "sample_2.json", diff --git a/examples/pathreference/test_tesseract.py b/examples/pathreference/test_tesseract.py index f6b1319e..7d265937 100644 --- a/examples/pathreference/test_tesseract.py +++ b/examples/pathreference/test_tesseract.py @@ -6,7 +6,7 @@ from tesseract_core import Tesseract -def _clean(): +def clean(): # delete before copy if output_path.exists(): shutil.rmtree(output_path) @@ -23,10 +23,7 @@ def _clean(): "sample_dir", ] -expected = ["sample_file.copy", "sample_dir"] - - -_clean() +clean() with Tesseract.from_tesseract_api( "tesseract_api.py", input_path=input_path, output_path=output_path, stream_logs=True ) as tess: @@ -38,7 +35,7 @@ def _clean(): assert len(list(output_path.glob("*.bin"))) == 1 -_clean() +clean() with Tesseract.from_image( "pathreference", input_path=input_path, output_path=output_path, stream_logs=True ) as tess: From 4aeb833e6998bcfd30f5c8f065df63a4e8fb14b5 Mon Sep 17 00:00:00 2001 From: Niklas Heim Date: Wed, 8 Apr 2026 20:09:08 +0200 Subject: [PATCH 11/17] correctly order validators --- examples/pathreference/tesseract_api.py | 4 ++ tesseract_core/runtime/schema_generation.py | 43 +++++++++++++++++++-- 2 files changed, 44 insertions(+), 3 deletions(-) diff --git a/examples/pathreference/tesseract_api.py b/examples/pathreference/tesseract_api.py index 2c9418aa..3d724192 100644 --- a/examples/pathreference/tesseract_api.py +++ b/examples/pathreference/tesseract_api.py @@ -29,6 +29,10 @@ def has_bin_sidecar(path: Path) -> Path: assert bin.exists(), ( f"Expected .bin file for json {json} not found at {bin}" ) + elif path.is_dir(): + return path + else: + raise ValueError(f"{path} does not exist.") return path diff --git a/tesseract_core/runtime/schema_generation.py b/tesseract_core/runtime/schema_generation.py index d8051773..6f235ae5 100644 --- a/tesseract_core/runtime/schema_generation.py +++ b/tesseract_core/runtime/schema_generation.py @@ -69,6 +69,7 @@ def apply_function_to_model_tree( func: Callable[[type, tuple], type], model_prefix: str = "", default_model_config: dict[str, Any] | None = None, + is_leaf: Callable[[Any], bool] | None = None, ) -> type[BaseModel]: """Apply a function to all leaves of a Pydantic model, recursing into containers + nested models. @@ -86,6 +87,11 @@ class MyModel(BaseModel): The path to the field "a" would be ["a"], and the path to the int type would be ["a", SEQ_INDEX_SENTINEL, DICT_INDEX_SENTINEL]. + + The optional ``is_leaf`` predicate, if provided, is checked first: when it returns + True for a node, ``func`` is called on that node immediately without further recursion. + This allows callers to treat compound types (e.g. ``Annotated[Path, ...]``) as atomic + leaves. """ if default_model_config is None: default_model_config = {} @@ -93,6 +99,10 @@ class MyModel(BaseModel): seen_models = set() def _recurse_over_model_tree(treeobj: Any, path: list[str]) -> Any: + # If the caller says this node is a leaf, apply func immediately + if is_leaf is not None and is_leaf(treeobj): + return func(treeobj, tuple(path)) + # Get the origin type of the annotation, e.g. List for List[int] origin_type = get_origin(treeobj) deprecated_types = ["List", "Dict", "Set", "FrozenSet", "Tuple"] @@ -256,7 +266,10 @@ def create_apply_schema( InputSchema: type[BaseModel], OutputSchema: type[BaseModel] ) -> tuple[type[BaseModel], type[BaseModel]]: """Create the input / output schemas for the /apply endpoint.""" - from .experimental import InputPathReference, OutputPathReference + from .experimental import ( + _resolve_input_path, + _strip_output_exists, + ) # We add metadata to the input and output schemas to indicate which fields are differentiable, # what their paths are, and which expected shape / dtype they have. @@ -269,17 +282,41 @@ def create_apply_schema( OutputSchema, filter_fn=is_differentiable ) + def input_path_reference(x: Any, _: tuple) -> Any: + if x is Path: + # Wrap with _resolve_input_path as the INNERMOST validator so that + # it runs before all user validators (if any) + return Annotated[Path, AfterValidator(_resolve_input_path)] + return x + InputSchema = apply_function_to_model_tree( InputSchema, - lambda x, _: InputPathReference if x is Path else x, + input_path_reference, model_prefix="Apply_", default_model_config=dict(extra="forbid"), ) + + def is_annotated_path(x: Any) -> bool: + def _core_type(ttype: Any) -> Any: + while _is_annotated(ttype): + ttype = ttype.__origin__ + return ttype + + return _is_annotated(x) and _core_type(x) is Path + + def output_path_reference(x: Any, _: Any) -> Any: + # x is either bare Path or Annotated[Path, *user_validators] + # Wrap with _strip_output_path as the OUTERMOST validator so user validators + # run first (on absolute paths) and stripping happens last. + # return Annotated[x, AfterValidator(_strip_output_exists)] + return Annotated[x, AfterValidator(_strip_output_exists)] + OutputSchema = apply_function_to_model_tree( OutputSchema, - lambda x, _: OutputPathReference if x is Path else x, + output_path_reference, model_prefix="Apply_", default_model_config=dict(extra="forbid"), + is_leaf=is_annotated_path, ) class ApplyInputSchema(BaseModel): From 4701bc423b6099647b71a9a469c89e98e2f456ef Mon Sep 17 00:00:00 2001 From: Niklas Heim Date: Wed, 8 Apr 2026 20:26:35 +0200 Subject: [PATCH 12/17] update readme --- examples/pathreference/README.md | 63 ++++++++++----------- tesseract_core/runtime/experimental.py | 5 -- tesseract_core/runtime/schema_generation.py | 10 ++-- 3 files changed, 36 insertions(+), 42 deletions(-) diff --git a/examples/pathreference/README.md b/examples/pathreference/README.md index 7afd03f0..51d3512f 100644 --- a/examples/pathreference/README.md +++ b/examples/pathreference/README.md @@ -7,39 +7,30 @@ Pydantic validators on top of the built-in path-handling behaviour. ## What `Path` does in a schema When you annotate a field with `Path`, the schema generation layer automatically -replaces it with `InputPathReference` on inputs and `OutputPathReference` on outputs. +injects path-handling validators at runtime. -```python -class InputSchema(BaseModel): - paths: list[Path] # → list[InputPathReference] at runtime - -class OutputSchema(BaseModel): - paths: list[Path] # → list[OutputPathReference] at runtime -``` +**Input `Path` fields** -**`InputPathReference`** (inputs) +- Accept a _relative_ path string from the caller. +- Resolve it to an absolute path under the configured `--input-path`. +- Reject any path that would escape `input_path` (path traversal protection). +- Raise `FileNotFoundError` if the resolved path does not exist. +- Accept both files **and** directories (use `InputFileReference` for files only). -- Accepts a _relative_ path string from the caller. -- Resolves it to an absolute path under the configured `--input-path`. -- Rejects any path that would escape `input_path` (path traversal protection). -- Raises `FileNotFoundError` if the resolved path does not exist. -- Accepts both files **and** directories (use `InputFileReference` for files only). +**Output `Path` fields** -**`OutputPathReference`** (outputs) - -- Accepts the absolute path your `apply` function produces (e.g. `output_path / name`). -- Strips the `output_path` prefix, returning a _relative_ path to the caller. -- Raises `ValueError` if the path does not exist inside `output_path`. -- Accepts both files **and** directories (use `OutputFileReference` for files only). +- Accept the absolute path your `apply` function produces (e.g. `output_path / name`). +- Strip the `output_path` prefix, returning a _relative_ path to the caller. +- Raise `ValueError` if the path does not exist inside `output_path`. +- Accept both files **and** directories (use `OutputFileReference` for files only). So from the caller's perspective, both inputs and outputs are relative path strings; the runtime handles all absolute-path resolution transparently. ## Composing user-defined validators -`AfterValidator`s placed on a `Path`-annotated field are preserved and run _after_ -the built-in path resolution. The user validator therefore always receives an -already-resolved, validated absolute `Path`: +`AfterValidator`s placed on a `Path`-annotated field are preserved, and in both +cases the user validator receives an already-resolved **absolute** `Path`: ```python def has_bin_sidecar(path: Path) -> Path: @@ -48,23 +39,31 @@ def has_bin_sidecar(path: Path) -> Path: name = bin_reference(path) if name is not None: bin = path.parent / name - assert bin.exists(), f"Expected .bin file for json {json} not found at {bin}" + assert bin.exists(), f"Expected .bin file for json {path} not found at {bin}" return path class InputSchema(BaseModel): paths: list[Annotated[Path, AfterValidator(has_bin_sidecar)]] ``` -Execution order for each element of `paths`: +The built-in path validators run at different points depending on direction: + +**Input fields** — built-in validator runs **first**, user validators run after: + +1. Raw string (e.g. `"sample_8.json"`) is resolved to an absolute path and checked + for existence by the built-in input validator. +2. The resolved absolute `Path` is passed to `has_bin_sidecar`, which checks that + the referenced `.bin` sidecar is present beside it. + +**Output fields** — user validators run **first**, built-in validator runs after: -1. Raw string (e.g. `"sample_8.json"`) is validated by `InputPathReference`: - resolves → `/abs/input_path/sample_8.json`, checks it exists. -2. The resolved `Path` is passed to `next_to_binary_path`, which reads the JSON - and checks that the referenced `.bin` sidecar is present beside it. +1. The absolute `Path` returned by `apply` (e.g. `output_path / "sample_8.copy"`) + is passed to `has_bin_sidecar`, which checks the `.bin` sidecar was also copied. +2. The built-in output validator then confirms the path exists inside `output_path` + and strips the prefix, returning a relative path to the caller. -The same pattern applies to `OutputSchema`: the validator runs after -`OutputPathReference` has verified the output file exists and stripped the prefix. -This example uses it to confirm that `apply` also copied the sidecar `.bin` file. +This example uses output validators to confirm that `apply` copied the sidecar +`.bin` file alongside each JSON file. ## Test data diff --git a/tesseract_core/runtime/experimental.py b/tesseract_core/runtime/experimental.py index e276358f..1cfcb977 100644 --- a/tesseract_core/runtime/experimental.py +++ b/tesseract_core/runtime/experimental.py @@ -263,9 +263,6 @@ def _strip_output_exists(path: Path) -> Path: return stripped -InputPathReference = Annotated[Path, AfterValidator(_resolve_input_path)] -OutputPathReference = Annotated[Path, AfterValidator(_strip_output_exists)] - InputFileReference = Annotated[Path, AfterValidator(_resolve_input_file)] OutputFileReference = Annotated[Path, AfterValidator(_strip_output_file)] @@ -372,10 +369,8 @@ def __get_pydantic_json_schema__( __all__ = [ "InputFileReference", - "InputPathReference", "LazySequence", "OutputFileReference", - "OutputPathReference", "PydanticLazySequenceAnnotation", "TesseractReference", "finite_difference_jacobian", diff --git a/tesseract_core/runtime/schema_generation.py b/tesseract_core/runtime/schema_generation.py index 6f235ae5..61fa9aa6 100644 --- a/tesseract_core/runtime/schema_generation.py +++ b/tesseract_core/runtime/schema_generation.py @@ -266,7 +266,7 @@ def create_apply_schema( InputSchema: type[BaseModel], OutputSchema: type[BaseModel] ) -> tuple[type[BaseModel], type[BaseModel]]: """Create the input / output schemas for the /apply endpoint.""" - from .experimental import ( + from tesseract_core.runtime.experimental import ( _resolve_input_path, _strip_output_exists, ) @@ -282,7 +282,7 @@ def create_apply_schema( OutputSchema, filter_fn=is_differentiable ) - def input_path_reference(x: Any, _: tuple) -> Any: + def resolve_input_path(x: Any, _: tuple) -> Any: if x is Path: # Wrap with _resolve_input_path as the INNERMOST validator so that # it runs before all user validators (if any) @@ -291,7 +291,7 @@ def input_path_reference(x: Any, _: tuple) -> Any: InputSchema = apply_function_to_model_tree( InputSchema, - input_path_reference, + resolve_input_path, model_prefix="Apply_", default_model_config=dict(extra="forbid"), ) @@ -304,7 +304,7 @@ def _core_type(ttype: Any) -> Any: return _is_annotated(x) and _core_type(x) is Path - def output_path_reference(x: Any, _: Any) -> Any: + def strip_output_path(x: Any, _: Any) -> Any: # x is either bare Path or Annotated[Path, *user_validators] # Wrap with _strip_output_path as the OUTERMOST validator so user validators # run first (on absolute paths) and stripping happens last. @@ -313,7 +313,7 @@ def output_path_reference(x: Any, _: Any) -> Any: OutputSchema = apply_function_to_model_tree( OutputSchema, - output_path_reference, + strip_output_path, model_prefix="Apply_", default_model_config=dict(extra="forbid"), is_leaf=is_annotated_path, From a24d1565334c7c26418f107d14bdca072ad5bea9 Mon Sep 17 00:00:00 2001 From: Niklas Heim Date: Wed, 8 Apr 2026 20:32:58 +0200 Subject: [PATCH 13/17] refactor --- tesseract_core/runtime/experimental.py | 8 -- tesseract_core/runtime/schema_generation.py | 94 ++++++++++++++------- 2 files changed, 64 insertions(+), 38 deletions(-) diff --git a/tesseract_core/runtime/experimental.py b/tesseract_core/runtime/experimental.py index 1cfcb977..b5f1c536 100644 --- a/tesseract_core/runtime/experimental.py +++ b/tesseract_core/runtime/experimental.py @@ -255,14 +255,6 @@ def _strip_output_file(path: Path) -> Path: return stripped -def _strip_output_exists(path: Path) -> Path: - stripped = _strip_output_path(path) - full_path = Path(get_config().output_path) / stripped - if not full_path.exists(): - raise ValueError(f"Output path {full_path} does not exist.") - return stripped - - InputFileReference = Annotated[Path, AfterValidator(_resolve_input_file)] OutputFileReference = Annotated[Path, AfterValidator(_strip_output_file)] diff --git a/tesseract_core/runtime/schema_generation.py b/tesseract_core/runtime/schema_generation.py index 61fa9aa6..a4d7a0b4 100644 --- a/tesseract_core/runtime/schema_generation.py +++ b/tesseract_core/runtime/schema_generation.py @@ -43,6 +43,67 @@ SEQ_INDEX_SENTINEL = object() DICT_INDEX_SENTINEL = object() + +def _resolve_input_path(path: Path) -> Path: + from tesseract_core.runtime.config import get_config + + input_path = get_config().input_path + tess_path = (input_path / path).resolve() + if str(input_path) not in str(tess_path): + raise ValueError( + f"Invalid input file reference: {path}. " + f"Expected path to be relative to {input_path}, but got {tess_path}. " + "File references have to be relative to --input-path." + ) + if not tess_path.exists(): + raise FileNotFoundError(f"Input path {tess_path} does not exist.") + return tess_path + + +def _strip_output_path(path: Path) -> Path: + from tesseract_core.runtime.config import get_config + + output_path = get_config().output_path + if path.is_relative_to(output_path): + return path.relative_to(output_path) + else: + return path + + +def _strip_output_exists(path: Path) -> Path: + from tesseract_core.runtime.config import get_config + + stripped = _strip_output_path(path) + full_path = Path(get_config().output_path) / stripped + if not full_path.exists(): + raise ValueError(f"Output path {full_path} does not exist.") + return stripped + + +def _is_annotated_path(x: Any) -> bool: + def _core_type(ttype: Any) -> Any: + while _is_annotated(ttype): + ttype = ttype.__origin__ + return ttype + + return _is_annotated(x) and _core_type(x) is Path + + +def _inject_input_path_validator(x: Any, _: tuple) -> Any: + if x is Path: + # Wrap with _resolve_input_path as the INNERMOST validator so that + # it runs before all user validators (if any) + return Annotated[Path, AfterValidator(_resolve_input_path)] + return x + + +def _inject_output_path_validator(x: Any, _: Any) -> Any: + # x is either bare Path or Annotated[Path, *user_validators] + # Wrap with _strip_output_exists as the OUTERMOST validator so user validators + # run first (on absolute paths) and stripping happens last. + return Annotated[x, AfterValidator(_strip_output_exists)] + + T = TypeVar("T") # Python has funnily enough two union types now. See https://github.com/python/cpython/issues/105499 @@ -266,11 +327,6 @@ def create_apply_schema( InputSchema: type[BaseModel], OutputSchema: type[BaseModel] ) -> tuple[type[BaseModel], type[BaseModel]]: """Create the input / output schemas for the /apply endpoint.""" - from tesseract_core.runtime.experimental import ( - _resolve_input_path, - _strip_output_exists, - ) - # We add metadata to the input and output schemas to indicate which fields are differentiable, # what their paths are, and which expected shape / dtype they have. # This is used internally and by some official clients, but not advertised as part of the public API, @@ -282,41 +338,19 @@ def create_apply_schema( OutputSchema, filter_fn=is_differentiable ) - def resolve_input_path(x: Any, _: tuple) -> Any: - if x is Path: - # Wrap with _resolve_input_path as the INNERMOST validator so that - # it runs before all user validators (if any) - return Annotated[Path, AfterValidator(_resolve_input_path)] - return x - InputSchema = apply_function_to_model_tree( InputSchema, - resolve_input_path, + _inject_input_path_validator, model_prefix="Apply_", default_model_config=dict(extra="forbid"), ) - def is_annotated_path(x: Any) -> bool: - def _core_type(ttype: Any) -> Any: - while _is_annotated(ttype): - ttype = ttype.__origin__ - return ttype - - return _is_annotated(x) and _core_type(x) is Path - - def strip_output_path(x: Any, _: Any) -> Any: - # x is either bare Path or Annotated[Path, *user_validators] - # Wrap with _strip_output_path as the OUTERMOST validator so user validators - # run first (on absolute paths) and stripping happens last. - # return Annotated[x, AfterValidator(_strip_output_exists)] - return Annotated[x, AfterValidator(_strip_output_exists)] - OutputSchema = apply_function_to_model_tree( OutputSchema, - strip_output_path, + _inject_output_path_validator, model_prefix="Apply_", default_model_config=dict(extra="forbid"), - is_leaf=is_annotated_path, + is_leaf=_is_annotated_path, ) class ApplyInputSchema(BaseModel): From 882d19336e19fc7daeb6b8d7ba27a5a5ee690d3e Mon Sep 17 00:00:00 2001 From: Niklas Heim Date: Wed, 8 Apr 2026 20:37:38 +0200 Subject: [PATCH 14/17] clarify readme --- examples/pathreference/README.md | 53 +++++++++++++++++++------------- 1 file changed, 32 insertions(+), 21 deletions(-) diff --git a/examples/pathreference/README.md b/examples/pathreference/README.md index 51d3512f..5218da4c 100644 --- a/examples/pathreference/README.md +++ b/examples/pathreference/README.md @@ -9,23 +9,28 @@ Pydantic validators on top of the built-in path-handling behaviour. When you annotate a field with `Path`, the schema generation layer automatically injects path-handling validators at runtime. -**Input `Path` fields** +**Input `Path` fields** — caller sends a relative string, `apply` receives an absolute `Path`: -- Accept a _relative_ path string from the caller. -- Resolve it to an absolute path under the configured `--input-path`. -- Reject any path that would escape `input_path` (path traversal protection). -- Raise `FileNotFoundError` if the resolved path does not exist. -- Accept both files **and** directories (use `InputFileReference` for files only). +``` +caller sends → "sample_8.json" +built-in resolves → Path("/tesseract/input_data/sample_8.json") (checked: exists) +apply sees → Path("/tesseract/input_data/sample_8.json") +``` + +- Rejects any path that would escape `input_path` (path traversal protection). +- Raises `FileNotFoundError` if the resolved path does not exist. +- Accepts both files **and** directories (use `InputFileReference` for files only). -**Output `Path` fields** +**Output `Path` fields** — `apply` returns an absolute `Path`, caller receives a relative string: -- Accept the absolute path your `apply` function produces (e.g. `output_path / name`). -- Strip the `output_path` prefix, returning a _relative_ path to the caller. -- Raise `ValueError` if the path does not exist inside `output_path`. -- Accept both files **and** directories (use `OutputFileReference` for files only). +``` +apply returns → Path("/tesseract/output_data/sample_8.copy") +built-in strips → Path("sample_8.copy") (checked: exists) +caller receives → "sample_8.copy" +``` -So from the caller's perspective, both inputs and outputs are relative path strings; -the runtime handles all absolute-path resolution transparently. +- Raises `ValueError` if the path does not exist inside `output_path`. +- Accepts both files **and** directories (use `OutputFileReference` for files only). ## Composing user-defined validators @@ -40,6 +45,8 @@ def has_bin_sidecar(path: Path) -> Path: if name is not None: bin = path.parent / name assert bin.exists(), f"Expected .bin file for json {path} not found at {bin}" + else: + raise ValueError(f"{path} does not exist or is not a file.") return path class InputSchema(BaseModel): @@ -50,17 +57,21 @@ The built-in path validators run at different points depending on direction: **Input fields** — built-in validator runs **first**, user validators run after: -1. Raw string (e.g. `"sample_8.json"`) is resolved to an absolute path and checked - for existence by the built-in input validator. -2. The resolved absolute `Path` is passed to `has_bin_sidecar`, which checks that - the referenced `.bin` sidecar is present beside it. +``` +"sample_8.json" + → built-in → Path("/tesseract/input_data/sample_8.json") (resolved + existence check) + → has_bin_sidecar → Path("/tesseract/input_data/sample_8.json") (checks .bin sidecar present) + → apply receives → Path("/tesseract/input_data/sample_8.json") +``` **Output fields** — user validators run **first**, built-in validator runs after: -1. The absolute `Path` returned by `apply` (e.g. `output_path / "sample_8.copy"`) - is passed to `has_bin_sidecar`, which checks the `.bin` sidecar was also copied. -2. The built-in output validator then confirms the path exists inside `output_path` - and strips the prefix, returning a relative path to the caller. +``` +apply returns → Path("/tesseract/output_data/sample_8.copy") + → has_bin_sidecar → Path("/tesseract/output_data/sample_8.copy") (checks .bin sidecar was copied) + → built-in → Path("sample_8.copy") (existence check + prefix stripped) + → caller receives → "sample_8.copy" +``` This example uses output validators to confirm that `apply` copied the sidecar `.bin` file alongside each JSON file. From cd551cd97fdfc710f27b3ff82cf5d7ae670822ba Mon Sep 17 00:00:00 2001 From: Niklas Heim Date: Wed, 8 Apr 2026 23:51:06 +0200 Subject: [PATCH 15/17] fix injection; add some tests --- tesseract_core/runtime/schema_generation.py | 2 + tests/runtime_tests/test_schema_generation.py | 274 +++++++++++++++++- 2 files changed, 275 insertions(+), 1 deletion(-) diff --git a/tesseract_core/runtime/schema_generation.py b/tesseract_core/runtime/schema_generation.py index a4d7a0b4..07b906d7 100644 --- a/tesseract_core/runtime/schema_generation.py +++ b/tesseract_core/runtime/schema_generation.py @@ -98,6 +98,8 @@ def _inject_input_path_validator(x: Any, _: tuple) -> Any: def _inject_output_path_validator(x: Any, _: Any) -> Any: + if x is not Path and not _is_annotated_path(x): + return x # x is either bare Path or Annotated[Path, *user_validators] # Wrap with _strip_output_exists as the OUTERMOST validator so user validators # run first (on absolute paths) and stripping happens last. diff --git a/tests/runtime_tests/test_schema_generation.py b/tests/runtime_tests/test_schema_generation.py index 8fd04579..c75aaa11 100644 --- a/tests/runtime_tests/test_schema_generation.py +++ b/tests/runtime_tests/test_schema_generation.py @@ -4,15 +4,19 @@ import json from collections.abc import Iterable from copy import deepcopy +from pathlib import Path from typing import Annotated, Optional import numpy as np import pytest -from pydantic import BaseModel, ConfigDict, RootModel, ValidationError +from pydantic import AfterValidator, BaseModel, ConfigDict, RootModel, ValidationError from tesseract_core.runtime import Array, Differentiable, Float32, Float64, Int64, UInt8 from tesseract_core.runtime.experimental import LazySequence from tesseract_core.runtime.schema_generation import ( + _inject_output_path_validator, + _is_annotated_path, + _strip_output_exists, apply_function_to_model_tree, create_abstract_eval_schema, create_apply_schema, @@ -809,3 +813,271 @@ class Parent(BaseModel): with pytest.raises(ValidationError): ApplyParent.model_validate({"child": {"x": "foo"}, "extra": 1}) + + +# ============================================================================= +# Path resolution tests (automatic path handling in Input/OutputSchema) +# ============================================================================= + + +@pytest.fixture +def path_config(tmp_path): + """Temp input/output dirs with get_config patched to point at them.""" + from tesseract_core.runtime.config import update_config + + input_dir = tmp_path / "inputs" + output_dir = tmp_path / "outputs" + input_dir.mkdir() + output_dir.mkdir() + update_config(input_path=str(input_dir), output_path=str(output_dir)) + return input_dir, output_dir + + +# --- Input: basic resolution --- + + +def test_input_relative_path_resolved_to_absolute(path_config): + """Caller sends relative string → apply() receives absolute Path under input_path.""" + input_dir, _ = path_config + (input_dir / "data.txt").touch() + (input_dir / "mydir").mkdir() + + class InputSchema(BaseModel): + file: Path + folder: Path + + ApplyInput, _ = create_apply_schema(InputSchema, InputSchema) + result = ApplyInput.model_validate( + {"inputs": {"file": "data.txt", "folder": "mydir"}} + ) + assert result.inputs.file == input_dir / "data.txt" + assert result.inputs.file.is_absolute() + + +def test_input_nonexistent_path_raises(path_config): + """FileNotFoundError when the referenced file does not exist. + + Note: Pydantic only wraps ValueError/AssertionError in ValidationError; + FileNotFoundError propagates directly. + """ + + class InputSchema(BaseModel): + file: Path + + ApplyInput, _ = create_apply_schema(InputSchema, InputSchema) + with pytest.raises(FileNotFoundError, match="does not exist"): + ApplyInput.model_validate({"inputs": {"file": "missing.txt"}}) + + +def test_input_path_traversal_rejected(path_config): + """Path traversal (../../etc/passwd) is rejected.""" + + class InputSchema(BaseModel): + file: Path + + ApplyInput, _ = create_apply_schema(InputSchema, InputSchema) + with pytest.raises(ValidationError, match="relative to"): + ApplyInput.model_validate({"inputs": {"file": "../../etc/passwd"}}) + + +# --- Input: container and optional types --- + + +def test_input_list_of_paths_all_resolved(path_config): + """list[Path] — every entry is resolved to an absolute path.""" + input_dir, _ = path_config + (input_dir / "a.txt").touch() + (input_dir / "b.txt").touch() + + class InputSchema(BaseModel): + files: list[Path] + + ApplyInput, _ = create_apply_schema(InputSchema, InputSchema) + result = ApplyInput.model_validate({"inputs": {"files": ["a.txt", "b.txt"]}}) + assert result.inputs.files == [input_dir / "a.txt", input_dir / "b.txt"] + + +def test_input_optional_path(path_config): + input_dir, _ = path_config + + class InputSchema(BaseModel): + file: Path | None = None + + # None works + ApplyInput, _ = create_apply_schema(InputSchema, InputSchema) + result = ApplyInput.model_validate({"inputs": {"file": None}}) + assert result.inputs.file is None + + # Path as well + (input_dir / "data.txt").touch() + ApplyInput, _ = create_apply_schema(InputSchema, InputSchema) + result = ApplyInput.model_validate({"inputs": {"file": "data.txt"}}) + assert result.inputs.file == input_dir / "data.txt" + + +# --- Input: user validators --- + + +def test_input_user_validator_receives_absolute_path(path_config): + """AfterValidator on an input Path field receives the already-resolved absolute path. + + From the README: + "sample_8.json" + → built-in resolves → Path("/tesseract/input_data/sample_8.json") + → user validator → Path("/tesseract/input_data/sample_8.json") + """ + input_dir, _ = path_config + (input_dir / "data.txt").touch() + + seen: list[Path] = [] + + def record(path: Path) -> Path: + seen.append(path) + return path + + class InputSchema(BaseModel): + file: Annotated[Path, AfterValidator(record)] + + ApplyInput, _ = create_apply_schema(InputSchema, InputSchema) + ApplyInput.model_validate({"inputs": {"file": "data.txt"}}) + + assert len(seen) == 1 + assert seen[0] == input_dir / "data.txt" + assert seen[0].is_absolute() + + +def test_input_nested_model_path_resolved(path_config): + """Path fields inside nested models are resolved.""" + input_dir, _ = path_config + (input_dir / "nested.txt").touch() + + class Inner(BaseModel): + file: Path + + class InputSchema(BaseModel): + inner: Inner + + ApplyInput, _ = create_apply_schema(InputSchema, InputSchema) + result = ApplyInput.model_validate({"inputs": {"inner": {"file": "nested.txt"}}}) + assert result.inputs.inner.file == input_dir / "nested.txt" + + +# --- Output: basic stripping --- + + +def test_output_absolute_path_stripped_to_relative(path_config): + """apply() returns absolute Path → caller receives relative Path. + + From the README: + apply returns → Path("/tesseract/output_data/sample_8.copy") + built-in strips → Path("sample_8.copy") + caller receives → "sample_8.copy" + """ + _, output_dir = path_config + (output_dir / "result.txt").touch() + + class OutputSchema(BaseModel): + result: Path + + _, ApplyOutput = create_apply_schema(OutputSchema, OutputSchema) + out = ApplyOutput.model_validate({"result": output_dir / "result.txt"}) + assert out.root.result == Path("result.txt") + assert not out.root.result.is_absolute() + + +def test_output_nonexistent_path_raises(path_config): + """ValueError when the output path does not exist.""" + _, output_dir = path_config + + class OutputSchema(BaseModel): + result: Path + + _, ApplyOutput = create_apply_schema(OutputSchema, OutputSchema) + with pytest.raises(ValidationError, match="does not exist"): + ApplyOutput.model_validate({"result": output_dir / "ghost.txt"}) + + +def test_output_list_of_paths_all_stripped(path_config): + """list[Path] — all output paths stripped to relative.""" + _, output_dir = path_config + (output_dir / "a.out").touch() + (output_dir / "b.out").touch() + + class OutputSchema(BaseModel): + files: list[Path] + + _, ApplyOutput = create_apply_schema(OutputSchema, OutputSchema) + out = ApplyOutput.model_validate( + {"files": [output_dir / "a.out", output_dir / "b.out"]} + ) + assert out.root.files == [Path("a.out"), Path("b.out")] + + +# --- Output: user validators --- + + +def test_output_user_validator_receives_absolute_path(path_config): + """AfterValidator on an output Path field receives the absolute path before stripping. + + From the README: + apply returns → Path("/tesseract/output_data/sample_8.copy") + → user validator → Path("/tesseract/output_data/sample_8.copy") ← absolute + → built-in → Path("sample_8.copy") ← stripped + """ + _, output_dir = path_config + output_file = output_dir / "result.txt" + output_file.touch() + + seen: list[Path] = [] + + def record(path: Path) -> Path: + seen.append(path) + return path + + class OutputSchema(BaseModel): + result: Annotated[Path, AfterValidator(record)] + + _, ApplyOutput = create_apply_schema(OutputSchema, OutputSchema) + out = ApplyOutput.model_validate({"result": output_file}) + + assert len(seen) == 1 + assert seen[0] == output_file # absolute, not yet stripped + assert out.root.result == Path("result.txt") # final result is stripped + + +# --- Regression: non-Path fields must not receive path validators --- + + +def test_output_path_validator_only_on_path_fields(): + """Regression: _inject_output_path_validator must not apply to non-Path fields. + + Previously the validator was injected on every leaf type, causing + AttributeError when validating str/ndarray output fields. + """ + + class MixedOutput(BaseModel): + name: str + value: int + tagged_path: Annotated[Path, "some_meta"] + + Result = apply_function_to_model_tree( + MixedOutput, + _inject_output_path_validator, + is_leaf=_is_annotated_path, + ) + + # Non-Path fields must have no _strip_output_exists injected + for field_name in ("name", "value"): + for m in Result.model_fields[field_name].metadata: + if isinstance(m, AfterValidator): + assert m.func is not _strip_output_exists, ( + f"_strip_output_exists must not be injected on non-Path field '{field_name}'" + ) + + # Annotated[Path, ...] field must have the validator (stored in Pydantic field metadata) + path_validators = [ + m + for m in Result.model_fields["tagged_path"].metadata + if isinstance(m, AfterValidator) + ] + assert any(v.func is _strip_output_exists for v in path_validators) From eac89313da4b0f72d5d9d181b289b25f762306eb Mon Sep 17 00:00:00 2001 From: Niklas Heim Date: Thu, 9 Apr 2026 00:03:31 +0200 Subject: [PATCH 16/17] unslop tests --- tests/runtime_tests/test_schema_generation.py | 150 +++++++----------- 1 file changed, 53 insertions(+), 97 deletions(-) diff --git a/tests/runtime_tests/test_schema_generation.py b/tests/runtime_tests/test_schema_generation.py index c75aaa11..e27f462d 100644 --- a/tests/runtime_tests/test_schema_generation.py +++ b/tests/runtime_tests/test_schema_generation.py @@ -14,9 +14,6 @@ from tesseract_core.runtime import Array, Differentiable, Float32, Float64, Int64, UInt8 from tesseract_core.runtime.experimental import LazySequence from tesseract_core.runtime.schema_generation import ( - _inject_output_path_validator, - _is_annotated_path, - _strip_output_exists, apply_function_to_model_tree, create_abstract_eval_schema, create_apply_schema, @@ -821,26 +818,32 @@ class Parent(BaseModel): @pytest.fixture -def path_config(tmp_path): - """Temp input/output dirs with get_config patched to point at them.""" - from tesseract_core.runtime.config import update_config +def runtime_config(tmp_path): + """Fixture providing a real RuntimeConfig with temp input/output dirs. + + Patches get_config() so that path-resolution validators use these dirs. + Tests access paths via Path(runtime_config.input_path) / Path(runtime_config.output_path), + mirroring how apply() accesses them: Path(get_config().output_path). + """ + from tesseract_core.runtime.config import get_config, update_config input_dir = tmp_path / "inputs" output_dir = tmp_path / "outputs" input_dir.mkdir() output_dir.mkdir() + update_config(input_path=str(input_dir), output_path=str(output_dir)) - return input_dir, output_dir + return get_config() # --- Input: basic resolution --- -def test_input_relative_path_resolved_to_absolute(path_config): +def test_input_relative_path_resolved_to_absolute(runtime_config): """Caller sends relative string → apply() receives absolute Path under input_path.""" - input_dir, _ = path_config - (input_dir / "data.txt").touch() - (input_dir / "mydir").mkdir() + input_path = Path(runtime_config.input_path) + (input_path / "data.txt").touch() + (input_path / "mydir").mkdir() class InputSchema(BaseModel): file: Path @@ -850,11 +853,11 @@ class InputSchema(BaseModel): result = ApplyInput.model_validate( {"inputs": {"file": "data.txt", "folder": "mydir"}} ) - assert result.inputs.file == input_dir / "data.txt" + assert result.inputs.file == input_path / "data.txt" assert result.inputs.file.is_absolute() -def test_input_nonexistent_path_raises(path_config): +def test_input_nonexistent_path_raises(runtime_config): """FileNotFoundError when the referenced file does not exist. Note: Pydantic only wraps ValueError/AssertionError in ValidationError; @@ -869,7 +872,7 @@ class InputSchema(BaseModel): ApplyInput.model_validate({"inputs": {"file": "missing.txt"}}) -def test_input_path_traversal_rejected(path_config): +def test_input_path_traversal_rejected(runtime_config): """Path traversal (../../etc/passwd) is rejected.""" class InputSchema(BaseModel): @@ -883,22 +886,25 @@ class InputSchema(BaseModel): # --- Input: container and optional types --- -def test_input_list_of_paths_all_resolved(path_config): +def test_input_list_of_paths_all_resolved(runtime_config): """list[Path] — every entry is resolved to an absolute path.""" - input_dir, _ = path_config - (input_dir / "a.txt").touch() - (input_dir / "b.txt").touch() + input_path = Path(runtime_config.input_path) + (input_path / "a.txt").touch() + (input_path / "b.txt").touch() class InputSchema(BaseModel): files: list[Path] + non_path_field: int ApplyInput, _ = create_apply_schema(InputSchema, InputSchema) - result = ApplyInput.model_validate({"inputs": {"files": ["a.txt", "b.txt"]}}) - assert result.inputs.files == [input_dir / "a.txt", input_dir / "b.txt"] + result = ApplyInput.model_validate( + {"inputs": {"files": ["a.txt", "b.txt"], "non_path_field": 1}} + ) + assert result.inputs.files == [input_path / "a.txt", input_path / "b.txt"] -def test_input_optional_path(path_config): - input_dir, _ = path_config +def test_input_optional_path(runtime_config): + input_path = Path(runtime_config.input_path) class InputSchema(BaseModel): file: Path | None = None @@ -909,16 +915,16 @@ class InputSchema(BaseModel): assert result.inputs.file is None # Path as well - (input_dir / "data.txt").touch() + (input_path / "data.txt").touch() ApplyInput, _ = create_apply_schema(InputSchema, InputSchema) result = ApplyInput.model_validate({"inputs": {"file": "data.txt"}}) - assert result.inputs.file == input_dir / "data.txt" + assert result.inputs.file == input_path / "data.txt" # --- Input: user validators --- -def test_input_user_validator_receives_absolute_path(path_config): +def test_input_user_validator_receives_absolute_path(runtime_config): """AfterValidator on an input Path field receives the already-resolved absolute path. From the README: @@ -926,8 +932,8 @@ def test_input_user_validator_receives_absolute_path(path_config): → built-in resolves → Path("/tesseract/input_data/sample_8.json") → user validator → Path("/tesseract/input_data/sample_8.json") """ - input_dir, _ = path_config - (input_dir / "data.txt").touch() + input_path = Path(runtime_config.input_path) + (input_path / "data.txt").touch() seen: list[Path] = [] @@ -942,30 +948,14 @@ class InputSchema(BaseModel): ApplyInput.model_validate({"inputs": {"file": "data.txt"}}) assert len(seen) == 1 - assert seen[0] == input_dir / "data.txt" + assert seen[0] == input_path / "data.txt" assert seen[0].is_absolute() -def test_input_nested_model_path_resolved(path_config): - """Path fields inside nested models are resolved.""" - input_dir, _ = path_config - (input_dir / "nested.txt").touch() - - class Inner(BaseModel): - file: Path - - class InputSchema(BaseModel): - inner: Inner - - ApplyInput, _ = create_apply_schema(InputSchema, InputSchema) - result = ApplyInput.model_validate({"inputs": {"inner": {"file": "nested.txt"}}}) - assert result.inputs.inner.file == input_dir / "nested.txt" - - # --- Output: basic stripping --- -def test_output_absolute_path_stripped_to_relative(path_config): +def test_output_absolute_path_stripped_to_relative(runtime_config): """apply() returns absolute Path → caller receives relative Path. From the README: @@ -973,42 +963,46 @@ def test_output_absolute_path_stripped_to_relative(path_config): built-in strips → Path("sample_8.copy") caller receives → "sample_8.copy" """ - _, output_dir = path_config - (output_dir / "result.txt").touch() + output_path = Path(runtime_config.output_path) + (output_path / "result.txt").touch() class OutputSchema(BaseModel): result: Path _, ApplyOutput = create_apply_schema(OutputSchema, OutputSchema) - out = ApplyOutput.model_validate({"result": output_dir / "result.txt"}) + out = ApplyOutput.model_validate({"result": output_path / "result.txt"}) assert out.root.result == Path("result.txt") assert not out.root.result.is_absolute() -def test_output_nonexistent_path_raises(path_config): +def test_output_nonexistent_path_raises(runtime_config): """ValueError when the output path does not exist.""" - _, output_dir = path_config + output_path = Path(runtime_config.output_path) class OutputSchema(BaseModel): result: Path _, ApplyOutput = create_apply_schema(OutputSchema, OutputSchema) with pytest.raises(ValidationError, match="does not exist"): - ApplyOutput.model_validate({"result": output_dir / "ghost.txt"}) + ApplyOutput.model_validate({"result": output_path / "ghost.txt"}) -def test_output_list_of_paths_all_stripped(path_config): +def test_output_list_of_paths_all_stripped(runtime_config): """list[Path] — all output paths stripped to relative.""" - _, output_dir = path_config - (output_dir / "a.out").touch() - (output_dir / "b.out").touch() + output_path = Path(runtime_config.output_path) + (output_path / "a.out").touch() + (output_path / "b.out").touch() class OutputSchema(BaseModel): files: list[Path] + other_non_path_field: int _, ApplyOutput = create_apply_schema(OutputSchema, OutputSchema) out = ApplyOutput.model_validate( - {"files": [output_dir / "a.out", output_dir / "b.out"]} + { + "files": [output_path / "a.out", output_path / "b.out"], + "other_non_path_field": 1, + } ) assert out.root.files == [Path("a.out"), Path("b.out")] @@ -1016,7 +1010,7 @@ class OutputSchema(BaseModel): # --- Output: user validators --- -def test_output_user_validator_receives_absolute_path(path_config): +def test_output_user_validator_receives_absolute_path(runtime_config): """AfterValidator on an output Path field receives the absolute path before stripping. From the README: @@ -1024,8 +1018,8 @@ def test_output_user_validator_receives_absolute_path(path_config): → user validator → Path("/tesseract/output_data/sample_8.copy") ← absolute → built-in → Path("sample_8.copy") ← stripped """ - _, output_dir = path_config - output_file = output_dir / "result.txt" + output_path = Path(runtime_config.output_path) + output_file = output_path / "result.txt" output_file.touch() seen: list[Path] = [] @@ -1043,41 +1037,3 @@ class OutputSchema(BaseModel): assert len(seen) == 1 assert seen[0] == output_file # absolute, not yet stripped assert out.root.result == Path("result.txt") # final result is stripped - - -# --- Regression: non-Path fields must not receive path validators --- - - -def test_output_path_validator_only_on_path_fields(): - """Regression: _inject_output_path_validator must not apply to non-Path fields. - - Previously the validator was injected on every leaf type, causing - AttributeError when validating str/ndarray output fields. - """ - - class MixedOutput(BaseModel): - name: str - value: int - tagged_path: Annotated[Path, "some_meta"] - - Result = apply_function_to_model_tree( - MixedOutput, - _inject_output_path_validator, - is_leaf=_is_annotated_path, - ) - - # Non-Path fields must have no _strip_output_exists injected - for field_name in ("name", "value"): - for m in Result.model_fields[field_name].metadata: - if isinstance(m, AfterValidator): - assert m.func is not _strip_output_exists, ( - f"_strip_output_exists must not be injected on non-Path field '{field_name}'" - ) - - # Annotated[Path, ...] field must have the validator (stored in Pydantic field metadata) - path_validators = [ - m - for m in Result.model_fields["tagged_path"].metadata - if isinstance(m, AfterValidator) - ] - assert any(v.func is _strip_output_exists for v in path_validators) From 5de4b40f3136149b052ad0346095b78cc750676f Mon Sep 17 00:00:00 2001 From: Niklas Heim Date: Thu, 9 Apr 2026 00:11:29 +0200 Subject: [PATCH 17/17] merge two tests into one --- tests/runtime_tests/test_schema_generation.py | 42 +++++++++---------- 1 file changed, 19 insertions(+), 23 deletions(-) diff --git a/tests/runtime_tests/test_schema_generation.py b/tests/runtime_tests/test_schema_generation.py index e27f462d..3011bebe 100644 --- a/tests/runtime_tests/test_schema_generation.py +++ b/tests/runtime_tests/test_schema_generation.py @@ -822,18 +822,24 @@ def runtime_config(tmp_path): """Fixture providing a real RuntimeConfig with temp input/output dirs. Patches get_config() so that path-resolution validators use these dirs. - Tests access paths via Path(runtime_config.input_path) / Path(runtime_config.output_path), - mirroring how apply() accesses them: Path(get_config().output_path). + Restores the original config state on teardown. """ + import tesseract_core.runtime.config as _cfg_mod from tesseract_core.runtime.config import get_config, update_config + original_config = _cfg_mod._current_config + original_overrides = _cfg_mod._config_overrides.copy() + input_dir = tmp_path / "inputs" output_dir = tmp_path / "outputs" input_dir.mkdir() output_dir.mkdir() update_config(input_path=str(input_dir), output_path=str(output_dir)) - return get_config() + yield get_config() + + _cfg_mod._current_config = original_config + _cfg_mod._config_overrides = original_overrides # --- Input: basic resolution --- @@ -857,30 +863,20 @@ class InputSchema(BaseModel): assert result.inputs.file.is_absolute() -def test_input_nonexistent_path_raises(runtime_config): - """FileNotFoundError when the referenced file does not exist. - - Note: Pydantic only wraps ValueError/AssertionError in ValidationError; - FileNotFoundError propagates directly. - """ - - class InputSchema(BaseModel): - file: Path - - ApplyInput, _ = create_apply_schema(InputSchema, InputSchema) - with pytest.raises(FileNotFoundError, match="does not exist"): - ApplyInput.model_validate({"inputs": {"file": "missing.txt"}}) - - -def test_input_path_traversal_rejected(runtime_config): - """Path traversal (../../etc/passwd) is rejected.""" - +@pytest.mark.parametrize( + "path,exc,match", + [ + ("missing.txt", FileNotFoundError, "does not exist"), + ("../../etc/passwd", ValidationError, "relative to"), + ], +) +def test_input_invalid_path_raises(runtime_config, path, exc, match): class InputSchema(BaseModel): file: Path ApplyInput, _ = create_apply_schema(InputSchema, InputSchema) - with pytest.raises(ValidationError, match="relative to"): - ApplyInput.model_validate({"inputs": {"file": "../../etc/passwd"}}) + with pytest.raises(exc, match=match): + ApplyInput.model_validate({"inputs": {"file": path}}) # --- Input: container and optional types ---