diff --git a/examples/filereference/inputs.json b/examples/filereference/inputs.json deleted file mode 100644 index e2626cec..00000000 --- a/examples/filereference/inputs.json +++ /dev/null @@ -1,16 +0,0 @@ -{ - "inputs": { - "data": [ - "sample_0.json", - "sample_1.json", - "sample_2.json", - "sample_3.json", - "sample_4.json", - "sample_5.json", - "sample_6.json", - "sample_7.json", - "sample_8.json", - "sample_9.json" - ] - } -} diff --git a/examples/filereference/tesseract_api.py b/examples/filereference/tesseract_api.py deleted file mode 100644 index fef9584e..00000000 --- a/examples/filereference/tesseract_api.py +++ /dev/null @@ -1,34 +0,0 @@ -# Copyright 2025 Pasteur Labs. All Rights Reserved. -# SPDX-License-Identifier: Apache-2.0 - -import shutil -from pathlib import Path - -from pydantic import BaseModel - -from tesseract_core.runtime.config import get_config -from tesseract_core.runtime.experimental import ( - InputFileReference, - OutputFileReference, -) - - -class InputSchema(BaseModel): - data: list[InputFileReference] - - -class OutputSchema(BaseModel): - data: list[OutputFileReference] - - -def apply(inputs: InputSchema) -> OutputSchema: - output_path = Path(get_config().output_path) - files = [] - for source in inputs.data: - # source is a pathlib.Path starting with /path/to/input_path/... - target = output_path / source.name - # target must be a pathlib.Path at /path/to/output_path - target = target.with_suffix(".copy") - shutil.copy(source, target) - files.append(target) - return OutputSchema(data=files) diff --git a/examples/filereference/tesseract_config.yaml b/examples/filereference/tesseract_config.yaml deleted file mode 100644 index 92219a45..00000000 --- a/examples/filereference/tesseract_config.yaml +++ /dev/null @@ -1,8 +0,0 @@ -name: filereference -version: "1.0.0" -description: | - Tesseract that loads data from a folder. - -build_config: - package_data: [] - custom_build_steps: [] diff --git a/examples/filereference/test_tesseract.py b/examples/filereference/test_tesseract.py deleted file mode 100644 index b8ca5af2..00000000 --- a/examples/filereference/test_tesseract.py +++ /dev/null @@ -1,40 +0,0 @@ -from pathlib import Path - -from rich import print - -from tesseract_core import Tesseract - -here = Path(__file__).parent.resolve() -input_path = Path("./testdata") -output_path = Path("./output") - -# these are relative to input_path -data = [ - "sample_0.json", - "sample_1.json", - "sample_2.json", - "sample_3.json", - "sample_4.json", - "sample_5.json", - "sample_6.json", - "sample_7.json", - "sample_8.json", - "sample_9.json", -] - -with Tesseract.from_tesseract_api( - "tesseract_api.py", input_path=input_path, output_path=output_path -) as tess: - result = tess.apply({"data": data}) - print(result) - assert all((output_path / p).exists() for p in result["data"]) - - -with Tesseract.from_image( - "filereference", - input_path=input_path, - output_path=output_path, -) as tess: - result = tess.apply({"data": data}) - print(result) - assert all((output_path / p).exists() for p in result["data"]) diff --git a/examples/filereference/.gitignore b/examples/pathreference/.gitignore similarity index 100% rename from examples/filereference/.gitignore rename to examples/pathreference/.gitignore diff --git a/examples/pathreference/README.md b/examples/pathreference/README.md new file mode 100644 index 00000000..5218da4c --- /dev/null +++ b/examples/pathreference/README.md @@ -0,0 +1,101 @@ +# Path Reference Example + +A Tesseract that copies files and directories from `input_path` to `output_path`. +It demonstrates how to use `Path` in Tesseract schemas and how to compose custom +Pydantic validators on top of the built-in path-handling behaviour. + +## What `Path` does in a schema + +When you annotate a field with `Path`, the schema generation layer automatically +injects path-handling validators at runtime. + +**Input `Path` fields** — caller sends a relative string, `apply` receives an absolute `Path`: + +``` +caller sends → "sample_8.json" +built-in resolves → Path("/tesseract/input_data/sample_8.json") (checked: exists) +apply sees → Path("/tesseract/input_data/sample_8.json") +``` + +- Rejects any path that would escape `input_path` (path traversal protection). +- Raises `FileNotFoundError` if the resolved path does not exist. +- Accepts both files **and** directories (use `InputFileReference` for files only). + +**Output `Path` fields** — `apply` returns an absolute `Path`, caller receives a relative string: + +``` +apply returns → Path("/tesseract/output_data/sample_8.copy") +built-in strips → Path("sample_8.copy") (checked: exists) +caller receives → "sample_8.copy" +``` + +- Raises `ValueError` if the path does not exist inside `output_path`. +- Accepts both files **and** directories (use `OutputFileReference` for files only). + +## Composing user-defined validators + +`AfterValidator`s placed on a `Path`-annotated field are preserved, and in both +cases the user validator receives an already-resolved **absolute** `Path`: + +```python +def has_bin_sidecar(path: Path) -> Path: + """Check that any binref JSON has its .bin sidecar present.""" + if path.is_file(): + name = bin_reference(path) + if name is not None: + bin = path.parent / name + assert bin.exists(), f"Expected .bin file for json {path} not found at {bin}" + else: + raise ValueError(f"{path} does not exist or is not a file.") + return path + +class InputSchema(BaseModel): + paths: list[Annotated[Path, AfterValidator(has_bin_sidecar)]] +``` + +The built-in path validators run at different points depending on direction: + +**Input fields** — built-in validator runs **first**, user validators run after: + +``` +"sample_8.json" + → built-in → Path("/tesseract/input_data/sample_8.json") (resolved + existence check) + → has_bin_sidecar → Path("/tesseract/input_data/sample_8.json") (checks .bin sidecar present) + → apply receives → Path("/tesseract/input_data/sample_8.json") +``` + +**Output fields** — user validators run **first**, built-in validator runs after: + +``` +apply returns → Path("/tesseract/output_data/sample_8.copy") + → has_bin_sidecar → Path("/tesseract/output_data/sample_8.copy") (checks .bin sidecar was copied) + → built-in → Path("sample_8.copy") (existence check + prefix stripped) + → caller receives → "sample_8.copy" +``` + +This example uses output validators to confirm that `apply` copied the sidecar +`.bin` file alongside each JSON file. + +## Test data + +The test dataset (`test_cases/testdata/`) contains: + +| File | Array encoding | +| ------------------------------------------------------------------ | ----------------------------------------------- | +| `sample_0.json`, `sample_3.json`, `sample_6.json`, `sample_9.json` | `json` (inline) | +| `sample_1.json`, `sample_4.json`, `sample_7.json` | `base64` (inline) | +| `sample_2.json`, `sample_5.json`, `sample_8.json` | `binref` (references the shared `.bin` sidecar) | +| `sample_dir/` | directory containing `data.json` | + +`generate_data.py` re-creates this dataset using a fixed RNG seed. + +## Running + +```bash +# local (no Docker) +uv run python test_tesseract.py + +# build Docker image first, then re-run +uv run tesseract build . +uv run python test_tesseract.py +``` diff --git a/examples/filereference/generate_data.py b/examples/pathreference/generate_data.py similarity index 100% rename from examples/filereference/generate_data.py rename to examples/pathreference/generate_data.py diff --git a/examples/pathreference/tesseract_api.py b/examples/pathreference/tesseract_api.py new file mode 100644 index 00000000..3d724192 --- /dev/null +++ b/examples/pathreference/tesseract_api.py @@ -0,0 +1,64 @@ +# Copyright 2025 Pasteur Labs. All Rights Reserved. +# SPDX-License-Identifier: Apache-2.0 + +import json +import shutil +from pathlib import Path +from typing import Annotated + +from pydantic import AfterValidator, BaseModel + +from tesseract_core.runtime.config import get_config + + +def bin_reference(path: Path) -> str | None: + """Return the name of the .bin file if the json at 'path' references one, else None.""" + with open(path) as f: + contents = json.load(f) + if contents["data"]["encoding"] == "binref": + return contents["data"]["buffer"].split(":")[0] + return None + + +def has_bin_sidecar(path: Path) -> Path: + """Pydantic validator to check for .bin file next to any json file that references one.""" + if path.is_file(): + name = bin_reference(path) + if name is not None: + bin = path.parent / name + assert bin.exists(), ( + f"Expected .bin file for json {json} not found at {bin}" + ) + elif path.is_dir(): + return path + else: + raise ValueError(f"{path} does not exist.") + return path + + +class InputSchema(BaseModel): + paths: list[Annotated[Path, AfterValidator(has_bin_sidecar)]] + + +class OutputSchema(BaseModel): + paths: list[Annotated[Path, AfterValidator(has_bin_sidecar)]] + + +def apply(inputs: InputSchema) -> OutputSchema: + output_path = Path(get_config().output_path) + result = [] + + for src in inputs.paths: + if src.is_dir(): + # copy any folder that is given + dest = output_path / src.name + shutil.copytree(src, dest) + else: + # copy any file that is given, and if it references a .bin file, copy that too + dest = output_path / src.with_suffix(".copy").name + shutil.copy(src, dest) + bin = bin_reference(src) + if bin is not None: + shutil.copy(src.parent / bin, dest.parent / bin) + result.append(dest) + return OutputSchema(paths=result) diff --git a/examples/pathreference/tesseract_config.yaml b/examples/pathreference/tesseract_config.yaml new file mode 100644 index 00000000..ca1245aa --- /dev/null +++ b/examples/pathreference/tesseract_config.yaml @@ -0,0 +1,10 @@ +name: pathreference +version: "1.0.0" +description: | + Tesseract that copies input files and directories to the output directory. + Demonstrates InputPathReference and OutputPathReference, which accept both + files and directories. + +build_config: + package_data: [] + custom_build_steps: [] diff --git a/examples/filereference/test_cases/test_apply.json b/examples/pathreference/test_cases/test_apply.json similarity index 85% rename from examples/filereference/test_cases/test_apply.json rename to examples/pathreference/test_cases/test_apply.json index 13208964..058c4e64 100644 --- a/examples/filereference/test_cases/test_apply.json +++ b/examples/pathreference/test_cases/test_apply.json @@ -1,7 +1,7 @@ { "endpoint": "apply", "expected_outputs": { - "data": [ + "paths": [ "sample_0.copy", "sample_1.copy", "sample_2.copy", @@ -11,7 +11,8 @@ "sample_6.copy", "sample_7.copy", "sample_8.copy", - "sample_9.copy" + "sample_9.copy", + "sample_dir" ] }, "expected_exception": null, @@ -23,7 +24,7 @@ }, "payload": { "inputs": { - "data": [ + "paths": [ "sample_0.json", "sample_1.json", "sample_2.json", @@ -33,7 +34,8 @@ "sample_6.json", "sample_7.json", "sample_8.json", - "sample_9.json" + "sample_9.json", + "sample_dir" ] } } diff --git a/examples/filereference/test_cases/testdata/a2af4236-15c5-40ec-9014-b79f9cbad50c.bin b/examples/pathreference/test_cases/testdata/a2af4236-15c5-40ec-9014-b79f9cbad50c.bin similarity index 100% rename from examples/filereference/test_cases/testdata/a2af4236-15c5-40ec-9014-b79f9cbad50c.bin rename to examples/pathreference/test_cases/testdata/a2af4236-15c5-40ec-9014-b79f9cbad50c.bin diff --git a/examples/filereference/test_cases/testdata/sample_0.json b/examples/pathreference/test_cases/testdata/sample_0.json similarity index 100% rename from examples/filereference/test_cases/testdata/sample_0.json rename to examples/pathreference/test_cases/testdata/sample_0.json diff --git a/examples/filereference/test_cases/testdata/sample_1.json b/examples/pathreference/test_cases/testdata/sample_1.json similarity index 100% rename from examples/filereference/test_cases/testdata/sample_1.json rename to examples/pathreference/test_cases/testdata/sample_1.json diff --git a/examples/filereference/test_cases/testdata/sample_2.json b/examples/pathreference/test_cases/testdata/sample_2.json similarity index 100% rename from examples/filereference/test_cases/testdata/sample_2.json rename to examples/pathreference/test_cases/testdata/sample_2.json diff --git a/examples/filereference/test_cases/testdata/sample_3.json b/examples/pathreference/test_cases/testdata/sample_3.json similarity index 100% rename from examples/filereference/test_cases/testdata/sample_3.json rename to examples/pathreference/test_cases/testdata/sample_3.json diff --git a/examples/filereference/test_cases/testdata/sample_4.json b/examples/pathreference/test_cases/testdata/sample_4.json similarity index 100% rename from examples/filereference/test_cases/testdata/sample_4.json rename to examples/pathreference/test_cases/testdata/sample_4.json diff --git a/examples/filereference/test_cases/testdata/sample_5.json b/examples/pathreference/test_cases/testdata/sample_5.json similarity index 100% rename from examples/filereference/test_cases/testdata/sample_5.json rename to examples/pathreference/test_cases/testdata/sample_5.json diff --git a/examples/filereference/test_cases/testdata/sample_6.json b/examples/pathreference/test_cases/testdata/sample_6.json similarity index 100% rename from examples/filereference/test_cases/testdata/sample_6.json rename to examples/pathreference/test_cases/testdata/sample_6.json diff --git a/examples/filereference/test_cases/testdata/sample_7.json b/examples/pathreference/test_cases/testdata/sample_7.json similarity index 100% rename from examples/filereference/test_cases/testdata/sample_7.json rename to examples/pathreference/test_cases/testdata/sample_7.json diff --git a/examples/filereference/test_cases/testdata/sample_8.json b/examples/pathreference/test_cases/testdata/sample_8.json similarity index 100% rename from examples/filereference/test_cases/testdata/sample_8.json rename to examples/pathreference/test_cases/testdata/sample_8.json diff --git a/examples/filereference/test_cases/testdata/sample_9.json b/examples/pathreference/test_cases/testdata/sample_9.json similarity index 100% rename from examples/filereference/test_cases/testdata/sample_9.json rename to examples/pathreference/test_cases/testdata/sample_9.json diff --git a/examples/pathreference/test_cases/testdata/sample_dir/data.json b/examples/pathreference/test_cases/testdata/sample_dir/data.json new file mode 100644 index 00000000..39adceba --- /dev/null +++ b/examples/pathreference/test_cases/testdata/sample_dir/data.json @@ -0,0 +1 @@ +{ "value": "world" } diff --git a/examples/pathreference/test_tesseract.py b/examples/pathreference/test_tesseract.py new file mode 100644 index 00000000..7d265937 --- /dev/null +++ b/examples/pathreference/test_tesseract.py @@ -0,0 +1,47 @@ +import shutil +from pathlib import Path + +from rich import print + +from tesseract_core import Tesseract + + +def clean(): + # delete before copy + if output_path.exists(): + shutil.rmtree(output_path) + output_path.mkdir() + + +input_path = Path("./test_cases/testdata") +output_path = Path("./output") + +# mix of a file and a directory, both relative to input_path +paths = [ + "sample_0.json", + "sample_8.json", # contains .bin reference + "sample_dir", +] + +clean() +with Tesseract.from_tesseract_api( + "tesseract_api.py", input_path=input_path, output_path=output_path, stream_logs=True +) as tess: + result = tess.apply({"paths": paths}) + print(result) + out_paths = [(output_path / p) for p in result["paths"]] + assert len(out_paths) == len(paths) + assert all(p.exists() for p in out_paths) + assert len(list(output_path.glob("*.bin"))) == 1 + + +clean() +with Tesseract.from_image( + "pathreference", input_path=input_path, output_path=output_path, stream_logs=True +) as tess: + result = tess.apply({"paths": paths}) + print(result) + out_paths = [(output_path / p) for p in result["paths"]] + assert len(out_paths) == len(paths) + assert all(p.exists() for p in out_paths) + assert len(list(output_path.glob("*.bin"))) == 1 diff --git a/tesseract_core/runtime/experimental.py b/tesseract_core/runtime/experimental.py index 4b7013a3..b5f1c536 100644 --- a/tesseract_core/runtime/experimental.py +++ b/tesseract_core/runtime/experimental.py @@ -20,6 +20,7 @@ from pydantic.json_schema import JsonSchemaValue from pydantic_core import CoreSchema, SchemaSerializer, SchemaValidator, core_schema +from tesseract_core.runtime.config import get_config from tesseract_core.runtime.file_interactions import PathLike, parent_path from tesseract_core.runtime.gradient_endpoint_derivation import ( jacobian_from_jvp, @@ -226,6 +227,11 @@ def _resolve_input_path(path: Path) -> Path: ) if not tess_path.exists(): raise FileNotFoundError(f"Input path {tess_path} does not exist.") + return tess_path + + +def _resolve_input_file(path: Path) -> Path: + tess_path = _resolve_input_path(path) if not tess_path.is_file(): raise ValueError(f"Input path {tess_path} is not a file.") return tess_path @@ -241,8 +247,16 @@ def _strip_output_path(path: Path) -> Path: return path -InputFileReference = Annotated[Path, AfterValidator(_resolve_input_path)] -OutputFileReference = Annotated[Path, AfterValidator(_strip_output_path)] +def _strip_output_file(path: Path) -> Path: + stripped = _strip_output_path(path) + full_path = Path(get_config().output_path) / stripped + if not full_path.is_file(): + raise ValueError(f"Output path {full_path} is not a file.") + return stripped + + +InputFileReference = Annotated[Path, AfterValidator(_resolve_input_file)] +OutputFileReference = Annotated[Path, AfterValidator(_strip_output_file)] def require_file(file_path: PathLike) -> Path: diff --git a/tesseract_core/runtime/schema_generation.py b/tesseract_core/runtime/schema_generation.py index dad8d2e0..07b906d7 100644 --- a/tesseract_core/runtime/schema_generation.py +++ b/tesseract_core/runtime/schema_generation.py @@ -5,6 +5,7 @@ import types from collections.abc import Callable, Iterable, Mapping, Sequence from copy import copy +from pathlib import Path from typing import ( Annotated, Any, @@ -42,6 +43,69 @@ SEQ_INDEX_SENTINEL = object() DICT_INDEX_SENTINEL = object() + +def _resolve_input_path(path: Path) -> Path: + from tesseract_core.runtime.config import get_config + + input_path = get_config().input_path + tess_path = (input_path / path).resolve() + if str(input_path) not in str(tess_path): + raise ValueError( + f"Invalid input file reference: {path}. " + f"Expected path to be relative to {input_path}, but got {tess_path}. " + "File references have to be relative to --input-path." + ) + if not tess_path.exists(): + raise FileNotFoundError(f"Input path {tess_path} does not exist.") + return tess_path + + +def _strip_output_path(path: Path) -> Path: + from tesseract_core.runtime.config import get_config + + output_path = get_config().output_path + if path.is_relative_to(output_path): + return path.relative_to(output_path) + else: + return path + + +def _strip_output_exists(path: Path) -> Path: + from tesseract_core.runtime.config import get_config + + stripped = _strip_output_path(path) + full_path = Path(get_config().output_path) / stripped + if not full_path.exists(): + raise ValueError(f"Output path {full_path} does not exist.") + return stripped + + +def _is_annotated_path(x: Any) -> bool: + def _core_type(ttype: Any) -> Any: + while _is_annotated(ttype): + ttype = ttype.__origin__ + return ttype + + return _is_annotated(x) and _core_type(x) is Path + + +def _inject_input_path_validator(x: Any, _: tuple) -> Any: + if x is Path: + # Wrap with _resolve_input_path as the INNERMOST validator so that + # it runs before all user validators (if any) + return Annotated[Path, AfterValidator(_resolve_input_path)] + return x + + +def _inject_output_path_validator(x: Any, _: Any) -> Any: + if x is not Path and not _is_annotated_path(x): + return x + # x is either bare Path or Annotated[Path, *user_validators] + # Wrap with _strip_output_exists as the OUTERMOST validator so user validators + # run first (on absolute paths) and stripping happens last. + return Annotated[x, AfterValidator(_strip_output_exists)] + + T = TypeVar("T") # Python has funnily enough two union types now. See https://github.com/python/cpython/issues/105499 @@ -68,6 +132,7 @@ def apply_function_to_model_tree( func: Callable[[type, tuple], type], model_prefix: str = "", default_model_config: dict[str, Any] | None = None, + is_leaf: Callable[[Any], bool] | None = None, ) -> type[BaseModel]: """Apply a function to all leaves of a Pydantic model, recursing into containers + nested models. @@ -85,6 +150,11 @@ class MyModel(BaseModel): The path to the field "a" would be ["a"], and the path to the int type would be ["a", SEQ_INDEX_SENTINEL, DICT_INDEX_SENTINEL]. + + The optional ``is_leaf`` predicate, if provided, is checked first: when it returns + True for a node, ``func`` is called on that node immediately without further recursion. + This allows callers to treat compound types (e.g. ``Annotated[Path, ...]``) as atomic + leaves. """ if default_model_config is None: default_model_config = {} @@ -92,6 +162,10 @@ class MyModel(BaseModel): seen_models = set() def _recurse_over_model_tree(treeobj: Any, path: list[str]) -> Any: + # If the caller says this node is a leaf, apply func immediately + if is_leaf is not None and is_leaf(treeobj): + return func(treeobj, tuple(path)) + # Get the origin type of the annotation, e.g. List for List[int] origin_type = get_origin(treeobj) deprecated_types = ["List", "Dict", "Set", "FrozenSet", "Tuple"] @@ -268,15 +342,17 @@ def create_apply_schema( InputSchema = apply_function_to_model_tree( InputSchema, - lambda x, _: x, + _inject_input_path_validator, model_prefix="Apply_", default_model_config=dict(extra="forbid"), ) + OutputSchema = apply_function_to_model_tree( OutputSchema, - lambda x, _: x, + _inject_output_path_validator, model_prefix="Apply_", default_model_config=dict(extra="forbid"), + is_leaf=_is_annotated_path, ) class ApplyInputSchema(BaseModel): diff --git a/tests/endtoend_tests/test_examples.py b/tests/endtoend_tests/test_examples.py index c8db19a4..df3351f0 100644 --- a/tests/endtoend_tests/test_examples.py +++ b/tests/endtoend_tests/test_examples.py @@ -102,7 +102,7 @@ class Config: "fortran_heat": Config(), "conda": Config(), "required_files": Config(input_path="input"), - "filereference": Config(input_path="test_cases/testdata", output_path="output"), + "pathreference": Config(input_path="test_cases/testdata", output_path="output"), "metrics": Config(test_with_random_inputs=True), "qp_solve": Config(), "tesseractreference": Config(), # Can't test requests standalone; needs target Tesseract. Covered in separate test. diff --git a/tests/runtime_tests/test_schema_generation.py b/tests/runtime_tests/test_schema_generation.py index 8fd04579..3011bebe 100644 --- a/tests/runtime_tests/test_schema_generation.py +++ b/tests/runtime_tests/test_schema_generation.py @@ -4,11 +4,12 @@ import json from collections.abc import Iterable from copy import deepcopy +from pathlib import Path from typing import Annotated, Optional import numpy as np import pytest -from pydantic import BaseModel, ConfigDict, RootModel, ValidationError +from pydantic import AfterValidator, BaseModel, ConfigDict, RootModel, ValidationError from tesseract_core.runtime import Array, Differentiable, Float32, Float64, Int64, UInt8 from tesseract_core.runtime.experimental import LazySequence @@ -809,3 +810,226 @@ class Parent(BaseModel): with pytest.raises(ValidationError): ApplyParent.model_validate({"child": {"x": "foo"}, "extra": 1}) + + +# ============================================================================= +# Path resolution tests (automatic path handling in Input/OutputSchema) +# ============================================================================= + + +@pytest.fixture +def runtime_config(tmp_path): + """Fixture providing a real RuntimeConfig with temp input/output dirs. + + Patches get_config() so that path-resolution validators use these dirs. + Restores the original config state on teardown. + """ + import tesseract_core.runtime.config as _cfg_mod + from tesseract_core.runtime.config import get_config, update_config + + original_config = _cfg_mod._current_config + original_overrides = _cfg_mod._config_overrides.copy() + + input_dir = tmp_path / "inputs" + output_dir = tmp_path / "outputs" + input_dir.mkdir() + output_dir.mkdir() + + update_config(input_path=str(input_dir), output_path=str(output_dir)) + yield get_config() + + _cfg_mod._current_config = original_config + _cfg_mod._config_overrides = original_overrides + + +# --- Input: basic resolution --- + + +def test_input_relative_path_resolved_to_absolute(runtime_config): + """Caller sends relative string → apply() receives absolute Path under input_path.""" + input_path = Path(runtime_config.input_path) + (input_path / "data.txt").touch() + (input_path / "mydir").mkdir() + + class InputSchema(BaseModel): + file: Path + folder: Path + + ApplyInput, _ = create_apply_schema(InputSchema, InputSchema) + result = ApplyInput.model_validate( + {"inputs": {"file": "data.txt", "folder": "mydir"}} + ) + assert result.inputs.file == input_path / "data.txt" + assert result.inputs.file.is_absolute() + + +@pytest.mark.parametrize( + "path,exc,match", + [ + ("missing.txt", FileNotFoundError, "does not exist"), + ("../../etc/passwd", ValidationError, "relative to"), + ], +) +def test_input_invalid_path_raises(runtime_config, path, exc, match): + class InputSchema(BaseModel): + file: Path + + ApplyInput, _ = create_apply_schema(InputSchema, InputSchema) + with pytest.raises(exc, match=match): + ApplyInput.model_validate({"inputs": {"file": path}}) + + +# --- Input: container and optional types --- + + +def test_input_list_of_paths_all_resolved(runtime_config): + """list[Path] — every entry is resolved to an absolute path.""" + input_path = Path(runtime_config.input_path) + (input_path / "a.txt").touch() + (input_path / "b.txt").touch() + + class InputSchema(BaseModel): + files: list[Path] + non_path_field: int + + ApplyInput, _ = create_apply_schema(InputSchema, InputSchema) + result = ApplyInput.model_validate( + {"inputs": {"files": ["a.txt", "b.txt"], "non_path_field": 1}} + ) + assert result.inputs.files == [input_path / "a.txt", input_path / "b.txt"] + + +def test_input_optional_path(runtime_config): + input_path = Path(runtime_config.input_path) + + class InputSchema(BaseModel): + file: Path | None = None + + # None works + ApplyInput, _ = create_apply_schema(InputSchema, InputSchema) + result = ApplyInput.model_validate({"inputs": {"file": None}}) + assert result.inputs.file is None + + # Path as well + (input_path / "data.txt").touch() + ApplyInput, _ = create_apply_schema(InputSchema, InputSchema) + result = ApplyInput.model_validate({"inputs": {"file": "data.txt"}}) + assert result.inputs.file == input_path / "data.txt" + + +# --- Input: user validators --- + + +def test_input_user_validator_receives_absolute_path(runtime_config): + """AfterValidator on an input Path field receives the already-resolved absolute path. + + From the README: + "sample_8.json" + → built-in resolves → Path("/tesseract/input_data/sample_8.json") + → user validator → Path("/tesseract/input_data/sample_8.json") + """ + input_path = Path(runtime_config.input_path) + (input_path / "data.txt").touch() + + seen: list[Path] = [] + + def record(path: Path) -> Path: + seen.append(path) + return path + + class InputSchema(BaseModel): + file: Annotated[Path, AfterValidator(record)] + + ApplyInput, _ = create_apply_schema(InputSchema, InputSchema) + ApplyInput.model_validate({"inputs": {"file": "data.txt"}}) + + assert len(seen) == 1 + assert seen[0] == input_path / "data.txt" + assert seen[0].is_absolute() + + +# --- Output: basic stripping --- + + +def test_output_absolute_path_stripped_to_relative(runtime_config): + """apply() returns absolute Path → caller receives relative Path. + + From the README: + apply returns → Path("/tesseract/output_data/sample_8.copy") + built-in strips → Path("sample_8.copy") + caller receives → "sample_8.copy" + """ + output_path = Path(runtime_config.output_path) + (output_path / "result.txt").touch() + + class OutputSchema(BaseModel): + result: Path + + _, ApplyOutput = create_apply_schema(OutputSchema, OutputSchema) + out = ApplyOutput.model_validate({"result": output_path / "result.txt"}) + assert out.root.result == Path("result.txt") + assert not out.root.result.is_absolute() + + +def test_output_nonexistent_path_raises(runtime_config): + """ValueError when the output path does not exist.""" + output_path = Path(runtime_config.output_path) + + class OutputSchema(BaseModel): + result: Path + + _, ApplyOutput = create_apply_schema(OutputSchema, OutputSchema) + with pytest.raises(ValidationError, match="does not exist"): + ApplyOutput.model_validate({"result": output_path / "ghost.txt"}) + + +def test_output_list_of_paths_all_stripped(runtime_config): + """list[Path] — all output paths stripped to relative.""" + output_path = Path(runtime_config.output_path) + (output_path / "a.out").touch() + (output_path / "b.out").touch() + + class OutputSchema(BaseModel): + files: list[Path] + other_non_path_field: int + + _, ApplyOutput = create_apply_schema(OutputSchema, OutputSchema) + out = ApplyOutput.model_validate( + { + "files": [output_path / "a.out", output_path / "b.out"], + "other_non_path_field": 1, + } + ) + assert out.root.files == [Path("a.out"), Path("b.out")] + + +# --- Output: user validators --- + + +def test_output_user_validator_receives_absolute_path(runtime_config): + """AfterValidator on an output Path field receives the absolute path before stripping. + + From the README: + apply returns → Path("/tesseract/output_data/sample_8.copy") + → user validator → Path("/tesseract/output_data/sample_8.copy") ← absolute + → built-in → Path("sample_8.copy") ← stripped + """ + output_path = Path(runtime_config.output_path) + output_file = output_path / "result.txt" + output_file.touch() + + seen: list[Path] = [] + + def record(path: Path) -> Path: + seen.append(path) + return path + + class OutputSchema(BaseModel): + result: Annotated[Path, AfterValidator(record)] + + _, ApplyOutput = create_apply_schema(OutputSchema, OutputSchema) + out = ApplyOutput.model_validate({"result": output_file}) + + assert len(seen) == 1 + assert seen[0] == output_file # absolute, not yet stripped + assert out.root.result == Path("result.txt") # final result is stripped