diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index da1c1d13..2b06d834 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -21,7 +21,7 @@ jobs: runs-on: ${{ github.repository == 'stainless-sdks/mixedbread-python' && 'depot-ubuntu-24.04' || 'ubuntu-latest' }} if: (github.event_name == 'push' || github.event.pull_request.head.repo.fork) && (github.event_name != 'push' || github.event.head_commit.message != 'codegen metadata') steps: - - uses: actions/checkout@v6 + - uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2 - name: Install Rye run: | @@ -46,7 +46,7 @@ jobs: id-token: write runs-on: ${{ github.repository == 'stainless-sdks/mixedbread-python' && 'depot-ubuntu-24.04' || 'ubuntu-latest' }} steps: - - uses: actions/checkout@v6 + - uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2 - name: Install Rye run: | @@ -67,7 +67,7 @@ jobs: github.repository == 'stainless-sdks/mixedbread-python' && !startsWith(github.ref, 'refs/heads/stl/') id: github-oidc - uses: actions/github-script@v8 + uses: actions/github-script@ed597411d8f924073f98dfc5c65a23a2325f34cd # v8.0.0 with: script: core.setOutput('github_token', await core.getIDToken()); @@ -87,7 +87,7 @@ jobs: runs-on: ${{ github.repository == 'stainless-sdks/mixedbread-python' && 'depot-ubuntu-24.04' || 'ubuntu-latest' }} if: github.event_name == 'push' || github.event.pull_request.head.repo.fork steps: - - uses: actions/checkout@v6 + - uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2 - name: Install Rye run: | diff --git a/.github/workflows/publish-pypi.yml b/.github/workflows/publish-pypi.yml index 4bed0df6..9343729a 100644 --- a/.github/workflows/publish-pypi.yml +++ b/.github/workflows/publish-pypi.yml @@ -14,7 +14,7 @@ jobs: runs-on: ubuntu-latest steps: - - uses: actions/checkout@v6 + - uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2 - name: Install Rye run: | diff --git a/.github/workflows/release-doctor.yml b/.github/workflows/release-doctor.yml index 5f4defaf..133836a8 100644 --- a/.github/workflows/release-doctor.yml +++ b/.github/workflows/release-doctor.yml @@ -12,7 +12,7 @@ jobs: if: github.repository == 'mixedbread-ai/mixedbread-python' && (github.event_name == 'push' || github.event_name == 'workflow_dispatch' || startsWith(github.head_ref, 'release-please') || github.head_ref == 'next') steps: - - uses: actions/checkout@v6 + - uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2 - name: Check release environment run: | diff --git a/.release-please-manifest.json b/.release-please-manifest.json index 26b1ce24..2b2b4fa9 100644 --- a/.release-please-manifest.json +++ b/.release-please-manifest.json @@ -1,3 +1,3 @@ { - ".": "0.50.0" + ".": "0.51.0" } \ No newline at end of file diff --git a/.stats.yml b/.stats.yml index a5e2a677..dbdec851 100644 --- a/.stats.yml +++ b/.stats.yml @@ -1,4 +1,4 @@ configured_endpoints: 55 -openapi_spec_url: https://storage.googleapis.com/stainless-sdk-openapi-specs/mixedbread%2Fmixedbread-ebd391dad1252eb00dd69ac50455b93bcdcd2cf0177d678e160e47f1d017287f.yml -openapi_spec_hash: 3bfd5f9eb34711238caef851aa81f5c0 +openapi_spec_url: https://storage.googleapis.com/stainless-sdk-openapi-specs/mixedbread/mixedbread-b6c2a38a4d1a997676f8b4a63ec3c68ea0c7d72434543e7dcf8bdb5840f31d62.yml +openapi_spec_hash: 175d3caf13620efd764ff33d192b2d2a config_hash: 594a43c9cb8089f079bb9c5442646791 diff --git a/CHANGELOG.md b/CHANGELOG.md index 219fa545..658a35b5 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,5 +1,29 @@ # Changelog +## 0.51.0 (2026-05-14) + +Full Changelog: [v0.50.0...v0.51.0](https://github.com/mixedbread-ai/mixedbread-python/compare/v0.50.0...v0.51.0) + +### Features + +* **api:** api update ([842efa8](https://github.com/mixedbread-ai/mixedbread-python/commit/842efa840415d1a6d8f754f9c1594655283858c9)) +* **api:** api update ([6fe066e](https://github.com/mixedbread-ai/mixedbread-python/commit/6fe066e17443211a47a21f677344bf8c349e6fde)) +* **api:** api update ([eac4b8f](https://github.com/mixedbread-ai/mixedbread-python/commit/eac4b8f34275c9377765cfe62f677d5ca36cf061)) +* **api:** api update ([66aa7dc](https://github.com/mixedbread-ai/mixedbread-python/commit/66aa7dc394357766b869a3d067e4c130cdf482e8)) +* **internal/types:** support eagerly validating pydantic iterators ([3e9c5ac](https://github.com/mixedbread-ai/mixedbread-python/commit/3e9c5ac9ab56db4eddf8a6a2f219a8789c9c45ac)) +* support setting headers via env ([38de0ee](https://github.com/mixedbread-ai/mixedbread-python/commit/38de0ee2781d9e27d6aecafe89f5a5b5baaed75c)) + + +### Bug Fixes + +* **client:** add missing f-string prefix in file type error message ([fe74792](https://github.com/mixedbread-ai/mixedbread-python/commit/fe74792cd81bb58bd62e58c086d6d1ee6c233c25)) +* use correct field name format for multipart file arrays ([361fcd6](https://github.com/mixedbread-ai/mixedbread-python/commit/361fcd66f8422020375b076d4c70a83c2b82241d)) + + +### Chores + +* **internal:** reformat pyproject.toml ([8802c53](https://github.com/mixedbread-ai/mixedbread-python/commit/8802c53023895e9248f03b92477c9a17013ec7b7)) + ## 0.50.0 (2026-04-23) Full Changelog: [v0.49.0...v0.50.0](https://github.com/mixedbread-ai/mixedbread-python/compare/v0.49.0...v0.50.0) diff --git a/api.md b/api.md index e3f1e559..00011475 100644 --- a/api.md +++ b/api.md @@ -1,7 +1,7 @@ # Shared Types ```python -from mixedbread.types import SearchFilter, SearchFilterCondition, Usage +from mixedbread.types import SearchFilterCondition, Usage ``` # Mixedbread diff --git a/pyproject.toml b/pyproject.toml index 1dc4f151..51d00208 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -1,6 +1,6 @@ [project] name = "mixedbread" -version = "0.50.0" +version = "0.51.0" description = "The official Python library for the Mixedbread API" dynamic = ["readme"] license = "Apache-2.0" @@ -168,7 +168,7 @@ show_error_codes = true # # We also exclude our `tests` as mypy doesn't always infer # types correctly and Pyright will still catch any type errors. -exclude = ['src/mixedbread/_files.py', '_dev/.*.py', 'tests/.*'] +exclude = ["src/mixedbread/_files.py", "_dev/.*.py", "tests/.*"] strict_equality = true implicit_reexport = true diff --git a/src/mixedbread/_client.py b/src/mixedbread/_client.py index 7391953a..c8d8e329 100644 --- a/src/mixedbread/_client.py +++ b/src/mixedbread/_client.py @@ -27,6 +27,7 @@ ) from ._utils import ( is_given, + is_mapping_t, maybe_transform, get_async_library, async_maybe_transform, @@ -150,6 +151,15 @@ def __init__( except KeyError as exc: raise ValueError(f"Unknown environment: {environment}") from exc + custom_headers_env = os.environ.get("MIXEDBREAD_CUSTOM_HEADERS") + if custom_headers_env is not None: + parsed: dict[str, str] = {} + for line in custom_headers_env.split("\n"): + colon = line.find(":") + if colon >= 0: + parsed[line[:colon].strip()] = line[colon + 1 :].strip() + default_headers = {**parsed, **(default_headers if is_mapping_t(default_headers) else {})} + super().__init__( version=__version__, base_url=base_url, @@ -547,6 +557,15 @@ def __init__( except KeyError as exc: raise ValueError(f"Unknown environment: {environment}") from exc + custom_headers_env = os.environ.get("MIXEDBREAD_CUSTOM_HEADERS") + if custom_headers_env is not None: + parsed: dict[str, str] = {} + for line in custom_headers_env.split("\n"): + colon = line.find(":") + if colon >= 0: + parsed[line[:colon].strip()] = line[colon + 1 :].strip() + default_headers = {**parsed, **(default_headers if is_mapping_t(default_headers) else {})} + super().__init__( version=__version__, base_url=base_url, diff --git a/src/mixedbread/_files.py b/src/mixedbread/_files.py index 4df9de45..f557c29b 100644 --- a/src/mixedbread/_files.py +++ b/src/mixedbread/_files.py @@ -99,7 +99,7 @@ async def async_to_httpx_files(files: RequestFiles | None) -> HttpxRequestFiles elif is_sequence_t(files): files = [(key, await _async_transform_file(file)) for key, file in files] else: - raise TypeError("Unexpected file type input {type(files)}, expected mapping or sequence") + raise TypeError(f"Unexpected file type input {type(files)}, expected mapping or sequence") return files diff --git a/src/mixedbread/_models.py b/src/mixedbread/_models.py index 29070e05..8c5ab260 100644 --- a/src/mixedbread/_models.py +++ b/src/mixedbread/_models.py @@ -25,7 +25,9 @@ ClassVar, Protocol, Required, + Annotated, ParamSpec, + TypeAlias, TypedDict, TypeGuard, final, @@ -79,7 +81,15 @@ from ._constants import RAW_RESPONSE_HEADER if TYPE_CHECKING: + from pydantic import GetCoreSchemaHandler, ValidatorFunctionWrapHandler + from pydantic_core import CoreSchema, core_schema from pydantic_core.core_schema import ModelField, ModelSchema, LiteralSchema, ModelFieldsSchema +else: + try: + from pydantic_core import CoreSchema, core_schema + except ImportError: + CoreSchema = None + core_schema = None __all__ = ["BaseModel", "GenericModel"] @@ -396,6 +406,76 @@ def model_dump_json( ) +class _EagerIterable(list[_T], Generic[_T]): + """ + Accepts any Iterable[T] input (including generators), consumes it + eagerly, and validates all items upfront. + + Validation preserves the original container type where possible + (e.g. a set[T] stays a set[T]). Serialization (model_dump / JSON) + always emits a list — round-tripping through model_dump() will not + restore the original container type. + """ + + @classmethod + def __get_pydantic_core_schema__( + cls, + source_type: Any, + handler: GetCoreSchemaHandler, + ) -> CoreSchema: + (item_type,) = get_args(source_type) or (Any,) + item_schema: CoreSchema = handler.generate_schema(item_type) + list_of_items_schema: CoreSchema = core_schema.list_schema(item_schema) + + return core_schema.no_info_wrap_validator_function( + cls._validate, + list_of_items_schema, + serialization=core_schema.plain_serializer_function_ser_schema( + cls._serialize, + info_arg=False, + ), + ) + + @staticmethod + def _validate(v: Iterable[_T], handler: "ValidatorFunctionWrapHandler") -> Any: + original_type: type[Any] = type(v) + + # Normalize to list so list_schema can validate each item + if isinstance(v, list): + items: list[_T] = v + else: + try: + items = list(v) + except TypeError as e: + raise TypeError("Value is not iterable") from e + + # Validate items against the inner schema + validated: list[_T] = handler(items) + + # Reconstruct original container type + if original_type is list: + return validated + # str(list) produces the list's repr, not a string built from items, + # so skip reconstruction for str and its subclasses. + if issubclass(original_type, str): + return validated + try: + return original_type(validated) + except (TypeError, ValueError): + # If the type cannot be reconstructed, just return the validated list + return validated + + @staticmethod + def _serialize(v: Iterable[_T]) -> list[_T]: + """Always serialize as a list so Pydantic's JSON encoder is happy.""" + if isinstance(v, list): + return v + return list(v) + + +EagerIterable: TypeAlias = Annotated[Iterable[_T], _EagerIterable] + + def _construct_field(value: object, field: FieldInfo, key: str) -> object: if value is None: return field_get_default(field) diff --git a/src/mixedbread/_qs.py b/src/mixedbread/_qs.py index de8c99bc..4127c19c 100644 --- a/src/mixedbread/_qs.py +++ b/src/mixedbread/_qs.py @@ -2,17 +2,13 @@ from typing import Any, List, Tuple, Union, Mapping, TypeVar from urllib.parse import parse_qs, urlencode -from typing_extensions import Literal, get_args +from typing_extensions import get_args -from ._types import NotGiven, not_given +from ._types import NotGiven, ArrayFormat, NestedFormat, not_given from ._utils import flatten _T = TypeVar("_T") - -ArrayFormat = Literal["comma", "repeat", "indices", "brackets"] -NestedFormat = Literal["dots", "brackets"] - PrimitiveData = Union[str, int, float, bool, None] # this should be Data = Union[PrimitiveData, "List[Data]", "Tuple[Data]", "Mapping[str, Data]"] # https://github.com/microsoft/pyright/issues/3555 diff --git a/src/mixedbread/_types.py b/src/mixedbread/_types.py index fa2086c9..39dc5bdd 100644 --- a/src/mixedbread/_types.py +++ b/src/mixedbread/_types.py @@ -47,6 +47,9 @@ ModelT = TypeVar("ModelT", bound=pydantic.BaseModel) _T = TypeVar("_T") +ArrayFormat = Literal["comma", "repeat", "indices", "brackets"] +NestedFormat = Literal["dots", "brackets"] + # Approximates httpx internal ProxiesTypes and RequestFiles types # while adding support for `PathLike` instances diff --git a/src/mixedbread/_utils/_utils.py b/src/mixedbread/_utils/_utils.py index 771859f5..199cd231 100644 --- a/src/mixedbread/_utils/_utils.py +++ b/src/mixedbread/_utils/_utils.py @@ -17,11 +17,11 @@ ) from pathlib import Path from datetime import date, datetime -from typing_extensions import TypeGuard +from typing_extensions import TypeGuard, get_args import sniffio -from .._types import Omit, NotGiven, FileTypes, HeadersLike +from .._types import Omit, NotGiven, FileTypes, ArrayFormat, HeadersLike _T = TypeVar("_T") _TupleT = TypeVar("_TupleT", bound=Tuple[object, ...]) @@ -40,25 +40,45 @@ def extract_files( query: Mapping[str, object], *, paths: Sequence[Sequence[str]], + array_format: ArrayFormat = "brackets", ) -> list[tuple[str, FileTypes]]: """Recursively extract files from the given dictionary based on specified paths. A path may look like this ['foo', 'files', '', 'data']. + ``array_format`` controls how ```` segments contribute to the emitted + field name. Supported values: ``"brackets"`` (``foo[]``), ``"repeat"`` and + ``"comma"`` (``foo``), ``"indices"`` (``foo[0]``, ``foo[1]``). + Note: this mutates the given dictionary. """ files: list[tuple[str, FileTypes]] = [] for path in paths: - files.extend(_extract_items(query, path, index=0, flattened_key=None)) + files.extend(_extract_items(query, path, index=0, flattened_key=None, array_format=array_format)) return files +def _array_suffix(array_format: ArrayFormat, array_index: int) -> str: + if array_format == "brackets": + return "[]" + if array_format == "indices": + return f"[{array_index}]" + if array_format == "repeat" or array_format == "comma": + # Both repeat the bare field name for each file part; there is no + # meaningful way to comma-join binary parts. + return "" + raise NotImplementedError( + f"Unknown array_format value: {array_format}, choose from {', '.join(get_args(ArrayFormat))}" + ) + + def _extract_items( obj: object, path: Sequence[str], *, index: int, flattened_key: str | None, + array_format: ArrayFormat, ) -> list[tuple[str, FileTypes]]: try: key = path[index] @@ -75,9 +95,11 @@ def _extract_items( if is_list(obj): files: list[tuple[str, FileTypes]] = [] - for entry in obj: - assert_is_file_content(entry, key=flattened_key + "[]" if flattened_key else "") - files.append((flattened_key + "[]", cast(FileTypes, entry))) + for array_index, entry in enumerate(obj): + suffix = _array_suffix(array_format, array_index) + emitted_key = (flattened_key + suffix) if flattened_key else suffix + assert_is_file_content(entry, key=emitted_key) + files.append((emitted_key, cast(FileTypes, entry))) return files assert_is_file_content(obj, key=flattened_key) @@ -106,6 +128,7 @@ def _extract_items( path, index=index, flattened_key=flattened_key, + array_format=array_format, ) elif is_list(obj): if key != "": @@ -117,9 +140,12 @@ def _extract_items( item, path, index=index, - flattened_key=flattened_key + "[]" if flattened_key is not None else "[]", + flattened_key=( + (flattened_key if flattened_key is not None else "") + _array_suffix(array_format, array_index) + ), + array_format=array_format, ) - for item in obj + for array_index, item in enumerate(obj) ] ) diff --git a/src/mixedbread/_version.py b/src/mixedbread/_version.py index 84dd5576..b259f3be 100644 --- a/src/mixedbread/_version.py +++ b/src/mixedbread/_version.py @@ -1,4 +1,4 @@ # File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details. __title__ = "mixedbread" -__version__ = "0.50.0" # x-release-please-version +__version__ = "0.51.0" # x-release-please-version diff --git a/src/mixedbread/resources/stores/stores.py b/src/mixedbread/resources/stores/stores.py index 20271dcc..c6923703 100644 --- a/src/mixedbread/resources/stores/stores.py +++ b/src/mixedbread/resources/stores/stores.py @@ -361,13 +361,16 @@ def delete( def metadata_facets( self, *, - query: Optional[str] | Omit = omit, store_identifiers: SequenceNotStr[str], top_k: int | Omit = omit, filters: Optional[store_metadata_facets_params.Filters] | Omit = omit, file_ids: Union[Iterable[object], SequenceNotStr[str], None] | Omit = omit, + query: Optional[str] | Omit = omit, search_options: StoreChunkSearchOptionsParam | Omit = omit, facets: Optional[SequenceNotStr[str]] | Omit = omit, + max_fields: int | Omit = omit, + max_values_per_field: int | Omit = omit, + max_files: int | Omit = omit, # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs. # The extra values given here take precedence over values defined on the client or passed to this method. extra_headers: Headers | None = None, @@ -379,9 +382,7 @@ def metadata_facets( Get metadata facets Args: - query: Search query text - - store_identifiers: IDs or names of stores to search + store_identifiers: IDs or names of stores top_k: Number of results to return @@ -389,10 +390,18 @@ def metadata_facets( file_ids: Optional list of file IDs to filter chunks by (inclusion filter) + query: Search query text + search_options: Search configuration options facets: Optional list of facets to return. Use dot for nested fields. + max_fields: Maximum number of distinct metadata fields (keys) to return. + + max_values_per_field: Maximum number of distinct values returned per field, ranked by count. + + max_files: Maximum number of store files scanned to compute facets. + extra_headers: Send extra headers extra_query: Add additional query parameters to the request @@ -405,13 +414,16 @@ def metadata_facets( "/v1/stores/metadata-facets", body=maybe_transform( { - "query": query, "store_identifiers": store_identifiers, "top_k": top_k, "filters": filters, "file_ids": file_ids, + "query": query, "search_options": search_options, "facets": facets, + "max_fields": max_fields, + "max_values_per_field": max_values_per_field, + "max_files": max_files, }, store_metadata_facets_params.StoreMetadataFacetsParams, ), @@ -424,11 +436,11 @@ def metadata_facets( def question_answering( self, *, - query: str | Omit = omit, store_identifiers: SequenceNotStr[str], top_k: int | Omit = omit, filters: Optional[store_question_answering_params.Filters] | Omit = omit, file_ids: Union[Iterable[object], SequenceNotStr[str], None] | Omit = omit, + query: str | Omit = omit, search_options: StoreChunkSearchOptionsParam | Omit = omit, stream: bool | Omit = omit, instructions: Optional[str] | Omit = omit, @@ -440,15 +452,11 @@ def question_answering( extra_body: Body | None = None, timeout: float | httpx.Timeout | None | NotGiven = not_given, ) -> StoreQuestionAnsweringResponse: - """Question answering + """ + Question answering Args: - query: Question to answer. - - If not provided, the question will be extracted from the - passed messages. - - store_identifiers: IDs or names of stores to search + store_identifiers: IDs or names of stores top_k: Number of results to return @@ -456,6 +464,9 @@ def question_answering( file_ids: Optional list of file IDs to filter chunks by (inclusion filter) + query: Question to answer. If not provided, the question will be extracted from the + passed messages. + search_options: Search configuration options stream: Whether to stream the answer @@ -477,11 +488,11 @@ def question_answering( "/v1/stores/question-answering", body=maybe_transform( { - "query": query, "store_identifiers": store_identifiers, "top_k": top_k, "filters": filters, "file_ids": file_ids, + "query": query, "search_options": search_options, "stream": stream, "instructions": instructions, @@ -498,11 +509,11 @@ def question_answering( def search( self, *, - query: store_search_params.Query, store_identifiers: SequenceNotStr[str], top_k: int | Omit = omit, filters: Optional[store_search_params.Filters] | Omit = omit, file_ids: Union[Iterable[object], SequenceNotStr[str], None] | Omit = omit, + query: store_search_params.Query, search_options: StoreChunkSearchOptionsParam | Omit = omit, # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs. # The extra values given here take precedence over values defined on the client or passed to this method. @@ -535,9 +546,7 @@ def search( (404): If no vector stores are found to search Args: - query: Search query text - - store_identifiers: IDs or names of stores to search + store_identifiers: IDs or names of stores top_k: Number of results to return @@ -545,6 +554,8 @@ def search( file_ids: Optional list of file IDs to filter chunks by (inclusion filter) + query: Search query text + search_options: Search configuration options extra_headers: Send extra headers @@ -559,11 +570,11 @@ def search( "/v1/stores/search", body=maybe_transform( { - "query": query, "store_identifiers": store_identifiers, "top_k": top_k, "filters": filters, "file_ids": file_ids, + "query": query, "search_options": search_options, }, store_search_params.StoreSearchParams, @@ -890,13 +901,16 @@ async def delete( async def metadata_facets( self, *, - query: Optional[str] | Omit = omit, store_identifiers: SequenceNotStr[str], top_k: int | Omit = omit, filters: Optional[store_metadata_facets_params.Filters] | Omit = omit, file_ids: Union[Iterable[object], SequenceNotStr[str], None] | Omit = omit, + query: Optional[str] | Omit = omit, search_options: StoreChunkSearchOptionsParam | Omit = omit, facets: Optional[SequenceNotStr[str]] | Omit = omit, + max_fields: int | Omit = omit, + max_values_per_field: int | Omit = omit, + max_files: int | Omit = omit, # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs. # The extra values given here take precedence over values defined on the client or passed to this method. extra_headers: Headers | None = None, @@ -908,9 +922,7 @@ async def metadata_facets( Get metadata facets Args: - query: Search query text - - store_identifiers: IDs or names of stores to search + store_identifiers: IDs or names of stores top_k: Number of results to return @@ -918,10 +930,18 @@ async def metadata_facets( file_ids: Optional list of file IDs to filter chunks by (inclusion filter) + query: Search query text + search_options: Search configuration options facets: Optional list of facets to return. Use dot for nested fields. + max_fields: Maximum number of distinct metadata fields (keys) to return. + + max_values_per_field: Maximum number of distinct values returned per field, ranked by count. + + max_files: Maximum number of store files scanned to compute facets. + extra_headers: Send extra headers extra_query: Add additional query parameters to the request @@ -934,13 +954,16 @@ async def metadata_facets( "/v1/stores/metadata-facets", body=await async_maybe_transform( { - "query": query, "store_identifiers": store_identifiers, "top_k": top_k, "filters": filters, "file_ids": file_ids, + "query": query, "search_options": search_options, "facets": facets, + "max_fields": max_fields, + "max_values_per_field": max_values_per_field, + "max_files": max_files, }, store_metadata_facets_params.StoreMetadataFacetsParams, ), @@ -953,11 +976,11 @@ async def metadata_facets( async def question_answering( self, *, - query: str | Omit = omit, store_identifiers: SequenceNotStr[str], top_k: int | Omit = omit, filters: Optional[store_question_answering_params.Filters] | Omit = omit, file_ids: Union[Iterable[object], SequenceNotStr[str], None] | Omit = omit, + query: str | Omit = omit, search_options: StoreChunkSearchOptionsParam | Omit = omit, stream: bool | Omit = omit, instructions: Optional[str] | Omit = omit, @@ -969,15 +992,11 @@ async def question_answering( extra_body: Body | None = None, timeout: float | httpx.Timeout | None | NotGiven = not_given, ) -> StoreQuestionAnsweringResponse: - """Question answering + """ + Question answering Args: - query: Question to answer. - - If not provided, the question will be extracted from the - passed messages. - - store_identifiers: IDs or names of stores to search + store_identifiers: IDs or names of stores top_k: Number of results to return @@ -985,6 +1004,9 @@ async def question_answering( file_ids: Optional list of file IDs to filter chunks by (inclusion filter) + query: Question to answer. If not provided, the question will be extracted from the + passed messages. + search_options: Search configuration options stream: Whether to stream the answer @@ -1006,11 +1028,11 @@ async def question_answering( "/v1/stores/question-answering", body=await async_maybe_transform( { - "query": query, "store_identifiers": store_identifiers, "top_k": top_k, "filters": filters, "file_ids": file_ids, + "query": query, "search_options": search_options, "stream": stream, "instructions": instructions, @@ -1027,11 +1049,11 @@ async def question_answering( async def search( self, *, - query: store_search_params.Query, store_identifiers: SequenceNotStr[str], top_k: int | Omit = omit, filters: Optional[store_search_params.Filters] | Omit = omit, file_ids: Union[Iterable[object], SequenceNotStr[str], None] | Omit = omit, + query: store_search_params.Query, search_options: StoreChunkSearchOptionsParam | Omit = omit, # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs. # The extra values given here take precedence over values defined on the client or passed to this method. @@ -1064,9 +1086,7 @@ async def search( (404): If no vector stores are found to search Args: - query: Search query text - - store_identifiers: IDs or names of stores to search + store_identifiers: IDs or names of stores top_k: Number of results to return @@ -1074,6 +1094,8 @@ async def search( file_ids: Optional list of file IDs to filter chunks by (inclusion filter) + query: Search query text + search_options: Search configuration options extra_headers: Send extra headers @@ -1088,11 +1110,11 @@ async def search( "/v1/stores/search", body=await async_maybe_transform( { - "query": query, "store_identifiers": store_identifiers, "top_k": top_k, "filters": filters, "file_ids": file_ids, + "query": query, "search_options": search_options, }, store_search_params.StoreSearchParams, diff --git a/src/mixedbread/types/__init__.py b/src/mixedbread/types/__init__.py index 9f1966f9..403a7f1d 100644 --- a/src/mixedbread/types/__init__.py +++ b/src/mixedbread/types/__init__.py @@ -2,11 +2,9 @@ from __future__ import annotations -from . import shared -from .. import _compat from .scope import Scope as Scope from .store import Store as Store -from .shared import Usage as Usage, SearchFilter as SearchFilter, SearchFilterCondition as SearchFilterCondition +from .shared import Usage as Usage, SearchFilterCondition as SearchFilterCondition from .api_key import APIKey as APIKey from .audio_url import AudioURL as AudioURL from .embedding import Embedding as Embedding @@ -75,12 +73,3 @@ from .store_chunk_search_options_param import StoreChunkSearchOptionsParam as StoreChunkSearchOptionsParam from .markdown_chunk_generated_metadata import MarkdownChunkGeneratedMetadata as MarkdownChunkGeneratedMetadata from .store_question_answering_response import StoreQuestionAnsweringResponse as StoreQuestionAnsweringResponse - -# Rebuild cyclical models only after all modules are imported. -# This ensures that, when building the deferred (due to cyclical references) model schema, -# Pydantic can resolve the necessary references. -# See: https://github.com/pydantic/pydantic/issues/11250 for more context. -if _compat.PYDANTIC_V1: - shared.search_filter.SearchFilter.update_forward_refs() # type: ignore -else: - shared.search_filter.SearchFilter.model_rebuild(_parent_namespace_depth=0) diff --git a/src/mixedbread/types/agentic_search_config_param.py b/src/mixedbread/types/agentic_search_config_param.py index 8a01d8e4..3ffe1840 100644 --- a/src/mixedbread/types/agentic_search_config_param.py +++ b/src/mixedbread/types/agentic_search_config_param.py @@ -3,7 +3,7 @@ from __future__ import annotations from typing import Optional -from typing_extensions import TypedDict +from typing_extensions import Literal, TypedDict __all__ = ["AgenticSearchConfigParam"] @@ -17,6 +17,16 @@ class AgenticSearchConfigParam(TypedDict, total=False): queries_per_round: int """Maximum queries per round""" + strict_top_k: bool + """Whether the final retrieved chunk list must provide exactly top_k ranked chunks""" + + media_content: Literal["auto", "never", "always"] + """Controls when retrieved image content is provided to the agent. + + `auto` sends images only when no OCR text or summary is available, `never` + disables image content, and `always` sends image content when available. + """ + instructions: Optional[str] """ Additional custom instructions (followed only when not in conflict with existing diff --git a/src/mixedbread/types/scored_audio_url_input_chunk.py b/src/mixedbread/types/scored_audio_url_input_chunk.py index e12b2d96..d5d29d8b 100644 --- a/src/mixedbread/types/scored_audio_url_input_chunk.py +++ b/src/mixedbread/types/scored_audio_url_input_chunk.py @@ -68,6 +68,9 @@ class ScoredAudioURLInputChunk(BaseModel): transcription: Optional[str] = None """speech recognition (sr) text of the audio""" + summary: Optional[str] = None + """summary of the audio""" + audio_url: Optional[AudioURL] = None """Model for audio URL validation.""" diff --git a/src/mixedbread/types/shared/__init__.py b/src/mixedbread/types/shared/__init__.py index 66d5dcf9..bdd92589 100644 --- a/src/mixedbread/types/shared/__init__.py +++ b/src/mixedbread/types/shared/__init__.py @@ -1,5 +1,4 @@ # File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details. from .usage import Usage as Usage -from .search_filter import SearchFilter as SearchFilter from .search_filter_condition import SearchFilterCondition as SearchFilterCondition diff --git a/src/mixedbread/types/shared/search_filter.py b/src/mixedbread/types/shared/search_filter.py deleted file mode 100644 index c502e6db..00000000 --- a/src/mixedbread/types/shared/search_filter.py +++ /dev/null @@ -1,40 +0,0 @@ -# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details. - -from __future__ import annotations - -from typing import TYPE_CHECKING, List, Union, Optional -from typing_extensions import TypeAlias, TypeAliasType - -from ..._compat import PYDANTIC_V1 -from ..._models import BaseModel -from .search_filter_condition import SearchFilterCondition - -__all__ = ["SearchFilter", "All", "Any", "NoneType"] - -if TYPE_CHECKING or not PYDANTIC_V1: - All = TypeAliasType("All", Union["SearchFilter", SearchFilterCondition]) -else: - All: TypeAlias = Union["SearchFilter", SearchFilterCondition] - -if TYPE_CHECKING or not PYDANTIC_V1: - Any = TypeAliasType("Any", Union["SearchFilter", SearchFilterCondition]) -else: - Any: TypeAlias = Union["SearchFilter", SearchFilterCondition] - -if TYPE_CHECKING or not PYDANTIC_V1: - NoneType = TypeAliasType("NoneType", Union["SearchFilter", SearchFilterCondition]) -else: - NoneType: TypeAlias = Union["SearchFilter", SearchFilterCondition] - - -class SearchFilter(BaseModel): - """Represents a filter with AND, OR, and NOT conditions.""" - - all: Optional[List[All]] = None - """List of conditions or filters to be ANDed together""" - - any: Optional[List[Any]] = None - """List of conditions or filters to be ORed together""" - - none: Optional[List[NoneType]] = None - """List of conditions or filters to be NOTed""" diff --git a/src/mixedbread/types/shared_params/__init__.py b/src/mixedbread/types/shared_params/__init__.py index c91e740d..69d169c2 100644 --- a/src/mixedbread/types/shared_params/__init__.py +++ b/src/mixedbread/types/shared_params/__init__.py @@ -1,4 +1,3 @@ # File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details. -from .search_filter import SearchFilter as SearchFilter from .search_filter_condition import SearchFilterCondition as SearchFilterCondition diff --git a/src/mixedbread/types/shared_params/search_filter.py b/src/mixedbread/types/shared_params/search_filter.py deleted file mode 100644 index fd468f38..00000000 --- a/src/mixedbread/types/shared_params/search_filter.py +++ /dev/null @@ -1,39 +0,0 @@ -# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details. - -from __future__ import annotations - -from typing import TYPE_CHECKING, Union, Iterable, Optional -from typing_extensions import TypeAlias, TypedDict, TypeAliasType - -from ..._compat import PYDANTIC_V1 -from .search_filter_condition import SearchFilterCondition - -__all__ = ["SearchFilter", "All", "Any", "NoneType"] - -if TYPE_CHECKING or not PYDANTIC_V1: - All = TypeAliasType("All", Union["SearchFilter", SearchFilterCondition]) -else: - All: TypeAlias = Union["SearchFilter", SearchFilterCondition] - -if TYPE_CHECKING or not PYDANTIC_V1: - Any = TypeAliasType("Any", Union["SearchFilter", SearchFilterCondition]) -else: - Any: TypeAlias = Union["SearchFilter", SearchFilterCondition] - -if TYPE_CHECKING or not PYDANTIC_V1: - NoneType = TypeAliasType("NoneType", Union["SearchFilter", SearchFilterCondition]) -else: - NoneType: TypeAlias = Union["SearchFilter", SearchFilterCondition] - - -class SearchFilter(TypedDict, total=False): - """Represents a filter with AND, OR, and NOT conditions.""" - - all: Optional[Iterable[All]] - """List of conditions or filters to be ANDed together""" - - any: Optional[Iterable[Any]] - """List of conditions or filters to be ORed together""" - - none: Optional[Iterable[NoneType]] - """List of conditions or filters to be NOTed""" diff --git a/src/mixedbread/types/store_metadata_facets_params.py b/src/mixedbread/types/store_metadata_facets_params.py index 64aa124e..07b59651 100644 --- a/src/mixedbread/types/store_metadata_facets_params.py +++ b/src/mixedbread/types/store_metadata_facets_params.py @@ -9,15 +9,24 @@ from .store_chunk_search_options_param import StoreChunkSearchOptionsParam from .shared_params.search_filter_condition import SearchFilterCondition -__all__ = ["StoreMetadataFacetsParams", "Filters", "FiltersUnionMember2"] +__all__ = [ + "StoreMetadataFacetsParams", + "Filters", + "FiltersSearchFilterInput", + "FiltersSearchFilterInputAll", + "FiltersSearchFilterInputAny", + "FiltersSearchFilterInputNone", + "FiltersUnionMember2", + "FiltersUnionMember2SearchFilterInput", + "FiltersUnionMember2SearchFilterInputAll", + "FiltersUnionMember2SearchFilterInputAny", + "FiltersUnionMember2SearchFilterInputNone", +] class StoreMetadataFacetsParams(TypedDict, total=False): - query: Optional[str] - """Search query text""" - store_identifiers: Required[SequenceNotStr[str]] - """IDs or names of stores to search""" + """IDs or names of stores""" top_k: int """Number of results to return""" @@ -28,15 +37,65 @@ class StoreMetadataFacetsParams(TypedDict, total=False): file_ids: Union[Iterable[object], SequenceNotStr[str], None] """Optional list of file IDs to filter chunks by (inclusion filter)""" + query: Optional[str] + """Search query text""" + search_options: StoreChunkSearchOptionsParam """Search configuration options""" facets: Optional[SequenceNotStr[str]] """Optional list of facets to return. Use dot for nested fields.""" + max_fields: int + """Maximum number of distinct metadata fields (keys) to return.""" + + max_values_per_field: int + """Maximum number of distinct values returned per field, ranked by count.""" + + max_files: int + """Maximum number of store files scanned to compute facets.""" + + +FiltersSearchFilterInputAll: TypeAlias = Union[SearchFilterCondition, object] + +FiltersSearchFilterInputAny: TypeAlias = Union[SearchFilterCondition, object] + +FiltersSearchFilterInputNone: TypeAlias = Union[SearchFilterCondition, object] + + +class FiltersSearchFilterInput(TypedDict, total=False): + """Represents a filter with AND, OR, and NOT conditions.""" + + all: Optional[Iterable[FiltersSearchFilterInputAll]] + """List of conditions or filters to be ANDed together""" + + any: Optional[Iterable[FiltersSearchFilterInputAny]] + """List of conditions or filters to be ORed together""" + + none: Optional[Iterable[FiltersSearchFilterInputNone]] + """List of conditions or filters to be NOTed""" + + +FiltersUnionMember2SearchFilterInputAll: TypeAlias = Union[SearchFilterCondition, object] + +FiltersUnionMember2SearchFilterInputAny: TypeAlias = Union[SearchFilterCondition, object] + +FiltersUnionMember2SearchFilterInputNone: TypeAlias = Union[SearchFilterCondition, object] + + +class FiltersUnionMember2SearchFilterInput(TypedDict, total=False): + """Represents a filter with AND, OR, and NOT conditions.""" + + all: Optional[Iterable[FiltersUnionMember2SearchFilterInputAll]] + """List of conditions or filters to be ANDed together""" + + any: Optional[Iterable[FiltersUnionMember2SearchFilterInputAny]] + """List of conditions or filters to be ORed together""" + + none: Optional[Iterable[FiltersUnionMember2SearchFilterInputNone]] + """List of conditions or filters to be NOTed""" -FiltersUnionMember2: TypeAlias = Union["SearchFilter", SearchFilterCondition] -Filters: TypeAlias = Union["SearchFilter", SearchFilterCondition, Iterable[FiltersUnionMember2]] +FiltersUnionMember2: TypeAlias = Union[FiltersUnionMember2SearchFilterInput, SearchFilterCondition] -from .shared_params.search_filter import SearchFilter +Filters: TypeAlias = Union[FiltersSearchFilterInput, SearchFilterCondition, Iterable[FiltersUnionMember2]] diff --git a/src/mixedbread/types/store_question_answering_params.py b/src/mixedbread/types/store_question_answering_params.py index e2eaefb8..c8c7eab0 100644 --- a/src/mixedbread/types/store_question_answering_params.py +++ b/src/mixedbread/types/store_question_answering_params.py @@ -9,18 +9,25 @@ from .store_chunk_search_options_param import StoreChunkSearchOptionsParam from .shared_params.search_filter_condition import SearchFilterCondition -__all__ = ["StoreQuestionAnsweringParams", "Filters", "FiltersUnionMember2", "QaOptions"] +__all__ = [ + "StoreQuestionAnsweringParams", + "Filters", + "FiltersSearchFilterInput", + "FiltersSearchFilterInputAll", + "FiltersSearchFilterInputAny", + "FiltersSearchFilterInputNone", + "FiltersUnionMember2", + "FiltersUnionMember2SearchFilterInput", + "FiltersUnionMember2SearchFilterInputAll", + "FiltersUnionMember2SearchFilterInputAny", + "FiltersUnionMember2SearchFilterInputNone", + "QaOptions", +] class StoreQuestionAnsweringParams(TypedDict, total=False): - query: str - """Question to answer. - - If not provided, the question will be extracted from the passed messages. - """ - store_identifiers: Required[SequenceNotStr[str]] - """IDs or names of stores to search""" + """IDs or names of stores""" top_k: int """Number of results to return""" @@ -31,6 +38,12 @@ class StoreQuestionAnsweringParams(TypedDict, total=False): file_ids: Union[Iterable[object], SequenceNotStr[str], None] """Optional list of file IDs to filter chunks by (inclusion filter)""" + query: str + """Question to answer. + + If not provided, the question will be extracted from the passed messages. + """ + search_options: StoreChunkSearchOptionsParam """Search configuration options""" @@ -47,9 +60,49 @@ class StoreQuestionAnsweringParams(TypedDict, total=False): """Question answering configuration options""" -FiltersUnionMember2: TypeAlias = Union["SearchFilter", SearchFilterCondition] +FiltersSearchFilterInputAll: TypeAlias = Union[SearchFilterCondition, object] + +FiltersSearchFilterInputAny: TypeAlias = Union[SearchFilterCondition, object] + +FiltersSearchFilterInputNone: TypeAlias = Union[SearchFilterCondition, object] + + +class FiltersSearchFilterInput(TypedDict, total=False): + """Represents a filter with AND, OR, and NOT conditions.""" -Filters: TypeAlias = Union["SearchFilter", SearchFilterCondition, Iterable[FiltersUnionMember2]] + all: Optional[Iterable[FiltersSearchFilterInputAll]] + """List of conditions or filters to be ANDed together""" + + any: Optional[Iterable[FiltersSearchFilterInputAny]] + """List of conditions or filters to be ORed together""" + + none: Optional[Iterable[FiltersSearchFilterInputNone]] + """List of conditions or filters to be NOTed""" + + +FiltersUnionMember2SearchFilterInputAll: TypeAlias = Union[SearchFilterCondition, object] + +FiltersUnionMember2SearchFilterInputAny: TypeAlias = Union[SearchFilterCondition, object] + +FiltersUnionMember2SearchFilterInputNone: TypeAlias = Union[SearchFilterCondition, object] + + +class FiltersUnionMember2SearchFilterInput(TypedDict, total=False): + """Represents a filter with AND, OR, and NOT conditions.""" + + all: Optional[Iterable[FiltersUnionMember2SearchFilterInputAll]] + """List of conditions or filters to be ANDed together""" + + any: Optional[Iterable[FiltersUnionMember2SearchFilterInputAny]] + """List of conditions or filters to be ORed together""" + + none: Optional[Iterable[FiltersUnionMember2SearchFilterInputNone]] + """List of conditions or filters to be NOTed""" + + +FiltersUnionMember2: TypeAlias = Union[FiltersUnionMember2SearchFilterInput, SearchFilterCondition] + +Filters: TypeAlias = Union[FiltersSearchFilterInput, SearchFilterCondition, Iterable[FiltersUnionMember2]] class QaOptions(TypedDict, total=False): @@ -60,6 +113,3 @@ class QaOptions(TypedDict, total=False): multimodal: bool """Whether to use multimodal context""" - - -from .shared_params.search_filter import SearchFilter diff --git a/src/mixedbread/types/store_search_params.py b/src/mixedbread/types/store_search_params.py index 7a526d7d..1bd8e6e2 100644 --- a/src/mixedbread/types/store_search_params.py +++ b/src/mixedbread/types/store_search_params.py @@ -11,15 +11,25 @@ from .extractions.image_url_input_param import ImageURLInputParam from .shared_params.search_filter_condition import SearchFilterCondition -__all__ = ["StoreSearchParams", "Query", "Filters", "FiltersUnionMember2"] +__all__ = [ + "StoreSearchParams", + "Filters", + "FiltersSearchFilterInput", + "FiltersSearchFilterInputAll", + "FiltersSearchFilterInputAny", + "FiltersSearchFilterInputNone", + "FiltersUnionMember2", + "FiltersUnionMember2SearchFilterInput", + "FiltersUnionMember2SearchFilterInputAll", + "FiltersUnionMember2SearchFilterInputAny", + "FiltersUnionMember2SearchFilterInputNone", + "Query", +] class StoreSearchParams(TypedDict, total=False): - query: Required[Query] - """Search query text""" - store_identifiers: Required[SequenceNotStr[str]] - """IDs or names of stores to search""" + """IDs or names of stores""" top_k: int """Number of results to return""" @@ -30,14 +40,55 @@ class StoreSearchParams(TypedDict, total=False): file_ids: Union[Iterable[object], SequenceNotStr[str], None] """Optional list of file IDs to filter chunks by (inclusion filter)""" + query: Required[Query] + """Search query text""" + search_options: StoreChunkSearchOptionsParam """Search configuration options""" -Query: TypeAlias = Union[str, ImageURLInputParam, TextInputParam] +FiltersSearchFilterInputAll: TypeAlias = Union[SearchFilterCondition, object] + +FiltersSearchFilterInputAny: TypeAlias = Union[SearchFilterCondition, object] + +FiltersSearchFilterInputNone: TypeAlias = Union[SearchFilterCondition, object] + + +class FiltersSearchFilterInput(TypedDict, total=False): + """Represents a filter with AND, OR, and NOT conditions.""" + + all: Optional[Iterable[FiltersSearchFilterInputAll]] + """List of conditions or filters to be ANDed together""" + + any: Optional[Iterable[FiltersSearchFilterInputAny]] + """List of conditions or filters to be ORed together""" -FiltersUnionMember2: TypeAlias = Union["SearchFilter", SearchFilterCondition] + none: Optional[Iterable[FiltersSearchFilterInputNone]] + """List of conditions or filters to be NOTed""" -Filters: TypeAlias = Union["SearchFilter", SearchFilterCondition, Iterable[FiltersUnionMember2]] -from .shared_params.search_filter import SearchFilter +FiltersUnionMember2SearchFilterInputAll: TypeAlias = Union[SearchFilterCondition, object] + +FiltersUnionMember2SearchFilterInputAny: TypeAlias = Union[SearchFilterCondition, object] + +FiltersUnionMember2SearchFilterInputNone: TypeAlias = Union[SearchFilterCondition, object] + + +class FiltersUnionMember2SearchFilterInput(TypedDict, total=False): + """Represents a filter with AND, OR, and NOT conditions.""" + + all: Optional[Iterable[FiltersUnionMember2SearchFilterInputAll]] + """List of conditions or filters to be ANDed together""" + + any: Optional[Iterable[FiltersUnionMember2SearchFilterInputAny]] + """List of conditions or filters to be ORed together""" + + none: Optional[Iterable[FiltersUnionMember2SearchFilterInputNone]] + """List of conditions or filters to be NOTed""" + + +FiltersUnionMember2: TypeAlias = Union[FiltersUnionMember2SearchFilterInput, SearchFilterCondition] + +Filters: TypeAlias = Union[FiltersSearchFilterInput, SearchFilterCondition, Iterable[FiltersUnionMember2]] + +Query: TypeAlias = Union[str, ImageURLInputParam, TextInputParam] diff --git a/src/mixedbread/types/store_search_response.py b/src/mixedbread/types/store_search_response.py index 6ab757c5..ad42424f 100644 --- a/src/mixedbread/types/store_search_response.py +++ b/src/mixedbread/types/store_search_response.py @@ -1,8 +1,11 @@ # File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details. -from typing import List, Union, Optional +import builtins +from typing import TYPE_CHECKING, Dict, List, Union, Optional from typing_extensions import Literal, Annotated, TypeAlias +from pydantic import Field as FieldInfo + from .._utils import PropertyInfo from .._models import BaseModel from .scored_text_input_chunk import ScoredTextInputChunk @@ -24,3 +27,15 @@ class StoreSearchResponse(BaseModel): data: List[Data] """The list of scored store file chunks""" + + if TYPE_CHECKING: + # Some versions of Pydantic <2.8.0 have a bug and don’t allow assigning a + # value to this field, so for compatibility we avoid doing it at runtime. + __pydantic_extra__: Dict[str, builtins.object] = FieldInfo(init=False) # pyright: ignore[reportIncompatibleVariableOverride] + + # Stub to indicate that arbitrary properties are accepted. + # To access properties that are not valid identifiers you can use `getattr`, e.g. + # `getattr(obj, '$type')` + def __getattr__(self, attr: str) -> builtins.object: ... + else: + __pydantic_extra__: Dict[str, builtins.object] diff --git a/src/mixedbread/types/stores/audio_url_input_chunk.py b/src/mixedbread/types/stores/audio_url_input_chunk.py index 79cdd3c3..830a577b 100644 --- a/src/mixedbread/types/stores/audio_url_input_chunk.py +++ b/src/mixedbread/types/stores/audio_url_input_chunk.py @@ -50,6 +50,9 @@ class AudioURLInputChunk(BaseModel): transcription: Optional[str] = None """speech recognition (sr) text of the audio""" + summary: Optional[str] = None + """summary of the audio""" + audio_url: Optional[AudioURL] = None """Model for audio URL validation.""" diff --git a/src/mixedbread/types/stores/file_list_params.py b/src/mixedbread/types/stores/file_list_params.py index 2089f9cf..8209f8bb 100644 --- a/src/mixedbread/types/stores/file_list_params.py +++ b/src/mixedbread/types/stores/file_list_params.py @@ -8,7 +8,19 @@ from .store_file_status import StoreFileStatus from ..shared_params.search_filter_condition import SearchFilterCondition -__all__ = ["FileListParams", "MetadataFilter", "MetadataFilterUnionMember2"] +__all__ = [ + "FileListParams", + "MetadataFilter", + "MetadataFilterSearchFilterInput", + "MetadataFilterSearchFilterInputAll", + "MetadataFilterSearchFilterInputAny", + "MetadataFilterSearchFilterInputNone", + "MetadataFilterUnionMember2", + "MetadataFilterUnionMember2SearchFilterInput", + "MetadataFilterUnionMember2SearchFilterInputAll", + "MetadataFilterUnionMember2SearchFilterInputAny", + "MetadataFilterUnionMember2SearchFilterInputNone", +] class FileListParams(TypedDict, total=False): @@ -40,8 +52,48 @@ class FileListParams(TypedDict, total=False): """Search query for fuzzy matching over name and external_id fields""" -MetadataFilterUnionMember2: TypeAlias = Union["SearchFilter", SearchFilterCondition] +MetadataFilterSearchFilterInputAll: TypeAlias = Union[SearchFilterCondition, object] -MetadataFilter: TypeAlias = Union["SearchFilter", SearchFilterCondition, Iterable[MetadataFilterUnionMember2]] +MetadataFilterSearchFilterInputAny: TypeAlias = Union[SearchFilterCondition, object] -from ..shared_params.search_filter import SearchFilter +MetadataFilterSearchFilterInputNone: TypeAlias = Union[SearchFilterCondition, object] + + +class MetadataFilterSearchFilterInput(TypedDict, total=False): + """Represents a filter with AND, OR, and NOT conditions.""" + + all: Optional[Iterable[MetadataFilterSearchFilterInputAll]] + """List of conditions or filters to be ANDed together""" + + any: Optional[Iterable[MetadataFilterSearchFilterInputAny]] + """List of conditions or filters to be ORed together""" + + none: Optional[Iterable[MetadataFilterSearchFilterInputNone]] + """List of conditions or filters to be NOTed""" + + +MetadataFilterUnionMember2SearchFilterInputAll: TypeAlias = Union[SearchFilterCondition, object] + +MetadataFilterUnionMember2SearchFilterInputAny: TypeAlias = Union[SearchFilterCondition, object] + +MetadataFilterUnionMember2SearchFilterInputNone: TypeAlias = Union[SearchFilterCondition, object] + + +class MetadataFilterUnionMember2SearchFilterInput(TypedDict, total=False): + """Represents a filter with AND, OR, and NOT conditions.""" + + all: Optional[Iterable[MetadataFilterUnionMember2SearchFilterInputAll]] + """List of conditions or filters to be ANDed together""" + + any: Optional[Iterable[MetadataFilterUnionMember2SearchFilterInputAny]] + """List of conditions or filters to be ORed together""" + + none: Optional[Iterable[MetadataFilterUnionMember2SearchFilterInputNone]] + """List of conditions or filters to be NOTed""" + + +MetadataFilterUnionMember2: TypeAlias = Union[MetadataFilterUnionMember2SearchFilterInput, SearchFilterCondition] + +MetadataFilter: TypeAlias = Union[ + MetadataFilterSearchFilterInput, SearchFilterCondition, Iterable[MetadataFilterUnionMember2] +] diff --git a/tests/api_resources/stores/test_files.py b/tests/api_resources/stores/test_files.py index 7baab6c8..83700520 100644 --- a/tests/api_resources/stores/test_files.py +++ b/tests/api_resources/stores/test_files.py @@ -207,9 +207,42 @@ def test_method_list_with_all_params(self, client: Mixedbread) -> None: include_total=False, statuses=["pending"], metadata_filter={ - "all": [{}, {}], - "any": [{}, {}], - "none": [{}, {}], + "all": [ + { + "key": "price", + "value": "100", + "operator": "gt", + }, + { + "key": "color", + "value": "red", + "operator": "eq", + }, + ], + "any": [ + { + "key": "price", + "value": "100", + "operator": "gt", + }, + { + "key": "color", + "value": "red", + "operator": "eq", + }, + ], + "none": [ + { + "key": "price", + "value": "100", + "operator": "gt", + }, + { + "key": "color", + "value": "red", + "operator": "eq", + }, + ], }, q="x", ) @@ -486,9 +519,42 @@ async def test_method_list_with_all_params(self, async_client: AsyncMixedbread) include_total=False, statuses=["pending"], metadata_filter={ - "all": [{}, {}], - "any": [{}, {}], - "none": [{}, {}], + "all": [ + { + "key": "price", + "value": "100", + "operator": "gt", + }, + { + "key": "color", + "value": "red", + "operator": "eq", + }, + ], + "any": [ + { + "key": "price", + "value": "100", + "operator": "gt", + }, + { + "key": "color", + "value": "red", + "operator": "eq", + }, + ], + "none": [ + { + "key": "price", + "value": "100", + "operator": "gt", + }, + { + "key": "color", + "value": "red", + "operator": "eq", + }, + ], }, q="x", ) diff --git a/tests/api_resources/test_stores.py b/tests/api_resources/test_stores.py index 39988178..4a9335cf 100644 --- a/tests/api_resources/test_stores.py +++ b/tests/api_resources/test_stores.py @@ -245,15 +245,48 @@ def test_method_metadata_facets(self, client: Mixedbread) -> None: @parametrize def test_method_metadata_facets_with_all_params(self, client: Mixedbread) -> None: store = client.stores.metadata_facets( - query="how to configure SSL", store_identifiers=["string"], top_k=1, filters={ - "all": [{}, {}], - "any": [{}, {}], - "none": [{}, {}], + "all": [ + { + "key": "price", + "value": "100", + "operator": "gt", + }, + { + "key": "color", + "value": "red", + "operator": "eq", + }, + ], + "any": [ + { + "key": "price", + "value": "100", + "operator": "gt", + }, + { + "key": "color", + "value": "red", + "operator": "eq", + }, + ], + "none": [ + { + "key": "price", + "value": "100", + "operator": "gt", + }, + { + "key": "color", + "value": "red", + "operator": "eq", + }, + ], }, file_ids=["123e4567-e89b-12d3-a456-426614174000", "123e4567-e89b-12d3-a456-426614174001"], + query="how to configure SSL", search_options={ "score_threshold": 0, "rewrite_query": True, @@ -263,6 +296,9 @@ def test_method_metadata_facets_with_all_params(self, client: Mixedbread) -> Non "apply_search_rules": True, }, facets=["string"], + max_fields=1, + max_values_per_field=1, + max_files=1, ) assert_matches_type(StoreMetadataFacetsResponse, store, path=["response"]) @@ -300,15 +336,48 @@ def test_method_question_answering(self, client: Mixedbread) -> None: @parametrize def test_method_question_answering_with_all_params(self, client: Mixedbread) -> None: store = client.stores.question_answering( - query="x", store_identifiers=["string"], top_k=1, filters={ - "all": [{}, {}], - "any": [{}, {}], - "none": [{}, {}], + "all": [ + { + "key": "price", + "value": "100", + "operator": "gt", + }, + { + "key": "color", + "value": "red", + "operator": "eq", + }, + ], + "any": [ + { + "key": "price", + "value": "100", + "operator": "gt", + }, + { + "key": "color", + "value": "red", + "operator": "eq", + }, + ], + "none": [ + { + "key": "price", + "value": "100", + "operator": "gt", + }, + { + "key": "color", + "value": "red", + "operator": "eq", + }, + ], }, file_ids=["123e4567-e89b-12d3-a456-426614174000", "123e4567-e89b-12d3-a456-426614174001"], + query="x", search_options={ "score_threshold": 0, "rewrite_query": True, @@ -353,23 +422,56 @@ def test_streaming_response_question_answering(self, client: Mixedbread) -> None @parametrize def test_method_search(self, client: Mixedbread) -> None: store = client.stores.search( - query="how to configure SSL", store_identifiers=["string"], + query="how to configure SSL", ) assert_matches_type(StoreSearchResponse, store, path=["response"]) @parametrize def test_method_search_with_all_params(self, client: Mixedbread) -> None: store = client.stores.search( - query="how to configure SSL", store_identifiers=["string"], top_k=1, filters={ - "all": [{}, {}], - "any": [{}, {}], - "none": [{}, {}], + "all": [ + { + "key": "price", + "value": "100", + "operator": "gt", + }, + { + "key": "color", + "value": "red", + "operator": "eq", + }, + ], + "any": [ + { + "key": "price", + "value": "100", + "operator": "gt", + }, + { + "key": "color", + "value": "red", + "operator": "eq", + }, + ], + "none": [ + { + "key": "price", + "value": "100", + "operator": "gt", + }, + { + "key": "color", + "value": "red", + "operator": "eq", + }, + ], }, file_ids=["123e4567-e89b-12d3-a456-426614174000", "123e4567-e89b-12d3-a456-426614174001"], + query="how to configure SSL", search_options={ "score_threshold": 0, "rewrite_query": True, @@ -384,8 +486,8 @@ def test_method_search_with_all_params(self, client: Mixedbread) -> None: @parametrize def test_raw_response_search(self, client: Mixedbread) -> None: response = client.stores.with_raw_response.search( - query="how to configure SSL", store_identifiers=["string"], + query="how to configure SSL", ) assert response.is_closed is True @@ -396,8 +498,8 @@ def test_raw_response_search(self, client: Mixedbread) -> None: @parametrize def test_streaming_response_search(self, client: Mixedbread) -> None: with client.stores.with_streaming_response.search( - query="how to configure SSL", store_identifiers=["string"], + query="how to configure SSL", ) as response: assert not response.is_closed assert response.http_request.headers.get("X-Stainless-Lang") == "python" @@ -634,15 +736,48 @@ async def test_method_metadata_facets(self, async_client: AsyncMixedbread) -> No @parametrize async def test_method_metadata_facets_with_all_params(self, async_client: AsyncMixedbread) -> None: store = await async_client.stores.metadata_facets( - query="how to configure SSL", store_identifiers=["string"], top_k=1, filters={ - "all": [{}, {}], - "any": [{}, {}], - "none": [{}, {}], + "all": [ + { + "key": "price", + "value": "100", + "operator": "gt", + }, + { + "key": "color", + "value": "red", + "operator": "eq", + }, + ], + "any": [ + { + "key": "price", + "value": "100", + "operator": "gt", + }, + { + "key": "color", + "value": "red", + "operator": "eq", + }, + ], + "none": [ + { + "key": "price", + "value": "100", + "operator": "gt", + }, + { + "key": "color", + "value": "red", + "operator": "eq", + }, + ], }, file_ids=["123e4567-e89b-12d3-a456-426614174000", "123e4567-e89b-12d3-a456-426614174001"], + query="how to configure SSL", search_options={ "score_threshold": 0, "rewrite_query": True, @@ -652,6 +787,9 @@ async def test_method_metadata_facets_with_all_params(self, async_client: AsyncM "apply_search_rules": True, }, facets=["string"], + max_fields=1, + max_values_per_field=1, + max_files=1, ) assert_matches_type(StoreMetadataFacetsResponse, store, path=["response"]) @@ -689,15 +827,48 @@ async def test_method_question_answering(self, async_client: AsyncMixedbread) -> @parametrize async def test_method_question_answering_with_all_params(self, async_client: AsyncMixedbread) -> None: store = await async_client.stores.question_answering( - query="x", store_identifiers=["string"], top_k=1, filters={ - "all": [{}, {}], - "any": [{}, {}], - "none": [{}, {}], + "all": [ + { + "key": "price", + "value": "100", + "operator": "gt", + }, + { + "key": "color", + "value": "red", + "operator": "eq", + }, + ], + "any": [ + { + "key": "price", + "value": "100", + "operator": "gt", + }, + { + "key": "color", + "value": "red", + "operator": "eq", + }, + ], + "none": [ + { + "key": "price", + "value": "100", + "operator": "gt", + }, + { + "key": "color", + "value": "red", + "operator": "eq", + }, + ], }, file_ids=["123e4567-e89b-12d3-a456-426614174000", "123e4567-e89b-12d3-a456-426614174001"], + query="x", search_options={ "score_threshold": 0, "rewrite_query": True, @@ -742,23 +913,56 @@ async def test_streaming_response_question_answering(self, async_client: AsyncMi @parametrize async def test_method_search(self, async_client: AsyncMixedbread) -> None: store = await async_client.stores.search( - query="how to configure SSL", store_identifiers=["string"], + query="how to configure SSL", ) assert_matches_type(StoreSearchResponse, store, path=["response"]) @parametrize async def test_method_search_with_all_params(self, async_client: AsyncMixedbread) -> None: store = await async_client.stores.search( - query="how to configure SSL", store_identifiers=["string"], top_k=1, filters={ - "all": [{}, {}], - "any": [{}, {}], - "none": [{}, {}], + "all": [ + { + "key": "price", + "value": "100", + "operator": "gt", + }, + { + "key": "color", + "value": "red", + "operator": "eq", + }, + ], + "any": [ + { + "key": "price", + "value": "100", + "operator": "gt", + }, + { + "key": "color", + "value": "red", + "operator": "eq", + }, + ], + "none": [ + { + "key": "price", + "value": "100", + "operator": "gt", + }, + { + "key": "color", + "value": "red", + "operator": "eq", + }, + ], }, file_ids=["123e4567-e89b-12d3-a456-426614174000", "123e4567-e89b-12d3-a456-426614174001"], + query="how to configure SSL", search_options={ "score_threshold": 0, "rewrite_query": True, @@ -773,8 +977,8 @@ async def test_method_search_with_all_params(self, async_client: AsyncMixedbread @parametrize async def test_raw_response_search(self, async_client: AsyncMixedbread) -> None: response = await async_client.stores.with_raw_response.search( - query="how to configure SSL", store_identifiers=["string"], + query="how to configure SSL", ) assert response.is_closed is True @@ -785,8 +989,8 @@ async def test_raw_response_search(self, async_client: AsyncMixedbread) -> None: @parametrize async def test_streaming_response_search(self, async_client: AsyncMixedbread) -> None: async with async_client.stores.with_streaming_response.search( - query="how to configure SSL", store_identifiers=["string"], + query="how to configure SSL", ) as response: assert not response.is_closed assert response.http_request.headers.get("X-Stainless-Lang") == "python" diff --git a/tests/test_extract_files.py b/tests/test_extract_files.py index 4a252515..06c03a7c 100644 --- a/tests/test_extract_files.py +++ b/tests/test_extract_files.py @@ -4,7 +4,7 @@ import pytest -from mixedbread._types import FileTypes +from mixedbread._types import FileTypes, ArrayFormat from mixedbread._utils import extract_files @@ -37,10 +37,7 @@ def test_multiple_files() -> None: def test_top_level_file_array() -> None: query = {"files": [b"file one", b"file two"], "title": "hello"} - assert extract_files(query, paths=[["files", ""]]) == [ - ("files[]", b"file one"), - ("files[]", b"file two"), - ] + assert extract_files(query, paths=[["files", ""]]) == [("files[]", b"file one"), ("files[]", b"file two")] assert query == {"title": "hello"} @@ -71,3 +68,24 @@ def test_ignores_incorrect_paths( expected: list[tuple[str, FileTypes]], ) -> None: assert extract_files(query, paths=paths) == expected + + +@pytest.mark.parametrize( + "array_format,expected_top_level,expected_nested", + [ + ("brackets", [("files[]", b"a"), ("files[]", b"b")], [("items[][file]", b"a"), ("items[][file]", b"b")]), + ("repeat", [("files", b"a"), ("files", b"b")], [("items[file]", b"a"), ("items[file]", b"b")]), + ("comma", [("files", b"a"), ("files", b"b")], [("items[file]", b"a"), ("items[file]", b"b")]), + ("indices", [("files[0]", b"a"), ("files[1]", b"b")], [("items[0][file]", b"a"), ("items[1][file]", b"b")]), + ], +) +def test_array_format_controls_file_field_names( + array_format: ArrayFormat, + expected_top_level: list[tuple[str, FileTypes]], + expected_nested: list[tuple[str, FileTypes]], +) -> None: + top_level = {"files": [b"a", b"b"]} + assert extract_files(top_level, paths=[["files", ""]], array_format=array_format) == expected_top_level + + nested = {"items": [{"file": b"a"}, {"file": b"b"}]} + assert extract_files(nested, paths=[["items", "", "file"]], array_format=array_format) == expected_nested diff --git a/tests/test_files.py b/tests/test_files.py index 8e51c40f..71c9c289 100644 --- a/tests/test_files.py +++ b/tests/test_files.py @@ -131,7 +131,7 @@ def test_extract_files_does_not_mutate_original_nested_array_path(self) -> None: copied = deepcopy_with_paths(original, [["items", "", "file"]]) extracted = extract_files(copied, paths=[["items", "", "file"]]) - assert extracted == [("items[][file]", file1), ("items[][file]", file2)] + assert [entry for _, entry in extracted] == [file1, file2] assert original == { "items": [ {"file": file1, "extra": 1}, diff --git a/tests/test_models.py b/tests/test_models.py index a8c78064..cc9f2963 100644 --- a/tests/test_models.py +++ b/tests/test_models.py @@ -1,7 +1,8 @@ import json -from typing import TYPE_CHECKING, Any, Dict, List, Union, Optional, cast +from typing import TYPE_CHECKING, Any, Dict, List, Union, Iterable, Optional, cast from datetime import datetime, timezone -from typing_extensions import Literal, Annotated, TypeAliasType +from collections import deque +from typing_extensions import Literal, Annotated, TypedDict, TypeAliasType import pytest import pydantic @@ -9,7 +10,7 @@ from mixedbread._utils import PropertyInfo from mixedbread._compat import PYDANTIC_V1, parse_obj, model_dump, model_json -from mixedbread._models import DISCRIMINATOR_CACHE, BaseModel, construct_type +from mixedbread._models import DISCRIMINATOR_CACHE, BaseModel, EagerIterable, construct_type class BasicModel(BaseModel): @@ -961,3 +962,56 @@ def __getattr__(self, attr: str) -> Item: ... assert model.a.prop == 1 assert isinstance(model.a, Item) assert model.other == "foo" + + +# NOTE: Workaround for Pydantic Iterable behavior. +# Iterable fields are replaced with a ValidatorIterator and may be consumed +# during serialization, which can cause subsequent dumps to return empty data. +# See: https://github.com/pydantic/pydantic/issues/9541 +@pytest.mark.parametrize( + "data, expected_validated", + [ + ([1, 2, 3], [1, 2, 3]), + ((1, 2, 3), (1, 2, 3)), + (set([1, 2, 3]), set([1, 2, 3])), + (iter([1, 2, 3]), [1, 2, 3]), + ([], []), + ((x for x in [1, 2, 3]), [1, 2, 3]), + (map(lambda x: x, [1, 2, 3]), [1, 2, 3]), + (frozenset([1, 2, 3]), frozenset([1, 2, 3])), + (deque([1, 2, 3]), deque([1, 2, 3])), + ], + ids=["list", "tuple", "set", "iterator", "empty", "generator", "map", "frozenset", "deque"], +) +@pytest.mark.skipif(PYDANTIC_V1, reason="this is only supported in pydantic v2") +def test_iterable_construction(data: Iterable[int], expected_validated: Iterable[int]) -> None: + class TypeWithIterable(TypedDict): + items: EagerIterable[int] + + class Model(BaseModel): + data: TypeWithIterable + + m = Model.model_validate({"data": {"items": data}}) + assert m.data["items"] == expected_validated + + # Verify repeated dumps don't lose data (the original bug) + assert m.model_dump()["data"]["items"] == list(expected_validated) + assert m.model_dump()["data"]["items"] == list(expected_validated) + + +@pytest.mark.skipif(PYDANTIC_V1, reason="this is only supported in pydantic v2") +def test_iterable_construction_str_falls_back_to_list() -> None: + # str is iterable (over chars), but str(list_of_chars) produces the list's repr + # rather than reconstructing a string from items. We special-case str to fall + # back to list instead of attempting reconstruction. + class TypeWithIterable(TypedDict): + items: EagerIterable[str] + + class Model(BaseModel): + data: TypeWithIterable + + m = Model.model_validate({"data": {"items": "hello"}}) + + # falls back to list of chars rather than calling str(["h", "e", "l", "l", "o"]) + assert m.data["items"] == ["h", "e", "l", "l", "o"] + assert m.model_dump()["data"]["items"] == ["h", "e", "l", "l", "o"]