diff --git a/UPSERT_MATCH_EXAMPLE.md b/UPSERT_MATCH_EXAMPLE.md new file mode 100644 index 0000000..154f286 --- /dev/null +++ b/UPSERT_MATCH_EXAMPLE.md @@ -0,0 +1,48 @@ +# Upsert Match Parameter Example + +The `match` parameter in the `upsert` method allows you to control whether the operation should only create new records or only update existing ones. + +**Note:** The `match` parameter is only supported for **individual mode**, not batch mode. + +## Usage + +### Standard Upsert (Default Behavior) +```python +# Creates new records or updates existing ones +entity.upsert(data, mode="individual") +``` + +### Prevent Create (Only Update) +```python +# Only updates existing records, will fail if record doesn't exist +# Uses If-Match: * header +# Only works with mode="individual" +entity.upsert(data, mode="individual", match="prevent_create") +``` + +### Prevent Update (Only Create) +```python +# Only creates new records, will fail if record already exists +# Uses If-None-Match: * header +# Only works with mode="individual" +entity.upsert(data, mode="individual", match="prevent_update") +``` + +## Batch Mode +The `match` parameter is **not supported** for batch mode operations. Attempting to use it will raise a `DataverseError`: + +```python +# This will raise an error +entity.upsert(data, mode="batch", match="prevent_create") # Error! +``` + +For batch operations, use standard upsert behavior without the `match` parameter: + +```python +# Standard batch upsert (create or update) +entity.upsert(data, mode="batch") +``` + +## Reference +For more details on the underlying Dataverse Web API behavior, see: +https://learn.microsoft.com/en-us/power-apps/developer/data-platform/use-upsert-insert-update-record diff --git a/dataverse_api/_api.py b/dataverse_api/_api.py index 17f32c1..8ec7687 100644 --- a/dataverse_api/_api.py +++ b/dataverse_api/_api.py @@ -98,7 +98,10 @@ def _api_call( try: resp.raise_for_status() except requests.HTTPError: - error_msg = resp.json()["error"]["message"].splitlines()[0] + try: + error_msg = resp.json()["error"]["message"].splitlines()[0] + except (ValueError, KeyError, IndexError): + error_msg = resp.text raise DataverseAPIError(message=f"{method} request failed: {error_msg}", response=resp) from None return resp diff --git a/dataverse_api/entity.py b/dataverse_api/entity.py index ca2cde3..9b0b175 100644 --- a/dataverse_api/entity.py +++ b/dataverse_api/entity.py @@ -699,17 +699,29 @@ def delete_columns( raise DataverseModeError(mode, "individual", "batch") def __upsert_singles( - self, data: Collection[Mapping[str, Any]], keys: Iterable[str], is_primary_id: bool, threading: bool + self, + data: Collection[Mapping[str, Any]], + keys: Iterable[str], + is_primary_id: bool, + threading: bool, + match: Literal["prevent_create", "prevent_update"] | None = None, ) -> list[requests.Response]: """ Upsert row by individual requests. """ check_altkey_support(keys=keys, data=data) + headers: dict[str, str] | None = None + if match == "prevent_create": + headers = {"If-Match": "*"} + elif match == "prevent_update": + headers = {"If-None-Match": "*"} + calls = [ APICommand( method=RequestMethod.PATCH, url=f"{self.entity_set_name}({key})", json=payload, + headers=headers, ) for key, payload in transform_upsert_data(data=data, keys=keys, is_primary_id=is_primary_id) ] @@ -725,6 +737,7 @@ def upsert( mode: Literal["individual"] = "individual", altkey_name: str | None = None, threading: bool = False, + match: Literal["prevent_create", "prevent_update"] | None = None, ) -> list[requests.Response]: ... @overload @@ -746,6 +759,7 @@ def upsert( altkey_name: str | None = None, threading: bool = False, batch_size: int | None = None, + match: Literal["prevent_create", "prevent_update"] | None = None, ) -> list[requests.Response]: """ Upsert data into Entity. @@ -764,7 +778,16 @@ def upsert( batch_size : int Optional override if batch mode is specified, useful for tuning workloads if 429s or timeouts occur. + match : Literal["prevent_create", "prevent_update"] | None + Controls upsert behavior using If-Match headers. + Only supported for individual mode, not batch mode. + - None (default): Standard upsert behavior (create or update) + - "prevent_create": Only update existing records (If-Match: *) + - "prevent_update": Only create new records (If-None-Match: *) """ + if match is not None and mode == "batch": + raise DataverseError("The 'match' parameter is only supported for individual mode, not batch mode.") + if altkey_name is not None: try: key_columns = self.alternate_keys[altkey_name] @@ -782,7 +805,9 @@ def upsert( if mode == "individual": logging.debug("%d rows to upsert. Using individual upserts.", len(data)) - return self.__upsert_singles(data=data, keys=key_columns, is_primary_id=is_primary_id, threading=threading) + return self.__upsert_singles( + data=data, keys=key_columns, is_primary_id=is_primary_id, threading=threading, match=match + ) if mode == "batch": logging.debug("%d rows to upsert. Using batch upserts.", len(data)) diff --git a/dataverse_api/utils/batching.py b/dataverse_api/utils/batching.py index 6bde96b..57c3eef 100644 --- a/dataverse_api/utils/batching.py +++ b/dataverse_api/utils/batching.py @@ -79,7 +79,6 @@ def __post_init__(self) -> None: self.content_type += "; type=entry" if self.headers: - print("Extra!") self.extra_header = "\n".join([f"{k}: {v}" for k, v in self.headers.items()]) self.url = encode_altkeys(self.url) diff --git a/tests/test_entity.py b/tests/test_entity.py index 66de9e3..bbbff4b 100644 --- a/tests/test_entity.py +++ b/tests/test_entity.py @@ -716,6 +716,67 @@ def test_entity_upsert_bad_altkey(entity: DataverseEntity): entity.upsert([{"data": 1}], altkey_name="foo") +def test_entity_upsert_individual_prevent_create( + entity: DataverseEntity, + primary_id: str, + mocked_responses: responses.RequestsMock, +): + """Test upsert with prevent_create (If-Match: *) - only update existing records.""" + # Setup + data = [{primary_id: str(uuid4()), "test_val": random.randint(1, 10)} for _ in range(4)] + + for row in data: + id = row[primary_id] + payload = {k: v for k, v in row.items() if k != primary_id} + + mocked_responses.patch( + url=f"{entity._endpoint}{entity.entity_set_name}({id})", + match=[json_params_matcher(payload), header_matcher({"If-Match": "*"})], + status=204, + ) + + resp = entity.upsert(data, mode="individual", match="prevent_create") + + for row in resp: + assert row.status_code == 204 + + +def test_entity_upsert_individual_prevent_update( + entity: DataverseEntity, + primary_id: str, + mocked_responses: responses.RequestsMock, +): + """Test upsert with prevent_update (If-None-Match: *) - only create new records.""" + # Setup + data = [{primary_id: str(uuid4()), "test_val": random.randint(1, 10)} for _ in range(4)] + + for row in data: + id = row[primary_id] + payload = {k: v for k, v in row.items() if k != primary_id} + + mocked_responses.patch( + url=f"{entity._endpoint}{entity.entity_set_name}({id})", + match=[json_params_matcher(payload), header_matcher({"If-None-Match": "*"})], + status=204, + ) + + resp = entity.upsert(data, mode="individual", match="prevent_update") + + for row in resp: + assert row.status_code == 204 + + +def test_entity_upsert_batch_match_not_supported(entity: DataverseEntity, primary_id: str): + """Test that using match parameter with batch mode raises an error.""" + data = [{primary_id: str(uuid4()), "test_val": 1}] + + with pytest.raises(DataverseError, match=r".*match.*only supported for individual mode.*"): + entity.upsert(data=data, mode="batch", match="prevent_create") # type: ignore + + with pytest.raises(DataverseError, match=r".*match.*only supported for individual mode.*"): + entity.upsert(data=data, mode="batch", match="prevent_update") # type: ignore + + def test_entity_upsert_pandas_dataframe( entity: DataverseEntity, mocked_responses: responses.RequestsMock, primary_id: str ):