From e6003ad402b4b01aced9b6e1e821b485f54b110b Mon Sep 17 00:00:00 2001 From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com> Date: Tue, 3 Feb 2026 09:44:40 +0000 Subject: [PATCH 1/7] Initial plan From 493f222d98d25269a5c3a49e38120bc38ba67274 Mon Sep 17 00:00:00 2001 From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com> Date: Tue, 3 Feb 2026 09:51:49 +0000 Subject: [PATCH 2/7] Add match parameter to upsert method with prevent_create and prevent_update modes Co-authored-by: MarcusRisanger <69350948+MarcusRisanger@users.noreply.github.com> --- dataverse_api/entity.py | 27 +++++++++- dataverse_api/utils/batching.py | 15 +++++- tests/test_entity.py | 94 +++++++++++++++++++++++++++++++++ 3 files changed, 133 insertions(+), 3 deletions(-) diff --git a/dataverse_api/entity.py b/dataverse_api/entity.py index ca2cde3..4ebb5ef 100644 --- a/dataverse_api/entity.py +++ b/dataverse_api/entity.py @@ -699,17 +699,29 @@ def delete_columns( raise DataverseModeError(mode, "individual", "batch") def __upsert_singles( - self, data: Collection[Mapping[str, Any]], keys: Iterable[str], is_primary_id: bool, threading: bool + self, + data: Collection[Mapping[str, Any]], + keys: Iterable[str], + is_primary_id: bool, + threading: bool, + match: Literal["prevent_create", "prevent_update"] | None = None, ) -> list[requests.Response]: """ Upsert row by individual requests. """ check_altkey_support(keys=keys, data=data) + headers: dict[str, str] | None = None + if match == "prevent_create": + headers = {"If-Match": "*"} + elif match == "prevent_update": + headers = {"If-None-Match": "*"} + calls = [ APICommand( method=RequestMethod.PATCH, url=f"{self.entity_set_name}({key})", json=payload, + headers=headers, ) for key, payload in transform_upsert_data(data=data, keys=keys, is_primary_id=is_primary_id) ] @@ -725,6 +737,7 @@ def upsert( mode: Literal["individual"] = "individual", altkey_name: str | None = None, threading: bool = False, + match: Literal["prevent_create", "prevent_update"] | None = None, ) -> list[requests.Response]: ... @overload @@ -736,6 +749,7 @@ def upsert( altkey_name: str | None = None, threading: bool = False, batch_size: int | None = None, + match: Literal["prevent_create", "prevent_update"] | None = None, ) -> list[requests.Response]: ... def upsert( @@ -746,6 +760,7 @@ def upsert( altkey_name: str | None = None, threading: bool = False, batch_size: int | None = None, + match: Literal["prevent_create", "prevent_update"] | None = None, ) -> list[requests.Response]: """ Upsert data into Entity. @@ -764,6 +779,11 @@ def upsert( batch_size : int Optional override if batch mode is specified, useful for tuning workloads if 429s or timeouts occur. + match : Literal["prevent_create", "prevent_update"] | None + Controls upsert behavior using If-Match headers: + - None (default): Standard upsert behavior (create or update) + - "prevent_create": Only update existing records (If-Match: *) + - "prevent_update": Only create new records (If-None-Match: *) """ if altkey_name is not None: try: @@ -782,7 +802,9 @@ def upsert( if mode == "individual": logging.debug("%d rows to upsert. Using individual upserts.", len(data)) - return self.__upsert_singles(data=data, keys=key_columns, is_primary_id=is_primary_id, threading=threading) + return self.__upsert_singles( + data=data, keys=key_columns, is_primary_id=is_primary_id, threading=threading, match=match + ) if mode == "batch": logging.debug("%d rows to upsert. Using batch upserts.", len(data)) @@ -791,6 +813,7 @@ def upsert( data=data, keys=key_columns, is_primary_id=is_primary_id, + match=match, ) return self._batch_api_call( batch_commands=batch_commands, diff --git a/dataverse_api/utils/batching.py b/dataverse_api/utils/batching.py index 6bde96b..dbe5ffe 100644 --- a/dataverse_api/utils/batching.py +++ b/dataverse_api/utils/batching.py @@ -4,7 +4,7 @@ from dataclasses import dataclass, field from enum import StrEnum from textwrap import dedent -from typing import Any, Collection, Generator, Mapping, MutableMapping, TypeVar +from typing import Any, Collection, Generator, Literal, Mapping, MutableMapping, TypeVar from urllib.parse import urljoin from dataverse_api.errors import DataverseError @@ -209,6 +209,7 @@ def transform_to_batch_for_upsert( data: Collection[MutableMapping[str, Any]], keys: Iterable[str], is_primary_id: bool = False, + match: Literal["prevent_create", "prevent_update"] | None = None, ) -> list[BatchCommand]: """ Transform data payload to upsert batch data. @@ -223,8 +224,19 @@ def transform_to_batch_for_upsert( The keys used to identify unique rows in the dataset. is_id : bool Whether the supplied singular key is the Entity primary ID attribute. + match : Literal["prevent_create", "prevent_update"] | None + Controls upsert behavior using If-Match headers: + - None (default): Standard upsert behavior (create or update) + - "prevent_create": Only update existing records (If-Match: *) + - "prevent_update": Only create new records (If-None-Match: *) """ check_altkey_support(keys=keys, data=data) + headers: dict[str, str] | None = None + if match == "prevent_create": + headers = {"If-Match": "*"} + elif match == "prevent_update": + headers = {"If-None-Match": "*"} + commands = [] for keys, payload in transform_upsert_data(data, keys, is_primary_id): commands.append( @@ -232,6 +244,7 @@ def transform_to_batch_for_upsert( url=f"{url}({keys})", method=RequestMethod.PATCH, data=payload, + headers=headers, ) ) diff --git a/tests/test_entity.py b/tests/test_entity.py index 66de9e3..e052eaa 100644 --- a/tests/test_entity.py +++ b/tests/test_entity.py @@ -716,6 +716,100 @@ def test_entity_upsert_bad_altkey(entity: DataverseEntity): entity.upsert([{"data": 1}], altkey_name="foo") +def test_entity_upsert_individual_prevent_create( + entity: DataverseEntity, + primary_id: str, + mocked_responses: responses.RequestsMock, +): + """Test upsert with prevent_create (If-Match: *) - only update existing records.""" + # Setup + data = [{primary_id: str(uuid4()), "test_val": random.randint(1, 10)} for _ in range(4)] + + for row in data: + id = row[primary_id] + payload = {k: v for k, v in row.items() if k != primary_id} + + mocked_responses.patch( + url=f"{entity._endpoint}{entity.entity_set_name}({id})", + match=[json_params_matcher(payload), header_matcher({"If-Match": "*"})], + status=204, + ) + + resp = entity.upsert(data, mode="individual", match="prevent_create") + + for row in resp: + assert row.status_code == 204 + + +def test_entity_upsert_individual_prevent_update( + entity: DataverseEntity, + primary_id: str, + mocked_responses: responses.RequestsMock, +): + """Test upsert with prevent_update (If-None-Match: *) - only create new records.""" + # Setup + data = [{primary_id: str(uuid4()), "test_val": random.randint(1, 10)} for _ in range(4)] + + for row in data: + id = row[primary_id] + payload = {k: v for k, v in row.items() if k != primary_id} + + mocked_responses.patch( + url=f"{entity._endpoint}{entity.entity_set_name}({id})", + match=[json_params_matcher(payload), header_matcher({"If-None-Match": "*"})], + status=204, + ) + + resp = entity.upsert(data, mode="individual", match="prevent_update") + + for row in resp: + assert row.status_code == 204 + + +def test_entity_upsert_batch_prevent_create( + entity: DataverseEntity, + primary_id: str, + mocked_responses: responses.RequestsMock, +): + """Test batch upsert with prevent_create (If-Match: *) - only update existing records.""" + # Setup + data = [{primary_id: str(uuid4()), "test_val": random.randint(1, 10)} for _ in range(4)] + + mocked_responses.post(url=f"{entity._endpoint}$batch") + + resp = entity.upsert(data=data, mode="batch", match="prevent_create") + + assert isinstance(resp[0].request.body, str) # type checking + elements = resp[0].request.body.split("--batch")[1:-1] + + for out, expected in zip(elements, data): + assert f"{entity.entity_set_name}({expected.pop(primary_id)})" in out + assert "If-Match: *" in out + assert serialize_json(expected) in out + + +def test_entity_upsert_batch_prevent_update( + entity: DataverseEntity, + primary_id: str, + mocked_responses: responses.RequestsMock, +): + """Test batch upsert with prevent_update (If-None-Match: *) - only create new records.""" + # Setup + data = [{primary_id: str(uuid4()), "test_val": random.randint(1, 10)} for _ in range(4)] + + mocked_responses.post(url=f"{entity._endpoint}$batch") + + resp = entity.upsert(data=data, mode="batch", match="prevent_update") + + assert isinstance(resp[0].request.body, str) # type checking + elements = resp[0].request.body.split("--batch")[1:-1] + + for out, expected in zip(elements, data): + assert f"{entity.entity_set_name}({expected.pop(primary_id)})" in out + assert "If-None-Match: *" in out + assert serialize_json(expected) in out + + def test_entity_upsert_pandas_dataframe( entity: DataverseEntity, mocked_responses: responses.RequestsMock, primary_id: str ): From 1491c170050e35af9168725bd16e6d75b7979522 Mon Sep 17 00:00:00 2001 From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com> Date: Tue, 3 Feb 2026 09:53:56 +0000 Subject: [PATCH 3/7] Add documentation example for upsert match parameter Co-authored-by: MarcusRisanger <69350948+MarcusRisanger@users.noreply.github.com> --- UPSERT_MATCH_EXAMPLE.md | 40 ++++++++++++++++++++++++++++++++++++++++ 1 file changed, 40 insertions(+) create mode 100644 UPSERT_MATCH_EXAMPLE.md diff --git a/UPSERT_MATCH_EXAMPLE.md b/UPSERT_MATCH_EXAMPLE.md new file mode 100644 index 0000000..ef41dea --- /dev/null +++ b/UPSERT_MATCH_EXAMPLE.md @@ -0,0 +1,40 @@ +# Upsert Match Parameter Example + +The `match` parameter in the `upsert` method allows you to control whether the operation should only create new records or only update existing ones. + +## Usage + +### Standard Upsert (Default Behavior) +```python +# Creates new records or updates existing ones +entity.upsert(data, mode="individual") +``` + +### Prevent Create (Only Update) +```python +# Only updates existing records, will fail if record doesn't exist +# Uses If-Match: * header +entity.upsert(data, mode="individual", match="prevent_create") +``` + +### Prevent Update (Only Create) +```python +# Only creates new records, will fail if record already exists +# Uses If-None-Match: * header +entity.upsert(data, mode="individual", match="prevent_update") +``` + +## Batch Mode Support +The `match` parameter works with both individual and batch modes: + +```python +# Batch mode with prevent_create +entity.upsert(data, mode="batch", match="prevent_create") + +# Batch mode with prevent_update +entity.upsert(data, mode="batch", match="prevent_update") +``` + +## Reference +For more details on the underlying Dataverse Web API behavior, see: +https://learn.microsoft.com/en-us/power-apps/developer/data-platform/use-upsert-insert-update-record From 01732f2197754a195129296a21f6609de52c5e67 Mon Sep 17 00:00:00 2001 From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com> Date: Tue, 3 Feb 2026 13:05:49 +0000 Subject: [PATCH 4/7] Restrict match parameter to individual mode only, remove from batch mode Co-authored-by: MarcusRisanger <69350948+MarcusRisanger@users.noreply.github.com> --- UPSERT_MATCH_EXAMPLE.md | 20 +++++++++---- dataverse_api/entity.py | 8 ++++-- dataverse_api/utils/batching.py | 15 +--------- tests/test_entity.py | 51 ++++++--------------------------- 4 files changed, 29 insertions(+), 65 deletions(-) diff --git a/UPSERT_MATCH_EXAMPLE.md b/UPSERT_MATCH_EXAMPLE.md index ef41dea..154f286 100644 --- a/UPSERT_MATCH_EXAMPLE.md +++ b/UPSERT_MATCH_EXAMPLE.md @@ -2,6 +2,8 @@ The `match` parameter in the `upsert` method allows you to control whether the operation should only create new records or only update existing ones. +**Note:** The `match` parameter is only supported for **individual mode**, not batch mode. + ## Usage ### Standard Upsert (Default Behavior) @@ -14,6 +16,7 @@ entity.upsert(data, mode="individual") ```python # Only updates existing records, will fail if record doesn't exist # Uses If-Match: * header +# Only works with mode="individual" entity.upsert(data, mode="individual", match="prevent_create") ``` @@ -21,18 +24,23 @@ entity.upsert(data, mode="individual", match="prevent_create") ```python # Only creates new records, will fail if record already exists # Uses If-None-Match: * header +# Only works with mode="individual" entity.upsert(data, mode="individual", match="prevent_update") ``` -## Batch Mode Support -The `match` parameter works with both individual and batch modes: +## Batch Mode +The `match` parameter is **not supported** for batch mode operations. Attempting to use it will raise a `DataverseError`: ```python -# Batch mode with prevent_create -entity.upsert(data, mode="batch", match="prevent_create") +# This will raise an error +entity.upsert(data, mode="batch", match="prevent_create") # Error! +``` -# Batch mode with prevent_update -entity.upsert(data, mode="batch", match="prevent_update") +For batch operations, use standard upsert behavior without the `match` parameter: + +```python +# Standard batch upsert (create or update) +entity.upsert(data, mode="batch") ``` ## Reference diff --git a/dataverse_api/entity.py b/dataverse_api/entity.py index 4ebb5ef..9b0b175 100644 --- a/dataverse_api/entity.py +++ b/dataverse_api/entity.py @@ -749,7 +749,6 @@ def upsert( altkey_name: str | None = None, threading: bool = False, batch_size: int | None = None, - match: Literal["prevent_create", "prevent_update"] | None = None, ) -> list[requests.Response]: ... def upsert( @@ -780,11 +779,15 @@ def upsert( Optional override if batch mode is specified, useful for tuning workloads if 429s or timeouts occur. match : Literal["prevent_create", "prevent_update"] | None - Controls upsert behavior using If-Match headers: + Controls upsert behavior using If-Match headers. + Only supported for individual mode, not batch mode. - None (default): Standard upsert behavior (create or update) - "prevent_create": Only update existing records (If-Match: *) - "prevent_update": Only create new records (If-None-Match: *) """ + if match is not None and mode == "batch": + raise DataverseError("The 'match' parameter is only supported for individual mode, not batch mode.") + if altkey_name is not None: try: key_columns = self.alternate_keys[altkey_name] @@ -813,7 +816,6 @@ def upsert( data=data, keys=key_columns, is_primary_id=is_primary_id, - match=match, ) return self._batch_api_call( batch_commands=batch_commands, diff --git a/dataverse_api/utils/batching.py b/dataverse_api/utils/batching.py index dbe5ffe..6bde96b 100644 --- a/dataverse_api/utils/batching.py +++ b/dataverse_api/utils/batching.py @@ -4,7 +4,7 @@ from dataclasses import dataclass, field from enum import StrEnum from textwrap import dedent -from typing import Any, Collection, Generator, Literal, Mapping, MutableMapping, TypeVar +from typing import Any, Collection, Generator, Mapping, MutableMapping, TypeVar from urllib.parse import urljoin from dataverse_api.errors import DataverseError @@ -209,7 +209,6 @@ def transform_to_batch_for_upsert( data: Collection[MutableMapping[str, Any]], keys: Iterable[str], is_primary_id: bool = False, - match: Literal["prevent_create", "prevent_update"] | None = None, ) -> list[BatchCommand]: """ Transform data payload to upsert batch data. @@ -224,19 +223,8 @@ def transform_to_batch_for_upsert( The keys used to identify unique rows in the dataset. is_id : bool Whether the supplied singular key is the Entity primary ID attribute. - match : Literal["prevent_create", "prevent_update"] | None - Controls upsert behavior using If-Match headers: - - None (default): Standard upsert behavior (create or update) - - "prevent_create": Only update existing records (If-Match: *) - - "prevent_update": Only create new records (If-None-Match: *) """ check_altkey_support(keys=keys, data=data) - headers: dict[str, str] | None = None - if match == "prevent_create": - headers = {"If-Match": "*"} - elif match == "prevent_update": - headers = {"If-None-Match": "*"} - commands = [] for keys, payload in transform_upsert_data(data, keys, is_primary_id): commands.append( @@ -244,7 +232,6 @@ def transform_to_batch_for_upsert( url=f"{url}({keys})", method=RequestMethod.PATCH, data=payload, - headers=headers, ) ) diff --git a/tests/test_entity.py b/tests/test_entity.py index e052eaa..4a5c4df 100644 --- a/tests/test_entity.py +++ b/tests/test_entity.py @@ -766,48 +766,15 @@ def test_entity_upsert_individual_prevent_update( assert row.status_code == 204 -def test_entity_upsert_batch_prevent_create( - entity: DataverseEntity, - primary_id: str, - mocked_responses: responses.RequestsMock, -): - """Test batch upsert with prevent_create (If-Match: *) - only update existing records.""" - # Setup - data = [{primary_id: str(uuid4()), "test_val": random.randint(1, 10)} for _ in range(4)] - - mocked_responses.post(url=f"{entity._endpoint}$batch") - - resp = entity.upsert(data=data, mode="batch", match="prevent_create") - - assert isinstance(resp[0].request.body, str) # type checking - elements = resp[0].request.body.split("--batch")[1:-1] - - for out, expected in zip(elements, data): - assert f"{entity.entity_set_name}({expected.pop(primary_id)})" in out - assert "If-Match: *" in out - assert serialize_json(expected) in out - - -def test_entity_upsert_batch_prevent_update( - entity: DataverseEntity, - primary_id: str, - mocked_responses: responses.RequestsMock, -): - """Test batch upsert with prevent_update (If-None-Match: *) - only create new records.""" - # Setup - data = [{primary_id: str(uuid4()), "test_val": random.randint(1, 10)} for _ in range(4)] - - mocked_responses.post(url=f"{entity._endpoint}$batch") - - resp = entity.upsert(data=data, mode="batch", match="prevent_update") - - assert isinstance(resp[0].request.body, str) # type checking - elements = resp[0].request.body.split("--batch")[1:-1] - - for out, expected in zip(elements, data): - assert f"{entity.entity_set_name}({expected.pop(primary_id)})" in out - assert "If-None-Match: *" in out - assert serialize_json(expected) in out +def test_entity_upsert_batch_match_not_supported(entity: DataverseEntity, primary_id: str): + """Test that using match parameter with batch mode raises an error.""" + data = [{primary_id: str(uuid4()), "test_val": 1}] + + with pytest.raises(DataverseError, match=r".*match.*only supported for individual mode.*"): + entity.upsert(data=data, mode="batch", match="prevent_create") # type: ignore + + with pytest.raises(DataverseError, match=r".*match.*only supported for individual mode.*"): + entity.upsert(data=data, mode="batch", match="prevent_update") # type: ignore def test_entity_upsert_pandas_dataframe( From 6a040af01fb933adc5a25723afbf20df3da770ce Mon Sep 17 00:00:00 2001 From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com> Date: Tue, 3 Feb 2026 13:06:34 +0000 Subject: [PATCH 5/7] Fix trailing whitespace in test --- tests/test_entity.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tests/test_entity.py b/tests/test_entity.py index 4a5c4df..bbbff4b 100644 --- a/tests/test_entity.py +++ b/tests/test_entity.py @@ -769,10 +769,10 @@ def test_entity_upsert_individual_prevent_update( def test_entity_upsert_batch_match_not_supported(entity: DataverseEntity, primary_id: str): """Test that using match parameter with batch mode raises an error.""" data = [{primary_id: str(uuid4()), "test_val": 1}] - + with pytest.raises(DataverseError, match=r".*match.*only supported for individual mode.*"): entity.upsert(data=data, mode="batch", match="prevent_create") # type: ignore - + with pytest.raises(DataverseError, match=r".*match.*only supported for individual mode.*"): entity.upsert(data=data, mode="batch", match="prevent_update") # type: ignore From 22f31b5bc8562c31103f625db64ad961a73ecb16 Mon Sep 17 00:00:00 2001 From: Marcus Risanger <69350948+MarcusRisanger@users.noreply.github.com> Date: Tue, 3 Feb 2026 14:33:58 +0100 Subject: [PATCH 6/7] Better error handling --- dataverse_api/_api.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/dataverse_api/_api.py b/dataverse_api/_api.py index 17f32c1..8ec7687 100644 --- a/dataverse_api/_api.py +++ b/dataverse_api/_api.py @@ -98,7 +98,10 @@ def _api_call( try: resp.raise_for_status() except requests.HTTPError: - error_msg = resp.json()["error"]["message"].splitlines()[0] + try: + error_msg = resp.json()["error"]["message"].splitlines()[0] + except (ValueError, KeyError, IndexError): + error_msg = resp.text raise DataverseAPIError(message=f"{method} request failed: {error_msg}", response=resp) from None return resp From 4d4d3ac36c59856490c2b5439fc81db50070680b Mon Sep 17 00:00:00 2001 From: Marcus Risanger <69350948+MarcusRisanger@users.noreply.github.com> Date: Tue, 3 Feb 2026 14:34:12 +0100 Subject: [PATCH 7/7] Remove old print statement --- dataverse_api/utils/batching.py | 1 - 1 file changed, 1 deletion(-) diff --git a/dataverse_api/utils/batching.py b/dataverse_api/utils/batching.py index 6bde96b..57c3eef 100644 --- a/dataverse_api/utils/batching.py +++ b/dataverse_api/utils/batching.py @@ -79,7 +79,6 @@ def __post_init__(self) -> None: self.content_type += "; type=entry" if self.headers: - print("Extra!") self.extra_header = "\n".join([f"{k}: {v}" for k, v in self.headers.items()]) self.url = encode_altkeys(self.url)