diff --git a/discogs_client/client.py b/discogs_client/client.py index ff10332..8657395 100644 --- a/discogs_client/client.py +++ b/discogs_client/client.py @@ -19,6 +19,7 @@ def __init__(self, user_agent, consumer_key=None, consumer_secret=None, token=No self.user_agent = user_agent self.verbose = False self._fetcher = RequestsFetcher() + self._trust_per_page = True # Default: True if consumer_key and consumer_secret: self.set_consumer_key(consumer_key, consumer_secret) @@ -219,3 +220,13 @@ def set_timeout(self, """ self._fetcher.connect_timeout = connect self._fetcher.read_timeout = read + + @property + def trust_per_page(self) -> bool: + return self._trust_per_page + + @trust_per_page.setter + def trust_per_page(self, value: bool) -> None: + if not isinstance(value, bool): + raise ValueError("trust_per_page must be a bool") + self._trust_per_page = value \ No newline at end of file diff --git a/discogs_client/models.py b/discogs_client/models.py index 437ce31..efc1284 100644 --- a/discogs_client/models.py +++ b/discogs_client/models.py @@ -354,18 +354,42 @@ def _transform(self, item): return item def __getitem__(self, index): - page_index = index // self.per_page + 1 - offset = index % self.per_page + """Retrieve an item by its index. - try: - page = self.page(page_index) - except HTTPError as e: - if e.status_code == 404: - raise IndexError(e.msg) - else: + By default, uses the API's ``per_page`` value to calculate the page + containing the item directly. If the API returns fewer items per page + than reported, this may yield incorrect results — set + ``client.trust_per_page = False`` to fall back to a sequential page + walk at the cost of performance. + """ + if self.client._trust_per_page: + page_index = index // self.per_page + 1 + offset = index % self.per_page + + try: + page = self.page(page_index) + except HTTPError as e: + if e.status_code == 404: + raise IndexError(e.msg) from e + raise + + return page[offset] + + # Fallback to sequential page loading if we're not trusting the per_page parameter + current = 0 + page_index = 1 + while True: + try: + page = self.page(page_index) + except HTTPError as e: + if e.status_code == 404: + raise IndexError(e.msg) from e raise + if current + len(page) > index: + return page[index - current] + current += len(page) + page_index += 1 - return page[offset] def __len__(self): return self.count diff --git a/discogs_client/tests/test_core.py b/discogs_client/tests/test_core.py index fc1834b..882882e 100644 --- a/discogs_client/tests/test_core.py +++ b/discogs_client/tests/test_core.py @@ -111,6 +111,46 @@ def test_pagination(self): results.per_page = 10 self.assertTrue(results._num_pages is None) + def test_pagination_index_without_trust_per_page_and_short_pages(self): + """Without trust_per_page, sequential page walking handles under-filled pages correctly.""" + client = Client('ua') + client.trust_per_page = False # opt into sequential walk + client._base_url = '' + client._trust_per_page = False + client._fetcher = MemoryFetcher({ + '/artists/1': ( + b'{"id": 1, "name": "Badger", "releases_url": "/artists/1/releases"}', + 200, + ), + '/artists/1/releases?page=1&per_page=50': ( + b'{"pagination": {"per_page": 50, "pages": 3, "items": 5}, ' + b'"releases": [' + b'{"id": 101, "type": "release", "title": "A"},' + b'{"id": 102, "type": "release", "title": "B"}' + b']}', + 200, + ), + '/artists/1/releases?page=2&per_page=50': ( + b'{"pagination": {"per_page": 50, "pages": 3, "items": 5}, ' + b'"releases": [{"id": 103, "type": "release", "title": "C"}]}', + 200, + ), + '/artists/1/releases?page=3&per_page=50': ( + b'{"pagination": {"per_page": 50, "pages": 3, "items": 5}, ' + b'"releases": [' + b'{"id": 104, "type": "release", "title": "D"},' + b'{"id": 105, "type": "release", "title": "E"}' + b']}', + 200, + ), + }) + + results = client.artist(1).releases + self.assertEqual(results[0].id, 101) # page 1 + self.assertEqual(results[2].id, 103) # page 2, cross-page + self.assertEqual(results[4].id, 105) # page 3, last item + self.assertRaises(IndexError, lambda: results[5]) + def test_timeout_defaults_to_none(self): # Need to create client without LoggingDelegator here # self.d would throw AttributeError trying to access timeout properties on LoggingDelegator diff --git a/docs/source/optional_configuration.md b/docs/source/optional_configuration.md index c006e47..89f8604 100644 --- a/docs/source/optional_configuration.md +++ b/docs/source/optional_configuration.md @@ -30,3 +30,25 @@ client.set_timeout( ``` _Timeouts support integer and float values, you can also set either value to `None` to disable timeout for connect or read separately_ + +## Trust Discogs per page value + +When accessing paginated results by index (e.g. `results[42]`), +python3-discogs-client uses the `per_page` value from the API response to +calculate which page contains the item directly. This is fast, but assumes the +API always returns exactly `per_page` items per page. + +If the API returns fewer items per page than the reported `per_page` value, +this calculation can be off. In such cases, disable this behaviour to fall +back to a sequential page walk: + +```python +>>> import discogs_client +>>> d = discogs_client.Client('ExampleApplication/0.1') +>>> d.trust_per_page = False +``` + +:::{attention} +The sequential fallback is slower for large result sets, as it must fetch pages +one by one until it reaches the requested index. +:::