From 7c592fe2d4862c4fe9bdd5f94c32f6e669e24d43 Mon Sep 17 00:00:00 2001 From: J0J0 Todos Date: Tue, 7 Apr 2026 17:47:30 +0200 Subject: [PATCH 1/6] Draft tests for fewer pages than per_page fix --- discogs_client/tests/test_core.py | 151 ++++++++++++++++++++++++++++++ 1 file changed, 151 insertions(+) diff --git a/discogs_client/tests/test_core.py b/discogs_client/tests/test_core.py index fc1834b..d4fa66d 100644 --- a/discogs_client/tests/test_core.py +++ b/discogs_client/tests/test_core.py @@ -111,6 +111,157 @@ def test_pagination(self): results.per_page = 10 self.assertTrue(results._num_pages is None) + def test_pagination_with_short_page(self): + """Indexing walks actual page sizes when the API under-fills a page.""" + client = Client('ua') + client._base_url = '' + client._fetcher = MemoryFetcher({ + '/artists/1': ( + b'{"id": 1, "name": "Badger", "releases_url": "/artists/1/releases"}', + 200, + ), + '/artists/1/releases?page=1&per_page=50': ( + b'{"pagination": {"per_page": 50, "pages": 2, "items": 2}, ' + b'"releases": [{"id": 101, "type": "release", "title": "First"}]}', + 200, + ), + '/artists/1/releases?page=2&per_page=50': ( + b'{"pagination": {"per_page": 50, "pages": 2, "items": 2}, ' + b'"releases": [{"id": 102, "type": "release", "title": "Second"}]}', + 200, + ), + }) + + results = client.artist(1).releases + + self.assertEqual(results.pages, 2) + self.assertEqual(len(results.page(1)), 1) + self.assertEqual(results[0].id, 101) + self.assertEqual(results[1].id, 102) + self.assertRaises(IndexError, lambda: results[2]) + + def test_pagination_with_varying_page_sizes(self): + """Indexing works correctly with pages of different actual sizes.""" + client = Client('ua') + client._base_url = '' + client._fetcher = MemoryFetcher({ + '/artists/1': ( + b'{"id": 1, "name": "Badger", "releases_url": "/artists/1/releases"}', + 200, + ), + '/artists/1/releases?page=1&per_page=50': ( + b'{"pagination": {"per_page": 50, "pages": 3, "items": 8}, ' + b'"releases": [' + b'{"id": 101, "type": "release", "title": "First"},' + b'{"id": 102, "type": "release", "title": "Second"},' + b'{"id": 103, "type": "release", "title": "Third"}' + b']}', + 200, + ), + '/artists/1/releases?page=2&per_page=50': ( + b'{"pagination": {"per_page": 50, "pages": 3, "items": 8}, ' + b'"releases": [' + b'{"id": 104, "type": "release", "title": "Fourth"},' + b'{"id": 105, "type": "release", "title": "Fifth"}' + b']}', + 200, + ), + '/artists/1/releases?page=3&per_page=50': ( + b'{"pagination": {"per_page": 50, "pages": 3, "items": 8}, ' + b'"releases": [' + b'{"id": 106, "type": "release", "title": "Sixth"},' + b'{"id": 107, "type": "release", "title": "Seventh"},' + b'{"id": 108, "type": "release", "title": "Eighth"}' + b']}', + 200, + ), + }) + + results = client.artist(1).releases + + # Verify we can access items across all pages + self.assertEqual(results[0].id, 101) + self.assertEqual(results[1].id, 102) + self.assertEqual(results[2].id, 103) + self.assertEqual(results[3].id, 104) # First item on page 2 + self.assertEqual(results[4].id, 105) # Last item on page 2 + self.assertEqual(results[5].id, 106) # First item on page 3 + self.assertEqual(results[7].id, 108) # Last item + + # Verify out-of-bounds access raises IndexError + self.assertRaises(IndexError, lambda: results[8]) + + def test_pagination_with_short_page_sequential_traversal(self): + """Verify sequential page walking works when all pages are short.""" + client = Client('ua') + client._base_url = '' + client._fetcher = MemoryFetcher({ + '/artists/1': ( + b'{"id": 1, "name": "Badger", "releases_url": "/artists/1/releases"}', + 200, + ), + '/artists/1/releases?page=1&per_page=50': ( + b'{"pagination": {"per_page": 50, "pages": 4, "items": 4}, ' + b'"releases": [{"id": 201, "type": "release", "title": "A"}]}', + 200, + ), + '/artists/1/releases?page=2&per_page=50': ( + b'{"pagination": {"per_page": 50, "pages": 4, "items": 4}, ' + b'"releases": [{"id": 202, "type": "release", "title": "B"}]}', + 200, + ), + '/artists/1/releases?page=3&per_page=50': ( + b'{"pagination": {"per_page": 50, "pages": 4, "items": 4}, ' + b'"releases": [{"id": 203, "type": "release", "title": "C"}]}', + 200, + ), + '/artists/1/releases?page=4&per_page=50': ( + b'{"pagination": {"per_page": 50, "pages": 4, "items": 4}, ' + b'"releases": [{"id": 204, "type": "release", "title": "D"}]}', + 200, + ), + }) + + results = client.artist(1).releases + + # With per_page=50 but only 1 item per actual page, + # math-based indexing would fail. Sequential walking should work. + self.assertEqual(results[0].id, 201) + self.assertEqual(results[1].id, 202) + self.assertEqual(results[2].id, 203) + self.assertEqual(results[3].id, 204) + self.assertRaises(IndexError, lambda: results[4]) + + def test_pagination_404_exception_chaining(self): + """Verify that HTTPError 404 is properly chained when index is out of bounds.""" + client = Client('ua') + client._base_url = '' + client._fetcher = MemoryFetcher({ + '/artists/1': ( + b'{"id": 1, "name": "Badger", "releases_url": "/artists/1/releases"}', + 200, + ), + '/artists/1/releases?page=1&per_page=50': ( + b'{"pagination": {"per_page": 50, "pages": 1, "items": 2}, ' + b'"releases": [' + b'{"id": 301, "type": "release", "title": "Only"},' + b'{"id": 302, "type": "release", "title": "Two"}' + b']}', + 200, + ), + }) + + results = client.artist(1).releases + + # Access valid items + self.assertEqual(results[0].id, 301) + self.assertEqual(results[1].id, 302) + + # Accessing beyond bounds should raise IndexError + # (which is chained from HTTPError 404) + with self.assertRaises(IndexError): + results[100] + def test_timeout_defaults_to_none(self): # Need to create client without LoggingDelegator here # self.d would throw AttributeError trying to access timeout properties on LoggingDelegator From 55f17727ec3fabf928c11d3dfd868c023aa49e20 Mon Sep 17 00:00:00 2001 From: J0J0 Todos Date: Thu, 30 Apr 2026 06:50:14 +0200 Subject: [PATCH 2/6] Reduce tests to a minimum --- discogs_client/tests/test_core.py | 142 ++++-------------------------- 1 file changed, 15 insertions(+), 127 deletions(-) diff --git a/discogs_client/tests/test_core.py b/discogs_client/tests/test_core.py index d4fa66d..0030d00 100644 --- a/discogs_client/tests/test_core.py +++ b/discogs_client/tests/test_core.py @@ -111,156 +111,44 @@ def test_pagination(self): results.per_page = 10 self.assertTrue(results._num_pages is None) - def test_pagination_with_short_page(self): - """Indexing walks actual page sizes when the API under-fills a page.""" + def test_pagination_index_without_trust_per_page_and_short_pages(self): + """Without trust_per_page, sequential page walking handles under-filled pages correctly.""" client = Client('ua') client._base_url = '' + client._trust_per_page = False client._fetcher = MemoryFetcher({ '/artists/1': ( b'{"id": 1, "name": "Badger", "releases_url": "/artists/1/releases"}', 200, ), '/artists/1/releases?page=1&per_page=50': ( - b'{"pagination": {"per_page": 50, "pages": 2, "items": 2}, ' - b'"releases": [{"id": 101, "type": "release", "title": "First"}]}', - 200, - ), - '/artists/1/releases?page=2&per_page=50': ( - b'{"pagination": {"per_page": 50, "pages": 2, "items": 2}, ' - b'"releases": [{"id": 102, "type": "release", "title": "Second"}]}', - 200, - ), - }) - - results = client.artist(1).releases - - self.assertEqual(results.pages, 2) - self.assertEqual(len(results.page(1)), 1) - self.assertEqual(results[0].id, 101) - self.assertEqual(results[1].id, 102) - self.assertRaises(IndexError, lambda: results[2]) - - def test_pagination_with_varying_page_sizes(self): - """Indexing works correctly with pages of different actual sizes.""" - client = Client('ua') - client._base_url = '' - client._fetcher = MemoryFetcher({ - '/artists/1': ( - b'{"id": 1, "name": "Badger", "releases_url": "/artists/1/releases"}', - 200, - ), - '/artists/1/releases?page=1&per_page=50': ( - b'{"pagination": {"per_page": 50, "pages": 3, "items": 8}, ' - b'"releases": [' - b'{"id": 101, "type": "release", "title": "First"},' - b'{"id": 102, "type": "release", "title": "Second"},' - b'{"id": 103, "type": "release", "title": "Third"}' - b']}', - 200, - ), - '/artists/1/releases?page=2&per_page=50': ( - b'{"pagination": {"per_page": 50, "pages": 3, "items": 8}, ' + b'{"pagination": {"per_page": 50, "pages": 3, "items": 5}, ' b'"releases": [' - b'{"id": 104, "type": "release", "title": "Fourth"},' - b'{"id": 105, "type": "release", "title": "Fifth"}' + b'{"id": 101, "type": "release", "title": "A"},' + b'{"id": 102, "type": "release", "title": "B"}' b']}', 200, ), - '/artists/1/releases?page=3&per_page=50': ( - b'{"pagination": {"per_page": 50, "pages": 3, "items": 8}, ' - b'"releases": [' - b'{"id": 106, "type": "release", "title": "Sixth"},' - b'{"id": 107, "type": "release", "title": "Seventh"},' - b'{"id": 108, "type": "release", "title": "Eighth"}' - b']}', - 200, - ), - }) - - results = client.artist(1).releases - - # Verify we can access items across all pages - self.assertEqual(results[0].id, 101) - self.assertEqual(results[1].id, 102) - self.assertEqual(results[2].id, 103) - self.assertEqual(results[3].id, 104) # First item on page 2 - self.assertEqual(results[4].id, 105) # Last item on page 2 - self.assertEqual(results[5].id, 106) # First item on page 3 - self.assertEqual(results[7].id, 108) # Last item - - # Verify out-of-bounds access raises IndexError - self.assertRaises(IndexError, lambda: results[8]) - - def test_pagination_with_short_page_sequential_traversal(self): - """Verify sequential page walking works when all pages are short.""" - client = Client('ua') - client._base_url = '' - client._fetcher = MemoryFetcher({ - '/artists/1': ( - b'{"id": 1, "name": "Badger", "releases_url": "/artists/1/releases"}', - 200, - ), - '/artists/1/releases?page=1&per_page=50': ( - b'{"pagination": {"per_page": 50, "pages": 4, "items": 4}, ' - b'"releases": [{"id": 201, "type": "release", "title": "A"}]}', - 200, - ), '/artists/1/releases?page=2&per_page=50': ( - b'{"pagination": {"per_page": 50, "pages": 4, "items": 4}, ' - b'"releases": [{"id": 202, "type": "release", "title": "B"}]}', + b'{"pagination": {"per_page": 50, "pages": 3, "items": 5}, ' + b'"releases": [{"id": 103, "type": "release", "title": "C"}]}', 200, ), '/artists/1/releases?page=3&per_page=50': ( - b'{"pagination": {"per_page": 50, "pages": 4, "items": 4}, ' - b'"releases": [{"id": 203, "type": "release", "title": "C"}]}', - 200, - ), - '/artists/1/releases?page=4&per_page=50': ( - b'{"pagination": {"per_page": 50, "pages": 4, "items": 4}, ' - b'"releases": [{"id": 204, "type": "release", "title": "D"}]}', - 200, - ), - }) - - results = client.artist(1).releases - - # With per_page=50 but only 1 item per actual page, - # math-based indexing would fail. Sequential walking should work. - self.assertEqual(results[0].id, 201) - self.assertEqual(results[1].id, 202) - self.assertEqual(results[2].id, 203) - self.assertEqual(results[3].id, 204) - self.assertRaises(IndexError, lambda: results[4]) - - def test_pagination_404_exception_chaining(self): - """Verify that HTTPError 404 is properly chained when index is out of bounds.""" - client = Client('ua') - client._base_url = '' - client._fetcher = MemoryFetcher({ - '/artists/1': ( - b'{"id": 1, "name": "Badger", "releases_url": "/artists/1/releases"}', - 200, - ), - '/artists/1/releases?page=1&per_page=50': ( - b'{"pagination": {"per_page": 50, "pages": 1, "items": 2}, ' + b'{"pagination": {"per_page": 50, "pages": 3, "items": 5}, ' b'"releases": [' - b'{"id": 301, "type": "release", "title": "Only"},' - b'{"id": 302, "type": "release", "title": "Two"}' + b'{"id": 104, "type": "release", "title": "D"},' + b'{"id": 105, "type": "release", "title": "E"}' b']}', 200, ), }) results = client.artist(1).releases - - # Access valid items - self.assertEqual(results[0].id, 301) - self.assertEqual(results[1].id, 302) - - # Accessing beyond bounds should raise IndexError - # (which is chained from HTTPError 404) - with self.assertRaises(IndexError): - results[100] + self.assertEqual(results[0].id, 101) # page 1 + self.assertEqual(results[2].id, 103) # page 2, cross-page + self.assertEqual(results[4].id, 105) # page 3, last item + self.assertRaises(IndexError, lambda: results[5]) def test_timeout_defaults_to_none(self): # Need to create client without LoggingDelegator here From 2ff7ef57f5e2707dd6028abcc4feaa72e45fe8de Mon Sep 17 00:00:00 2001 From: George Flores Date: Sun, 22 Mar 2026 16:18:07 -0700 Subject: [PATCH 3/6] fix: walk actual page sizes in __getitem__ to handle inconsistent Discogs pagination metadata --- discogs_client/models.py | 32 ++++++++++++++++++++------------ 1 file changed, 20 insertions(+), 12 deletions(-) diff --git a/discogs_client/models.py b/discogs_client/models.py index 437ce31..a19a06c 100644 --- a/discogs_client/models.py +++ b/discogs_client/models.py @@ -354,18 +354,26 @@ def _transform(self, item): return item def __getitem__(self, index): - page_index = index // self.per_page + 1 - offset = index % self.per_page - - try: - page = self.page(page_index) - except HTTPError as e: - if e.status_code == 404: - raise IndexError(e.msg) - else: - raise - - return page[offset] + # Rather than trusting per_page metadata for offset math, we walk + # through actual page sizes sequentially. This handles cases where + # Discogs returns fewer items than per_page claims, which causes + # index-out-of-range errors when using the standard calculation: + # page_index = index // self.per_page + 1 + # offset = index % self.per_page + current = 0 + page_index = 1 + while True: + try: + page = self.page(page_index) + except HTTPError as e: + if e.status_code == 404: + raise IndexError(e.msg) + else: + raise + if current + len(page) > index: + return page[index - current] + current += len(page) + page_index += 1 def __len__(self): return self.count From d3cbaeb07bfe80c00a624755a6410f95fa2f3b0a Mon Sep 17 00:00:00 2001 From: George Flores Date: Tue, 7 Apr 2026 16:17:06 -0700 Subject: [PATCH 4/6] fix: add trust_per_page config option to toggle sequential page walk fix: add trust_per_page config, update __getitem__ and tests accordingly fix: add trust_per_page config, update __getitem__ and tests accordingly# --- discogs_client/client.py | 11 +++++++++ discogs_client/models.py | 40 +++++++++++++++++++++---------- discogs_client/tests/test_core.py | 1 + 3 files changed, 39 insertions(+), 13 deletions(-) diff --git a/discogs_client/client.py b/discogs_client/client.py index ff10332..8657395 100644 --- a/discogs_client/client.py +++ b/discogs_client/client.py @@ -19,6 +19,7 @@ def __init__(self, user_agent, consumer_key=None, consumer_secret=None, token=No self.user_agent = user_agent self.verbose = False self._fetcher = RequestsFetcher() + self._trust_per_page = True # Default: True if consumer_key and consumer_secret: self.set_consumer_key(consumer_key, consumer_secret) @@ -219,3 +220,13 @@ def set_timeout(self, """ self._fetcher.connect_timeout = connect self._fetcher.read_timeout = read + + @property + def trust_per_page(self) -> bool: + return self._trust_per_page + + @trust_per_page.setter + def trust_per_page(self, value: bool) -> None: + if not isinstance(value, bool): + raise ValueError("trust_per_page must be a bool") + self._trust_per_page = value \ No newline at end of file diff --git a/discogs_client/models.py b/discogs_client/models.py index a19a06c..04fcc40 100644 --- a/discogs_client/models.py +++ b/discogs_client/models.py @@ -354,15 +354,12 @@ def _transform(self, item): return item def __getitem__(self, index): - # Rather than trusting per_page metadata for offset math, we walk - # through actual page sizes sequentially. This handles cases where - # Discogs returns fewer items than per_page claims, which causes - # index-out-of-range errors when using the standard calculation: - # page_index = index // self.per_page + 1 - # offset = index % self.per_page - current = 0 - page_index = 1 - while True: + if self.client._trust_per_page: + # Directly jump to index using offset math. + # Default + page_index = index // self.per_page + 1 + offset = index % self.per_page + try: page = self.page(page_index) except HTTPError as e: @@ -370,10 +367,27 @@ def __getitem__(self, index): raise IndexError(e.msg) else: raise - if current + len(page) > index: - return page[index - current] - current += len(page) - page_index += 1 + + return page[offset] + + else: + # Sequentially walks through each page until index or end of array reached. + # Opt-in + current = 0 + page_index = 1 + while True: + try: + page = self.page(page_index) + except HTTPError as e: + if e.status_code == 404: + raise IndexError(e.msg) + else: + raise + if current + len(page) > index: + return page[index - current] + current += len(page) + page_index += 1 + def __len__(self): return self.count diff --git a/discogs_client/tests/test_core.py b/discogs_client/tests/test_core.py index 0030d00..882882e 100644 --- a/discogs_client/tests/test_core.py +++ b/discogs_client/tests/test_core.py @@ -114,6 +114,7 @@ def test_pagination(self): def test_pagination_index_without_trust_per_page_and_short_pages(self): """Without trust_per_page, sequential page walking handles under-filled pages correctly.""" client = Client('ua') + client.trust_per_page = False # opt into sequential walk client._base_url = '' client._trust_per_page = False client._fetcher = MemoryFetcher({ From d0d4fcc9253ea33649bdb8f105858efc7b81671a Mon Sep 17 00:00:00 2001 From: J0J0 Todos Date: Tue, 28 Apr 2026 07:19:31 +0200 Subject: [PATCH 5/6] Refactor BasePaginatedResponse getitem slightly and add an exhaustive docstring. --- discogs_client/models.py | 46 +++++++++++++++++++++------------------- 1 file changed, 24 insertions(+), 22 deletions(-) diff --git a/discogs_client/models.py b/discogs_client/models.py index 04fcc40..efc1284 100644 --- a/discogs_client/models.py +++ b/discogs_client/models.py @@ -354,9 +354,15 @@ def _transform(self, item): return item def __getitem__(self, index): + """Retrieve an item by its index. + + By default, uses the API's ``per_page`` value to calculate the page + containing the item directly. If the API returns fewer items per page + than reported, this may yield incorrect results — set + ``client.trust_per_page = False`` to fall back to a sequential page + walk at the cost of performance. + """ if self.client._trust_per_page: - # Directly jump to index using offset math. - # Default page_index = index // self.per_page + 1 offset = index % self.per_page @@ -364,29 +370,25 @@ def __getitem__(self, index): page = self.page(page_index) except HTTPError as e: if e.status_code == 404: - raise IndexError(e.msg) - else: - raise + raise IndexError(e.msg) from e + raise return page[offset] - else: - # Sequentially walks through each page until index or end of array reached. - # Opt-in - current = 0 - page_index = 1 - while True: - try: - page = self.page(page_index) - except HTTPError as e: - if e.status_code == 404: - raise IndexError(e.msg) - else: - raise - if current + len(page) > index: - return page[index - current] - current += len(page) - page_index += 1 + # Fallback to sequential page loading if we're not trusting the per_page parameter + current = 0 + page_index = 1 + while True: + try: + page = self.page(page_index) + except HTTPError as e: + if e.status_code == 404: + raise IndexError(e.msg) from e + raise + if current + len(page) > index: + return page[index - current] + current += len(page) + page_index += 1 def __len__(self): From 924d127ad6c4b56829e9fb96b0acae0a4af66633 Mon Sep 17 00:00:00 2001 From: J0J0 Todos Date: Tue, 28 Apr 2026 07:12:56 +0200 Subject: [PATCH 6/6] Add trust_per_page config docs chapter --- docs/source/optional_configuration.md | 22 ++++++++++++++++++++++ 1 file changed, 22 insertions(+) diff --git a/docs/source/optional_configuration.md b/docs/source/optional_configuration.md index c006e47..89f8604 100644 --- a/docs/source/optional_configuration.md +++ b/docs/source/optional_configuration.md @@ -30,3 +30,25 @@ client.set_timeout( ``` _Timeouts support integer and float values, you can also set either value to `None` to disable timeout for connect or read separately_ + +## Trust Discogs per page value + +When accessing paginated results by index (e.g. `results[42]`), +python3-discogs-client uses the `per_page` value from the API response to +calculate which page contains the item directly. This is fast, but assumes the +API always returns exactly `per_page` items per page. + +If the API returns fewer items per page than the reported `per_page` value, +this calculation can be off. In such cases, disable this behaviour to fall +back to a sequential page walk: + +```python +>>> import discogs_client +>>> d = discogs_client.Client('ExampleApplication/0.1') +>>> d.trust_per_page = False +``` + +:::{attention} +The sequential fallback is slower for large result sets, as it must fetch pages +one by one until it reaches the requested index. +:::