diff --git a/src/crawlee/storage_clients/_file_system/_request_queue_client.py b/src/crawlee/storage_clients/_file_system/_request_queue_client.py index 6808c4b88e..6d5421ea98 100644 --- a/src/crawlee/storage_clients/_file_system/_request_queue_client.py +++ b/src/crawlee/storage_clients/_file_system/_request_queue_client.py @@ -452,8 +452,6 @@ async def get_request(self, unique_key: str) -> Request | None: logger.warning(f'Request with unique key "{unique_key}" not found in the queue.') return None - state = self._state.current_value - state.in_progress_requests.add(request.unique_key) await self._update_metadata(update_accessed_at=True) return request diff --git a/tests/unit/storage_clients/_file_system/test_fs_rq_client.py b/tests/unit/storage_clients/_file_system/test_fs_rq_client.py index ddb5e22331..a18d4813a4 100644 --- a/tests/unit/storage_clients/_file_system/test_fs_rq_client.py +++ b/tests/unit/storage_clients/_file_system/test_fs_rq_client.py @@ -173,3 +173,17 @@ async def test_data_persistence_across_reopens() -> None: assert {request1.url, request2.url} == {'https://example.com/1', 'https://example.com/2'} await reopened_client.drop() + + +async def test_get_request_does_not_mark_in_progress(rq_client: FileSystemRequestQueueClient) -> None: + """Test that get_request does not block a request from being fetched.""" + request = Request.from_url('https://example.com/blocked') + await rq_client.add_batch_of_requests([request]) + + fetched = await rq_client.get_request(request.unique_key) + assert fetched is not None + assert fetched.unique_key == request.unique_key + + next_request = await rq_client.fetch_next_request() + assert next_request is not None + assert next_request.unique_key == request.unique_key