feat(uptime): Add ability to use queues to manage parallelism#8
feat(uptime): Add ability to use queues to manage parallelism#8akshayutture-augment wants to merge 1 commit into
Conversation
One potential problem we have with batch processing is that any one slow item will clog up the whole batch. This pr implements a queueing method instead, where we keep N queues that each have their own workers. There's still a chance of individual items backlogging a queue, but we can try increased concurrency here to reduce the chances of that happening <!-- Describe your PR here. -->
There was a problem hiding this comment.
Pull Request Overview
Copilot reviewed 5 out of 7 changed files in this pull request and generated 3 comments.
💡 Add Copilot custom instructions for smarter, more guided reviews. Learn how to get started.
| """Process items from the queue in order.""" | ||
| while not self.shutdown: | ||
| try: | ||
| work_item = self.work_queue.get() |
There was a problem hiding this comment.
The get() call will block indefinitely when the queue is empty. This prevents the worker thread from shutting down gracefully when self.shutdown is set to True. Use get(timeout=1) with appropriate exception handling to allow periodic shutdown checks.
| work_item = self.work_queue.get() | |
| work_item = self.work_queue.get(timeout=1) | |
| except queue.Empty: | |
| continue |
| ) -> None: | ||
| self.mode = mode | ||
| metric_tags = {"identifier": self.identifier, "mode": self.mode} | ||
| self.result_processor = self.result_processor_cls() |
There was a problem hiding this comment.
The result_processor is now instantiated before checking the mode, but previously it was created after all mode-specific setup. This changes the initialization order which could affect mode-specific configurations that might depend on the processor's state. Consider whether this reordering is intentional and document why it's necessary.
| factory.create_with_partitions = create_with_partitions_tracking # type: ignore[method-assign] | ||
| consumer_config = build_kafka_consumer_configuration( | ||
| settings.KAFKA_CLUSTERS["default"]["common"], | ||
| group_id=consumer_group, | ||
| auto_offset_reset="earliest", | ||
| ) | ||
|
|
||
| consumer = KafkaConsumer(consumer_config) | ||
| processor = StreamProcessor( | ||
| consumer=consumer, | ||
| topic=Topic(test_topic), | ||
| processor_factory=factory, | ||
| commit_policy=ONCE_PER_SECOND, | ||
| ) | ||
|
|
||
| with mock.patch.object( | ||
| type(factory.result_processor), "__call__" | ||
| ) as mock_processor: | ||
| mock_processor.return_value = None | ||
|
|
||
| start_time = time.time() | ||
| while time.time() - start_time < 5: | ||
| processor._run_once() | ||
| time.sleep(0.1) | ||
|
|
||
| processor._shutdown() |
There was a problem hiding this comment.
Directly assigning to a method bypasses Python's descriptor protocol and can lead to unexpected behavior. Consider using unittest.mock.patch.object() or wrapping the factory in a test subclass that overrides the method properly.
| factory.create_with_partitions = create_with_partitions_tracking # type: ignore[method-assign] | |
| consumer_config = build_kafka_consumer_configuration( | |
| settings.KAFKA_CLUSTERS["default"]["common"], | |
| group_id=consumer_group, | |
| auto_offset_reset="earliest", | |
| ) | |
| consumer = KafkaConsumer(consumer_config) | |
| processor = StreamProcessor( | |
| consumer=consumer, | |
| topic=Topic(test_topic), | |
| processor_factory=factory, | |
| commit_policy=ONCE_PER_SECOND, | |
| ) | |
| with mock.patch.object( | |
| type(factory.result_processor), "__call__" | |
| ) as mock_processor: | |
| mock_processor.return_value = None | |
| start_time = time.time() | |
| while time.time() - start_time < 5: | |
| processor._run_once() | |
| time.sleep(0.1) | |
| processor._shutdown() | |
| with mock.patch.object(factory, "create_with_partitions", create_with_partitions_tracking): | |
| consumer_config = build_kafka_consumer_configuration( | |
| settings.KAFKA_CLUSTERS["default"]["common"], | |
| group_id=consumer_group, | |
| auto_offset_reset="earliest", | |
| ) | |
| consumer = KafkaConsumer(consumer_config) | |
| processor = StreamProcessor( | |
| consumer=consumer, | |
| topic=Topic(test_topic), | |
| processor_factory=factory, | |
| commit_policy=ONCE_PER_SECOND, | |
| ) | |
| with mock.patch.object( | |
| type(factory.result_processor), "__call__" | |
| ) as mock_processor: | |
| mock_processor.return_value = None | |
| start_time = time.time() | |
| while time.time() - start_time < 5: | |
| processor._run_once() | |
| time.sleep(0.1) | |
| processor._shutdown() |
Test 9
Replicated from ai-code-review-evaluation/sentry-copilot#8