@@ -56,23 +56,34 @@ def record_warning(type, attributes)
5656 redis . rpush ( key ( 'warnings' ) , Marshal . dump ( [ type , attributes ] ) )
5757 end
5858
59- def record_error ( id , payload )
59+ def record_error ( id , payload , stat_delta : nil )
6060 # Run acknowledge first so we know whether we're the first to ack
6161 acknowledged = @queue . acknowledge ( id , error : payload )
62+ stats_logger &.info ( "[stats] record_error test_id=#{ id . inspect } acknowledged=#{ acknowledged } worker_id=#{ config . worker_id } stat_delta=#{ stat_delta . inspect } " )
6263
6364 if acknowledged
6465 # We were the first to ack; another worker already ack'd would get falsy from SADD
6566 @queue . increment_test_failed
67+ # Only the acknowledging worker's stats include this failure (others skip increment when ack=false).
68+ # Store so we can subtract it if another worker records success later.
69+ store_error_report_delta ( id , stat_delta ) if stat_delta && stat_delta . any?
6670 end
6771 # Return so caller can roll back local counter when not acknowledged
6872 !!acknowledged
6973 end
7074
7175 def record_success ( id , skip_flaky_record : false )
72- acknowledged , error_reports_deleted_count , requeued_count = redis . multi do |transaction |
76+ acknowledged , error_reports_deleted_count , requeued_count , delta_json = redis . multi do |transaction |
7377 @queue . acknowledge ( id , pipeline : transaction )
7478 transaction . hdel ( key ( 'error-reports' ) , id )
7579 transaction . hget ( key ( 'requeues-count' ) , id )
80+ transaction . hget ( key ( 'error-report-deltas' ) , id )
81+ end
82+ stats_logger &.info ( "[stats] record_success test_id=#{ id . inspect } acknowledged=#{ acknowledged } worker_id=#{ config . worker_id } error_reports_deleted=#{ error_reports_deleted_count } has_delta=#{ !!delta_json } " )
83+ # When we're replacing a failure, subtract the (single) acknowledging worker's stat contribution
84+ if error_reports_deleted_count . to_i > 0 && delta_json
85+ apply_error_report_delta_correction ( delta_json )
86+ redis . hdel ( key ( 'error-report-deltas' ) , id )
7687 end
7788 record_flaky ( id ) if !skip_flaky_record && ( error_reports_deleted_count . to_i > 0 || requeued_count . to_i > 0 )
7889 !!acknowledged
@@ -84,6 +95,7 @@ def record_requeue(id)
8495
8596 def record_stats ( stats = nil , pipeline : nil )
8697 return unless stats
98+ stats_logger &.info ( "[stats] record_stats worker_id=#{ config . worker_id } stats=#{ stats . inspect } " )
8799 if pipeline
88100 stats . each do |stat_name , stat_value |
89101 pipeline . hset ( key ( stat_name ) , config . worker_id , stat_value )
@@ -146,6 +158,38 @@ def reset_stats(stat_names)
146158 def key ( *args )
147159 KeyShortener . key ( config . build_id , *args )
148160 end
161+
162+ def store_error_report_delta ( test_id , stat_delta )
163+ # Only the acknowledging worker's stats include this test; store their delta for correction on success
164+ payload = { 'worker_id' => config . worker_id } . merge ( stat_delta )
165+ stats_logger &.info ( "[stats] store_error_report_delta test_id=#{ test_id . inspect } worker_id=#{ config . worker_id } payload=#{ payload . inspect } " )
166+ redis . hset ( key ( 'error-report-deltas' ) , test_id , JSON . generate ( payload ) )
167+ redis . expire ( key ( 'error-report-deltas' ) , config . redis_ttl )
168+ end
169+
170+ def apply_error_report_delta_correction ( delta_json )
171+ delta = JSON . parse ( delta_json )
172+ worker_id = delta . delete ( 'worker_id' )
173+ stats_logger &.info ( "[stats] apply_error_report_delta_correction worker_id=#{ worker_id . inspect } subtracting=#{ delta . inspect } " )
174+ return if worker_id . nil? || delta . empty?
175+
176+ redis . pipelined do |pipeline |
177+ delta . each do |stat_name , value |
178+ next unless value . is_a? ( Numeric ) || value . to_s . match? ( /\A -?\d +\. ?\d *\z / )
179+
180+ pipeline . hincrbyfloat ( key ( stat_name ) , worker_id , -value . to_f )
181+ pipeline . expire ( key ( stat_name ) , config . redis_ttl )
182+ end
183+ end
184+ end
185+
186+ def stats_logger
187+ return @stats_logger if defined? ( @stats_logger )
188+ return @stats_logger = nil unless config . respond_to? ( :debug_log ) && config . debug_log
189+
190+ require 'logger'
191+ @stats_logger = Logger . new ( config . debug_log ) . tap { |l | l . level = Logger ::INFO }
192+ end
149193 end
150194 end
151195 end
0 commit comments