diff --git a/CHANGELOG.markdown b/CHANGELOG.markdown index 7bd8c177..7360ef63 100644 --- a/CHANGELOG.markdown +++ b/CHANGELOG.markdown @@ -1,5 +1,7 @@ # Pending +- Set timeouts on the reporting HTTP connection so a slow or unreachable Scout host can no longer block the reporting thread indefinitely (previously fell back to Net::HTTP's 60s default). Configurable via `connect_timeout` / `read_timeout` (both default 5s). + # 6.2.0 - Fix compatibility with `http >= 6.0.0` (#613) diff --git a/lib/scout_apm/config.rb b/lib/scout_apm/config.rb index b9450b5f..9d05c0c2 100644 --- a/lib/scout_apm/config.rb +++ b/lib/scout_apm/config.rb @@ -12,6 +12,7 @@ # application_root - override the detected directory of the application # collect_remote_ip - automatically capture user's IP into a Trace's Context # compress_payload - true/false to enable gzipping of payload +# connect_timeout - seconds to wait when opening a connection to the reporting host before giving up. Default: 5 # data_file - override the default temporary storage location. Must be a location in a writable directory # dev_trace - true or false. Enables always-on tracing in development environmen only # direct_host - override the default "direct" host. The direct_host bypasses the ingestion pipeline and goes directly to the webserver, and is primarily used for features under development. @@ -26,6 +27,7 @@ # name - override the name reported to APM. This is the name that shows in the Web UI # profile - turn on/off scoutprof (only applicable in Gem versions including scoutprof) # proxy - an http proxy +# read_timeout - seconds to wait for a response from the reporting host before giving up. Default: 5 # report_format - 'json' or 'marshal'. Marshal is legacy and will be removed. # scm_subdirectory - if the app root lives in source management in a subdirectory. E.g. #{SCM_ROOT}/src # uri_reporting - 'path' or 'full_path' default is 'full_path', which reports URL params as well as the path. @@ -71,6 +73,7 @@ class Config 'collect_remote_ip', 'compress_payload', 'config_file', + 'connect_timeout', 'data_file', 'database_metric_limit', 'database_metric_report_limit', @@ -97,6 +100,7 @@ class Config 'name', 'profile', 'proxy', + 'read_timeout', 'record_queue_time', 'job_params_capture', 'job_filtered_params', @@ -244,6 +248,8 @@ def coerce(val) 'monitor' => BooleanCoercion.new, 'collect_remote_ip' => BooleanCoercion.new, 'compress_payload' => BooleanCoercion.new, + 'connect_timeout' => IntegerCoercion.new, + 'read_timeout' => IntegerCoercion.new, 'database_metric_limit' => IntegerCoercion.new, 'database_metric_report_limit' => IntegerCoercion.new, 'external_service_metric_limit' => IntegerCoercion.new, @@ -362,6 +368,7 @@ def logger class ConfigDefaults DEFAULTS = { 'compress_payload' => true, + 'connect_timeout' => 5, # seconds; Net::HTTP open_timeout for reporting 'detailed_middleware' => false, 'dev_trace' => false, 'direct_host' => 'https://apm.scoutapp.com', @@ -374,6 +381,7 @@ class ConfigDefaults 'log_level' => 'info', 'max_traces' => 10, 'profile' => true, # for scoutprof + 'read_timeout' => 5, # seconds; Net::HTTP read_timeout for reporting 'report_format' => 'json', 'scm_subdirectory' => '', 'uri_reporting' => 'full_path', diff --git a/lib/scout_apm/reporter.rb b/lib/scout_apm/reporter.rb index 356c91a7..0b2f0593 100644 --- a/lib/scout_apm/reporter.rb +++ b/lib/scout_apm/reporter.rb @@ -124,6 +124,16 @@ def http(url) proxy_uri.port, proxy_uri.user, proxy_uri.password).new(url.host, url.port) + + # Bound how long a single reporting attempt can block. Without these, + # Net::HTTP falls back to a 60s default and a slow or unreachable host + # leaves the reporting thread stuck inside a native call (DNS, TLS, + # socket read). That is especially dangerous in a preloaded/forking app + # server, where a thread blocked there at fork() time can leave the + # forked worker holding a lock that no surviving thread will release. + http.open_timeout = config.value("connect_timeout") + http.read_timeout = config.value("read_timeout") + if url.is_a?(URI::HTTPS) http.use_ssl = true http.ca_file = config.value("ssl_cert_file") diff --git a/test/unit/reporter_test.rb b/test/unit/reporter_test.rb new file mode 100644 index 00000000..d669ac16 --- /dev/null +++ b/test/unit/reporter_test.rb @@ -0,0 +1,58 @@ +require 'test_helper' + +require 'scout_apm/reporter' + +class ReporterTest < Minitest::Test + def setup + @context = ScoutApm::AgentContext.new + end + + # Build a Config with the given overrides taking precedence, but still + # backed by the real defaults & coercions. + def config_with(overrides = {}) + overlays = [ + FakeConfigOverlay.new(overrides), + ScoutApm::Config::ConfigDefaults.new, + ScoutApm::Config::ConfigNull.new, + ] + ScoutApm::Config.new(@context, overlays) + end + + def reporter_for(overrides = {}) + @context.config = config_with(overrides) + ScoutApm::Reporter.new(@context, :checkin) + end + + def https_uri + URI.parse("https://checkin.scoutapp.com/apps/checkin.scout") + end + + def test_sets_default_timeouts_on_http_connection + http = reporter_for.send(:http, https_uri) + + assert_equal 5, http.open_timeout + assert_equal 5, http.read_timeout + end + + def test_honors_configured_timeouts + http = reporter_for('connect_timeout' => 2, 'read_timeout' => 3).send(:http, https_uri) + + assert_equal 2, http.open_timeout + assert_equal 3, http.read_timeout + end + + def test_coerces_string_timeouts_from_env + http = reporter_for('connect_timeout' => "7", 'read_timeout' => "8").send(:http, https_uri) + + assert_equal 7, http.open_timeout + assert_equal 8, http.read_timeout + end + + def test_sets_timeouts_for_plain_http_endpoints_too + http = reporter_for.send(:http, URI.parse("http://checkin.scoutapp.com/apps/checkin.scout")) + + refute http.use_ssl? + assert_equal 5, http.open_timeout + assert_equal 5, http.read_timeout + end +end