diff --git a/README.md b/README.md index b62c6cc..d1f4b3d 100644 --- a/README.md +++ b/README.md @@ -70,13 +70,23 @@ You must have a [`cache_store`](https://guides.rubyonrails.org/caching_with_rail ## Usage -You can configure the HTTP `timeout` and `expires_in` cache parameters inside of your `rails` config: +You can configure the HTTP timeouts and caching parameters inside of your `rails` config: ```ruby -config.cloudflare.expires_in = 12.hours # default value -config.cloudflare.timeout = 5.seconds # default value +config.cloudflare.expires_in = 12.hours # how long a successful ip list is cached +config.cloudflare.timeout = 5.seconds # HTTP read timeout for the fetch +config.cloudflare.open_timeout = 5.seconds # HTTP open (TCP connect) timeout for the fetch +config.cloudflare.error_expires_in = 1.minute # how long the fallback is cached after a failed fetch before retrying +config.cloudflare.race_condition_ttl = 10.seconds # see ActiveSupport::Cache::Store#fetch ``` +When a fetch from Cloudflare fails the gem logs the error and falls back to a +built-in list of Cloudflare ranges. That fallback is cached for +`error_expires_in` (rather than re-requested on every call) so a blocked or slow +upstream can't issue an outbound request — and block a request thread up to the +configured timeouts — on every incoming request. Once `error_expires_in` lapses +the next call retries the network, so a transient outage recovers on its own. + ## Blocking non-Cloudflare traffic You can use the `#cloudflare?` method from this gem to block all non-Cloudflare traffic to your application. Here's an example of doing this with [`Rack::Attack`](https://github.com/rack/rack-attack): diff --git a/lib/cloudflare_rails/importer.rb b/lib/cloudflare_rails/importer.rb index 1e9ef46..6b4a14a 100644 --- a/lib/cloudflare_rails/importer.rb +++ b/lib/cloudflare_rails/importer.rb @@ -35,34 +35,105 @@ def ips_v4 end def fetch(url) - uri = URI("#{BASE_URL}#{url}") - - resp = Net::HTTP.start(uri.host, - uri.port, - use_ssl: true, - read_timeout: Rails.application.config.cloudflare.timeout) do |http| - req = Net::HTTP::Get.new(uri) - - http.request(req) - end - + resp = http_get(URI("#{BASE_URL}#{url}")) raise ResponseError, resp unless resp.is_a?(Net::HTTPSuccess) - resp.body.split("\n").reject(&:blank?).map { |ip| IPAddr.new ip } + parse_ips(resp) end + # Returns a cache entry hash: { ok: Boolean, ips: Array }. + # + # On success the real list is cached for `expires_in` (long). + # On failure the fallback list is cached for `error_expires_in` (short) so a + # blocked/slow upstream does not trigger a fresh outbound request on *every* + # call (each of which could block a request thread up to the configured + # timeouts). Once the short ttl lapses the next call retries the network, so + # a transient outage self-heals without a process restart. def fetch_with_cache(type) - Rails.cache.fetch("cloudflare-rails:#{type}", expires_in: Rails.application.config.cloudflare.expires_in) do - send type + config = Rails.application.config.cloudflare + entry = Rails.cache.fetch(cache_key(type), + expires_in: config.expires_in, + # collapse the thundering herd that would otherwise hit + # cloudflare.com when a cached entry expires under load. + race_condition_ttl: config.race_condition_ttl) do + { ok: true, ips: send(type) } end + + normalize_entry(entry) + rescue StandardError => e + Rails.logger.error "cloudflare-rails: error fetching ip addresses from Cloudflare (#{e}), falling back to defaults" + cache_fallback(type) end def cloudflare_ips(refresh: false) @ips = nil if refresh - @ips ||= (Importer.fetch_with_cache(:ips_v4) + Importer.fetch_with_cache(:ips_v6)).freeze - rescue StandardError => e - Rails.logger.error "cloudflare-rails: error fetching ip addresses from Cloudflare (#{e}), falling back to defaults" - CloudflareRails::FallbackIps::IPS_V4 + CloudflareRails::FallbackIps::IPS_V6 + return @ips if @ips + + v4 = fetch_with_cache(:ips_v4) + v6 = fetch_with_cache(:ips_v6) + ips = (v4[:ips] + v6[:ips]).freeze + + # Only memoize a fully successful fetch. While serving the (negatively + # cached) fallback we deliberately leave @ips unset so the next call + # re-enters fetch_with_cache and can recover once the short error ttl + # lapses — otherwise a single early failure would pin the fallback for the + # entire life of the process. + @ips = ips if v4[:ok] && v6[:ok] + ips + end + + private + + def http_get(uri) + Net::HTTP.start(uri.host, + uri.port, + use_ssl: true, + # without an explicit open_timeout Net::HTTP waits up to its default + # (60s in most rubies) to establish the TCP connection. if egress to + # cloudflare.com is blackholed this blocks a request thread for the + # full duration, so cap it explicitly. + open_timeout: Rails.application.config.cloudflare.open_timeout, + read_timeout: Rails.application.config.cloudflare.timeout) do |http| + http.request(Net::HTTP::Get.new(uri)) + end + end + + def parse_ips(resp) + ips = resp.body.split("\n").reject(&:blank?).map { |ip| IPAddr.new ip } + + # an empty list is never a legitimate response and must not be cached as a + # success — treat it like a failed fetch so we fall back instead. + raise ResponseError, resp if ips.empty? + + ips + end + + def cache_fallback(type) + config = Rails.application.config.cloudflare + fallback = { ok: false, ips: fallback_ips_for(type) } + Rails.cache.write(cache_key(type), fallback, expires_in: config.error_expires_in) + fallback + end + + def cache_key(type) + "cloudflare-rails:#{type}" + end + + def fallback_ips_for(type) + case type + when :ips_v4 then CloudflareRails::FallbackIps::IPS_V4 + when :ips_v6 then CloudflareRails::FallbackIps::IPS_V6 + else [] + end + end + + # Tolerate legacy cache entries written by older versions of this gem, which + # stored a bare Array instead of the { ok:, ips: } hash. This keeps + # things working across a rolling deploy. + def normalize_entry(entry) + return entry if entry.is_a?(Hash) + + { ok: true, ips: Array(entry) } end end end diff --git a/lib/cloudflare_rails/railtie.rb b/lib/cloudflare_rails/railtie.rb index 3f18b17..d059108 100644 --- a/lib/cloudflare_rails/railtie.rb +++ b/lib/cloudflare_rails/railtie.rb @@ -7,7 +7,16 @@ class Railtie < Rails::Railtie # setup defaults before we configure our app. DEFAULTS = { expires_in: 12.hours, - timeout: 5.seconds + timeout: 5.seconds, + # cap the TCP connect time so a blocked/blackholed egress to cloudflare.com + # can't hold a request thread for Net::HTTP's (much larger) default. + open_timeout: 5.seconds, + # how long the fallback list is cached after a failed fetch before we retry + # the network. short, so a transient outage self-heals quickly. + error_expires_in: 1.minute, + # serve the previous value to concurrent callers while one refreshes an + # expired entry, collapsing the thundering herd against cloudflare.com. + race_condition_ttl: 10.seconds }.freeze config.before_configuration do |app| diff --git a/spec/cloudflare/rails_spec.rb b/spec/cloudflare/rails_spec.rb index d0b0afc..e17711a 100644 --- a/spec/cloudflare/rails_spec.rb +++ b/spec/cloudflare/rails_spec.rb @@ -16,6 +16,8 @@ config.load_defaults Rails.gem_version.version.to_f config.eager_load = false config.active_support.deprecation = :stderr + # a real (in-process) store so the caching specs are deterministic. + config.cache_store = :memory_store config.middleware.use Rack::Attack if ENV['RACK_ATTACK'] end end @@ -80,8 +82,11 @@ let(:ips_v4_status) { 404 } let(:ips_v6_status) { 404 } + # each address family is fetched independently now, so a failure is + # logged once per list (v4 + v6) rather than short-circuiting after + # the first one. it "doesn't break, logs the error, and returns the fallback values" do - expect_any_instance_of(Logger).to receive(:error).once.and_call_original + expect_any_instance_of(Logger).to receive(:error).twice.and_call_original rails_app.initialize! expect(subject) .to eq(CloudflareRails::FallbackIps::IPS_V4 + CloudflareRails::FallbackIps::IPS_V6) @@ -93,12 +98,62 @@ let(:ips_v6_body) { "\r\n\r\n\r\n" } it "doesn't break but still logs the error" do - expect_any_instance_of(Logger).to receive(:error).once.and_call_original + expect_any_instance_of(Logger).to receive(:error).twice.and_call_original rails_app.initialize! expect(subject) .to eq(CloudflareRails::FallbackIps::IPS_V4 + CloudflareRails::FallbackIps::IPS_V6) end end + + describe 'caching behaviour' do + before { rails_app.initialize! } + + context 'with a successful fetch' do + it 'memoizes the result and does not hit cloudflare again on the next call' do + CloudflareRails::Importer.cloudflare_ips(refresh: true) + CloudflareRails::Importer.cloudflare_ips + + expect(a_request(:get, 'https://www.cloudflare.com/ips-v4/')).to have_been_made.once + expect(a_request(:get, 'https://www.cloudflare.com/ips-v6/')).to have_been_made.once + end + end + + context 'with a failed fetch' do + let(:ips_v4_status) { 404 } + let(:ips_v6_status) { 404 } + + # the bug this guards against: previously a blocked/failing upstream was + # never cached, so *every* request issued a fresh (and potentially + # blocking) outbound call to cloudflare.com. + it 'caches the fallback and does not hammer cloudflare on subsequent calls' do + CloudflareRails::Importer.cloudflare_ips(refresh: true) + 3.times { CloudflareRails::Importer.cloudflare_ips } + + expect(a_request(:get, 'https://www.cloudflare.com/ips-v4/')).to have_been_made.once + expect(a_request(:get, 'https://www.cloudflare.com/ips-v6/')).to have_been_made.once + end + end + + context 'when a failed fetch later recovers' do + let(:ips_v4_status) { 404 } + let(:ips_v6_status) { 404 } + + before do + CloudflareRails::Importer.cloudflare_ips(refresh: true) # fails -> negatively cached + stub_request(:get, 'https://www.cloudflare.com/ips-v4/').to_return(status: 200, body: ips_v4_body) + stub_request(:get, 'https://www.cloudflare.com/ips-v6/').to_return(status: 200, body: ips_v6_body) + end + + # because the fallback is not memoized, the next call after the short + # error ttl retries the network and picks up the recovered upstream. + it 'retries the network once the error ttl lapses' do + travel(Rails.application.config.cloudflare.error_expires_in + 1.second) do + expect(CloudflareRails::Importer.cloudflare_ips) + .to eq((ips_v4_body + ips_v6_body).split("\n").map { |ip| IPAddr.new ip }) + end + end + end + end end describe 'Rack::Request' do diff --git a/spec/spec_helper.rb b/spec/spec_helper.rb index 3eb85ca..23cc83f 100644 --- a/spec/spec_helper.rb +++ b/spec/spec_helper.rb @@ -29,4 +29,5 @@ RSpec.configure do |config| config.infer_base_class_for_anonymous_controllers = false + config.include ActiveSupport::Testing::TimeHelpers end