From 670694f1a62efb3b754764b7ef13847dce7dd351 Mon Sep 17 00:00:00 2001 From: jrhoads Date: Tue, 13 Jan 2026 22:36:54 +0100 Subject: [PATCH 01/25] perf: optimize event preloading for DOIs using EventsPreloader service --- .../concerns/preloaded_event_relation.rb | 84 +++++++++++++++++++ app/models/datacite_doi.rb | 11 +-- app/models/doi.rb | 84 +++++++++++++++++++ app/models/other_doi.rb | 11 +-- app/services/events_preloader.rb | 58 +++++++++++++ 5 files changed, 232 insertions(+), 16 deletions(-) create mode 100644 app/models/concerns/preloaded_event_relation.rb create mode 100644 app/services/events_preloader.rb diff --git a/app/models/concerns/preloaded_event_relation.rb b/app/models/concerns/preloaded_event_relation.rb new file mode 100644 index 000000000..b4936fa03 --- /dev/null +++ b/app/models/concerns/preloaded_event_relation.rb @@ -0,0 +1,84 @@ +# frozen_string_literal: true + +# Wrapper class to make preloaded event arrays compatible with ActiveRecord::Relation API +# This allows existing code that calls methods like `pluck`, `map`, `select` to work +# with in-memory arrays without modification. +class PreloadedEventRelation + include Enumerable + + def initialize(events) + @events = Array(events) + end + + # Delegate Enumerable methods to the underlying array + def each(&block) + @events.each(&block) + end + + # Implement pluck to match ActiveRecord::Relation#pluck behavior + # Supports single or multiple column names + def pluck(*column_names) + if column_names.length == 1 + column_name = column_names.first + @events.map { |event| event.public_send(column_name) } + else + @events.map { |event| column_names.map { |col| event.public_send(col) } } + end + end + + # Delegate map to the underlying array + def map(&block) + @events.map(&block) + end + + # Delegate select to the underlying array + def select(&block) + PreloadedEventRelation.new(@events.select(&block)) + end + + # Delegate other common Enumerable methods + def compact + PreloadedEventRelation.new(@events.compact) + end + + def uniq + PreloadedEventRelation.new(@events.uniq) + end + + def sort_by(&block) + PreloadedEventRelation.new(@events.sort_by(&block)) + end + + def group_by(&block) + @events.group_by(&block) + end + + def inject(initial = nil, &block) + @events.inject(initial, &block) + end + + def length + @events.length + end + + def empty? + @events.empty? + end + + def present? + @events.present? + end + + def blank? + @events.blank? + end + + # Allow direct access to the underlying array + def to_a + @events + end + + def to_ary + @events + end +end diff --git a/app/models/datacite_doi.rb b/app/models/datacite_doi.rb index 1e67ec85e..409be4253 100644 --- a/app/models/datacite_doi.rb +++ b/app/models/datacite_doi.rb @@ -146,17 +146,12 @@ def self.import_in_bulk(ids, options = {}) selected_dois = DataciteDoi.where(id: ids, type: "DataciteDoi").includes( :client, :media, - :view_events, - :download_events, - :citation_events, - :reference_events, - :part_events, - :part_of_events, - :version_events, - :version_of_events, :metadata ) selected_dois.find_in_batches(batch_size: batch_size) do |dois| + # Preload all events for this batch in a single query + EventsPreloader.new(dois).preload! + bulk_body = dois.map do |doi| { index: { diff --git a/app/models/doi.rb b/app/models/doi.rb index 3b87366f0..67be4db54 100644 --- a/app/models/doi.rb +++ b/app/models/doi.rb @@ -2,6 +2,7 @@ require "maremma" require "benchmark" +require_relative "concerns/preloaded_event_relation" class Doi < ApplicationRecord self.ignored_columns += [:publisher] @@ -76,6 +77,7 @@ class Doi < ApplicationRecord alias_attribute :state, :aasm_state attr_accessor :current_user + attr_accessor :preloaded_events attribute :regenerate, :boolean, default: false attribute :only_validate, :boolean, default: false @@ -100,6 +102,88 @@ class Doi < ApplicationRecord # has_many :source_events, class_name: "Event", primary_key: :doi, foreign_key: :source_doi, dependent: :destroy # has_many :target_events, class_name: "Event", primary_key: :doi, foreign_key: :target_doi, dependent: :destroy + # Override association methods to use preloaded_events when available + # Check for !nil instead of present? to handle empty arrays (preloaded but no events) + def view_events + if !preloaded_events.nil? + PreloadedEventRelation.new( + preloaded_events.select { |e| e.target_relation_type_id == "views" } + ) + else + association(:view_events).scope + end + end + + def download_events + if !preloaded_events.nil? + PreloadedEventRelation.new( + preloaded_events.select { |e| e.target_relation_type_id == "downloads" } + ) + else + association(:download_events).scope + end + end + + def reference_events + if !preloaded_events.nil? + PreloadedEventRelation.new( + preloaded_events.select { |e| e.source_relation_type_id == "references" } + ) + else + association(:reference_events).scope + end + end + + def citation_events + if !preloaded_events.nil? + PreloadedEventRelation.new( + preloaded_events.select { |e| e.target_relation_type_id == "citations" } + ) + else + association(:citation_events).scope + end + end + + def part_events + if !preloaded_events.nil? + PreloadedEventRelation.new( + preloaded_events.select { |e| e.source_relation_type_id == "parts" } + ) + else + association(:part_events).scope + end + end + + def part_of_events + if !preloaded_events.nil? + PreloadedEventRelation.new( + preloaded_events.select { |e| e.target_relation_type_id == "part_of" } + ) + else + association(:part_of_events).scope + end + end + + def version_events + if !preloaded_events.nil? + PreloadedEventRelation.new( + preloaded_events.select { |e| e.source_relation_type_id == "versions" } + ) + else + association(:version_events).scope + end + end + + def version_of_events + if !preloaded_events.nil? + PreloadedEventRelation.new( + preloaded_events.select { |e| e.target_relation_type_id == "version_of" } + ) + else + association(:version_of_events).scope + end + end + has_many :references, class_name: "Doi", through: :reference_events, source: :doi_for_target has_many :citations, class_name: "Doi", through: :citation_events, source: :doi_for_source has_many :parts, class_name: "Doi", through: :part_events, source: :doi_for_target diff --git a/app/models/other_doi.rb b/app/models/other_doi.rb index 666ca6a80..71555ae9c 100644 --- a/app/models/other_doi.rb +++ b/app/models/other_doi.rb @@ -150,17 +150,12 @@ def self.import_in_bulk(ids, options = {}) selected_dois = OtherDoi.where(id: ids, type: "OtherDoi").includes( :client, :media, - :view_events, - :download_events, - :citation_events, - :reference_events, - :part_events, - :part_of_events, - :version_events, - :version_of_events, :metadata ) selected_dois.find_in_batches(batch_size: batch_size) do |dois| + # Preload all events for this batch in a single query + EventsPreloader.new(dois).preload! + bulk_body = dois.map do |doi| { index: { diff --git a/app/services/events_preloader.rb b/app/services/events_preloader.rb new file mode 100644 index 000000000..9a7aff209 --- /dev/null +++ b/app/services/events_preloader.rb @@ -0,0 +1,58 @@ +# frozen_string_literal: true + +# Service class to preload events for a batch of DOIs in a single query +# This dramatically reduces database queries from N*M (DOIs * Relationship Types) to 1-2 queries total +class EventsPreloader + # Maximum number of DOIs to query at once to avoid database parameter limits + CHUNK_SIZE = 1000 + + def initialize(dois) + @dois = Array(dois) + @doi_map = {} + @dois.each do |doi| + @doi_map[doi.doi.upcase] = doi + # Initialize preloaded_events array if not already set + doi.preloaded_events ||= [] + end + end + + # Preload all events for the batch of DOIs + def preload! + return if @dois.empty? + + doi_identifiers = @dois.map { |doi| doi.doi.upcase }.uniq + return if doi_identifiers.empty? + + # Fetch events in chunks to avoid database parameter limits + all_events = [] + doi_identifiers.each_slice(CHUNK_SIZE) do |chunk| + events = Event.where( + "source_doi IN (?) OR target_doi IN (?)", + chunk, chunk + ).to_a + all_events.concat(events) + end + + # Group events by DOI and assign to each Doi object + all_events.each do |event| + # Add event to source DOI's preloaded_events if it matches + if event.source_doi.present? + source_doi_obj = @doi_map[event.source_doi.upcase] + source_doi_obj.preloaded_events << event if source_doi_obj + end + + # Add event to target DOI's preloaded_events if it matches + if event.target_doi.present? + target_doi_obj = @doi_map[event.target_doi.upcase] + target_doi_obj.preloaded_events << event if target_doi_obj + end + end + + # Ensure all DOIs have an array (even if empty) + @dois.each do |doi| + doi.preloaded_events ||= [] + end + + self + end +end From d0d6ed31bd9a718c3115ad194c6304d500a8ba19 Mon Sep 17 00:00:00 2001 From: jrhoads Date: Tue, 13 Jan 2026 22:37:28 +0100 Subject: [PATCH 02/25] perf: preload DOI events in show action to optimize serialization --- app/controllers/datacite_dois_controller.rb | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/app/controllers/datacite_dois_controller.rb b/app/controllers/datacite_dois_controller.rb index 625fc0c33..499109a32 100644 --- a/app/controllers/datacite_dois_controller.rb +++ b/app/controllers/datacite_dois_controller.rb @@ -457,6 +457,10 @@ def show fail ActiveRecord::RecordNotFound end + # Preload events if we are going to show details + # This optimizes the serializer which accesses part_events, citation_events, etc. + EventsPreloader.new([doi]).preload! if params[:detail] != false + options = {} options[:include] = @include options[:is_collection] = false From 3ebf15f62dec7c61810c0513f6245f5bc0ac538d Mon Sep 17 00:00:00 2001 From: jrhoads Date: Tue, 13 Jan 2026 22:38:00 +0100 Subject: [PATCH 03/25] test: add unit tests for PreloadedEventRelation and EventsPreloader --- spec/models/preloaded_event_relation_spec.rb | 146 +++++++++++++++++ spec/services/events_preloader_spec.rb | 160 +++++++++++++++++++ 2 files changed, 306 insertions(+) create mode 100644 spec/models/preloaded_event_relation_spec.rb create mode 100644 spec/services/events_preloader_spec.rb diff --git a/spec/models/preloaded_event_relation_spec.rb b/spec/models/preloaded_event_relation_spec.rb new file mode 100644 index 000000000..703c08efb --- /dev/null +++ b/spec/models/preloaded_event_relation_spec.rb @@ -0,0 +1,146 @@ +# frozen_string_literal: true + +require "rails_helper" + +describe PreloadedEventRelation do + let(:event1) { double("Event", id: 1, target_doi: "10.1234/TEST1", source_doi: "10.1234/TEST2", total: 10) } + let(:event2) { double("Event", id: 2, target_doi: "10.1234/TEST2", source_doi: "10.1234/TEST3", total: 20) } + let(:event3) { double("Event", id: 3, target_doi: "10.1234/TEST1", source_doi: nil, total: 30) } + let(:events) { [event1, event2, event3] } + let(:relation) { PreloadedEventRelation.new(events) } + + describe "#pluck" do + it "plucks a single column" do + expect(relation.pluck(:id)).to eq([1, 2, 3]) + expect(relation.pluck(:total)).to eq([10, 20, 30]) + end + + it "plucks multiple columns" do + result = relation.pluck(:id, :total) + expect(result).to eq([[1, 10], [2, 20], [3, 30]]) + end + + it "handles nil values" do + expect(relation.pluck(:source_doi)).to eq(["10.1234/TEST2", "10.1234/TEST3", nil]) + end + end + + describe "#map" do + it "maps over events" do + result = relation.map { |e| e.id * 2 } + expect(result).to eq([2, 4, 6]) + end + end + + describe "#select" do + it "filters events and returns PreloadedEventRelation" do + result = relation.select { |e| e.total > 15 } + expect(result).to be_a(PreloadedEventRelation) + expect(result.to_a).to eq([event2, event3]) + end + end + + describe "#compact" do + let(:events_with_nils) { [event1, nil, event2] } + let(:relation_with_nils) { PreloadedEventRelation.new(events_with_nils) } + + it "removes nil values" do + result = relation_with_nils.compact + expect(result).to be_a(PreloadedEventRelation) + expect(result.to_a).to eq([event1, event2]) + end + end + + describe "#uniq" do + let(:duplicate_events) { [event1, event1, event2] } + let(:relation_with_dups) { PreloadedEventRelation.new(duplicate_events) } + + it "removes duplicate events" do + result = relation_with_dups.uniq + expect(result).to be_a(PreloadedEventRelation) + expect(result.to_a).to eq([event1, event2]) + end + end + + describe "#sort_by" do + it "sorts events" do + result = relation.sort_by(&:total) + expect(result).to be_a(PreloadedEventRelation) + expect(result.to_a.map(&:total)).to eq([10, 20, 30]) + end + end + + describe "#group_by" do + it "groups events" do + result = relation.group_by { |e| e.target_doi } + expect(result.keys).to include("10.1234/TEST1", "10.1234/TEST2") + expect(result["10.1234/TEST1"].length).to eq(2) + end + end + + describe "#inject" do + it "reduces events" do + result = relation.inject(0) { |sum, e| sum + e.total } + expect(result).to eq(60) + end + end + + describe "#length" do + it "returns the number of events" do + expect(relation.length).to eq(3) + end + end + + describe "#empty?" do + it "returns false when events exist" do + expect(relation.empty?).to be false + end + + it "returns true when no events" do + empty_relation = PreloadedEventRelation.new([]) + expect(empty_relation.empty?).to be true + end + end + + describe "#present?" do + it "returns true when events exist" do + expect(relation.present?).to be true + end + + it "returns false when no events" do + empty_relation = PreloadedEventRelation.new([]) + expect(empty_relation.present?).to be false + end + end + + describe "#blank?" do + it "returns false when events exist" do + expect(relation.blank?).to be false + end + + it "returns true when no events" do + empty_relation = PreloadedEventRelation.new([]) + expect(empty_relation.blank?).to be true + end + end + + describe "#to_a" do + it "returns the underlying array" do + expect(relation.to_a).to eq(events) + end + end + + describe "Enumerable methods" do + it "implements each" do + collected = [] + relation.each { |e| collected << e.id } + expect(collected).to eq([1, 2, 3]) + end + + it "works with Enumerable methods" do + expect(relation.first).to eq(event1) + expect(relation.last).to eq(event3) + expect(relation.count).to eq(3) + end + end +end diff --git a/spec/services/events_preloader_spec.rb b/spec/services/events_preloader_spec.rb new file mode 100644 index 000000000..ef5ede735 --- /dev/null +++ b/spec/services/events_preloader_spec.rb @@ -0,0 +1,160 @@ +# frozen_string_literal: true + +require "rails_helper" + +describe EventsPreloader do + let(:client) { create(:client) } + let(:doi1) { create(:doi, client: client, doi: "10.1234/TEST1", aasm_state: "findable") } + let(:doi2) { create(:doi, client: client, doi: "10.1234/TEST2", aasm_state: "findable") } + let(:doi3) { create(:doi, client: client, doi: "10.1234/TEST3", aasm_state: "findable") } + + describe "#initialize" do + it "initializes preloaded_events for each DOI" do + dois = [doi1, doi2] + preloader = EventsPreloader.new(dois) + + expect(doi1.preloaded_events).to eq([]) + expect(doi2.preloaded_events).to eq([]) + end + + it "handles empty array" do + preloader = EventsPreloader.new([]) + expect { preloader.preload! }.not_to raise_error + end + end + + describe "#preload!" do + context "with source and target events" do + let!(:reference_event) do + create(:event_for_crossref, { + subj_id: "https://doi.org/#{doi1.doi}", + obj_id: "https://doi.org/#{doi2.doi}", + relation_type_id: "references", + }) + end + let!(:citation_event) do + create(:event_for_datacite_crossref, { + subj_id: "https://doi.org/#{doi3.doi}", + obj_id: "https://doi.org/#{doi1.doi}", + relation_type_id: "is-referenced-by", + }) + end + let!(:part_event) do + create(:event_for_datacite_parts, { + subj_id: "https://doi.org/#{doi1.doi}", + obj_id: "https://doi.org/#{doi3.doi}", + relation_type_id: "has-part", + }) + end + + it "loads all events for the DOIs" do + dois = [doi1, doi2, doi3] + EventsPreloader.new(dois).preload! + + # doi1 should have reference_event (as source) and citation_event (as target) + expect(doi1.preloaded_events.length).to eq(2) + expect(doi1.preloaded_events).to include(reference_event, citation_event) + + # doi2 should have reference_event (as target) + expect(doi2.preloaded_events.length).to eq(1) + expect(doi2.preloaded_events).to include(reference_event) + + # doi3 should have citation_event (as source) and part_event (as target) + expect(doi3.preloaded_events.length).to eq(2) + expect(doi3.preloaded_events).to include(citation_event, part_event) + end + + it "makes only one database query" do + dois = [doi1, doi2, doi3] + preloader = EventsPreloader.new(dois) + + expect { + preloader.preload! + }.not_to exceed_query_limit(1) + end + end + + context "with no events" do + it "sets empty arrays for all DOIs" do + dois = [doi1, doi2] + EventsPreloader.new(dois).preload! + + expect(doi1.preloaded_events).to eq([]) + expect(doi2.preloaded_events).to eq([]) + end + end + + context "with large batch" do + it "chunks large DOI lists" do + # Create more than CHUNK_SIZE DOIs + large_batch = create_list(:doi, EventsPreloader::CHUNK_SIZE + 100, client: client, aasm_state: "findable") + + # Create events for some of them + create(:event_for_crossref, { + subj_id: "https://doi.org/#{large_batch.first.doi}", + obj_id: "https://doi.org/#{large_batch.last.doi}", + relation_type_id: "references", + }) + + expect { + EventsPreloader.new(large_batch).preload! + }.not_to raise_error + + expect(large_batch.first.preloaded_events).not_to be_nil + end + end + + context "with case-insensitive DOI matching" do + let!(:event) do + create(:event_for_crossref, { + subj_id: "https://doi.org/#{doi1.doi.downcase}", + obj_id: "https://doi.org/#{doi2.doi.downcase}", + relation_type_id: "references", + }) + end + + it "matches DOIs regardless of case" do + dois = [doi1, doi2] + EventsPreloader.new(dois).preload! + + expect(doi1.preloaded_events).to include(event) + expect(doi2.preloaded_events).to include(event) + end + end + end + + describe "integration with Doi model" do + let!(:reference_event) do + create(:event_for_crossref, { + subj_id: "https://doi.org/#{doi1.doi}", + obj_id: "https://doi.org/#{doi2.doi}", + relation_type_id: "references", + }) + end + let!(:citation_event) do + create(:event_for_datacite_crossref, { + subj_id: "https://doi.org/#{doi3.doi}", + obj_id: "https://doi.org/#{doi1.doi}", + relation_type_id: "is-referenced-by", + }) + end + + it "allows Doi methods to use preloaded events" do + dois = [doi1, doi2, doi3] + EventsPreloader.new(dois).preload! + + # These should use preloaded_events instead of querying the database + expect(doi1.reference_events.to_a).to include(reference_event) + expect(doi1.citation_events.to_a).to include(citation_event) + expect(doi1.reference_count).to eq(1) + expect(doi1.citation_count).to eq(1) + end + + it "maintains backward compatibility when preloaded_events is nil" do + # When preloaded_events is nil, should fall back to database queries + expect(doi1.preloaded_events).to be_nil + expect(doi1.reference_events.to_a).to include(reference_event) + expect(doi1.citation_events.to_a).to include(citation_event) + end + end +end From ef1dd09e7886fd0f6e50276cc5e004778ee6322a Mon Sep 17 00:00:00 2001 From: jrhoads Date: Tue, 13 Jan 2026 22:38:25 +0100 Subject: [PATCH 04/25] test: enhance N+1 testing and event preloading coverage in Doi model specs --- spec/models/doi_related_spec.rb | 99 +++++++++++++++++++++++++++++++-- 1 file changed, 95 insertions(+), 4 deletions(-) diff --git a/spec/models/doi_related_spec.rb b/spec/models/doi_related_spec.rb index 0e59658c4..0f5866fff 100644 --- a/spec/models/doi_related_spec.rb +++ b/spec/models/doi_related_spec.rb @@ -23,15 +23,43 @@ describe "N+1 safety" do describe ".import_in_bulk" do - let(:ids) { [1, 2, 3] } + let(:client) { create(:client) } + let!(:doi1) { create(:doi, client: client, type: "DataciteDoi", aasm_state: "findable") } + let!(:doi2) { create(:doi, client: client, type: "DataciteDoi", aasm_state: "findable") } + let!(:doi3) { create(:doi, client: client, type: "DataciteDoi", aasm_state: "findable") } + let(:ids) { [doi1.id, doi2.id, doi3.id] } - it "should make only one db call" do + it "should use EventsPreloader to reduce queries" do allow(DataciteDoi).to receive(:upload_to_elasticsearch) - # Test the maximum number of queries made by the method + # With EventsPreloader, we should make minimal queries + # 1 query for DOIs, 1 query for events (via EventsPreloader) expect { DataciteDoi.import_in_bulk(ids) - }.not_to exceed_query_limit(1) + }.not_to exceed_query_limit(5) # Allow some overhead for associations + end + + it "should preload events for all DOIs in batch" do + # Create events for the DOIs + create(:event_for_crossref, { + subj_id: "https://doi.org/#{doi1.doi}", + obj_id: "https://doi.org/#{doi2.doi}", + relation_type_id: "references", + }) + create(:event_for_datacite_crossref, { + subj_id: "https://doi.org/#{doi3.doi}", + obj_id: "https://doi.org/#{doi1.doi}", + relation_type_id: "is-referenced-by", + }) + + allow(DataciteDoi).to receive(:upload_to_elasticsearch) + + DataciteDoi.import_in_bulk(ids) + + # Verify that events were preloaded (check via a fresh query) + fresh_doi1 = DataciteDoi.find(doi1.id) + expect(fresh_doi1.reference_events.count).to eq(1) + expect(fresh_doi1.citation_events.count).to eq(1) end end @@ -308,4 +336,67 @@ expect(doi.other_relation_count).to eq(3) end end + + describe "backward compatibility with preloaded_events" do + let(:client) { create(:client) } + let(:doi) { create(:doi, client: client, aasm_state: "findable") } + let(:target_doi) { create(:doi, client: client, aasm_state: "findable") } + let!(:reference_event) do + create(:event_for_crossref, { + subj_id: "https://doi.org/#{doi.doi}", + obj_id: "https://doi.org/#{target_doi.doi}", + relation_type_id: "references", + }) + end + let!(:citation_event) do + create(:event_for_datacite_crossref, { + subj_id: "https://doi.org/#{target_doi.doi}", + obj_id: "https://doi.org/#{doi.doi}", + relation_type_id: "is-referenced-by", + }) + end + + it "works the same when preloaded_events is nil (fallback to database)" do + expect(doi.preloaded_events).to be_nil + expect(doi.reference_events.count).to eq(1) + expect(doi.citation_events.count).to eq(1) + expect(doi.reference_count).to eq(1) + expect(doi.citation_count).to eq(1) + end + + it "works the same when preloaded_events is set (uses in-memory data)" do + EventsPreloader.new([doi]).preload! + + expect(doi.preloaded_events).not_to be_nil + expect(doi.reference_events.count).to eq(1) + expect(doi.citation_events.count).to eq(1) + expect(doi.reference_count).to eq(1) + expect(doi.citation_count).to eq(1) + expect(doi.reference_ids).to include(target_doi.doi.downcase) + end + + it "returns same results whether preloaded or not" do + # Get results without preloading + reference_ids_without_preload = doi.reference_ids + citation_ids_without_preload = doi.citation_ids + reference_count_without_preload = doi.reference_count + citation_count_without_preload = doi.citation_count + + # Reload and preload + doi.reload + EventsPreloader.new([doi]).preload! + + # Get results with preloading + reference_ids_with_preload = doi.reference_ids + citation_ids_with_preload = doi.citation_ids + reference_count_with_preload = doi.reference_count + citation_count_with_preload = doi.citation_count + + # Should be the same + expect(reference_ids_with_preload).to eq(reference_ids_without_preload) + expect(citation_ids_with_preload).to eq(citation_ids_without_preload) + expect(reference_count_with_preload).to eq(reference_count_without_preload) + expect(citation_count_with_preload).to eq(citation_count_without_preload) + end + end end From f98c9395575b865949d97cf3b324270f84aed090 Mon Sep 17 00:00:00 2001 From: jrhoads Date: Tue, 13 Jan 2026 22:40:24 +0100 Subject: [PATCH 05/25] Appease Rubocop --- app/models/datacite_doi.rb | 2 +- app/models/other_doi.rb | 2 +- spec/services/events_preloader_spec.rb | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) diff --git a/app/models/datacite_doi.rb b/app/models/datacite_doi.rb index 409be4253..81ed784f2 100644 --- a/app/models/datacite_doi.rb +++ b/app/models/datacite_doi.rb @@ -151,7 +151,7 @@ def self.import_in_bulk(ids, options = {}) selected_dois.find_in_batches(batch_size: batch_size) do |dois| # Preload all events for this batch in a single query EventsPreloader.new(dois).preload! - + bulk_body = dois.map do |doi| { index: { diff --git a/app/models/other_doi.rb b/app/models/other_doi.rb index 71555ae9c..c2cca25df 100644 --- a/app/models/other_doi.rb +++ b/app/models/other_doi.rb @@ -155,7 +155,7 @@ def self.import_in_bulk(ids, options = {}) selected_dois.find_in_batches(batch_size: batch_size) do |dois| # Preload all events for this batch in a single query EventsPreloader.new(dois).preload! - + bulk_body = dois.map do |doi| { index: { diff --git a/spec/services/events_preloader_spec.rb b/spec/services/events_preloader_spec.rb index ef5ede735..eb97d5e4e 100644 --- a/spec/services/events_preloader_spec.rb +++ b/spec/services/events_preloader_spec.rb @@ -11,7 +11,7 @@ describe "#initialize" do it "initializes preloaded_events for each DOI" do dois = [doi1, doi2] - preloader = EventsPreloader.new(dois) + EventsPreloader.new(dois) expect(doi1.preloaded_events).to eq([]) expect(doi2.preloaded_events).to eq([]) From b74d788eed25ce57f37310c4a229fd11af3c7773 Mon Sep 17 00:00:00 2001 From: jrhoads Date: Wed, 14 Jan 2026 09:29:35 +0100 Subject: [PATCH 06/25] fix: filter preloaded events by DOI and update event tests --- app/models/doi.rb | 17 ++++++------- spec/models/doi_related_spec.rb | 33 +++++++++++++------------- spec/services/events_preloader_spec.rb | 18 ++++++++------ 3 files changed, 37 insertions(+), 31 deletions(-) diff --git a/app/models/doi.rb b/app/models/doi.rb index 67be4db54..280ac571e 100644 --- a/app/models/doi.rb +++ b/app/models/doi.rb @@ -104,10 +104,11 @@ class Doi < ApplicationRecord # Override association methods to use preloaded_events when available # Check for !nil instead of present? to handle empty arrays (preloaded but no events) + # Also filter by source_doi/target_doi to match association behavior def view_events if !preloaded_events.nil? PreloadedEventRelation.new( - preloaded_events.select { |e| e.target_relation_type_id == "views" } + preloaded_events.select { |e| e.target_relation_type_id == "views" && e.target_doi&.upcase == doi.upcase } ) else association(:view_events).scope @@ -117,7 +118,7 @@ def view_events def download_events if !preloaded_events.nil? PreloadedEventRelation.new( - preloaded_events.select { |e| e.target_relation_type_id == "downloads" } + preloaded_events.select { |e| e.target_relation_type_id == "downloads" && e.target_doi&.upcase == doi.upcase } ) else association(:download_events).scope @@ -127,7 +128,7 @@ def download_events def reference_events if !preloaded_events.nil? PreloadedEventRelation.new( - preloaded_events.select { |e| e.source_relation_type_id == "references" } + preloaded_events.select { |e| e.source_relation_type_id == "references" && e.source_doi&.upcase == doi.upcase } ) else association(:reference_events).scope @@ -137,7 +138,7 @@ def reference_events def citation_events if !preloaded_events.nil? PreloadedEventRelation.new( - preloaded_events.select { |e| e.target_relation_type_id == "citations" } + preloaded_events.select { |e| e.target_relation_type_id == "citations" && e.target_doi&.upcase == doi.upcase } ) else association(:citation_events).scope @@ -147,7 +148,7 @@ def citation_events def part_events if !preloaded_events.nil? PreloadedEventRelation.new( - preloaded_events.select { |e| e.source_relation_type_id == "parts" } + preloaded_events.select { |e| e.source_relation_type_id == "parts" && e.source_doi&.upcase == doi.upcase } ) else association(:part_events).scope @@ -157,7 +158,7 @@ def part_events def part_of_events if !preloaded_events.nil? PreloadedEventRelation.new( - preloaded_events.select { |e| e.target_relation_type_id == "part_of" } + preloaded_events.select { |e| e.target_relation_type_id == "part_of" && e.target_doi&.upcase == doi.upcase } ) else association(:part_of_events).scope @@ -167,7 +168,7 @@ def part_of_events def version_events if !preloaded_events.nil? PreloadedEventRelation.new( - preloaded_events.select { |e| e.source_relation_type_id == "versions" } + preloaded_events.select { |e| e.source_relation_type_id == "versions" && e.source_doi&.upcase == doi.upcase } ) else association(:version_events).scope @@ -177,7 +178,7 @@ def version_events def version_of_events if !preloaded_events.nil? PreloadedEventRelation.new( - preloaded_events.select { |e| e.target_relation_type_id == "version_of" } + preloaded_events.select { |e| e.target_relation_type_id == "version_of" && e.target_doi&.upcase == doi.upcase } ) else association(:version_of_events).scope diff --git a/spec/models/doi_related_spec.rb b/spec/models/doi_related_spec.rb index 0f5866fff..2ab636cf9 100644 --- a/spec/models/doi_related_spec.rb +++ b/spec/models/doi_related_spec.rb @@ -33,22 +33,24 @@ allow(DataciteDoi).to receive(:upload_to_elasticsearch) # With EventsPreloader, we should make minimal queries - # 1 query for DOIs, 1 query for events (via EventsPreloader) + # 1 query for DOIs, 1 query for events (via EventsPreloader), plus associations expect { DataciteDoi.import_in_bulk(ids) - }.not_to exceed_query_limit(5) # Allow some overhead for associations + }.not_to exceed_query_limit(6) # Allow some overhead for associations (client, media, metadata, allocator) end it "should preload events for all DOIs in batch" do # Create events for the DOIs - create(:event_for_crossref, { + reference_event = create(:event_for_crossref, { subj_id: "https://doi.org/#{doi1.doi}", obj_id: "https://doi.org/#{doi2.doi}", relation_type_id: "references", }) - create(:event_for_datacite_crossref, { - subj_id: "https://doi.org/#{doi3.doi}", - obj_id: "https://doi.org/#{doi1.doi}", + # For citation_events, the DOI must be the target (target_doi) + # For "is-referenced-by", target_doi = subj_id, so doi1 needs to be subj_id + citation_event = create(:event_for_datacite_crossref, { + subj_id: "https://doi.org/#{doi1.doi}", + obj_id: "https://doi.org/#{doi3.doi}", relation_type_id: "is-referenced-by", }) @@ -72,18 +74,14 @@ dois = DataciteDoi.where(id: doi.id).includes( :client, :media, - :view_events, - :download_events, - :citation_events, - :reference_events, - :part_events, - :part_of_events, - :version_events, - :version_of_events, :metadata ) + # Preload events to avoid N+1 queries + EventsPreloader.new(dois.to_a).preload! + # Test the maximum number of queries made by the method + # With EventsPreloader, we should have fewer queries expect { dois.first.as_indexed_json }.not_to exceed_query_limit(13) @@ -341,6 +339,7 @@ let(:client) { create(:client) } let(:doi) { create(:doi, client: client, aasm_state: "findable") } let(:target_doi) { create(:doi, client: client, aasm_state: "findable") } + let(:source_doi) { create(:doi, client: client, aasm_state: "findable") } let!(:reference_event) do create(:event_for_crossref, { subj_id: "https://doi.org/#{doi.doi}", @@ -348,10 +347,12 @@ relation_type_id: "references", }) end + # For citation_events, the DOI must be the target (target_doi) + # For "is-referenced-by", target_doi = subj_id, so doi needs to be subj_id let!(:citation_event) do create(:event_for_datacite_crossref, { - subj_id: "https://doi.org/#{target_doi.doi}", - obj_id: "https://doi.org/#{doi.doi}", + subj_id: "https://doi.org/#{doi.doi}", + obj_id: "https://doi.org/#{source_doi.doi}", relation_type_id: "is-referenced-by", }) end diff --git a/spec/services/events_preloader_spec.rb b/spec/services/events_preloader_spec.rb index eb97d5e4e..59cf600e5 100644 --- a/spec/services/events_preloader_spec.rb +++ b/spec/services/events_preloader_spec.rb @@ -32,10 +32,12 @@ relation_type_id: "references", }) end + # For citation_events, the DOI must be the target (target_doi) + # For "is-referenced-by", target_doi = subj_id, so doi1 needs to be subj_id let!(:citation_event) do create(:event_for_datacite_crossref, { - subj_id: "https://doi.org/#{doi3.doi}", - obj_id: "https://doi.org/#{doi1.doi}", + subj_id: "https://doi.org/#{doi1.doi}", + obj_id: "https://doi.org/#{doi3.doi}", relation_type_id: "is-referenced-by", }) end @@ -51,9 +53,9 @@ dois = [doi1, doi2, doi3] EventsPreloader.new(dois).preload! - # doi1 should have reference_event (as source) and citation_event (as target) - expect(doi1.preloaded_events.length).to eq(2) - expect(doi1.preloaded_events).to include(reference_event, citation_event) + # doi1 should have reference_event (as source), citation_event (as target), and part_event (as source) + expect(doi1.preloaded_events.length).to eq(3) + expect(doi1.preloaded_events).to include(reference_event, citation_event, part_event) # doi2 should have reference_event (as target) expect(doi2.preloaded_events.length).to eq(1) @@ -131,10 +133,12 @@ relation_type_id: "references", }) end + # For citation_events, the DOI must be the target (target_doi) + # For "is-referenced-by", target_doi = subj_id, so doi1 needs to be subj_id let!(:citation_event) do create(:event_for_datacite_crossref, { - subj_id: "https://doi.org/#{doi3.doi}", - obj_id: "https://doi.org/#{doi1.doi}", + subj_id: "https://doi.org/#{doi1.doi}", + obj_id: "https://doi.org/#{doi3.doi}", relation_type_id: "is-referenced-by", }) end From 25eebba813d4cb183312e6b4b45fe1c19a6186d9 Mon Sep 17 00:00:00 2001 From: jrhoads Date: Wed, 14 Jan 2026 09:33:11 +0100 Subject: [PATCH 07/25] Appease Rubocop --- spec/models/doi_related_spec.rb | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/spec/models/doi_related_spec.rb b/spec/models/doi_related_spec.rb index 2ab636cf9..fdaa31e38 100644 --- a/spec/models/doi_related_spec.rb +++ b/spec/models/doi_related_spec.rb @@ -41,14 +41,14 @@ it "should preload events for all DOIs in batch" do # Create events for the DOIs - reference_event = create(:event_for_crossref, { + create(:event_for_crossref, { subj_id: "https://doi.org/#{doi1.doi}", obj_id: "https://doi.org/#{doi2.doi}", relation_type_id: "references", }) # For citation_events, the DOI must be the target (target_doi) # For "is-referenced-by", target_doi = subj_id, so doi1 needs to be subj_id - citation_event = create(:event_for_datacite_crossref, { + create(:event_for_datacite_crossref, { subj_id: "https://doi.org/#{doi1.doi}", obj_id: "https://doi.org/#{doi3.doi}", relation_type_id: "is-referenced-by", From 24a6019147851cd57a60db950fd083ffa7505390 Mon Sep 17 00:00:00 2001 From: jrhoads Date: Wed, 14 Jan 2026 09:46:02 +0100 Subject: [PATCH 08/25] refactor: delegate first, last, and count methods to events array --- app/models/concerns/preloaded_event_relation.rb | 13 +++++++++++++ 1 file changed, 13 insertions(+) diff --git a/app/models/concerns/preloaded_event_relation.rb b/app/models/concerns/preloaded_event_relation.rb index b4936fa03..d57068166 100644 --- a/app/models/concerns/preloaded_event_relation.rb +++ b/app/models/concerns/preloaded_event_relation.rb @@ -15,6 +15,19 @@ def each(&block) @events.each(&block) end + # Delegate common Enumerable methods explicitly + def first + @events.first + end + + def last + @events.last + end + + def count + @events.count + end + # Implement pluck to match ActiveRecord::Relation#pluck behavior # Supports single or multiple column names def pluck(*column_names) From 89882065fdb8a6ee244cb8c1eb6e8b9cd67e401e Mon Sep 17 00:00:00 2001 From: jrhoads Date: Wed, 14 Jan 2026 15:02:24 +0100 Subject: [PATCH 09/25] feat: add provider_id filtering to ReferenceRepository and update tests --- app/models/reference_repository.rb | 5 +++++ spec/requests/repositories_spec.rb | 2 ++ 2 files changed, 7 insertions(+) diff --git a/app/models/reference_repository.rb b/app/models/reference_repository.rb index 089100fbc..85c91181c 100644 --- a/app/models/reference_repository.rb +++ b/app/models/reference_repository.rb @@ -355,6 +355,11 @@ def filter(options) "repository_type": options[:repository_type].split(",") } } end + if options[:provider_id].present? + retval << { term: { + "provider_id": { value: options[:provider_id], case_insensitive: true } + } } + end if options[:is_open] == "true" retval << { term: { "data_access.type": "open" diff --git a/spec/requests/repositories_spec.rb b/spec/requests/repositories_spec.rb index cd5037bf5..2feb7e1f2 100644 --- a/spec/requests/repositories_spec.rb +++ b/spec/requests/repositories_spec.rb @@ -16,6 +16,8 @@ def reset_indices clear_index(DataciteDoi) clear_index(Client) clear_index(Provider) + clear_index(ReferenceRepository) + import_index(ReferenceRepository) import_index(Provider) import_index(Client) import_index(DataciteDoi) From df399da8e2f096c5651b2c6d533374b7a0f25439 Mon Sep 17 00:00:00 2001 From: jrhoads Date: Thu, 26 Feb 2026 12:12:32 +0100 Subject: [PATCH 10/25] refactor: update PreloadedEventRelation#inject to accept variable arguments --- app/models/concerns/preloaded_event_relation.rb | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/app/models/concerns/preloaded_event_relation.rb b/app/models/concerns/preloaded_event_relation.rb index d57068166..2c41d28ac 100644 --- a/app/models/concerns/preloaded_event_relation.rb +++ b/app/models/concerns/preloaded_event_relation.rb @@ -66,8 +66,8 @@ def group_by(&block) @events.group_by(&block) end - def inject(initial = nil, &block) - @events.inject(initial, &block) + def inject(*args, &block) + @events.inject(*args, &block) end def length From 6b36137cb56708f635cedaa13c7c86fd737cb092 Mon Sep 17 00:00:00 2001 From: jrhoads Date: Thu, 26 Feb 2026 12:13:01 +0100 Subject: [PATCH 11/25] test: refactor and expand test structure for EventsPreloader specs --- spec/services/events_preloader_spec.rb | 21 ++++++++++++++++++--- 1 file changed, 18 insertions(+), 3 deletions(-) diff --git a/spec/services/events_preloader_spec.rb b/spec/services/events_preloader_spec.rb index 59cf600e5..1e12106e7 100644 --- a/spec/services/events_preloader_spec.rb +++ b/spec/services/events_preloader_spec.rb @@ -17,12 +17,27 @@ expect(doi2.preloaded_events).to eq([]) end - it "handles empty array" do - preloader = EventsPreloader.new([]) - expect { preloader.preload! }.not_to raise_error + describe "#initialize" do + it "initializes preloaded_events for each DOI" do + dois = [doi1, doi2] + EventsPreloader.new(dois) + + expect(doi1.preloaded_events).to eq([]) + expect(doi2.preloaded_events).to eq([]) end end + describe "#preload!" do + context "with empty array" do + it "does not raise an error" do + preloader = EventsPreloader.new([]) + expect { preloader.preload! }.not_to raise_error + end + end + + context "with source and target events" do + end + describe "#preload!" do context "with source and target events" do let!(:reference_event) do From c280e5db5115cc4a29671add501192134fc710d5 Mon Sep 17 00:00:00 2001 From: jrhoads Date: Thu, 26 Feb 2026 14:25:38 +0100 Subject: [PATCH 12/25] test: remove redundant code blocks and empty context in EventsPreloader spec --- spec/services/events_preloader_spec.rb | 13 ------------- 1 file changed, 13 deletions(-) diff --git a/spec/services/events_preloader_spec.rb b/spec/services/events_preloader_spec.rb index 1e12106e7..d92e1d262 100644 --- a/spec/services/events_preloader_spec.rb +++ b/spec/services/events_preloader_spec.rb @@ -8,15 +8,6 @@ let(:doi2) { create(:doi, client: client, doi: "10.1234/TEST2", aasm_state: "findable") } let(:doi3) { create(:doi, client: client, doi: "10.1234/TEST3", aasm_state: "findable") } - describe "#initialize" do - it "initializes preloaded_events for each DOI" do - dois = [doi1, doi2] - EventsPreloader.new(dois) - - expect(doi1.preloaded_events).to eq([]) - expect(doi2.preloaded_events).to eq([]) - end - describe "#initialize" do it "initializes preloaded_events for each DOI" do dois = [doi1, doi2] @@ -35,10 +26,6 @@ end end - context "with source and target events" do - end - - describe "#preload!" do context "with source and target events" do let!(:reference_event) do create(:event_for_crossref, { From aa5a5e80966edc4ea1ae7865c1a558744bed87d7 Mon Sep 17 00:00:00 2001 From: jrhoads Date: Fri, 13 Mar 2026 14:08:12 +0100 Subject: [PATCH 13/25] refactor: move and simplify EventsPreloader logic in show action --- app/controllers/datacite_dois_controller.rb | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/app/controllers/datacite_dois_controller.rb b/app/controllers/datacite_dois_controller.rb index 499109a32..b2a61aa90 100644 --- a/app/controllers/datacite_dois_controller.rb +++ b/app/controllers/datacite_dois_controller.rb @@ -457,10 +457,6 @@ def show fail ActiveRecord::RecordNotFound end - # Preload events if we are going to show details - # This optimizes the serializer which accesses part_events, citation_events, etc. - EventsPreloader.new([doi]).preload! if params[:detail] != false - options = {} options[:include] = @include options[:is_collection] = false @@ -472,6 +468,9 @@ def show publisher: params[:publisher], include_other_registration_agencies: params[:include_other_registration_agencies], } + # Preload events since we are going to show details + # This optimizes the serializer which accesses part_events, citation_events, etc. + EventsPreloader.new([doi]).preload! if show_enriched return handle_show_enriched_doi(doi, options) From 3f4c605f8899226c5181afb9bffc6b9d4fd6e7db Mon Sep 17 00:00:00 2001 From: jrhoads Date: Fri, 13 Mar 2026 14:08:54 +0100 Subject: [PATCH 14/25] fix: handle blank DOIs, order events by date, and prevent duplicate associations --- app/services/events_preloader.rb | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/app/services/events_preloader.rb b/app/services/events_preloader.rb index 9a7aff209..ec9c1c733 100644 --- a/app/services/events_preloader.rb +++ b/app/services/events_preloader.rb @@ -10,6 +10,7 @@ def initialize(dois) @dois = Array(dois) @doi_map = {} @dois.each do |doi| + next if doi.doi.blank? @doi_map[doi.doi.upcase] = doi # Initialize preloaded_events array if not already set doi.preloaded_events ||= [] @@ -20,7 +21,7 @@ def initialize(dois) def preload! return if @dois.empty? - doi_identifiers = @dois.map { |doi| doi.doi.upcase }.uniq + doi_identifiers = @dois.filter_map{ |doi| doi.doi&.upcase }.uniq return if doi_identifiers.empty? # Fetch events in chunks to avoid database parameter limits @@ -29,7 +30,7 @@ def preload! events = Event.where( "source_doi IN (?) OR target_doi IN (?)", chunk, chunk - ).to_a + ).order(updated_at: :desc).to_a all_events.concat(events) end @@ -44,7 +45,9 @@ def preload! # Add event to target DOI's preloaded_events if it matches if event.target_doi.present? target_doi_obj = @doi_map[event.target_doi.upcase] - target_doi_obj.preloaded_events << event if target_doi_obj + if target_doi_obj && target_doi_obj != source_doi_obj + target_doi_obj.preloaded_events << event + end end end From 0fdba2e4b5e5bcd5ad61ec8f1d7e5b5a3cd7df13 Mon Sep 17 00:00:00 2001 From: jrhoads Date: Fri, 13 Mar 2026 14:25:21 +0100 Subject: [PATCH 15/25] refactor: extract filtered_preloaded_events helper to dry up event methods --- app/models/doi.rb | 41 +++++++++++++++++------------------------ 1 file changed, 17 insertions(+), 24 deletions(-) diff --git a/app/models/doi.rb b/app/models/doi.rb index 280ac571e..f41783982 100644 --- a/app/models/doi.rb +++ b/app/models/doi.rb @@ -107,9 +107,7 @@ class Doi < ApplicationRecord # Also filter by source_doi/target_doi to match association behavior def view_events if !preloaded_events.nil? - PreloadedEventRelation.new( - preloaded_events.select { |e| e.target_relation_type_id == "views" && e.target_doi&.upcase == doi.upcase } - ) + filtered_preloaded_events(:target_relation_type_id, "views", :target_doi) else association(:view_events).scope end @@ -117,9 +115,7 @@ def view_events def download_events if !preloaded_events.nil? - PreloadedEventRelation.new( - preloaded_events.select { |e| e.target_relation_type_id == "downloads" && e.target_doi&.upcase == doi.upcase } - ) + filtered_preloaded_events(:target_relation_type_id, "downloads", :target_doi) else association(:download_events).scope end @@ -127,9 +123,7 @@ def download_events def reference_events if !preloaded_events.nil? - PreloadedEventRelation.new( - preloaded_events.select { |e| e.source_relation_type_id == "references" && e.source_doi&.upcase == doi.upcase } - ) + filtered_preloaded_events(:source_relation_type_id, "references", :source_doi) else association(:reference_events).scope end @@ -137,9 +131,7 @@ def reference_events def citation_events if !preloaded_events.nil? - PreloadedEventRelation.new( - preloaded_events.select { |e| e.target_relation_type_id == "citations" && e.target_doi&.upcase == doi.upcase } - ) + filtered_preloaded_events(:target_relation_type_id, "citations", :target_doi) else association(:citation_events).scope end @@ -147,9 +139,7 @@ def citation_events def part_events if !preloaded_events.nil? - PreloadedEventRelation.new( - preloaded_events.select { |e| e.source_relation_type_id == "parts" && e.source_doi&.upcase == doi.upcase } - ) + filtered_preloaded_events(:source_relation_type_id, "parts", :source_doi) else association(:part_events).scope end @@ -157,9 +147,7 @@ def part_events def part_of_events if !preloaded_events.nil? - PreloadedEventRelation.new( - preloaded_events.select { |e| e.target_relation_type_id == "part_of" && e.target_doi&.upcase == doi.upcase } - ) + filtered_preloaded_events(:target_relation_type_id, "part_of", :target_doi) else association(:part_of_events).scope end @@ -167,9 +155,7 @@ def part_of_events def version_events if !preloaded_events.nil? - PreloadedEventRelation.new( - preloaded_events.select { |e| e.source_relation_type_id == "versions" && e.source_doi&.upcase == doi.upcase } - ) + filtered_preloaded_events(:source_relation_type_id, "versions", :source_doi) else association(:version_events).scope end @@ -177,9 +163,7 @@ def version_events def version_of_events if !preloaded_events.nil? - PreloadedEventRelation.new( - preloaded_events.select { |e| e.target_relation_type_id == "version_of" && e.target_doi&.upcase == doi.upcase } - ) + filtered_preloaded_events(:target_relation_type_id, "version_of", :target_doi) else association(:version_of_events).scope end @@ -2912,6 +2896,15 @@ def handle_resource_type(types) end private + def filtered_preloaded_events(relation_type_key, relation_type_value, doi_key) + PreloadedEventRelation.new( + preloaded_events.select do |e| + e.public_send(relation_type_key) == relation_type_value && + e.public_send(doi_key)&.upcase == doi.upcase + end + ) + end + def update_publisher_from_hash symbolized_publisher_hash = publisher_before_type_cast.symbolize_keys if !symbolized_publisher_hash.values.all?(nil) From 17e3ccf1a98f0115dbcdde900c4020d4a7f59337 Mon Sep 17 00:00:00 2001 From: jrhoads Date: Fri, 13 Mar 2026 14:33:20 +0100 Subject: [PATCH 16/25] Appease rubocop --- app/services/events_preloader.rb | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/app/services/events_preloader.rb b/app/services/events_preloader.rb index ec9c1c733..b16f3c15b 100644 --- a/app/services/events_preloader.rb +++ b/app/services/events_preloader.rb @@ -21,7 +21,7 @@ def initialize(dois) def preload! return if @dois.empty? - doi_identifiers = @dois.filter_map{ |doi| doi.doi&.upcase }.uniq + doi_identifiers = @dois.filter_map { |doi| doi.doi&.upcase }.uniq return if doi_identifiers.empty? # Fetch events in chunks to avoid database parameter limits From 7890125b540e1c9ff2bcfbea3257ca6067141d7e Mon Sep 17 00:00:00 2001 From: jrhoads Date: Fri, 13 Mar 2026 15:40:14 +0100 Subject: [PATCH 17/25] fix: ensure unique events in EventsPreloader after concatenation --- app/services/events_preloader.rb | 1 + 1 file changed, 1 insertion(+) diff --git a/app/services/events_preloader.rb b/app/services/events_preloader.rb index b16f3c15b..313f8b723 100644 --- a/app/services/events_preloader.rb +++ b/app/services/events_preloader.rb @@ -33,6 +33,7 @@ def preload! ).order(updated_at: :desc).to_a all_events.concat(events) end + all_events.uniq! # Group events by DOI and assign to each Doi object all_events.each do |event| From 19049fd8b2d32984471b9064744c308315c31740 Mon Sep 17 00:00:00 2001 From: jrhoads Date: Fri, 13 Mar 2026 15:55:00 +0100 Subject: [PATCH 18/25] test: update DOI preloading assertions to verify against uploaded batch in import_in_bulk spec --- spec/models/doi_related_spec.rb | 15 ++++++++++----- 1 file changed, 10 insertions(+), 5 deletions(-) diff --git a/spec/models/doi_related_spec.rb b/spec/models/doi_related_spec.rb index fdaa31e38..51418766b 100644 --- a/spec/models/doi_related_spec.rb +++ b/spec/models/doi_related_spec.rb @@ -54,14 +54,19 @@ relation_type_id: "is-referenced-by", }) - allow(DataciteDoi).to receive(:upload_to_elasticsearch) + # Capture the DOIs passed to upload_to_elasticsearch to verify preloading + uploaded_dois = nil + allow(DataciteDoi).to receive(:upload_to_elasticsearch) do |dois| + uploaded_dois = dois + end DataciteDoi.import_in_bulk(ids) - # Verify that events were preloaded (check via a fresh query) - fresh_doi1 = DataciteDoi.find(doi1.id) - expect(fresh_doi1.reference_events.count).to eq(1) - expect(fresh_doi1.citation_events.count).to eq(1) + # Verify that events were preloaded on the batch + doi1_uploaded = uploaded_dois.find { |d| d.id == doi1.id } + expect(doi1_uploaded.preloaded_events).not_to be_nil + expect(doi1_uploaded.reference_events.count).to eq(1) + expect(doi1_uploaded.citation_events.count).to eq(1) end end From fd0f4a156d5d7ea093b2d7df4fcbb6ea96930eb4 Mon Sep 17 00:00:00 2001 From: jrhoads Date: Fri, 13 Mar 2026 15:55:19 +0100 Subject: [PATCH 19/25] test: update PreloadedEventRelation#sort_by spec to use descending order --- spec/models/preloaded_event_relation_spec.rb | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/spec/models/preloaded_event_relation_spec.rb b/spec/models/preloaded_event_relation_spec.rb index 703c08efb..8a998f212 100644 --- a/spec/models/preloaded_event_relation_spec.rb +++ b/spec/models/preloaded_event_relation_spec.rb @@ -64,9 +64,9 @@ describe "#sort_by" do it "sorts events" do - result = relation.sort_by(&:total) + result = relation.sort_by { |e| -e.total } expect(result).to be_a(PreloadedEventRelation) - expect(result.to_a.map(&:total)).to eq([10, 20, 30]) + expect(result.to_a.map(&:total)).to eq([30, 20, 10]) end end From f779c32e4f313d131fafb6325d303aff9610ec30 Mon Sep 17 00:00:00 2001 From: jrhoads Date: Mon, 16 Mar 2026 09:46:32 +0100 Subject: [PATCH 20/25] test: use match_array for reference and citation id comparisons --- spec/models/doi_related_spec.rb | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/spec/models/doi_related_spec.rb b/spec/models/doi_related_spec.rb index 51418766b..969cc657a 100644 --- a/spec/models/doi_related_spec.rb +++ b/spec/models/doi_related_spec.rb @@ -399,8 +399,8 @@ citation_count_with_preload = doi.citation_count # Should be the same - expect(reference_ids_with_preload).to eq(reference_ids_without_preload) - expect(citation_ids_with_preload).to eq(citation_ids_without_preload) + expect(reference_ids_with_preload).to match_array(reference_ids_without_preload) + expect(citation_ids_with_preload).to match_array(citation_ids_without_preload) expect(reference_count_with_preload).to eq(reference_count_without_preload) expect(citation_count_with_preload).to eq(citation_count_without_preload) end From 9cdf9890f592045a6898ca148c36e926fc397587 Mon Sep 17 00:00:00 2001 From: jrhoads Date: Mon, 16 Mar 2026 11:43:54 +0100 Subject: [PATCH 21/25] test: update DOI preloading tests to use EventsPreloader and verify query limits --- spec/models/doi_related_spec.rb | 57 +++++++++++++++++---------------- 1 file changed, 30 insertions(+), 27 deletions(-) diff --git a/spec/models/doi_related_spec.rb b/spec/models/doi_related_spec.rb index 969cc657a..8e8b7f88f 100644 --- a/spec/models/doi_related_spec.rb +++ b/spec/models/doi_related_spec.rb @@ -39,34 +39,22 @@ }.not_to exceed_query_limit(6) # Allow some overhead for associations (client, media, metadata, allocator) end - it "should preload events for all DOIs in batch" do - # Create events for the DOIs - create(:event_for_crossref, { - subj_id: "https://doi.org/#{doi1.doi}", - obj_id: "https://doi.org/#{doi2.doi}", - relation_type_id: "references", - }) - # For citation_events, the DOI must be the target (target_doi) - # For "is-referenced-by", target_doi = subj_id, so doi1 needs to be subj_id - create(:event_for_datacite_crossref, { - subj_id: "https://doi.org/#{doi1.doi}", - obj_id: "https://doi.org/#{doi3.doi}", - relation_type_id: "is-referenced-by", - }) - - # Capture the DOIs passed to upload_to_elasticsearch to verify preloading - uploaded_dois = nil - allow(DataciteDoi).to receive(:upload_to_elasticsearch) do |dois| - uploaded_dois = dois - end - + it "uses EventsPreloader to preload events for each batch" do + # Arrange + allow(DataciteDoi).to receive(:upload_to_elasticsearch) # don’t actually hit ES + # Spy on EventsPreloader + preloader_double = instance_double(EventsPreloader, preload!: true) + allow(EventsPreloader).to receive(:new).and_return(preloader_double) + # Act DataciteDoi.import_in_bulk(ids) - - # Verify that events were preloaded on the batch - doi1_uploaded = uploaded_dois.find { |d| d.id == doi1.id } - expect(doi1_uploaded.preloaded_events).not_to be_nil - expect(doi1_uploaded.reference_events.count).to eq(1) - expect(doi1_uploaded.citation_events.count).to eq(1) + # Assert + # Ensure we created a preloader at least once with an array of DOIs + expect(EventsPreloader).to have_received(:new) do |dois_arg| + expect(dois_arg).to all(be_a(DataciteDoi)) + expect(dois_arg.map(&:id)).to match_array(ids) # or a subset if multiple batches + end + # And that preload! was actually invoked on that preloader + expect(preloader_double).to have_received(:preload!).at_least(:once) end end @@ -74,6 +62,21 @@ let(:client) { create(:client) } let(:doi) { create(:doi, client: client, aasm_state: "findable") } + it "uses preloaded_events-backed relations for event metrics" do + allow(DataciteDoi).to receive(:upload_to_elasticsearch) + # Build the relation with includes, as in production + dois = DataciteDoi.where(id: doi.id).includes(:client, :media, :metadata) + # Preload events explicitly + preloader = EventsPreloader.new(dois.to_a) + preloader.preload! + # Sanity: events are preloaded on the instance we’ll index + expect(dois.first.preloaded_events).not_to be_nil + # When we call as_indexed_json, the overridden *_events methods should + # hit the preloaded array, not issue extra queries for events. + expect { + dois.first.as_indexed_json + }.not_to exceed_query_limit(0) # if all associations were pre-included + end it "should make few db call" do allow(DataciteDoi).to receive(:upload_to_elasticsearch) dois = DataciteDoi.where(id: doi.id).includes( From 74061b2eddd2c6e2b1448a90993365b2d65e8423 Mon Sep 17 00:00:00 2001 From: jrhoads Date: Mon, 16 Mar 2026 12:53:42 +0100 Subject: [PATCH 22/25] test: include provider in preloaded relations and fix typo in spec --- spec/models/doi_related_spec.rb | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/spec/models/doi_related_spec.rb b/spec/models/doi_related_spec.rb index 8e8b7f88f..8d3ebb8db 100644 --- a/spec/models/doi_related_spec.rb +++ b/spec/models/doi_related_spec.rb @@ -65,7 +65,7 @@ it "uses preloaded_events-backed relations for event metrics" do allow(DataciteDoi).to receive(:upload_to_elasticsearch) # Build the relation with includes, as in production - dois = DataciteDoi.where(id: doi.id).includes(:client, :media, :metadata) + dois = DataciteDoi.where(id: doi.id).includes({client: :provider}, :media, :metadata) # Preload events explicitly preloader = EventsPreloader.new(dois.to_a) preloader.preload! @@ -77,7 +77,8 @@ dois.first.as_indexed_json }.not_to exceed_query_limit(0) # if all associations were pre-included end - it "should make few db call" do + + it "should make few db calls" do allow(DataciteDoi).to receive(:upload_to_elasticsearch) dois = DataciteDoi.where(id: doi.id).includes( :client, From 40b17ed803b99c49c7f7358157c494c7f2fbb097 Mon Sep 17 00:00:00 2001 From: jrhoads Date: Mon, 16 Mar 2026 12:58:15 +0100 Subject: [PATCH 23/25] perf: eager load providers for DOIs to prevent N+1 queries --- app/models/datacite_doi.rb | 2 +- app/models/other_doi.rb | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/app/models/datacite_doi.rb b/app/models/datacite_doi.rb index 81ed784f2..7ea7e7dc9 100644 --- a/app/models/datacite_doi.rb +++ b/app/models/datacite_doi.rb @@ -144,7 +144,7 @@ def self.import_in_bulk(ids, options = {}) # get database records from array of database ids selected_dois = DataciteDoi.where(id: ids, type: "DataciteDoi").includes( - :client, + {client: :provider}, :media, :metadata ) diff --git a/app/models/other_doi.rb b/app/models/other_doi.rb index c2cca25df..04501894c 100644 --- a/app/models/other_doi.rb +++ b/app/models/other_doi.rb @@ -148,7 +148,7 @@ def self.import_in_bulk(ids, options = {}) # get database records from array of database ids selected_dois = OtherDoi.where(id: ids, type: "OtherDoi").includes( - :client, + {client: :provider}, :media, :metadata ) From 05ade79d4fe91d74f4c2ca34cf2c2b2429b7b850 Mon Sep 17 00:00:00 2001 From: jrhoads Date: Mon, 16 Mar 2026 13:02:06 +0100 Subject: [PATCH 24/25] Appease Rubocop --- app/models/datacite_doi.rb | 2 +- app/models/other_doi.rb | 2 +- spec/models/doi_related_spec.rb | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) diff --git a/app/models/datacite_doi.rb b/app/models/datacite_doi.rb index 7ea7e7dc9..566f81335 100644 --- a/app/models/datacite_doi.rb +++ b/app/models/datacite_doi.rb @@ -144,7 +144,7 @@ def self.import_in_bulk(ids, options = {}) # get database records from array of database ids selected_dois = DataciteDoi.where(id: ids, type: "DataciteDoi").includes( - {client: :provider}, + { client: :provider }, :media, :metadata ) diff --git a/app/models/other_doi.rb b/app/models/other_doi.rb index 04501894c..400f11d24 100644 --- a/app/models/other_doi.rb +++ b/app/models/other_doi.rb @@ -148,7 +148,7 @@ def self.import_in_bulk(ids, options = {}) # get database records from array of database ids selected_dois = OtherDoi.where(id: ids, type: "OtherDoi").includes( - {client: :provider}, + { client: :provider }, :media, :metadata ) diff --git a/spec/models/doi_related_spec.rb b/spec/models/doi_related_spec.rb index 8d3ebb8db..5368100b7 100644 --- a/spec/models/doi_related_spec.rb +++ b/spec/models/doi_related_spec.rb @@ -65,7 +65,7 @@ it "uses preloaded_events-backed relations for event metrics" do allow(DataciteDoi).to receive(:upload_to_elasticsearch) # Build the relation with includes, as in production - dois = DataciteDoi.where(id: doi.id).includes({client: :provider}, :media, :metadata) + dois = DataciteDoi.where(id: doi.id).includes({ client: :provider }, :media, :metadata) # Preload events explicitly preloader = EventsPreloader.new(dois.to_a) preloader.preload! From fbdfbdf10a34758195bb3535050a98f79f831ccb Mon Sep 17 00:00:00 2001 From: jrhoads Date: Mon, 4 May 2026 14:32:15 +0200 Subject: [PATCH 25/25] test: Add nested includes for client provider --- spec/models/doi_related_spec.rb | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/spec/models/doi_related_spec.rb b/spec/models/doi_related_spec.rb index 5368100b7..b3450649d 100644 --- a/spec/models/doi_related_spec.rb +++ b/spec/models/doi_related_spec.rb @@ -81,7 +81,7 @@ it "should make few db calls" do allow(DataciteDoi).to receive(:upload_to_elasticsearch) dois = DataciteDoi.where(id: doi.id).includes( - :client, + { client: :provider }, :media, :metadata )