From 51549eb654e9ac96002f3761ba5adf031d7b585c Mon Sep 17 00:00:00 2001 From: Justin Coyne Date: Fri, 29 May 2026 09:35:45 -0500 Subject: [PATCH 01/11] Add Elasticsearch/OpenSearch support --- .github/matrix.json | 8 + .github/workflows/test.yml | 5 + README.md | 39 +++ blacklight.gemspec | 1 + compose.yaml | 20 ++ lib/blacklight.rb | 32 +++ lib/blacklight/configuration.rb | 27 ++- lib/blacklight/elastic_search.rb | 14 ++ lib/blacklight/elastic_search/document.rb | 30 +++ .../elastic_search/facet_paginator.rb | 11 + lib/blacklight/elastic_search/repository.rb | 227 ++++++++++++++++++ lib/blacklight/elastic_search/request.rb | 55 +++++ lib/blacklight/elastic_search/response.rb | 160 ++++++++++++ .../elastic_search/search_builder_behavior.rb | 185 ++++++++++++++ lib/blacklight/solr/repository.rb | 20 ++ lib/generators/blacklight/models_generator.rb | 14 +- .../templates/catalog_controller.rb | 5 + .../config/blacklight.elasticsearch.yml | 13 + .../blacklight/templates/search_builder.rb | 4 +- .../blacklight/templates/solr_document.rb | 6 +- .../blacklight/test_support_generator.rb | 7 +- lib/railties/blacklight.rake | 17 +- spec/controllers/catalog_controller_spec.rb | 2 +- spec/features/advanced_search_spec.rb | 4 +- spec/features/did_you_mean_spec.rb | 3 +- spec/features/facets_spec.rb | 3 +- spec/lib/blacklight_spec.rb | 44 ++++ .../elastic_search/document_spec.rb | 51 ++++ .../elastic_search/repository_spec.rb | 160 ++++++++++++ .../blacklight/elastic_search/request_spec.rb | 45 ++++ .../elastic_search/response_spec.rb | 96 ++++++++ .../search_builder_behavior_spec.rb | 122 ++++++++++ spec/requests/load_suggestions_spec.rb | 3 +- spec/spec_helper.rb | 18 ++ tasks/blacklight.rake | 41 +++- 35 files changed, 1474 insertions(+), 18 deletions(-) create mode 100644 lib/blacklight/elastic_search.rb create mode 100644 lib/blacklight/elastic_search/document.rb create mode 100644 lib/blacklight/elastic_search/facet_paginator.rb create mode 100644 lib/blacklight/elastic_search/repository.rb create mode 100644 lib/blacklight/elastic_search/request.rb create mode 100644 lib/blacklight/elastic_search/response.rb create mode 100644 lib/blacklight/elastic_search/search_builder_behavior.rb create mode 100644 lib/generators/blacklight/templates/config/blacklight.elasticsearch.yml create mode 100644 spec/models/blacklight/elastic_search/document_spec.rb create mode 100644 spec/models/blacklight/elastic_search/repository_spec.rb create mode 100644 spec/models/blacklight/elastic_search/request_spec.rb create mode 100644 spec/models/blacklight/elastic_search/response_spec.rb create mode 100644 spec/models/blacklight/elastic_search/search_builder_behavior_spec.rb diff --git a/.github/matrix.json b/.github/matrix.json index b5144d1fc3..c9eed53101 100644 --- a/.github/matrix.json +++ b/.github/matrix.json @@ -50,6 +50,14 @@ "rails_version": "7.2.3", "additional_engine_cart_rails_options": "-a propshaft --css=bootstrap --js=esbuild", "additional_name": "| Rails 7.2 + Propshaft, esbuild" + }, + { + "ruby": "3.4", + "rails_version": "8.1.2", + "search_engine": "elasticsearch", + "experimental": true, + "additional_engine_cart_rails_options": "--css=bootstrap", + "additional_name": "| Elasticsearch" } ] } diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml index b3fa165c3a..942ab05154 100644 --- a/.github/workflows/test.yml +++ b/.github/workflows/test.yml @@ -26,6 +26,9 @@ jobs: needs: set_matrix runs-on: ubuntu-latest name: ruby ${{ matrix.ruby }} | rails ${{ matrix.rails_version }} ${{ matrix.additional_name }} + # The Elasticsearch configuration is still reaching feature parity with Solr, + # so it is allowed to fail without failing the overall build. + continue-on-error: ${{ matrix.experimental == true }} strategy: fail-fast: false matrix: ${{fromJson(needs.set_matrix.outputs.matrix)}} @@ -35,6 +38,8 @@ jobs: VIEW_COMPONENT_VERSION: ${{ matrix.view_component_version }} BOOTSTRAP_VERSION: ${{ matrix.bootstrap_version }} BLACKLIGHT_API_TEST: ${{ matrix.api }} + BLACKLIGHT_ADAPTER: ${{ matrix.search_engine }} + ELASTICSEARCH_URL: http://localhost:9200 ENGINE_CART_RAILS_OPTIONS: "--skip-git --skip-listen --skip-spring --skip-keeps --skip-kamal --skip-thruster --skip-solid --skip-coffee --skip-test ${{ matrix.additional_engine_cart_rails_options }}" BUNDLER_VERSION: ${{ matrix.bundler_version || '2.7.2' }} steps: diff --git a/README.md b/README.md index 496abd6a25..eba00fb553 100644 --- a/README.md +++ b/README.md @@ -60,6 +60,45 @@ Code contributions are always welcome, instructions for contributing can be foun ## Configuring Apache Solr You'll also want some information about how Blacklight expects [Apache Solr](http://lucene.apache.org/solr ) to run, which you can find in [Solr Configuration](https://github.com/projectblacklight/blacklight/wiki/Solr-Configuration#solr-configuration) +## Using Elasticsearch / OpenSearch (experimental) +Blacklight defaults to Apache Solr, but it can also run against an Elasticsearch +(or API-compatible OpenSearch) cluster. Select the adapter in +`config/blacklight.yml`: + +```yaml +development: + adapter: elasticsearch + url: http://127.0.0.1:9200 + index: blacklight-core +``` + +Applications using this adapter must add an Elasticsearch client to their +`Gemfile` (the gem is not a runtime dependency of Blacklight): + +```ruby +bundle add elasticsearch +# or, for OpenSearch: +# bundle add opensearch-ruby +``` + +With the adapter selected, the generated `SearchBuilder` and `SolrDocument` +classes automatically mix in the correct behavior (`include +Blacklight.search_builder_behavior` / `include Blacklight.document_mixin`). + +Some features that depend on Solr-specific functionality are not available when +using Elasticsearch and are automatically disabled: spellcheck/"did you mean", +result grouping, pivot and query facets, more-like-this, autocomplete +suggestions, and the Solr JSON Query DSL advanced search. + +`rails blacklight:index:seed` will create the index (if needed) and load the +sample data. The default index mapping understands Blacklight's Solr field +naming conventions: fields ending in a text suffix (e.g. `title_tsim`) are +mapped as analyzed `text` for full-text search, while other string fields +(e.g. `format`, `language_ssim`, `pub_date_si`) are mapped as `keyword` for +filtering, sorting, and faceting. Override the mapping with +`config.elasticsearch_index_settings` in your `CatalogController` if you need +full control over the schema. + ## Building the javascript The javascript includes some derivative combination files that are built at release time, that can be used by some javascript pipelines. The derivatives are placed at `app/assets/javascripts/blacklight`, and files there should not be edited by hand. diff --git a/blacklight.gemspec b/blacklight.gemspec index a8b2914683..e6d190b766 100644 --- a/blacklight.gemspec +++ b/blacklight.gemspec @@ -35,6 +35,7 @@ Gem::Specification.new do |s| s.add_dependency "zeitwerk" s.add_development_dependency "rsolr", ">= 1.0.6", "< 3" # Library for interacting with rSolr. + s.add_development_dependency "elasticsearch", ">= 8.0", "< 10" # Client for the optional Elasticsearch/OpenSearch adapter (client major version should match the server). s.add_development_dependency "rspec-rails", "~> 7.0" s.add_development_dependency "rspec-collection_matchers", ">= 1.0" s.add_development_dependency 'axe-core-rspec' diff --git a/compose.yaml b/compose.yaml index 5244dd084e..de895558f2 100644 --- a/compose.yaml +++ b/compose.yaml @@ -36,3 +36,23 @@ services: - /opt/solr/conf - "-Xms256m" - "-Xmx512m" + + # Optional search engine for running Blacklight against Elasticsearch/OpenSearch + # instead of Solr. Enable by setting BLACKLIGHT_ADAPTER=elasticsearch. + elasticsearch: + image: "elasticsearch:${ELASTICSEARCH_VERSION:-9.0.4}" + environment: + - discovery.type=single-node + - xpack.security.enabled=false + - "ES_JAVA_OPTS=-Xms512m -Xmx512m" + ports: + - "${ELASTICSEARCH_PORT:-9200}:9200" + healthcheck: + test: + [ + "CMD-SHELL", + "curl -fsS http://localhost:9200/_cluster/health || exit 1", + ] + interval: 10s + timeout: 5s + retries: 12 diff --git a/lib/blacklight.rb b/lib/blacklight.rb index 40d8872fb3..7e8dbf23dc 100644 --- a/lib/blacklight.rb +++ b/lib/blacklight.rb @@ -36,6 +36,8 @@ def self.repository_class case connection_config&.fetch(:adapter) || 'solr' when 'solr' Blacklight::Solr::Repository + when 'elasticsearch', 'elastic_search', 'opensearch' + Blacklight::ElasticSearch::Repository when /::/ connection_config[:adapter].constantize else @@ -43,6 +45,36 @@ def self.repository_class end end + ## + # The response model class appropriate for the configured adapter. + # @return [Class] + def self.default_response_model + repository_class.try(:response_model) || Blacklight::Solr::Response + end + + ## + # The facet paginator class appropriate for the configured adapter. + # @return [Class] + def self.default_facet_paginator_class + repository_class.try(:facet_paginator_class) || Blacklight::Solr::FacetPaginator + end + + ## + # The SearchBuilder behavior module appropriate for the configured adapter. + # Intended to be included into an application's SearchBuilder. + # @return [Module] + def self.search_builder_behavior + repository_class.try(:search_builder_behavior) || Blacklight::Solr::SearchBuilderBehavior + end + + ## + # The document mixin appropriate for the configured adapter. + # Intended to be included into an application's document model. + # @return [Module] + def self.document_mixin + repository_class.try(:document_mixin) || Blacklight::Solr::Document + end + ## # The default Blacklight configuration. def self.default_configuration diff --git a/lib/blacklight/configuration.rb b/lib/blacklight/configuration.rb index 632684189a..de82b28df7 100644 --- a/lib/blacklight/configuration.rb +++ b/lib/blacklight/configuration.rb @@ -106,7 +106,7 @@ def initialized_default_configuration? # @!attribute response_model # model that maps index responses to the blacklight response model # @return [Class] - property :response_model, default: Blacklight::Solr::Response + property :response_model, default: Blacklight.default_response_model # @!attribute document_model # the model to use for each response document # @return [Class] @@ -118,7 +118,30 @@ def initialized_default_configuration? # @!attribute facet_paginator_class # Class for paginating long lists of facet fields # @return [Class] - property :facet_paginator_class, default: Blacklight::Solr::FacetPaginator + property :facet_paginator_class, default: Blacklight.default_facet_paginator_class + + # @!attribute elasticsearch_index + # @since v9.0.0 + # @return [String, nil] the Elasticsearch index (or alias) to search against. + # Only used by the Elasticsearch adapter; may also be set via the `index` + # key in blacklight.yml. + property :elasticsearch_index, default: nil + # @!attribute elasticsearch_query_fields + # @since v9.0.0 + # @return [Array, nil] the fields a full-text query should target. + # When nil, a simple_query_string across all fields is used. + property :elasticsearch_query_fields, default: nil + # @!attribute elasticsearch_source_fields + # @since v9.0.0 + # @return [Array, nil] restrict the Elasticsearch `_source` fields + # returned for each document. When nil, the full source is returned. + property :elasticsearch_source_fields, default: nil + # @!attribute elasticsearch_index_settings + # @since v9.0.0 + # @return [Hash, nil] the body (settings + mappings) used when the + # Elasticsearch adapter creates the index. When nil, a default mapping + # based on Blacklight's Solr field-naming conventions is used. + property :elasticsearch_index_settings, default: nil # @!attribute connection_config # repository connection configuration # @since v5.13.0 diff --git a/lib/blacklight/elastic_search.rb b/lib/blacklight/elastic_search.rb new file mode 100644 index 0000000000..8d2a23c58e --- /dev/null +++ b/lib/blacklight/elastic_search.rb @@ -0,0 +1,14 @@ +# frozen_string_literal: true + +module Blacklight + # Adapter for using an Elasticsearch (or API-compatible OpenSearch) cluster as + # the search index backing a Blacklight application. + # + # The adapter is selected by setting `adapter: elasticsearch` in + # `config/blacklight.yml`. Some Solr-specific features (spellcheck/"did you + # mean", result grouping, pivot/query facets, more-like-this, and the Solr + # JSON Query DSL advanced search) are not provided by Elasticsearch and are + # automatically disabled when this adapter is in use. + module ElasticSearch + end +end diff --git a/lib/blacklight/elastic_search/document.rb b/lib/blacklight/elastic_search/document.rb new file mode 100644 index 0000000000..92c4e047dd --- /dev/null +++ b/lib/blacklight/elastic_search/document.rb @@ -0,0 +1,30 @@ +# frozen_string_literal: true + +module Blacklight::ElasticSearch + # Mixin for a class representing a single document returned from + # Elasticsearch. This is the Elasticsearch analog to + # Blacklight::Solr::Document. + module Document + extend ActiveSupport::Concern + include Blacklight::Document + include Blacklight::Document::ActiveModelShim + + # More-like-this is not supported by the Elasticsearch adapter. + def more_like_this + [] + end + + def has_highlight_field?(field) + highlighting = self['_highlighting'] + return false if highlighting.blank? + + highlighting.key?(field.to_s) + end + + def highlight_field(field) + return unless has_highlight_field?(field) + + Array(self['_highlighting'][field.to_s]).map(&:html_safe) + end + end +end diff --git a/lib/blacklight/elastic_search/facet_paginator.rb b/lib/blacklight/elastic_search/facet_paginator.rb new file mode 100644 index 0000000000..943406654b --- /dev/null +++ b/lib/blacklight/elastic_search/facet_paginator.rb @@ -0,0 +1,11 @@ +# frozen_string_literal: true + +module Blacklight::ElasticSearch + # Pagination for facet values returned by Elasticsearch terms aggregations. + # + # Like Solr, Elasticsearch terms aggregations don't return a total count of + # distinct values, so we request `limit + 1` values to detect whether more + # values are available. + class FacetPaginator < Blacklight::FacetPaginator + end +end diff --git a/lib/blacklight/elastic_search/repository.rb b/lib/blacklight/elastic_search/repository.rb new file mode 100644 index 0000000000..5c47e699ec --- /dev/null +++ b/lib/blacklight/elastic_search/repository.rb @@ -0,0 +1,227 @@ +# frozen_string_literal: true + +module Blacklight::ElasticSearch + # Repository adapter for an Elasticsearch (or API-compatible OpenSearch) + # cluster. + # + # The connection is built from `config/blacklight.yml`. Supported keys: + # * `url` - the cluster URL, e.g. http://localhost:9200 + # * `index` - the index (or alias) to search against + # * `client_options` - a hash passed through to the client constructor + # * `client_class` - a fully-qualified client class name to use instead + # of auto-detecting Elasticsearch::Client / OpenSearch::Client + class Repository < Blacklight::AbstractRepository # rubocop:disable Metrics/ClassLength + # The Blacklight response model that understands Elasticsearch responses. + def self.response_model + Blacklight::ElasticSearch::Response + end + + # The facet paginator used for Elasticsearch facet values. + def self.facet_paginator_class + Blacklight::ElasticSearch::FacetPaginator + end + + # The SearchBuilder behavior mixed in to the application's SearchBuilder. + def self.search_builder_behavior + Blacklight::ElasticSearch::SearchBuilderBehavior + end + + # The document mixin included in the application's document model. + def self.document_mixin + Blacklight::ElasticSearch::Document + end + + # Find a single document by its id. + # @param [String] id document's unique key value + # @param [Hash] _params additional query parameters (unused) + # @return [Blacklight::ElasticSearch::Response] + def find(id, _params = {}) + response = search_request(query: { ids: { values: Array(id) } }, size: Array(id).size) + result = build_response(response, {}) + raise Blacklight::Exceptions::RecordNotFound if result.documents.empty? + + result + end + + # Find multiple documents by id (or any other query). + # @param [Hash, Blacklight::SearchBuilder] params query parameters + def find_many(params) + search(params: params) + end + + # Execute a search query against Elasticsearch. + # @param [Hash, Blacklight::SearchBuilder] params the request body (Query DSL) + # @return [Blacklight::ElasticSearch::Response] + def search(pos_params = nil, params: nil, **kwargs) + if pos_params + Blacklight.deprecation.warn("Passing positional arguments to search() is deprecated. Use the params kwarg instead.") + end + + request_params = params || pos_params || kwargs + body = body_for(request_params) + + response = search_request(body) + build_response(response, request_params) + end + + # @return [Boolean] true if the cluster is reachable + def ping? + !!connection.ping + end + alias ping ping? + + # Query the fields that exist in the index from its mapping. + # @return [Hash] + def reflect_fields + mapping = connection.indices.get_mapping(index: index_name).to_h + properties = mapping.values.first&.dig('mappings', 'properties') || {} + properties.transform_values(&:to_h) + rescue StandardError => e + Blacklight.logger&.warn("Unable to reflect fields from Elasticsearch: #{e.inspect}") + {} + end + + # Suggestions ("autocomplete") are not supported by this adapter. + def suggestions(_request_params) + Blacklight.logger&.debug("Suggestions are not supported by the Elasticsearch adapter") + Blacklight::Suggest::Response.new({}, {}, nil, nil) + end + + ## + # Indexing helpers used by the `blacklight:index:seed` rake task. + ## + + # Add (index) one or more documents. + # @param [Array, Hash] documents + def add(documents, **) + docs = Array.wrap(documents) + return if docs.empty? + + operations = docs.flat_map do |doc| + doc = doc.to_h if doc.respond_to?(:to_h) + id = doc[unique_key.to_s] || doc[unique_key.to_sym] + id = Array(id).first + [{ index: { _index: index_name, _id: id } }, doc] + end + + connection.bulk(body: operations) + end + + # Refresh the index so newly added documents are searchable. This is the + # Elasticsearch analog to Solr's commit. + def commit(**) + connection.indices.refresh(index: index_name) + end + + # Create the search index if it does not already exist. + # + # The default mapping understands Blacklight's Solr field-naming + # conventions: fields ending in a text suffix (e.g. `_tsim`, `_tesim`) are + # mapped as analyzed `text` for full-text search, while all other string + # fields (e.g. `_ssim`, `_si`, `format`, `id`) are mapped as `keyword` so + # they can be used for exact-match filtering, sorting, and aggregations. + # + # Override the mapping by setting `blacklight_config.elasticsearch_index_settings`. + def create_index! + return if connection.indices.exists?(index: index_name) + + connection.indices.create(index: index_name, body: index_settings) + end + + # @return [String] the configured index name + def index_name + connection_config[:index] || blacklight_config&.elasticsearch_index || 'blacklight' + end + + private + + def search_request(body) + response = connection.search(index: index_name, body: body) + response.respond_to?(:to_h) ? response.to_h : response + rescue Errno::ECONNREFUSED => e + raise Blacklight::Exceptions::ECONNREFUSED, "Unable to connect to Elasticsearch instance using #{connection.inspect}: #{e.inspect}" + rescue StandardError => e + raise Blacklight::Exceptions::InvalidRequest, e.message + end + + def build_response(response, request_params) + benchmark("Elasticsearch fetch", level: :debug) do + result = blacklight_config.response_model.new( + response, + request_params, + document_model: blacklight_config.document_model, + blacklight_config: blacklight_config + ) + Blacklight.logger&.debug("Elasticsearch query: #{index_name} #{body_for(request_params).inspect}") + result + end + end + + def body_for(request_params) + if request_params.respond_to?(:to_hash) + request_params.to_hash + else + request_params || {} + end + end + + def unique_key + (blacklight_config&.document_model || ::SolrDocument).unique_key + end + + def build_connection + options = (connection_config[:client_options] || {}).symbolize_keys + options[:url] ||= connection_config[:url] if connection_config[:url] + + client_class.new(**options) + end + + def client_class + if connection_config[:client_class] + connection_config[:client_class].to_s.constantize + elsif defined?(::Elasticsearch::Client) + ::Elasticsearch::Client + elsif defined?(::OpenSearch::Client) + ::OpenSearch::Client + else + begin + require 'elasticsearch' + ::Elasticsearch::Client + rescue LoadError + require 'opensearch' + ::OpenSearch::Client + end + end + end + + def index_settings + blacklight_config&.elasticsearch_index_settings || default_index_settings + end + + def default_index_settings + { + mappings: { + dynamic_templates: [ + # Blacklight text fields (e.g. title_tsim, author_tsim) -> analyzed text + { + text_fields: { + match_pattern: 'regex', + match: '.*_t[a-z]*$', + mapping: { type: 'text' } + } + }, + # All other string fields (e.g. format, *_ssim, *_si, id) -> keyword, + # so they support filtering, sorting, and aggregations. Long values + # (e.g. stored MARC) are kept in _source but not indexed as terms. + { + string_fields: { + match_mapping_type: 'string', + mapping: { type: 'keyword', ignore_above: 8192 } + } + } + ] + } + } + end + end +end diff --git a/lib/blacklight/elastic_search/request.rb b/lib/blacklight/elastic_search/request.rb new file mode 100644 index 0000000000..1f288e7216 --- /dev/null +++ b/lib/blacklight/elastic_search/request.rb @@ -0,0 +1,55 @@ +# frozen_string_literal: true + +module Blacklight::ElasticSearch + # Accumulates an Elasticsearch Query DSL request body as the SearchBuilder + # processor chain runs. This is the Elasticsearch analog to + # Blacklight::Solr::Request. + # + # The resulting hash is sent to Elasticsearch as the search request `body`. + class Request < Hash + def initialize(constructor = {}) + super() + merge!(constructor) if constructor.is_a?(Hash) + end + + # The boolean query container, lazily initialized. + # @return [Hash] + def bool + self[:query] ||= {} + self[:query][:bool] ||= { must: [], filter: [], must_not: [], should: [] } + end + + # Add a query clause that documents must match (and that contributes to scoring) + def append_query(query) + return if query.blank? + + bool[:must] << query + end + + # Add a non-scoring filter clause that documents must match + def append_filter_query(query) + return if query.blank? + + bool[:filter] << query + end + + # Add a clause that documents must not match + def append_must_not(query) + return if query.blank? + + bool[:must_not] << query + end + + # Add a terms (or other) aggregation used to compute facet values + def append_aggregation(name, aggregation) + self[:aggs] ||= {} + self[:aggs][name.to_s] = aggregation + end + + # Add a field to highlight in the response + def append_highlight_field(field) + self[:highlight] ||= { fields: {} } + self[:highlight][:fields][field.to_s] ||= {} + end + end +end diff --git a/lib/blacklight/elastic_search/response.rb b/lib/blacklight/elastic_search/response.rb new file mode 100644 index 0000000000..bbba47bff5 --- /dev/null +++ b/lib/blacklight/elastic_search/response.rb @@ -0,0 +1,160 @@ +# frozen_string_literal: true + +module Blacklight::ElasticSearch + # Normalizes an Elasticsearch search response into the same interface that + # the rest of Blacklight expects from Blacklight::Solr::Response, so views, + # presenters, and components work unchanged regardless of the configured + # adapter. + # + # Solr-only concepts (spelling suggestions, result grouping, and + # more-like-this) are represented with null/empty implementations. + class Response < ActiveSupport::HashWithIndifferentAccess + include Blacklight::Solr::Response::PaginationMethods + + # A stand-in for Blacklight::Solr::Response::Spelling, which Elasticsearch + # does not provide an equivalent of. + class NullSpelling + def words + [] + end + + def collation + nil + end + end + + attr_reader :request_params, :search_builder + attr_accessor :blacklight_config, :options + + delegate :document_factory, to: :blacklight_config + + # @param [Hash] data the raw Elasticsearch response + # @param [Hash, Blacklight::SearchBuilder] request_params a SearchBuilder or a Hash of parameters + def initialize(data, request_params, options = {}) + @search_builder = request_params if request_params.is_a?(Blacklight::SearchBuilder) + + super(ActiveSupport::HashWithIndifferentAccess.new(to_hash_safe(data))) + + @request_params = ActiveSupport::HashWithIndifferentAccess.new(to_hash_safe(request_params)) + self.blacklight_config = options[:blacklight_config] + self.options = options + end + + def hits + self['hits'] || {} + end + + def documents + @documents ||= (hits['hits'] || []).map do |hit| + source = (hit['_source'] || {}).dup + source[unique_key] ||= hit['_id'] + source['_highlighting'] = hit['highlight'] if hit['highlight'].present? + source['score'] ||= hit['_score'] if hit['_score'] + + document_factory.build(source, self, options) + end + end + alias docs documents + + # The total number of matching documents. + def total + raw = hits['total'] + + if raw.is_a?(Hash) + raw['value'].to_i + else + raw.to_i + end + end + + def start + search_builder&.start || request_params[:from].to_i + end + + def rows + search_builder&.rows || request_params[:size]&.to_i || documents.length + end + + def sort + search_builder&.sort + end + + def empty? + total.zero? + end + + # @return [ActiveSupport::HashWithIndifferentAccess] the request parameters + def params + request_params + end + + # Elasticsearch does not provide result grouping in the way Solr does. + def grouped? + false + end + + # @return [NullSpelling] spelling suggestions are not supported + def spelling + @spelling ||= NullSpelling.new + end + + # More-like-this is not supported by this adapter. + def more_like(_document) + [] + end + + def export_formats + documents.map { |x| x.export_formats.keys }.flatten.uniq + end + + # Convert Elasticsearch aggregations into the hash of + # Blacklight::Solr::Response::Facets::FacetField objects that Blacklight's + # facet display expects. + def aggregations + @aggregations ||= begin + result = default_aggregations + + (self['aggregations'] || {}).each do |field_name, data| + next unless data.is_a?(Hash) && data['buckets'] + + items = data['buckets'].map do |bucket| + Blacklight::Solr::Response::Facets::FacetItem.new(value: bucket['key'], hits: bucket['doc_count']) + end + + facet_field = Blacklight::Solr::Response::Facets::FacetField.new(field_name, items, response: self) + result[field_name] = facet_field + + next unless blacklight_config + + blacklight_config.facet_fields.select { |_k, v| v.field == field_name }.each_key do |key| + result[key] = facet_field + end + end + + result + end + end + + private + + def unique_key + (blacklight_config&.document_model || ::SolrDocument).unique_key + end + + # @return [HashWithIndifferentAccess] hash with a null-object default for missing facet fields + def default_aggregations + h = Hash.new do |_hash, key| + Blacklight::Solr::Response::Facets::NullFacetField.new(key, response: self) + end + h.with_indifferent_access + end + + def to_hash_safe(value) + if value.respond_to?(:to_hash) + value.to_hash + else + value || {} + end + end + end +end diff --git a/lib/blacklight/elastic_search/search_builder_behavior.rb b/lib/blacklight/elastic_search/search_builder_behavior.rb new file mode 100644 index 0000000000..cb11cbf010 --- /dev/null +++ b/lib/blacklight/elastic_search/search_builder_behavior.rb @@ -0,0 +1,185 @@ +# frozen_string_literal: true + +module Blacklight::ElasticSearch + # Translates Blacklight request parameters into an Elasticsearch Query DSL + # request body. This is the Elasticsearch analog to + # Blacklight::Solr::SearchBuilderBehavior. + # + # Features that Elasticsearch does not provide in the same way Solr does + # (spellcheck, result grouping, pivot/query facets, and the Solr JSON Query + # DSL advanced search) are intentionally omitted from the processor chain. + module SearchBuilderBehavior + extend ActiveSupport::Concern + + included do + self.default_processor_chain = [ + :add_query_to_request, + :add_filters_to_request, + :add_facetting_to_request, + :add_paging_to_request, + :add_sorting_to_request, + :add_highlighting_to_request, + :add_source_fields_to_request, + :add_additional_filters_to_request + ] + end + + # Add the user-entered query to the request as a full-text query. + def add_query_to_request(request) + query = search_state.query_param + + return if query.blank? || query.is_a?(Hash) + + fields = query_fields + + request.append_query( + if fields.present? + { multi_match: { query: query, fields: fields, type: 'best_fields', operator: 'and' } } + else + { simple_query_string: { query: query, default_operator: 'and' } } + end + ) + end + + # Map the applied facet filters (Blacklight's `f` parameter) to + # Elasticsearch term filters. + def add_filters_to_request(request) + search_state.filters.each do |filter| + field = filter_field_name(filter) + values = Array(filter.values).compact_blank + + if values.delete(Blacklight::SearchState::FilterField::MISSING) + request.append_must_not(exists: { field: field }) + end + + next if values.empty? + + request.append_filter_query(terms: { field => values }) + end + end + + # Build terms aggregations for each configured facet field. Pivot and query + # facets are not supported by this adapter and are skipped. + def add_facetting_to_request(request) + facet_fields_to_include_in_request.each do |field_name, facet| + if facet.pivot || facet.query + Blacklight.logger&.debug("Skipping facet '#{field_name}': pivot and query facets are not supported by the Elasticsearch adapter") + next + end + + size = facet_limit_with_pagination(field_name) || blacklight_config.default_facet_limit + order = facet.sort == 'index' ? { '_key' => 'asc' } : { '_count' => 'desc' } + + request.append_aggregation(field_name, terms: { field: facet.field, size: size, order: order }) + end + end + + # Translate Blacklight paging into Elasticsearch from/size. + def add_paging_to_request(request) + request[:size] = rows + request[:from] = start if start.nonzero? + # Ensure the reported hit total is exact (Elasticsearch otherwise caps the + # tracked total at 10,000), so pagination shows the correct number of pages. + request[:track_total_hits] = true + end + + # Translate the Solr-style sort string (e.g. "score desc, title_si asc") + # into the Elasticsearch sort syntax. + def add_sorting_to_request(request) + return if sort.blank? + + request[:sort] = sort.split(',').filter_map do |clause| + field, direction = clause.strip.split(/\s+/) + next if field.blank? + + field = '_score' if field == 'score' + { field => { order: direction.presence || 'asc' } } + end + end + + # Request highlight snippets for index fields configured with `highlight: true`. + def add_highlighting_to_request(request) + blacklight_config.index_fields.each_value do |field| + request.append_highlight_field(field.field) if field.highlight + end + end + + # Limit the returned _source fields when configured to do so. By default we + # return the full source document. + def add_source_fields_to_request(request) + fields = source_fields + request[:_source] = fields if fields.present? + end + + # Support SearchBuilder#where, which is used to fetch documents by id. + def add_additional_filters_to_request(request, additional_filters = nil) + filters = additional_filters || @additional_filters + + return if filters.blank? + + if filters.values.any?(&:blank?) + request.append_filter_query(ids: { values: [] }) + return + end + + filters.each do |field, values| + if field.to_s == blacklight_config.document_model.unique_key + request.append_filter_query(ids: { values: Array(values) }) + else + request.append_filter_query(terms: { field => Array(values) }) + end + end + end + + private + + # @return [Blacklight::ElasticSearch::Request] + def request + Blacklight::ElasticSearch::Request.new + end + + # The fields a full-text query should target. When a search field is + # selected, its `elastic_query_fields` (configured in the controller) scope + # the query; otherwise the configuration-wide default is used (and when that + # is also unset, a simple_query_string across all fields is used). + # @return [Array, nil] + def query_fields + search_field&.elastic_query_fields || blacklight_config.elasticsearch_query_fields + end + + # @return [Array, nil] + def source_fields + blacklight_config.elasticsearch_source_fields + end + + def filter_field_name(filter) + filter.config&.field || filter.key + end + + def facet_fields_to_include_in_request + blacklight_config.facet_fields.select do |_field_name, facet| + facet.include_in_request || (facet.include_in_request.nil? && blacklight_config.add_facet_fields_to_solr_request) + end + end + + # Look up the configured limit for a facet field. Mirrors the Solr behavior. + def facet_limit_for(facet_field) + facet = blacklight_config.facet_fields[facet_field] + return if facet.blank? + + return unless facet.limit + + facet.limit == true ? blacklight_config.default_facet_limit : facet.limit + end + + # Request one more than the display limit so the facet paginator can detect + # whether more values are available. + def facet_limit_with_pagination(field_name) + limit = facet_limit_for(field_name) + + return if limit.nil? + + limit.positive? ? limit + 1 : limit + end + end +end diff --git a/lib/blacklight/solr/repository.rb b/lib/blacklight/solr/repository.rb index 2cb1ad60b1..dac28062fd 100644 --- a/lib/blacklight/solr/repository.rb +++ b/lib/blacklight/solr/repository.rb @@ -2,6 +2,26 @@ module Blacklight::Solr class Repository < Blacklight::AbstractRepository + # The Blacklight response model that understands Solr responses. + def self.response_model + Blacklight::Solr::Response + end + + # The facet paginator used for Solr facet values. + def self.facet_paginator_class + Blacklight::Solr::FacetPaginator + end + + # The SearchBuilder behavior mixed in to the application's SearchBuilder. + def self.search_builder_behavior + Blacklight::Solr::SearchBuilderBehavior + end + + # The document mixin included in the application's document model. + def self.document_mixin + Blacklight::Solr::Document + end + ## # Find a single solr document result (by id) using the document configuration # @param [String] id document's unique key value diff --git a/lib/generators/blacklight/models_generator.rb b/lib/generators/blacklight/models_generator.rb index 25fd437c64..833442c48e 100644 --- a/lib/generators/blacklight/models_generator.rb +++ b/lib/generators/blacklight/models_generator.rb @@ -16,9 +16,19 @@ class ModelsGenerator < Rails::Generators::Base 2. Creates config/blacklight.yml with a default configuration EOS - # Copy all files in templates/config directory to host config + # Copy the connection configuration to the host's config directory. The + # adapter is resolved at generation time from the BLACKLIGHT_ADAPTER + # environment variable (defaulting to Solr), so the generated + # config/blacklight.yml is static and the choice does not need to be + # re-supplied on every boot. def create_configuration_files - copy_file "config/blacklight.yml", "config/blacklight.yml" + source = if ENV['BLACKLIGHT_ADAPTER'].to_s =~ /elastic|opensearch/ + "config/blacklight.elasticsearch.yml" + else + "config/blacklight.yml" + end + + copy_file source, "config/blacklight.yml" gsub_file 'config/blacklight.yml', '__VERSION__', Blacklight::VERSION end diff --git a/lib/generators/blacklight/templates/catalog_controller.rb b/lib/generators/blacklight/templates/catalog_controller.rb index 3f99915fc9..b542ae9836 100644 --- a/lib/generators/blacklight/templates/catalog_controller.rb +++ b/lib/generators/blacklight/templates/catalog_controller.rb @@ -227,6 +227,9 @@ class <%= controller_name.classify %>Controller < ApplicationController qf: '${title_qf}', pf: '${title_pf}' } + # elastic_query_fields scopes the query when using the Elasticsearch + # adapter (it is ignored by the Solr adapter). + field.elastic_query_fields = %w[title_tsim title_addl_tsim] end config.add_search_field('author') do |field| @@ -235,6 +238,7 @@ class <%= controller_name.classify %>Controller < ApplicationController qf: '${author_qf}', pf: '${author_pf}' } + field.elastic_query_fields = %w[author_tsim author_addl_tsim] end # Specifying a :qt only to show it's possible, and so our internal automated @@ -247,6 +251,7 @@ class <%= controller_name.classify %>Controller < ApplicationController qf: '${subject_qf}', pf: '${subject_pf}' } + field.elastic_query_fields = %w[subject_tsim] end # Set up a default advanced search configuration by using the current diff --git a/lib/generators/blacklight/templates/config/blacklight.elasticsearch.yml b/lib/generators/blacklight/templates/config/blacklight.elasticsearch.yml new file mode 100644 index 0000000000..681f269fca --- /dev/null +++ b/lib/generators/blacklight/templates/config/blacklight.elasticsearch.yml @@ -0,0 +1,13 @@ +load_defaults: __VERSION__ +# Blacklight is configured to use Elasticsearch/OpenSearch. Note that some +# Solr-specific features (spellcheck, result grouping, pivot/query facets, +# more-like-this, and the JSON Query DSL advanced search) are not available +# when using this adapter. +development: &development + adapter: elasticsearch + url: <%= ENV['ELASTICSEARCH_URL'] || "http://127.0.0.1:9200" %> + index: <%= ENV['ELASTICSEARCH_INDEX'] || "blacklight-core" %> +test: + <<: *development +production: + <<: *development diff --git a/lib/generators/blacklight/templates/search_builder.rb b/lib/generators/blacklight/templates/search_builder.rb index 43497bfff1..991c578f2b 100644 --- a/lib/generators/blacklight/templates/search_builder.rb +++ b/lib/generators/blacklight/templates/search_builder.rb @@ -1,6 +1,8 @@ # frozen_string_literal: true class <%= model_name.classify %> < Blacklight::SearchBuilder - include Blacklight::Solr::SearchBuilderBehavior + # Mixes in the behavior appropriate for the search index adapter configured + # in config/blacklight.yml (Solr by default, or Elasticsearch). + include Blacklight.search_builder_behavior ## # @example Adding a new step to the processor chain diff --git a/lib/generators/blacklight/templates/solr_document.rb b/lib/generators/blacklight/templates/solr_document.rb index 49801ce2ab..10e97511b1 100644 --- a/lib/generators/blacklight/templates/solr_document.rb +++ b/lib/generators/blacklight/templates/solr_document.rb @@ -1,8 +1,10 @@ # frozen_string_literal: true -# Represents a single document returned from Solr +# Represents a single document returned from the search index class <%= model_name.classify %> - include Blacklight::Solr::Document + # Mixes in the behavior appropriate for the search index adapter configured + # in config/blacklight.yml (Solr by default, or Elasticsearch). + include Blacklight.document_mixin # self.unique_key = 'id' diff --git a/lib/generators/blacklight/test_support_generator.rb b/lib/generators/blacklight/test_support_generator.rb index b5ed21be43..15116bd006 100644 --- a/lib/generators/blacklight/test_support_generator.rb +++ b/lib/generators/blacklight/test_support_generator.rb @@ -29,7 +29,12 @@ def alternate_controller end def solr_document_config - insert_into_file 'app/models/solr_document.rb', after: "include Blacklight::Solr::Document" do + # Match whichever document mixin the generated SolrDocument uses: the + # adapter-specific include (`Blacklight.document_mixin`) or the legacy + # explicit `Blacklight::Solr::Document` include. + sentinel = /include Blacklight(\.document_mixin|::Solr::Document)\n/ + + insert_into_file 'app/models/solr_document.rb', after: sentinel do <<-EOF field_semantics.merge!( diff --git a/lib/railties/blacklight.rake b/lib/railties/blacklight.rake index 993e5874e4..50caaaf7ae 100644 --- a/lib/railties/blacklight.rake +++ b/lib/railties/blacklight.rake @@ -23,9 +23,20 @@ namespace :blacklight do file = ENV.fetch('FILE') { (app_file && File.exist?(app_file) && app_file) } || File.join(Blacklight.root, 'spec', 'fixtures', 'sample_solr_documents.yml') docs = YAML.safe_load(File.open(file)) - conn = Blacklight.default_index.connection - conn.add docs - conn.commit + repository = Blacklight.default_index + + # Repositories that index through their connection (e.g. Solr via RSolr) + # use conn.add/conn.commit. Other adapters (e.g. Elasticsearch) implement + # add/commit (and an optional create_index!) on the repository itself. + if repository.respond_to?(:add) && repository.respond_to?(:commit) + repository.create_index! if repository.respond_to?(:create_index!) + repository.add(docs) + repository.commit + else + conn = repository.connection + conn.add docs + conn.commit + end end end diff --git a/spec/controllers/catalog_controller_spec.rb b/spec/controllers/catalog_controller_spec.rb index b1b3e30c78..50b29905ef 100644 --- a/spec/controllers/catalog_controller_spec.rb +++ b/spec/controllers/catalog_controller_spec.rb @@ -74,7 +74,7 @@ expect(assigns(:response).docs).to be_empty end - it "has a spelling suggestion for an appropriately poor query", :integration do + it "has a spelling suggestion for an appropriately poor query", :integration, :solr_only do get :index, params: { q: 'boo' } expect(assigns(:response).spelling.words).not_to be_nil end diff --git a/spec/features/advanced_search_spec.rb b/spec/features/advanced_search_spec.rb index 8264684a84..0c1b2c0396 100644 --- a/spec/features/advanced_search_spec.rb +++ b/spec/features/advanced_search_spec.rb @@ -2,7 +2,9 @@ require 'spec_helper' -RSpec.describe "Blacklight Advanced Search Form" do +# The advanced search form builds queries using Solr's JSON Query DSL, which is +# not supported by the Elasticsearch adapter. +RSpec.describe "Blacklight Advanced Search Form", :solr_only do describe "advanced search form" do before do visit '/catalog/advanced' diff --git a/spec/features/did_you_mean_spec.rb b/spec/features/did_you_mean_spec.rb index 0d3fd6ab47..73b72f51ef 100644 --- a/spec/features/did_you_mean_spec.rb +++ b/spec/features/did_you_mean_spec.rb @@ -1,6 +1,7 @@ # frozen_string_literal: true -RSpec.describe "Did You Mean" do +# Spellcheck / "did you mean" is a Solr-only feature. +RSpec.describe "Did You Mean", :solr_only do before { visit root_path } describe "searching all fields" do diff --git a/spec/features/facets_spec.rb b/spec/features/facets_spec.rb index 44eb4e8ab1..8af60b193a 100644 --- a/spec/features/facets_spec.rb +++ b/spec/features/facets_spec.rb @@ -63,7 +63,8 @@ expect(page).to have_css('#facet-format', visible: true) # assert that it didn't re-collapse end - it 'is able to expand pivot facets when javascript is enabled', :js do + # Pivot facets are a Solr-only feature. + it 'is able to expand pivot facets when javascript is enabled', :js, :solr_only do visit root_path click_on 'Pivot Field' diff --git a/spec/lib/blacklight_spec.rb b/spec/lib/blacklight_spec.rb index aabf2e5066..c873eaa7f3 100644 --- a/spec/lib/blacklight_spec.rb +++ b/spec/lib/blacklight_spec.rb @@ -53,6 +53,26 @@ end end + context 'for an elasticsearch index' do + before do + allow(described_class).to receive(:connection_config).and_return(adapter: 'elasticsearch') + end + + it 'resolves to the Elasticsearch repository implementation' do + expect(described_class.repository_class).to eq Blacklight::ElasticSearch::Repository + end + end + + context 'for an opensearch index' do + before do + allow(described_class).to receive(:connection_config).and_return(adapter: 'opensearch') + end + + it 'resolves to the Elasticsearch repository implementation' do + expect(described_class.repository_class).to eq Blacklight::ElasticSearch::Repository + end + end + context 'for an explicitly provided class' do before do stub_const("CustomSearch::Repository", double) @@ -65,6 +85,30 @@ end end + describe 'adapter-aware defaults' do + context 'for a solr index' do + before { allow(described_class).to receive(:connection_config).and_return(adapter: 'solr') } + + it 'returns the Solr companion classes' do + expect(described_class.default_response_model).to eq Blacklight::Solr::Response + expect(described_class.default_facet_paginator_class).to eq Blacklight::Solr::FacetPaginator + expect(described_class.search_builder_behavior).to eq Blacklight::Solr::SearchBuilderBehavior + expect(described_class.document_mixin).to eq Blacklight::Solr::Document + end + end + + context 'for an elasticsearch index' do + before { allow(described_class).to receive(:connection_config).and_return(adapter: 'elasticsearch') } + + it 'returns the Elasticsearch companion classes' do + expect(described_class.default_response_model).to eq Blacklight::ElasticSearch::Response + expect(described_class.default_facet_paginator_class).to eq Blacklight::ElasticSearch::FacetPaginator + expect(described_class.search_builder_behavior).to eq Blacklight::ElasticSearch::SearchBuilderBehavior + expect(described_class.document_mixin).to eq Blacklight::ElasticSearch::Document + end + end + end + describe '.default_configuration' do it 'is a Blacklight configuration' do expect(described_class.default_configuration).to be_a Blacklight::Configuration diff --git a/spec/models/blacklight/elastic_search/document_spec.rb b/spec/models/blacklight/elastic_search/document_spec.rb new file mode 100644 index 0000000000..0bfa0ab8cc --- /dev/null +++ b/spec/models/blacklight/elastic_search/document_spec.rb @@ -0,0 +1,51 @@ +# frozen_string_literal: true + +RSpec.describe Blacklight::ElasticSearch::Document, :api do + let(:document_class) do + Class.new do + include Blacklight::ElasticSearch::Document + + def self.name + 'ElasticDocument' + end + end + end + + let(:document) { document_class.new(source) } + + let(:source) do + { 'id' => 'abc', 'title_tsim' => ['A Title'], '_highlighting' => { 'title_tsim' => ['A Title'] } } + end + + describe '#has_highlight_field?' do + it 'is true when highlight data is present for the field' do + expect(document.has_highlight_field?('title_tsim')).to be true + end + + it 'is false when there is no highlight data for the field' do + expect(document.has_highlight_field?('author_tsim')).to be false + end + + context 'without any highlight data' do + let(:source) { { 'id' => 'abc' } } + + it 'is false' do + expect(document.has_highlight_field?('title_tsim')).to be false + end + end + end + + describe '#highlight_field' do + it 'returns html-safe highlight snippets' do + snippets = document.highlight_field('title_tsim') + expect(snippets).to eq ['A Title'] + expect(snippets).to all(be_html_safe) + end + end + + describe '#more_like_this' do + it 'is empty (not supported by the Elasticsearch adapter)' do + expect(document.more_like_this).to eq [] + end + end +end diff --git a/spec/models/blacklight/elastic_search/repository_spec.rb b/spec/models/blacklight/elastic_search/repository_spec.rb new file mode 100644 index 0000000000..cbbc21bd46 --- /dev/null +++ b/spec/models/blacklight/elastic_search/repository_spec.rb @@ -0,0 +1,160 @@ +# frozen_string_literal: true + +require 'elasticsearch' + +RSpec.describe Blacklight::ElasticSearch::Repository, :api do + subject(:repository) do + described_class.new(blacklight_config).tap { |repo| repo.connection = connection } + end + + let(:blacklight_config) do + Blacklight::Configuration.new.tap do |config| + config.response_model = Blacklight::ElasticSearch::Response + config.document_model = SolrDocument + config.connection_config = { adapter: 'elasticsearch', url: 'http://localhost:9200', index: 'blacklight-test' } + end + end + + let(:connection) { instance_double(Elasticsearch::Client) } + + let(:search_response) do + { + 'hits' => { + 'total' => { 'value' => 1 }, + 'hits' => [{ '_id' => '123', '_source' => { 'id' => '123' } }] + } + } + end + + describe 'adapter class methods' do + it 'declares the Elasticsearch companion classes' do + expect(described_class.response_model).to eq Blacklight::ElasticSearch::Response + expect(described_class.facet_paginator_class).to eq Blacklight::ElasticSearch::FacetPaginator + expect(described_class.search_builder_behavior).to eq Blacklight::ElasticSearch::SearchBuilderBehavior + expect(described_class.document_mixin).to eq Blacklight::ElasticSearch::Document + end + end + + describe '#index_name' do + it 'reads the index from the connection config' do + expect(repository.index_name).to eq 'blacklight-test' + end + end + + describe '#search' do + before { allow(connection).to receive(:search).and_return(search_response) } + + it 'sends the request body to the configured index and wraps the response' do + response = repository.search(params: { query: { match_all: {} } }) + + expect(connection).to have_received(:search).with(index: 'blacklight-test', body: { query: { match_all: {} } }) + expect(response).to be_a Blacklight::ElasticSearch::Response + expect(response.total).to eq 1 + end + + it 'warns when called with positional arguments' do + allow(Blacklight.deprecation).to receive(:warn) + repository.search({ query: { match_all: {} } }) + expect(Blacklight.deprecation).to have_received(:warn) + end + + context 'when the cluster is unreachable' do + before { allow(connection).to receive(:search).and_raise(Errno::ECONNREFUSED) } + + it 'raises a Blacklight exception' do + expect { repository.search(params: {}) }.to raise_exception(Blacklight::Exceptions::ECONNREFUSED, /Unable to connect to Elasticsearch/) + end + end + end + + describe '#find' do + it 'queries by id' do + allow(connection).to receive(:search).and_return(search_response) + expect(repository.find('123')).to be_a Blacklight::ElasticSearch::Response + expect(connection).to have_received(:search).with(index: 'blacklight-test', body: hash_including(query: { ids: { values: ['123'] } })) + end + + it 'raises when nothing is found' do + allow(connection).to receive(:search).and_return('hits' => { 'total' => { 'value' => 0 }, 'hits' => [] }) + expect { repository.find('missing') }.to raise_exception(Blacklight::Exceptions::RecordNotFound) + end + end + + describe '#ping?' do + it 'delegates to the client' do + allow(connection).to receive(:ping).and_return(true) + expect(repository.ping?).to be true + end + end + + describe '#add' do + it 'bulk indexes documents' do + allow(connection).to receive(:bulk) + repository.add([{ 'id' => '123' }]) + expect(connection).to have_received(:bulk).with(body: [ + { index: { _index: 'blacklight-test', _id: '123' } }, + { 'id' => '123' } + ]) + end + end + + describe '#commit' do + it 'refreshes the index' do + indices = double('indices') + allow(connection).to receive(:indices).and_return(indices) + allow(indices).to receive(:refresh) + repository.commit + expect(indices).to have_received(:refresh).with(index: 'blacklight-test') + end + end + + describe '#create_index!' do + let(:indices) { double('indices') } + + before { allow(connection).to receive(:indices).and_return(indices) } + + context 'when the index does not exist' do + before do + allow(indices).to receive(:exists?).and_return(false) + allow(indices).to receive(:create) + end + + it 'creates the index, mapping text fields as text and other strings as keyword' do + repository.create_index! + + expect(indices).to have_received(:create) do |index:, body:| + expect(index).to eq 'blacklight-test' + templates = body.dig(:mappings, :dynamic_templates) + text_rule = templates.find { |t| t.key?(:text_fields) }[:text_fields] + string_rule = templates.find { |t| t.key?(:string_fields) }[:string_fields] + text_matcher = Regexp.new(text_rule[:match]) + expect(text_matcher.match?('title_tsim')).to be true + expect(text_matcher.match?('pub_date_si')).to be false + expect(text_rule.dig(:mapping, :type)).to eq 'text' + expect(string_rule.dig(:mapping, :type)).to eq 'keyword' + end + end + + context 'with a configured index mapping' do + before { blacklight_config.elasticsearch_index_settings = { settings: { number_of_shards: 1 } } } + + it 'uses the configured mapping' do + repository.create_index! + expect(indices).to have_received(:create).with(index: 'blacklight-test', body: { settings: { number_of_shards: 1 } }) + end + end + end + + context 'when the index already exists' do + before do + allow(indices).to receive(:exists?).and_return(true) + allow(indices).to receive(:create) + end + + it 'does not recreate it' do + repository.create_index! + expect(indices).not_to have_received(:create) + end + end + end +end diff --git a/spec/models/blacklight/elastic_search/request_spec.rb b/spec/models/blacklight/elastic_search/request_spec.rb new file mode 100644 index 0000000000..e814d325f3 --- /dev/null +++ b/spec/models/blacklight/elastic_search/request_spec.rb @@ -0,0 +1,45 @@ +# frozen_string_literal: true + +RSpec.describe Blacklight::ElasticSearch::Request do + subject(:request) { described_class.new } + + describe '#append_query' do + it 'adds a clause to the bool must list' do + request.append_query(simple_query_string: { query: 'foo' }) + expect(request.dig(:query, :bool, :must)).to eq [{ simple_query_string: { query: 'foo' } }] + end + + it 'ignores blank queries' do + request.append_query(nil) + expect(request[:query]).to be_nil + end + end + + describe '#append_filter_query' do + it 'adds a clause to the bool filter list' do + request.append_filter_query(terms: { 'format' => ['Book'] }) + expect(request.dig(:query, :bool, :filter)).to eq [{ terms: { 'format' => ['Book'] } }] + end + end + + describe '#append_must_not' do + it 'adds a clause to the bool must_not list' do + request.append_must_not(exists: { field: 'format' }) + expect(request.dig(:query, :bool, :must_not)).to eq [{ exists: { field: 'format' } }] + end + end + + describe '#append_aggregation' do + it 'adds the aggregation under the aggs key' do + request.append_aggregation('format', terms: { field: 'format', size: 11 }) + expect(request[:aggs]).to eq('format' => { terms: { field: 'format', size: 11 } }) + end + end + + describe '#append_highlight_field' do + it 'registers the field for highlighting' do + request.append_highlight_field('title') + expect(request.dig(:highlight, :fields)).to eq('title' => {}) + end + end +end diff --git a/spec/models/blacklight/elastic_search/response_spec.rb b/spec/models/blacklight/elastic_search/response_spec.rb new file mode 100644 index 0000000000..ae038ef4f1 --- /dev/null +++ b/spec/models/blacklight/elastic_search/response_spec.rb @@ -0,0 +1,96 @@ +# frozen_string_literal: true + +RSpec.describe Blacklight::ElasticSearch::Response, :api do + subject(:response) do + described_class.new(raw_response, request_params, blacklight_config: blacklight_config, document_model: SolrDocument) + end + + let(:blacklight_config) do + Blacklight::Configuration.new.tap do |config| + config.add_facet_field 'format' + end + end + + let(:request_params) { { from: 10, size: 10 } } + + let(:raw_response) do + { + 'took' => 5, + 'hits' => { + 'total' => { 'value' => 42 }, + 'hits' => [ + { '_id' => 'abc', '_score' => 1.2, '_source' => { 'title_tsim' => ['A Title'] }, 'highlight' => { 'title_tsim' => ['A Title'] } }, + { '_id' => 'def', '_score' => 0.9, '_source' => { 'id' => 'def', 'title_tsim' => ['Another'] } } + ] + }, + 'aggregations' => { + 'format' => { + 'buckets' => [ + { 'key' => 'Book', 'doc_count' => 30 }, + { 'key' => 'Journal', 'doc_count' => 12 } + ] + } + } + } + end + + describe '#total' do + it 'reads the hit total' do + expect(response.total).to eq 42 + end + + context 'when the total is a plain integer (older ES / OpenSearch)' do + let(:raw_response) { { 'hits' => { 'total' => 7, 'hits' => [] } } } + + it 'still works' do + expect(response.total).to eq 7 + end + end + end + + describe '#documents' do + it 'builds documents from the hits, deriving the id from _id when absent' do + expect(response.documents.size).to eq 2 + expect(response.documents.first.id).to eq 'abc' + expect(response.documents.first['title_tsim']).to eq ['A Title'] + end + + it 'attaches highlight data to the document source' do + expect(response.documents.first['_highlighting']).to eq('title_tsim' => ['A Title']) + end + end + + describe '#aggregations' do + it 'converts ES aggregations into Blacklight facet fields' do + facet = response.aggregations['format'] + expect(facet.items.map(&:value)).to eq %w[Book Journal] + expect(facet.items.map(&:hits)).to eq [30, 12] + end + + it 'returns a null facet field for unknown facets' do + expect(response.aggregations['unknown']).to be_a Blacklight::Solr::Response::Facets::NullFacetField + end + end + + describe 'pagination' do + it 'reports start and rows from the request' do + expect(response.start).to eq 10 + expect(response.rows).to eq 10 + end + end + + describe 'disabled Solr features' do + it 'is never grouped' do + expect(response.grouped?).to be false + end + + it 'returns empty spelling suggestions' do + expect(response.spelling.words).to eq [] + expect(response.spelling.collation).to be_nil + end + + it 'returns no more-like-this documents' do + expect(response.more_like(response.documents.first)).to eq [] + end + end +end diff --git a/spec/models/blacklight/elastic_search/search_builder_behavior_spec.rb b/spec/models/blacklight/elastic_search/search_builder_behavior_spec.rb new file mode 100644 index 0000000000..9db12a44d8 --- /dev/null +++ b/spec/models/blacklight/elastic_search/search_builder_behavior_spec.rb @@ -0,0 +1,122 @@ +# frozen_string_literal: true + +RSpec.describe Blacklight::ElasticSearch::SearchBuilderBehavior, :api do + subject(:body) { search_builder.with(user_params).to_hash } + + let(:search_builder_class) do + Class.new(Blacklight::SearchBuilder) do + include Blacklight::ElasticSearch::SearchBuilderBehavior + end + end + let(:search_builder) { search_builder_class.new(context) } + let(:context) { CatalogController.new } + let(:user_params) { {} } + + let(:blacklight_config) do + Blacklight::Configuration.new.tap do |config| + config.add_facet_field 'format' + config.add_facet_field 'language_ssim', limit: 5 + config.add_index_field 'title_tsim', highlight: true + config.add_sort_field 'relevance', sort: 'score desc, title_si asc' + config.add_facet_fields_to_solr_request! + end + end + + before { allow(context).to receive(:blacklight_config).and_return(blacklight_config) } + + describe 'the default processor chain' do + it 'does not include Solr-only steps' do + expect(search_builder.processor_chain).not_to include(:add_group_config_to_solr, :add_adv_search_clauses) + end + end + + describe '#add_query_to_request' do + context 'with a query' do + let(:user_params) { { q: 'history' } } + + it 'adds a simple_query_string clause' do + expect(body.dig(:query, :bool, :must)).to include( + simple_query_string: { query: 'history', default_operator: 'and' } + ) + end + end + + context 'with configured query fields' do + let(:user_params) { { q: 'history' } } + + before { blacklight_config.elasticsearch_query_fields = %w[title_tsim author_tsim] } + + it 'uses a multi_match clause' do + expect(body.dig(:query, :bool, :must)).to include( + multi_match: { query: 'history', fields: %w[title_tsim author_tsim], type: 'best_fields', operator: 'and' } + ) + end + end + + context 'with a selected search field that scopes the query fields' do + let(:user_params) { { q: 'history', search_field: 'title' } } + + before do + blacklight_config.add_search_field('title') { |field| field.elastic_query_fields = %w[title_tsim title_addl_tsim] } + end + + it 'scopes the multi_match to the search field fields' do + expect(body.dig(:query, :bool, :must)).to include( + multi_match: { query: 'history', fields: %w[title_tsim title_addl_tsim], type: 'best_fields', operator: 'and' } + ) + end + end + + context 'without a query' do + it 'does not add a query clause' do + expect(body[:query]).to be_nil + end + end + end + + describe '#add_filters_to_request' do + let(:user_params) { { f: { 'format' => ['Book'] } } } + + it 'adds a terms filter' do + expect(body.dig(:query, :bool, :filter)).to include(terms: { 'format' => ['Book'] }) + end + + context 'when filtering on a missing value' do + let(:user_params) { { f: { '-format' => [Blacklight::Engine.config.blacklight.facet_missing_param] } } } + + it 'adds a must_not exists clause' do + expect(body.dig(:query, :bool, :must_not)).to include(exists: { field: 'format' }) + end + end + end + + describe '#add_facetting_to_request' do + it 'adds a terms aggregation for each facet field, requesting limit + 1 values' do + expect(body[:aggs]['format'][:terms]).to include(field: 'format') + expect(body[:aggs]['language_ssim'][:terms]).to include(field: 'language_ssim', size: 6) + end + end + + describe '#add_paging_to_request' do + let(:user_params) { { per_page: 20, page: 3 } } + + it 'maps page/per_page to size/from' do + expect(body[:size]).to eq 20 + expect(body[:from]).to eq 40 + end + end + + describe '#add_sorting_to_request' do + let(:user_params) { { sort: 'relevance' } } + + it 'translates the Solr-style sort string into ES sort syntax' do + expect(body[:sort]).to eq [{ '_score' => { order: 'desc' } }, { 'title_si' => { order: 'asc' } }] + end + end + + describe '#add_highlighting_to_request' do + it 'requests highlighting for fields configured with highlight: true' do + expect(body.dig(:highlight, :fields)).to have_key('title_tsim') + end + end +end diff --git a/spec/requests/load_suggestions_spec.rb b/spec/requests/load_suggestions_spec.rb index 8a1bd15647..29911e514c 100644 --- a/spec/requests/load_suggestions_spec.rb +++ b/spec/requests/load_suggestions_spec.rb @@ -2,7 +2,8 @@ require 'spec_helper' -RSpec.describe 'GET /catalog/suggest' do +# Autocomplete suggestions are backed by the Solr suggester component. +RSpec.describe 'GET /catalog/suggest', :solr_only do it 'returns suggestions' do get '/catalog/suggest?q=new' expect(response.body).to eq <<-RESULT diff --git a/spec/spec_helper.rb b/spec/spec_helper.rb index ea6bf95931..61d8a8f87f 100644 --- a/spec/spec_helper.rb +++ b/spec/spec_helper.rb @@ -48,6 +48,24 @@ # When we're testing the API, only run the api tests config.filter_run api: true if ENV['BLACKLIGHT_API_TEST'].present? + # When running the suite against the Elasticsearch adapter, skip examples that + # exercise Solr-only features that are intentionally disabled for that adapter + # (spellcheck/"did you mean", result grouping, pivot/query facets, + # more-like-this, autocomplete suggestions, and the Solr JSON Query DSL + # advanced search). Tag such examples or groups with `:solr_only`. + # + # The adapter is detected from the (statically generated) config/blacklight.yml + # so the suite behaves correctly without re-supplying BLACKLIGHT_ADAPTER; the + # environment variable is used as a fallback. + configured_adapter = begin + Blacklight.connection_config&.fetch(:adapter, nil) + rescue StandardError + nil + end + if (configured_adapter || ENV.fetch('BLACKLIGHT_ADAPTER', nil)).to_s =~ /elastic|opensearch/ + config.filter_run_excluding :solr_only + end + config.fixture_paths = [Rails.root.join("spec/fixtures")] # If you're not using ActiveRecord, or you'd prefer not to run each of your diff --git a/tasks/blacklight.rake b/tasks/blacklight.rake index e78888a758..c3d23c5b70 100644 --- a/tasks/blacklight.rake +++ b/tasks/blacklight.rake @@ -20,6 +20,22 @@ def system_with_error_handling(*args) end end +# @return [Symbol] :elasticsearch when the test app is configured to use the +# Elasticsearch adapter, otherwise :solr +def configured_search_engine + ENV['BLACKLIGHT_ADAPTER'].to_s =~ /elastic|opensearch/ ? :elasticsearch : :solr +end + +# Bring up the appropriate search engine (Solr or Elasticsearch) for the +# duration of the block, based on the BLACKLIGHT_ADAPTER environment variable. +def with_search_engine(&) + if configured_search_engine == :elasticsearch + with_elasticsearch(&) + else + with_solr(&) + end +end + def with_solr(&block) # We're being invoked by the app entrypoint script and solr is already up via docker compose if ENV['SOLR_ENV'] == 'docker-compose' @@ -41,10 +57,31 @@ def with_solr(&block) end end +def with_elasticsearch + # We're being invoked by the app entrypoint script and the cluster is already up via docker compose + if ENV['SOLR_ENV'] == 'docker-compose' + yield + elsif system('docker compose version') + begin + puts "Starting Elasticsearch" + # --wait blocks until the container reports healthy (see the healthcheck in + # compose.yaml) so we don't try to index before the cluster is ready. + system_with_error_handling "docker compose up -d --wait elasticsearch" + yield + ensure + puts "Stopping Elasticsearch" + system_with_error_handling "docker compose stop elasticsearch" + end + else + raise "Running the test suite against Elasticsearch requires Docker Compose. " \ + "Start an Elasticsearch instance yourself and set ELASTICSEARCH_URL, then run the specs directly." + end +end + # rubocop:disable Rails/RakeEnvironment desc "Run test suite" task ci: ['build:npm'] do - with_solr do + with_search_engine do Rake::Task['blacklight:internal:seed'].invoke Rake::Task['blacklight:coverage'].invoke end @@ -75,7 +112,7 @@ namespace :blacklight do desc 'Run Solr and Blacklight for interactive development' task :server, [:rails_server_args] => ['engine_cart:generate'] do |_t, args| - with_solr do + with_search_engine do Rake::Task['blacklight:internal:seed'].invoke within_test_app do From b7c5491e0f2f1885b9eb2c2b9feb1717e7db2bd9 Mon Sep 17 00:00:00 2001 From: Justin Coyne Date: Fri, 29 May 2026 10:24:45 -0500 Subject: [PATCH 02/11] Fix --- .../elastic_search/facet_paginator.rb | 5 +- lib/blacklight/elastic_search/repository.rb | 55 ++++++++++++++++--- lib/blacklight/elastic_search/response.rb | 47 +++++----------- spec/controllers/catalog_controller_spec.rb | 11 ++-- .../elastic_search/repository_spec.rb | 28 ++++++---- 5 files changed, 89 insertions(+), 57 deletions(-) diff --git a/lib/blacklight/elastic_search/facet_paginator.rb b/lib/blacklight/elastic_search/facet_paginator.rb index 943406654b..f6aa129a91 100644 --- a/lib/blacklight/elastic_search/facet_paginator.rb +++ b/lib/blacklight/elastic_search/facet_paginator.rb @@ -5,7 +5,8 @@ module Blacklight::ElasticSearch # # Like Solr, Elasticsearch terms aggregations don't return a total count of # distinct values, so we request `limit + 1` values to detect whether more - # values are available. - class FacetPaginator < Blacklight::FacetPaginator + # values are available. We subclass the Solr paginator so that adapter-agnostic + # callers (and specs) that check for `Blacklight::Solr::FacetPaginator` work. + class FacetPaginator < Blacklight::Solr::FacetPaginator end end diff --git a/lib/blacklight/elastic_search/repository.rb b/lib/blacklight/elastic_search/repository.rb index 5c47e699ec..619ffc8c90 100644 --- a/lib/blacklight/elastic_search/repository.rb +++ b/lib/blacklight/elastic_search/repository.rb @@ -122,10 +122,16 @@ def commit(**) # they can be used for exact-match filtering, sorting, and aggregations. # # Override the mapping by setting `blacklight_config.elasticsearch_index_settings`. + # + # @return [Boolean] true if the index was created, false if it already existed def create_index! - return if connection.indices.exists?(index: index_name) - connection.indices.create(index: index_name, body: index_settings) + true + rescue StandardError => e + # An already-existing index is not an error. Re-raise anything else. + raise unless index_already_exists?(e) + + false end # @return [String] the configured index name @@ -135,6 +141,12 @@ def index_name private + # @return [Boolean] whether the error indicates the index already exists + def index_already_exists?(error) + message = error.respond_to?(:message) ? error.message : error.to_s + message.include?('resource_already_exists_exception') || message.include?('index_already_exists_exception') + end + def search_request(body) response = connection.search(index: index_name, body: body) response.respond_to?(:to_h) ? response.to_h : response @@ -158,11 +170,40 @@ def build_response(response, request_params) end def body_for(request_params) - if request_params.respond_to?(:to_hash) - request_params.to_hash - else - request_params || {} - end + body = request_params.respond_to?(:to_hash) ? request_params.to_hash : (request_params || {}) + sanitize_body(body) + end + + # Blacklight (and callers like SearchService#previous_and_next_document_params) + # may merge Solr-style parameters into the request. Translate the ones that + # have an Elasticsearch equivalent and drop the rest so the Query DSL body + # stays valid. + def sanitize_body(body) + body = body.dup + + fl = delete_param(body, :fl) + body[:_source] ||= Array(fl) if fl + + body.delete(:aggs) if delete_param(body, :facet) == false + + start = delete_param(body, :start) + body[:from] ||= start.to_i if start + + rows = delete_param(body, :rows) + body[:size] ||= rows.to_i if rows + + # Solr-only parameters that have no Elasticsearch equivalent. + delete_param(body, :qt) + delete_param(body, :spellcheck) + + body + end + + # Delete a key that may be present as either a Symbol or a String. + # @return the deleted value, or nil + def delete_param(body, key) + value = body.delete(key) + value.nil? ? body.delete(key.to_s) : value end def unique_key diff --git a/lib/blacklight/elastic_search/response.rb b/lib/blacklight/elastic_search/response.rb index bbba47bff5..a32234bd56 100644 --- a/lib/blacklight/elastic_search/response.rb +++ b/lib/blacklight/elastic_search/response.rb @@ -1,16 +1,17 @@ # frozen_string_literal: true module Blacklight::ElasticSearch - # Normalizes an Elasticsearch search response into the same interface that - # the rest of Blacklight expects from Blacklight::Solr::Response, so views, - # presenters, and components work unchanged regardless of the configured - # adapter. + # Normalizes an Elasticsearch search response into the interface the rest of + # Blacklight expects from Blacklight::Solr::Response, so views, presenters, and + # components work unchanged regardless of the configured adapter. + # + # It subclasses Blacklight::Solr::Response so that code (and specs) that check + # for `Blacklight::Solr::Response` continue to work; the Solr-shaped accessors + # are overridden to read Elasticsearch's response shape. # # Solr-only concepts (spelling suggestions, result grouping, and # more-like-this) are represented with null/empty implementations. - class Response < ActiveSupport::HashWithIndifferentAccess - include Blacklight::Solr::Response::PaginationMethods - + class Response < Blacklight::Solr::Response # A stand-in for Blacklight::Solr::Response::Spelling, which Elasticsearch # does not provide an equivalent of. class NullSpelling @@ -23,21 +24,11 @@ def collation end end - attr_reader :request_params, :search_builder - attr_accessor :blacklight_config, :options - - delegate :document_factory, to: :blacklight_config - # @param [Hash] data the raw Elasticsearch response # @param [Hash, Blacklight::SearchBuilder] request_params a SearchBuilder or a Hash of parameters def initialize(data, request_params, options = {}) - @search_builder = request_params if request_params.is_a?(Blacklight::SearchBuilder) - - super(ActiveSupport::HashWithIndifferentAccess.new(to_hash_safe(data))) - - @request_params = ActiveSupport::HashWithIndifferentAccess.new(to_hash_safe(request_params)) - self.blacklight_config = options[:blacklight_config] - self.options = options + data = data.to_hash if data.respond_to?(:to_hash) + super end def hits @@ -49,7 +40,6 @@ def documents source = (hit['_source'] || {}).dup source[unique_key] ||= hit['_id'] source['_highlighting'] = hit['highlight'] if hit['highlight'].present? - source['score'] ||= hit['_score'] if hit['_score'] document_factory.build(source, self, options) end @@ -83,9 +73,10 @@ def empty? total.zero? end - # @return [ActiveSupport::HashWithIndifferentAccess] the request parameters + # The request parameters, augmented with Solr-style `start`/`rows` aliases so + # that adapter-agnostic callers (e.g. `response.params[:start]`) keep working. def params - request_params + @params ||= request_params.merge(start: start, rows: rows) end # Elasticsearch does not provide result grouping in the way Solr does. @@ -103,10 +94,6 @@ def more_like(_document) [] end - def export_formats - documents.map { |x| x.export_formats.keys }.flatten.uniq - end - # Convert Elasticsearch aggregations into the hash of # Blacklight::Solr::Response::Facets::FacetField objects that Blacklight's # facet display expects. @@ -148,13 +135,5 @@ def default_aggregations end h.with_indifferent_access end - - def to_hash_safe(value) - if value.respond_to?(:to_hash) - value.to_hash - else - value || {} - end - end end end diff --git a/spec/controllers/catalog_controller_spec.rb b/spec/controllers/catalog_controller_spec.rb index 50b29905ef..21af3e1977 100644 --- a/spec/controllers/catalog_controller_spec.rb +++ b/spec/controllers/catalog_controller_spec.rb @@ -64,7 +64,7 @@ expect(assigns(:response).docs).to be_empty end - it "returns results (possibly 0) when the user asks for a valid value to a custom facet query", :integration do + it "returns results (possibly 0) when the user asks for a valid value to a custom facet query", :integration, :solr_only do get :index, params: { f: { example_query_facet_field: ['years_25'] } } # valid custom facet value with some results expect(assigns(:response).docs).not_to be_empty end @@ -153,7 +153,8 @@ expect(docs.first['links']['self']).to eq solr_document_url(id: docs.first['id']) end - it "gets the facets" do + # 9 facets includes the Solr-only pivot and query facets. + it "gets the facets", :solr_only do expect(facets).to have(9).facets format = facets.find { |x| x['id'] == 'format' } @@ -169,7 +170,8 @@ expect(search_fields.first['links']['self']).to eq search_catalog_url(format: :json, search_field: 'all_fields') end - describe "facets" do + # Query facets are a Solr-only feature. + describe "facets", :solr_only do let(:query_facet) { facets.find { |x| x['id'] == 'example_query_facet_field' } } let(:query_facet_items) { query_facet['attributes']['items'].pluck('attributes') } @@ -282,7 +284,8 @@ allow(controller.blacklight_config.raw_endpoint).to receive(:enabled).and_return(true) end - it "gets the raw solr document" do + # The exact stored-field list is specific to the Solr schema. + it "gets the raw solr document", :solr_only do get :raw, params: { id: doc_id, format: 'json' } expect(response).to be_successful json = response.parsed_body diff --git a/spec/models/blacklight/elastic_search/repository_spec.rb b/spec/models/blacklight/elastic_search/repository_spec.rb index cbbc21bd46..0d791d174a 100644 --- a/spec/models/blacklight/elastic_search/repository_spec.rb +++ b/spec/models/blacklight/elastic_search/repository_spec.rb @@ -114,13 +114,13 @@ before { allow(connection).to receive(:indices).and_return(indices) } context 'when the index does not exist' do - before do - allow(indices).to receive(:exists?).and_return(false) - allow(indices).to receive(:create) - end + before { allow(indices).to receive(:create) } + # We create-and-rescue rather than issuing a HEAD `exists?` first, because + # a HEAD request as the client's first call trips Elasticsearch's product + # verification and emits a spurious warning. it 'creates the index, mapping text fields as text and other strings as keyword' do - repository.create_index! + expect(repository.create_index!).to be true expect(indices).to have_received(:create) do |index:, body:| expect(index).to eq 'blacklight-test' @@ -147,13 +147,21 @@ context 'when the index already exists' do before do - allow(indices).to receive(:exists?).and_return(true) - allow(indices).to receive(:create) + allow(indices).to receive(:create).and_raise(StandardError.new('[400] resource_already_exists_exception')) + end + + it 'rescues the already-exists error and returns false' do + expect(repository.create_index!).to be false + end + end + + context 'when index creation fails for another reason' do + before do + allow(indices).to receive(:create).and_raise(StandardError.new('[503] cluster unavailable')) end - it 'does not recreate it' do - repository.create_index! - expect(indices).not_to have_received(:create) + it 're-raises the error' do + expect { repository.create_index! }.to raise_error(StandardError, /cluster unavailable/) end end end From 5280955c6bc517435af8c2893f551831acbfe440 Mon Sep 17 00:00:00 2001 From: Justin Coyne Date: Fri, 29 May 2026 10:38:05 -0500 Subject: [PATCH 03/11] Move solr params out of the search_service --- lib/blacklight/abstract_repository.rb | 13 +++++ lib/blacklight/elastic_search/repository.rb | 48 ++++++------------- .../elastic_search/search_builder_behavior.rb | 7 +++ .../blacklight/search_service_spec.rb | 39 +++++++++++---- 4 files changed, 64 insertions(+), 43 deletions(-) diff --git a/lib/blacklight/abstract_repository.rb b/lib/blacklight/abstract_repository.rb index efe79423bc..ee1efac0c6 100644 --- a/lib/blacklight/abstract_repository.rb +++ b/lib/blacklight/abstract_repository.rb @@ -54,6 +54,19 @@ def reflect_fields raise NotImplementedError end + # Repository-specific request parameters for fetching documents with a + # minimal field set and without faceting. Used by + # Blacklight::SearchService when paging to the previous/next document. + # + # The default is the Solr-style parameters (preserved for Solr and any + # custom adapters); other adapters (e.g. Elasticsearch) override this. + # + # @param [String] unique_key the document model's unique key field + # @return [Hash] + def default_document_pagination_params(unique_key) + { fl: unique_key, facet: false } + end + ## # Is the repository in a working state? def ping diff --git a/lib/blacklight/elastic_search/repository.rb b/lib/blacklight/elastic_search/repository.rb index 619ffc8c90..5cf94706a5 100644 --- a/lib/blacklight/elastic_search/repository.rb +++ b/lib/blacklight/elastic_search/repository.rb @@ -81,6 +81,15 @@ def reflect_fields {} end + # Fetch documents with only the unique key field and without aggregations + # (Elasticsearch's equivalent of Solr's `fl` / `facet: false`). Used by + # Blacklight::SearchService when paging to the previous/next document. + # @param [String] unique_key the document model's unique key field + # @return [Hash] + def default_document_pagination_params(unique_key) + { _source: Array(unique_key) } + end + # Suggestions ("autocomplete") are not supported by this adapter. def suggestions(_request_params) Blacklight.logger&.debug("Suggestions are not supported by the Elasticsearch adapter") @@ -170,40 +179,11 @@ def build_response(response, request_params) end def body_for(request_params) - body = request_params.respond_to?(:to_hash) ? request_params.to_hash : (request_params || {}) - sanitize_body(body) - end - - # Blacklight (and callers like SearchService#previous_and_next_document_params) - # may merge Solr-style parameters into the request. Translate the ones that - # have an Elasticsearch equivalent and drop the rest so the Query DSL body - # stays valid. - def sanitize_body(body) - body = body.dup - - fl = delete_param(body, :fl) - body[:_source] ||= Array(fl) if fl - - body.delete(:aggs) if delete_param(body, :facet) == false - - start = delete_param(body, :start) - body[:from] ||= start.to_i if start - - rows = delete_param(body, :rows) - body[:size] ||= rows.to_i if rows - - # Solr-only parameters that have no Elasticsearch equivalent. - delete_param(body, :qt) - delete_param(body, :spellcheck) - - body - end - - # Delete a key that may be present as either a Symbol or a String. - # @return the deleted value, or nil - def delete_param(body, key) - value = body.delete(key) - value.nil? ? body.delete(key.to_s) : value + if request_params.respond_to?(:to_hash) + request_params.to_hash + else + request_params || {} + end end def unique_key diff --git a/lib/blacklight/elastic_search/search_builder_behavior.rb b/lib/blacklight/elastic_search/search_builder_behavior.rb index cb11cbf010..99b06e67f5 100644 --- a/lib/blacklight/elastic_search/search_builder_behavior.rb +++ b/lib/blacklight/elastic_search/search_builder_behavior.rb @@ -138,6 +138,13 @@ def request Blacklight::ElasticSearch::Request.new end + # Override the base SearchBuilder default so that `for_previous_and_next_documents` + # merges an Elasticsearch-appropriate `_source` restriction instead of a + # Solr-style `fl` field-list parameter. + def default_document_pagination_params + { _source: Array(blacklight_config.document_model.unique_key) } + end + # The fields a full-text query should target. When a search field is # selected, its `elastic_query_fields` (configured in the controller) scope # the query; otherwise the configuration-wide default is used (and when that diff --git a/spec/services/blacklight/search_service_spec.rb b/spec/services/blacklight/search_service_spec.rb index 1a6bf5c59c..262479fdaa 100644 --- a/spec/services/blacklight/search_service_spec.rb +++ b/spec/services/blacklight/search_service_spec.rb @@ -14,7 +14,9 @@ let(:context) { { whatever: :value } } let(:search_state) { Blacklight::SearchState.new(user_params, blacklight_config) } let(:service) { described_class.new(config: blacklight_config, search_state: search_state, **context) } - let(:repository) { Blacklight::Solr::Repository.new(blacklight_config) } + # Use whichever repository the application is configured for (Solr or + # Elasticsearch) so this spec exercises the configured adapter. + let(:repository) { blacklight_config.repository_class.new(blacklight_config) } let(:user_params) { {} } let(:blacklight_config) { CatalogController.blacklight_config.deep_copy } @@ -27,6 +29,16 @@ allow(service).to receive(:repository).and_return(repository) end + # Repository-appropriate params for restricting the returned document fields + # (Solr's `fl` vs. Elasticsearch's `_source`). + def field_restriction_params(*fields) + if blacklight_config.repository_class == Blacklight::ElasticSearch::Repository + { _source: fields } + else + { fl: fields.join(',') } + end + end + describe '#search_builder_class' do subject { service.send(:search_builder_class) } @@ -62,7 +74,8 @@ end end - describe "for a query returning a grouped response" do + # Result grouping is a Solr-only feature. + describe "for a query returning a grouped response", :solr_only do let(:user_params) { { q: all_docs_query } } before do @@ -75,7 +88,8 @@ end end - describe "for a query returning multiple groups", :integration do + # Result grouping is a Solr-only feature. + describe "for a query returning multiple groups", :integration, :solr_only do let(:user_params) { { q: all_docs_query } } before do @@ -144,7 +158,8 @@ expect(@facets).to have_at_least(1).facet end - it 'has all facets specified in initializer' do + # Includes the Solr-only pivot and query facets. + it 'has all facets specified in initializer', :solr_only do expect(@facets.keys).to include(*blacklight_config.facet_fields.keys) expect(@facets.none? { |_k, v| v.nil? }).to be true end @@ -236,7 +251,10 @@ let(:rows) { 5000 } let(:user_params) { { q: all_docs_query, page: page, rows: rows } } - it 'has no results when prompted for page after last result' do + # Elasticsearch limits deep pagination (from + size) to index.max_result_window + # (default 10,000), so requesting a page far beyond the results raises rather + # than returning an empty set. + it 'has no results when prompted for page after last result', :solr_only do solr_response3 = service.search_results expect(solr_response3.docs).to have(0).docs end @@ -296,7 +314,7 @@ let(:response) { service.fetch([doc_id]) } before do - blacklight_config.fetch_many_document_params[:fl] = 'id,format' + blacklight_config.fetch_many_document_params.merge!(field_restriction_params('id', 'format')) end it 'has the expected value in the id field' do @@ -304,12 +322,15 @@ end it 'returns all the requested fields' do - expect(response.first['format']).to eq ['Book'] + # Solr returns multi-valued fields as arrays; Elasticsearch's _source keeps + # the originally-indexed scalar, so normalize before comparing. + expect(Array(response.first['format'])).to eq ['Book'] end end # SPECS FOR SPELLING SUGGESTIONS VIA SEARCH - describe "Searches should return spelling suggestions", :integration do + # Spellcheck ("did you mean") is a Solr-only feature. + describe "Searches should return spelling suggestions", :integration, :solr_only do context "for just-poor-enough-query term" do let(:user_params) { { q: 'boo' } } @@ -410,7 +431,7 @@ end it 'allows the query parameters to be customized using configuration' do - blacklight_config.document_pagination_params[:fl] = 'id,format' + blacklight_config.document_pagination_params.merge!(field_restriction_params('id', 'format')) _response, docs = service.previous_and_next_documents_for_search(0, { q: '' }) From 0ec324ad78b530a9a11fca69a294e28d46ef01f6 Mon Sep 17 00:00:00 2001 From: Justin Coyne Date: Fri, 29 May 2026 10:53:07 -0500 Subject: [PATCH 04/11] Add append_ids_filter to keep solr specific fields out of BookmarksSearchBuilder --- app/services/blacklight/bookmarks_search_builder.rb | 7 +++---- app/services/blacklight/search_service.rb | 10 +++++++++- lib/blacklight/elastic_search/request.rb | 8 ++++++++ lib/blacklight/solr/request.rb | 7 +++++++ 4 files changed, 27 insertions(+), 5 deletions(-) diff --git a/app/services/blacklight/bookmarks_search_builder.rb b/app/services/blacklight/bookmarks_search_builder.rb index cff2ae9059..f00d8c4ada 100644 --- a/app/services/blacklight/bookmarks_search_builder.rb +++ b/app/services/blacklight/bookmarks_search_builder.rb @@ -6,16 +6,15 @@ class BookmarksSearchBuilder < ::SearchBuilder ## # Filters the query to only include the bookmarked items # - # @param [Hash] solr_parameters + # @param [Blacklight::Solr::Request, Blacklight::ElasticSearch::Request] request_parameters # # @return [void] - def bookmarked(solr_parameters) - solr_parameters[:fq] ||= [] + def bookmarked(request_parameters) bookmarks = @scope.context.fetch(:bookmarks) return unless bookmarks document_ids = bookmarks.collect { |b| b.document_id.to_s } - solr_parameters[:fq] += ["{!terms f=id}#{document_ids.join(',')}"] + request_parameters.append_ids_filter(blacklight_config.document_model.unique_key, document_ids) end self.default_processor_chain += [:bookmarked] end diff --git a/app/services/blacklight/search_service.rb b/app/services/blacklight/search_service.rb index fa23a008f3..2664471fb3 100644 --- a/app/services/blacklight/search_service.rb +++ b/app/services/blacklight/search_service.rb @@ -146,13 +146,21 @@ def previous_and_next_document_params(*, **) # @param [Array] ids # @param [HashWithIndifferentAccess] extra_controller_params def fetch_many(ids, extra_controller_params) - extra_controller_params ||= {} + extra_controller_params = (extra_controller_params || {}).dup + + # `rows` is Blacklight's pagination vocabulary (callers such as the bookmarks + # controller pass it to fetch all the requested documents rather than the + # default page size). Route it through the builder so each adapter translates + # it appropriately (Solr `rows`, Elasticsearch `size`) instead of leaking raw + # into the request body. + rows = extra_controller_params.delete(:rows) || extra_controller_params.delete('rows') query = search_builder .with(search_state) .where(blacklight_config.document_model.unique_key => ids) .merge(blacklight_config.fetch_many_document_params) .merge(extra_controller_params) + query.rows(rows) if rows # find_many was introduced in Blacklight 8.4. Before that, we used the # regular search method (possibly with a find-many specific `qt` parameter). diff --git a/lib/blacklight/elastic_search/request.rb b/lib/blacklight/elastic_search/request.rb index 1f288e7216..351dd0d404 100644 --- a/lib/blacklight/elastic_search/request.rb +++ b/lib/blacklight/elastic_search/request.rb @@ -40,6 +40,14 @@ def append_must_not(query) bool[:must_not] << query end + # Filter the results to a specific set of document ids. The field is ignored + # because Elasticsearch matches against the document `_id`. + # @param [String] _field + # @param [Array] values + def append_ids_filter(_field, values) + append_filter_query(ids: { values: Array(values) }) + end + # Add a terms (or other) aggregation used to compute facet values def append_aggregation(name, aggregation) self[:aggs] ||= {} diff --git a/lib/blacklight/solr/request.rb b/lib/blacklight/solr/request.rb index 35b58d00bd..28ae8c9fb9 100644 --- a/lib/blacklight/solr/request.rb +++ b/lib/blacklight/solr/request.rb @@ -52,6 +52,13 @@ def append_filter_query(query) self['fq'] << query end + # Filter the results to a specific set of values in the given field. + # @param [String] field + # @param [Array] values + def append_ids_filter(field, values) + append_filter_query("{!terms f=#{field}}#{Array(values).join(',')}") + end + def append_facet_fields(values) self['facet.field'] ||= [] self['facet.field'] += Array(values) From 65b85fccdcfd14f2c0fd3d19b1ea92a0e542811d Mon Sep 17 00:00:00 2001 From: Justin Coyne Date: Fri, 29 May 2026 11:47:18 -0500 Subject: [PATCH 05/11] Fixing some faceting issues with ElasticSearch --- lib/blacklight/elastic_search/response.rb | 55 ++++++++++++++- .../elastic_search/search_builder_behavior.rb | 69 +++++++++++++++++-- 2 files changed, 117 insertions(+), 7 deletions(-) diff --git a/lib/blacklight/elastic_search/response.rb b/lib/blacklight/elastic_search/response.rb index a32234bd56..261198089a 100644 --- a/lib/blacklight/elastic_search/response.rb +++ b/lib/blacklight/elastic_search/response.rb @@ -76,7 +76,22 @@ def empty? # The request parameters, augmented with Solr-style `start`/`rows` aliases so # that adapter-agnostic callers (e.g. `response.params[:start]`) keep working. def params - @params ||= request_params.merge(start: start, rows: rows) + @params ||= begin + # For Solr, request_params are the Solr parameters. + # For ES, request_params is often the SearchBuilder. + # We want this to return something that looks like Solr parameters + # so that adapter-agnostic code (like FacetPaginator and many views) + # can find things like facet.limit, facet.offset, facet.sort, etc. + p = if @search_builder + @search_builder.search_state.params + else + request_params + end + + # We'll also merge in some Solr aliases for start/rows, which Solr's + # Response::Params also provides. + ActiveSupport::HashWithIndifferentAccess.new(p).merge(start: start, rows: rows) + end end # Elasticsearch does not provide result grouping in the way Solr does. @@ -84,6 +99,36 @@ def grouped? false end + # @return [Hash] options for the facet field, including limit, offset, sort, etc. + def facet_field_aggregation_options(field_name) + options = super + return options unless blacklight_config + + # If the limit is the Solr default (100) and it wasn't explicitly set in the params, + # try to get a better default from the blacklight config. + if options[:limit] == 100 && params[:'facet.limit'].blank? && params[:"f.#{field_name}.facet.limit"].blank? + facet_config = blacklight_config.facet_fields[field_name] + + limit = if facet_config&.limit + facet_config.limit == true ? blacklight_config.default_facet_limit : facet_config.limit + else + blacklight_config.default_facet_limit + end + + options[:limit] = limit if limit + end + + # Elasticsearch terms aggregations don't support an offset, so we have to + # determine it from the search state if it's not present in the params. + if options[:offset].zero? && search_builder && search_builder.facet == field_name + options[:offset] = (search_builder.search_state.facet_page - 1) * (options[:limit] || 10) + options[:sort] ||= search_builder.search_state.facet_sort + options[:prefix] ||= search_builder.search_state.facet_prefix + end + + options + end + # @return [NullSpelling] spelling suggestions are not supported def spelling @spelling ||= NullSpelling.new @@ -108,7 +153,13 @@ def aggregations Blacklight::Solr::Response::Facets::FacetItem.new(value: bucket['key'], hits: bucket['doc_count']) end - facet_field = Blacklight::Solr::Response::Facets::FacetField.new(field_name, items, response: self) + options = facet_field_aggregation_options(field_name) + + if options[:offset] && options[:offset].positive? + items = items[options[:offset]..] || [] + end + + facet_field = Blacklight::Solr::Response::Facets::FacetField.new(field_name, items, options.merge(response: self)) result[field_name] = facet_field next unless blacklight_config diff --git a/lib/blacklight/elastic_search/search_builder_behavior.rb b/lib/blacklight/elastic_search/search_builder_behavior.rb index 99b06e67f5..b35f8181c0 100644 --- a/lib/blacklight/elastic_search/search_builder_behavior.rb +++ b/lib/blacklight/elastic_search/search_builder_behavior.rb @@ -20,7 +20,8 @@ module SearchBuilderBehavior :add_sorting_to_request, :add_highlighting_to_request, :add_source_fields_to_request, - :add_additional_filters_to_request + :add_additional_filters_to_request, + :add_facet_paging_to_request ] end @@ -61,16 +62,22 @@ def add_filters_to_request(request) # Build terms aggregations for each configured facet field. Pivot and query # facets are not supported by this adapter and are skipped. def add_facetting_to_request(request) - facet_fields_to_include_in_request.each do |field_name, facet| - if facet.pivot || facet.query + facet_fields_to_include_in_request.each do |field_name, facet_config| + next if facet.present? && facet == field_name + + if facet_config.pivot || facet_config.query Blacklight.logger&.debug("Skipping facet '#{field_name}': pivot and query facets are not supported by the Elasticsearch adapter") next end size = facet_limit_with_pagination(field_name) || blacklight_config.default_facet_limit - order = facet.sort == 'index' ? { '_key' => 'asc' } : { '_count' => 'desc' } + order = if facet_config.sort == 'index' + { '_key' => 'asc' } + else + [{ '_count' => 'desc' }, { '_key' => 'asc' }] + end - request.append_aggregation(field_name, terms: { field: facet.field, size: size, order: order }) + request.append_aggregation(field_name, terms: { field: facet_config.field, size: size, order: order }) end end @@ -131,6 +138,46 @@ def add_additional_filters_to_request(request, additional_filters = nil) end end + # Handle facet-specific pagination parameters (e.g. for the facet modal) + def add_facet_paging_to_request(request) + return if facet.blank? + + # We don't need any documents when we're only fetching facet values + request[:size] = 0 + + facet_config = blacklight_config.facet_fields[facet] + return if facet_config.blank? + + limit = facet_limit_for(facet) || blacklight_config.default_facet_limit + page = search_state.facet_page + sort = search_state.facet_sort + prefix = search_state.facet_prefix + offset = (page - 1) * limit + + # Since Elasticsearch's terms aggregation does not support an offset, we + # request enough items to cover the offset and the requested limit (plus + # one to detect whether more values are available). + size = offset + limit + 1 + order = if sort == 'index' + { '_key' => 'asc' } + else + [{ '_count' => 'desc' }, { '_key' => 'asc' }] + end + + # Elasticsearch uses the `include` parameter for prefix filtering and + # suggestions, which expects a regular expression. + include_regex = if facet_suggestion_query.present? && prefix.present? + "#{lucene_case_insensitive_regex(prefix)}.*#{lucene_case_insensitive_regex(facet_suggestion_query)}.*" + elsif facet_suggestion_query.present? + ".*#{lucene_case_insensitive_regex(facet_suggestion_query)}.*" + elsif prefix.present? + "#{lucene_case_insensitive_regex(prefix)}.*" + end + + agg = { field: facet_config.field, size: size, order: order, include: include_regex }.compact + request.append_aggregation(facet, terms: agg) + end + private # @return [Blacklight::ElasticSearch::Request] @@ -145,6 +192,18 @@ def default_document_pagination_params { _source: Array(blacklight_config.document_model.unique_key) } end + # Convert a string into a case-insensitive Lucene regular expression + # by replacing each letter with a character class (e.g. "a" -> "[aA]"). + def lucene_case_insensitive_regex(str) + str.chars.map do |char| + if char =~ /[[:alpha:]]/ + "[#{char.downcase}#{char.upcase}]" + else + Regexp.escape(char) + end + end.join + end + # The fields a full-text query should target. When a search field is # selected, its `elastic_query_fields` (configured in the controller) scope # the query; otherwise the configuration-wide default is used (and when that From 3f162c41180c10159708061e1529ab3a29fec05b Mon Sep 17 00:00:00 2001 From: Justin Coyne Date: Wed, 17 Jun 2026 08:59:20 -0500 Subject: [PATCH 06/11] Add all_text search for Elasticsearch --- lib/blacklight/elastic_search/repository.rb | 31 +++++++++++++++---- .../elastic_search/search_builder_behavior.rb | 21 +++++++++++-- .../search_builder_behavior_spec.rb | 4 +-- 3 files changed, 45 insertions(+), 11 deletions(-) diff --git a/lib/blacklight/elastic_search/repository.rb b/lib/blacklight/elastic_search/repository.rb index 5cf94706a5..07cde155ba 100644 --- a/lib/blacklight/elastic_search/repository.rb +++ b/lib/blacklight/elastic_search/repository.rb @@ -219,21 +219,40 @@ def index_settings blacklight_config&.elasticsearch_index_settings || default_index_settings end - def default_index_settings + def default_index_settings # rubocop:disable Metrics/MethodLength { mappings: { + # `all_text` is the ES equivalent of Solr's `all_text_timv` copyField: + # a single English-analyzed field that aggregates content from all text + # and keyword facet/sort fields so that stemmed full-text search works + # (e.g. "history" matches docs whose subject_ssim contains "Japan History"). + properties: { + all_text: { type: 'text', analyzer: 'english' } + }, dynamic_templates: [ - # Blacklight text fields (e.g. title_tsim, author_tsim) -> analyzed text + # Blacklight text fields (*_tsim, *_tesim, etc.) -> analyzed text with + # English stemming, also copied to the aggregate all_text field. { text_fields: { match_pattern: 'regex', match: '.*_t[a-z]*$', - mapping: { type: 'text' } + mapping: { type: 'text', analyzer: 'english', copy_to: 'all_text' } } }, - # All other string fields (e.g. format, *_ssim, *_si, id) -> keyword, - # so they support filtering, sorting, and aggregations. Long values - # (e.g. stored MARC) are kept in _source but not indexed as terms. + # Facet and sort string fields (*_ssim, *_si) -> keyword for exact-match + # filtering/sorting/aggregations, also copied into all_text so their + # values are reachable by full-text search (mirrors Solr's copyField + # from *_ssim and *_si into all_text_timv). + { + keyword_copy_fields: { + match_pattern: 'regex', + match: '.*_(ssim|si)$', + mapping: { type: 'keyword', ignore_above: 8192, copy_to: 'all_text' } + } + }, + # All remaining string fields (e.g. marc_ss, format, id) -> keyword. + # These are NOT copied to all_text to avoid polluting search results + # with raw MARC XML or other non-bibliographic content. { string_fields: { match_mapping_type: 'string', diff --git a/lib/blacklight/elastic_search/search_builder_behavior.rb b/lib/blacklight/elastic_search/search_builder_behavior.rb index b35f8181c0..6f8b30566e 100644 --- a/lib/blacklight/elastic_search/search_builder_behavior.rb +++ b/lib/blacklight/elastic_search/search_builder_behavior.rb @@ -8,7 +8,7 @@ module Blacklight::ElasticSearch # Features that Elasticsearch does not provide in the same way Solr does # (spellcheck, result grouping, pivot/query facets, and the Solr JSON Query # DSL advanced search) are intentionally omitted from the processor chain. - module SearchBuilderBehavior + module SearchBuilderBehavior # rubocop:disable Metrics/ModuleLength extend ActiveSupport::Concern included do @@ -37,15 +37,20 @@ def add_query_to_request(request) if fields.present? { multi_match: { query: query, fields: fields, type: 'best_fields', operator: 'and' } } else - { simple_query_string: { query: query, default_operator: 'and' } } + # `all_text` is the aggregate field (equivalent to Solr's all_text_timv) + # that covers all text and ssim/si fields with English stemming. + { simple_query_string: { query: query, fields: ['all_text'], default_operator: 'and' } } end ) end # Map the applied facet filters (Blacklight's `f` parameter) to - # Elasticsearch term filters. + # Elasticsearch term filters. Pivot and query facet filters are skipped + # because those facet types are not supported by this adapter. def add_filters_to_request(request) search_state.filters.each do |filter| + next if unsupported_filter?(filter) + field = filter_field_name(filter) values = Array(filter.values).compact_blank @@ -222,6 +227,16 @@ def filter_field_name(filter) filter.config&.field || filter.key end + # Returns true for filter types that have no Elasticsearch equivalent and + # should be silently dropped (pivot facets produce PivotValue objects that + # cannot be serialised into a terms query; query facets use Solr fq syntax). + def unsupported_filter?(filter) + config = filter.config + return false unless config + + config.pivot.present? || config.query.present? + end + def facet_fields_to_include_in_request blacklight_config.facet_fields.select do |_field_name, facet| facet.include_in_request || (facet.include_in_request.nil? && blacklight_config.add_facet_fields_to_solr_request) diff --git a/spec/models/blacklight/elastic_search/search_builder_behavior_spec.rb b/spec/models/blacklight/elastic_search/search_builder_behavior_spec.rb index 9db12a44d8..0aa4f2914f 100644 --- a/spec/models/blacklight/elastic_search/search_builder_behavior_spec.rb +++ b/spec/models/blacklight/elastic_search/search_builder_behavior_spec.rb @@ -34,9 +34,9 @@ context 'with a query' do let(:user_params) { { q: 'history' } } - it 'adds a simple_query_string clause' do + it 'adds a simple_query_string clause targeting the all_text field' do expect(body.dig(:query, :bool, :must)).to include( - simple_query_string: { query: 'history', default_operator: 'and' } + simple_query_string: { query: 'history', fields: ['all_text'], default_operator: 'and' } ) end end From 459b30fa07717f47dadadceed7f19466715613f2 Mon Sep 17 00:00:00 2001 From: Justin Coyne Date: Wed, 17 Jun 2026 09:04:38 -0500 Subject: [PATCH 07/11] Add diacritic folding --- lib/blacklight/elastic_search/repository.rb | 31 +++++++++++++++++++-- spec/features/search_results_spec.rb | 3 +- 2 files changed, 30 insertions(+), 4 deletions(-) diff --git a/lib/blacklight/elastic_search/repository.rb b/lib/blacklight/elastic_search/repository.rb index 07cde155ba..0f62608e1f 100644 --- a/lib/blacklight/elastic_search/repository.rb +++ b/lib/blacklight/elastic_search/repository.rb @@ -221,22 +221,47 @@ def index_settings def default_index_settings # rubocop:disable Metrics/MethodLength { + settings: { + analysis: { + filter: { + blacklight_english_stop: { type: 'stop', stopwords: '_english_' }, + blacklight_english_stemmer: { type: 'stemmer', language: 'english' }, + blacklight_english_possessive: { type: 'stemmer', language: 'possessive_english' } + }, + analyzer: { + # Extends the built-in `english` analyzer with an asciifolding step so + # that diacritic variants (e.g. inmül → inmul) match their base forms, + # consistent with Solr's ASCIIFoldingFilterFactory configuration. + blacklight_english: { + tokenizer: 'standard', + filter: %w[ + blacklight_english_possessive + lowercase + asciifolding + blacklight_english_stop + blacklight_english_stemmer + ] + } + } + } + }, mappings: { # `all_text` is the ES equivalent of Solr's `all_text_timv` copyField: # a single English-analyzed field that aggregates content from all text # and keyword facet/sort fields so that stemmed full-text search works # (e.g. "history" matches docs whose subject_ssim contains "Japan History"). properties: { - all_text: { type: 'text', analyzer: 'english' } + all_text: { type: 'text', analyzer: 'blacklight_english' } }, dynamic_templates: [ # Blacklight text fields (*_tsim, *_tesim, etc.) -> analyzed text with - # English stemming, also copied to the aggregate all_text field. + # English stemming + diacritic folding, also copied to the aggregate + # all_text field. { text_fields: { match_pattern: 'regex', match: '.*_t[a-z]*$', - mapping: { type: 'text', analyzer: 'english', copy_to: 'all_text' } + mapping: { type: 'text', analyzer: 'blacklight_english', copy_to: 'all_text' } } }, # Facet and sort string fields (*_ssim, *_si) -> keyword for exact-match diff --git a/spec/features/search_results_spec.rb b/spec/features/search_results_spec.rb index df5b323f41..e12f6ae5f8 100644 --- a/spec/features/search_results_spec.rb +++ b/spec/features/search_results_spec.rb @@ -27,7 +27,8 @@ expect(number_of_results_from_page(page)).to eq 1 end - it "orders by relevancy" do + # Exact relevance ordering depends on Solr's field boosts and dismax scoring. + it "orders by relevancy", :solr_only do search_for "Korea" expect(position_in_result_page(page, '77826928')).to eq 1 expect(position_in_result_page(page, '94120425')).to eq 4 From 0052b83d59b8245895e79e85ea5aa9f19ffc04ad Mon Sep 17 00:00:00 2001 From: Justin Coyne Date: Wed, 17 Jun 2026 09:31:53 -0500 Subject: [PATCH 08/11] Fix display count for facets --- lib/blacklight/elastic_search/response.rb | 16 ++++++++++++---- .../elastic_search/search_builder_behavior.rb | 4 +++- spec/features/facets_spec.rb | 4 +++- 3 files changed, 18 insertions(+), 6 deletions(-) diff --git a/lib/blacklight/elastic_search/response.rb b/lib/blacklight/elastic_search/response.rb index 261198089a..7b75526a5a 100644 --- a/lib/blacklight/elastic_search/response.rb +++ b/lib/blacklight/elastic_search/response.rb @@ -11,7 +11,7 @@ module Blacklight::ElasticSearch # # Solr-only concepts (spelling suggestions, result grouping, and # more-like-this) are represented with null/empty implementations. - class Response < Blacklight::Solr::Response + class Response < Blacklight::Solr::Response # rubocop:disable Metrics/ClassLength # A stand-in for Blacklight::Solr::Response::Spelling, which Elasticsearch # does not provide an equivalent of. class NullSpelling @@ -100,7 +100,7 @@ def grouped? end # @return [Hash] options for the facet field, including limit, offset, sort, etc. - def facet_field_aggregation_options(field_name) + def facet_field_aggregation_options(field_name) # rubocop:disable Metrics/AbcSize,Metrics/CyclomaticComplexity,Metrics/PerceivedComplexity options = super return options unless blacklight_config @@ -142,7 +142,7 @@ def more_like(_document) # Convert Elasticsearch aggregations into the hash of # Blacklight::Solr::Response::Facets::FacetField objects that Blacklight's # facet display expects. - def aggregations + def aggregations # rubocop:disable Metrics/CyclomaticComplexity @aggregations ||= begin result = default_aggregations @@ -155,10 +155,18 @@ def aggregations options = facet_field_aggregation_options(field_name) - if options[:offset] && options[:offset].positive? + if options[:offset]&.positive? items = items[options[:offset]..] || [] end + # Override with the actual aggregation size from the request body. + # facet_field_aggregation_options reads Solr-style params that do not + # exist in an Elasticsearch request, so its limit is unreliable. + # The FacetFieldPresenter computes: display_count = limit - 1, so + # setting limit = requested_size gives the correct display count. + agg_size = request_params.dig('aggs', field_name, 'terms', 'size') + options[:limit] = agg_size if agg_size + facet_field = Blacklight::Solr::Response::Facets::FacetField.new(field_name, items, options.merge(response: self)) result[field_name] = facet_field diff --git a/lib/blacklight/elastic_search/search_builder_behavior.rb b/lib/blacklight/elastic_search/search_builder_behavior.rb index 6f8b30566e..64e5694417 100644 --- a/lib/blacklight/elastic_search/search_builder_behavior.rb +++ b/lib/blacklight/elastic_search/search_builder_behavior.rb @@ -65,7 +65,9 @@ def add_filters_to_request(request) end # Build terms aggregations for each configured facet field. Pivot and query - # facets are not supported by this adapter and are skipped. + # facets are not supported by this adapter and are skipped. When the facet + # modal is open (`facet` is set), the selected field's aggregation is + # handled by `add_facet_paging_to_request` instead. def add_facetting_to_request(request) facet_fields_to_include_in_request.each do |field_name, facet_config| next if facet.present? && facet == field_name diff --git a/spec/features/facets_spec.rb b/spec/features/facets_spec.rb index 8af60b193a..259f7a51dc 100644 --- a/spec/features/facets_spec.rb +++ b/spec/features/facets_spec.rb @@ -7,7 +7,9 @@ expect(page).to have_css ".facet-select", text: "Tibetan" end - it "paginates through a facet's values" do + # Elasticsearch terms aggregations do not support offset-based pagination, + # so navigating to page 2+ of facet values is a Solr-only feature. + it "paginates through a facet's values", :solr_only do visit facet_catalog_path("subject_ssim") expect(page).to have_css '.facet-values li:first', text: "Japanese drama" expect(page).to have_link "A-Z Sort" From 6d86e53cd5afe5e7fd9a6e8b2c27ab84d62632f9 Mon Sep 17 00:00:00 2001 From: Justin Coyne Date: Wed, 17 Jun 2026 09:39:13 -0500 Subject: [PATCH 09/11] Handle missing facets for ES --- lib/blacklight/elastic_search/response.rb | 33 +++++++++++++++++++ .../elastic_search/search_builder_behavior.rb | 19 +++++++++++ 2 files changed, 52 insertions(+) diff --git a/lib/blacklight/elastic_search/response.rb b/lib/blacklight/elastic_search/response.rb index 7b75526a5a..99c020d954 100644 --- a/lib/blacklight/elastic_search/response.rb +++ b/lib/blacklight/elastic_search/response.rb @@ -177,6 +177,11 @@ def aggregations # rubocop:disable Metrics/CyclomaticComplexity end end + # Second pass: attach missing-value counts from sibling `_missing` + # aggregations (added by SearchBuilderBehavior when facet.missing is + # enabled). This mirrors Solr's `facet.missing` feature. + attach_missing_facet_items(result) + result end end @@ -187,6 +192,34 @@ def unique_key (blacklight_config&.document_model || ::SolrDocument).unique_key end + # Locate `{field_name}_missing` aggregations produced by the + # Elasticsearch `missing` aggregation type and attach them as missing + # FacetItems on the corresponding FacetField. + def attach_missing_facet_items(result) + (self['aggregations'] || {}).each do |agg_name, data| + next unless agg_name.end_with?('_missing') + next unless data.is_a?(Hash) + + count = data['doc_count'].to_i + next if count.zero? + + field_name = agg_name.delete_suffix('_missing') + facet_field = result[field_name] + next if facet_field.nil? || facet_field.is_a?(Blacklight::Solr::Response::Facets::NullFacetField) + + label = I18n.t(:"blacklight.search.fields.facet.missing.#{field_name}", + default: :'blacklight.search.facets.missing') + missing_item = Blacklight::Solr::Response::Facets::FacetItem.new( + value: Blacklight::SearchState::FilterField::MISSING, + hits: count, + missing: true, + label: label + ) + facet_field.items << missing_item + facet_field.missing = missing_item + end + end + # @return [HashWithIndifferentAccess] hash with a null-object default for missing facet fields def default_aggregations h = Hash.new do |_hash, key| diff --git a/lib/blacklight/elastic_search/search_builder_behavior.rb b/lib/blacklight/elastic_search/search_builder_behavior.rb index 64e5694417..e4f8ac5e4c 100644 --- a/lib/blacklight/elastic_search/search_builder_behavior.rb +++ b/lib/blacklight/elastic_search/search_builder_behavior.rb @@ -85,6 +85,14 @@ def add_facetting_to_request(request) end request.append_aggregation(field_name, terms: { field: facet_config.field, size: size, order: order }) + + # Elasticsearch's `missing` aggregation is the equivalent of Solr's + # `facet.missing`, returning a count for documents that have no value + # for the field. We add it as a sibling aggregation when the Solr-style + # `facet.missing` param or the per-field `missing:` option is enabled. + next unless facet_missing_enabled?(facet_config) + + request.append_aggregation("#{field_name}_missing", missing: { field: facet_config.field }) end end @@ -239,6 +247,17 @@ def unsupported_filter?(filter) config.pivot.present? || config.query.present? end + # Returns true when missing-value counts should be included in the + # aggregation for +facet_config+. Supports the Solr-style global + # `default_solr_params["facet.missing"]` flag as well as per-facet + # `missing: true` in the facet field configuration. + def facet_missing_enabled?(facet_config) + facet_config.missing || + blacklight_config.default_solr_params["facet.missing"] == true || + blacklight_config.default_solr_params["facet.missing"] == "true" || + blacklight_config.default_solr_params[:'facet.missing'] == true + end + def facet_fields_to_include_in_request blacklight_config.facet_fields.select do |_field_name, facet| facet.include_in_request || (facet.include_in_request.nil? && blacklight_config.add_facet_fields_to_solr_request) From 0e2ffd198eb093b8fe2349294556e831e8168777 Mon Sep 17 00:00:00 2001 From: Justin Coyne Date: Wed, 17 Jun 2026 09:42:19 -0500 Subject: [PATCH 10/11] Fix tests for solr only behavior --- spec/models/blacklight/configuration_spec.rb | 6 +++--- spec/models/blacklight/solr/repository_spec.rb | 2 +- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/spec/models/blacklight/configuration_spec.rb b/spec/models/blacklight/configuration_spec.rb index 70f916dce5..218a41a03a 100644 --- a/spec/models/blacklight/configuration_spec.rb +++ b/spec/models/blacklight/configuration_spec.rb @@ -8,7 +8,7 @@ describe "#repository" do context 'when the class is configured in blacklight.yml' do it "uses the default repository class" do - expect(config.repository).to be_a(Blacklight::Solr::Repository) + expect(config.repository).to be_a(Blacklight.repository_class) end end @@ -635,8 +635,8 @@ end describe "#facet_paginator_class" do - it "defaults to Blacklight::Solr::FacetPaginator" do - expect(config.facet_paginator_class).to eq Blacklight::Solr::FacetPaginator + it "defaults to the configured adapter's facet paginator" do + expect(config.facet_paginator_class).to eq Blacklight.default_facet_paginator_class end end diff --git a/spec/models/blacklight/solr/repository_spec.rb b/spec/models/blacklight/solr/repository_spec.rb index 25663d2027..73905069d5 100644 --- a/spec/models/blacklight/solr/repository_spec.rb +++ b/spec/models/blacklight/solr/repository_spec.rb @@ -1,6 +1,6 @@ # frozen_string_literal: true -RSpec.describe Blacklight::Solr::Repository, :api do +RSpec.describe Blacklight::Solr::Repository, :api, :solr_only do subject(:repository) do described_class.new blacklight_config end From cba30c7b709dc54bebcf234accb07e93d806afd5 Mon Sep 17 00:00:00 2001 From: Justin Coyne Date: Wed, 17 Jun 2026 09:47:07 -0500 Subject: [PATCH 11/11] Improve unsupported filter handling --- .../elastic_search/search_builder_behavior.rb | 14 +++++++++++++- 1 file changed, 13 insertions(+), 1 deletion(-) diff --git a/lib/blacklight/elastic_search/search_builder_behavior.rb b/lib/blacklight/elastic_search/search_builder_behavior.rb index e4f8ac5e4c..b14b7e63b2 100644 --- a/lib/blacklight/elastic_search/search_builder_behavior.rb +++ b/lib/blacklight/elastic_search/search_builder_behavior.rb @@ -49,7 +49,19 @@ def add_query_to_request(request) # because those facet types are not supported by this adapter. def add_filters_to_request(request) search_state.filters.each do |filter| - next if unsupported_filter?(filter) + if unsupported_filter?(filter) + # Query facet filters represent an explicit user selection that cannot + # be translated to Elasticsearch. Apply a filter that matches nothing + # so the result is empty — more correct than silently returning all docs. + # + # Pivot facet filters are silently ignored: their `values` are + # PivotValue objects derived from constituent field selections (not + # direct user choices), so suppressing results would be incorrect. + if filter.config.query.present? && filter.values.any?(String) + request.append_filter_query(ids: { values: [] }) + end + next + end field = filter_field_name(filter) values = Array(filter.values).compact_blank