Skip to content

Commit

Permalink
Configure Opensearch model to search single fields
Browse files Browse the repository at this point in the history
Why these changes are being introduced:

The search model needs to be able to target any of the fields defined
as single-field searchable in v2.0 of the data model.

Relevant ticket(s):

https://mitlibraries.atlassian.net/browse/RDI-102

How this addresses that need:

This enables matching on the following fields in the Opensearch model:

* citation
* contributors
* funding_information
* identifiers
* locations
* subjects

(Note that title was already made searchable as part of a previous
commit.)

Side effects of this change:

* These fields are not yet enabled in GraphQL, so you'll need to use
REST to confirm this behavior.
* While this allows us to search a single nested subfield, it does not
it does not allow for searching multiple nested subfields. For example,
searching contributors should search both 'value' and 'identifier', but
right now we can only search 'value'. RDI-116 will confirm expectations
on which subfields must be searchable.
* The way this targets individual fields seems to require an exact match.
This may be resolvable for nested fields by using `multi_match`, which
would also allow us to search multiple subfields. We plan to explore
this more as part of the graphql work.
  • Loading branch information
jazairi committed May 20, 2022
1 parent 40ca6e2 commit 5fefc47
Show file tree
Hide file tree
Showing 5 changed files with 438 additions and 8 deletions.
55 changes: 47 additions & 8 deletions app/models/opensearch.rb
Original file line number Diff line number Diff line change
Expand Up @@ -31,6 +31,7 @@ def query

def multisearch
return unless @params[:q].present?

[
{
prefix: {
Expand Down Expand Up @@ -73,14 +74,14 @@ def matches
}
}
end

if @params[:title].present?
m << {
match: {
title: @params[:title].downcase
}
}
end
match_single_field(:citation, m)
match_single_field(:title, m)

match_single_field_nested(:contributors, m)
match_single_field_nested(:funding_information, m)
match_single_field_nested(:identifiers, m)
match_single_field_nested(:locations, m)
match_single_field_nested(:subjects, m)
m
end

Expand Down Expand Up @@ -198,4 +199,42 @@ def aggregations
}
}
end

private

def match_single_field(field, match_array)
return unless @params[field]

match_array << {
match: {
field => @params[field].downcase
}
}
end

def match_single_field_nested(field, match_array)
return unless @params[field]

match_array << {
nested: {
path: field.to_s,
query: {
bool: {
must: [
{ match: { "#{field}.#{nested_field(field)}": @params[field].downcase } }
]
}
}
}
}
end

# For most nested fields, we only care about 'value'; this handles the exceptions to that rule.
def nested_field(field)
if field == :funding_information
'funder_name'
else
'value'
end
end
end
88 changes: 88 additions & 0 deletions test/models/opensearch_test.rb
Original file line number Diff line number Diff line change
@@ -0,0 +1,88 @@
require 'test_helper'

class OpensearchTest < ActiveSupport::TestCase
test 'matches citation' do
os = Opensearch.new
os.instance_variable_set(:@params, { citation: 'foo' })
assert os.matches.select { |m| m[:citation] == 'foo' }
end

test 'matches title' do
os = Opensearch.new
os.instance_variable_set(:@params, { title: 'foo' })
assert os.matches.select { |m| m[:title] == 'foo' }
end

test 'matches contributors' do
os = Opensearch.new
os.instance_variable_set(:@params, { contributors: 'foo' })
assert os.matches.select { |m| m['contributors.value'] == 'foo' }
end

test 'matches funding_information' do
os = Opensearch.new
os.instance_variable_set(:@params, { funding_information: 'foo' })
assert os.matches.select { |m| m['funding_information.funder_name'] == 'foo' }
end

test 'matches identifiers' do
os = Opensearch.new
os.instance_variable_set(:@params, { identifiers: 'foo' })
assert os.matches.select { |m| m['identifiers.value'] == 'foo' }
end

test 'matches locations' do
os = Opensearch.new
os.instance_variable_set(:@params, { locations: 'foo' })
assert os.matches.select { |m| m['locations.value'] == 'foo' }
end

test 'matches subjects' do
os = Opensearch.new
os.instance_variable_set(:@params, { subjects: 'foo' })
assert os.matches.select { |m| m['subjects.value'] == 'foo' }
end

test 'matches everything' do
os = Opensearch.new
os.instance_variable_set(:@params, { q: 'this', citation: 'here', title: 'is', contributors: 'a',
funding_information: 'real', identifiers: 'search', locations: 'rest',
subjects: 'assured,' })
matches = os.matches
assert matches.select { |m| m[:q] == 'this' }
assert matches.select { |m| m[:citation] == 'here' }
assert matches.select { |m| m[:title] == 'is' }
assert matches.select { |m| m['contributors.value'] == 'a' }
assert matches.select { |m| m['funding_information.funder_name'] == 'real' }
assert matches.select { |m| m['identifiers.value'] == 'search' }
assert matches.select { |m| m['locations.value'] == 'rest' }
assert matches.select { |m| m['subjects.value'] == 'assured' }
end

test 'searches a single field' do
VCR.use_cassette('opensearch single field') do
params = { title: 'spice' }
results = Opensearch.new.search(0, params, Timdex::OSClient)
assert_equal "Spice it up! the best of Paquito D'Rivera.",
results['hits']['hits'].first['_source']['title']
end
end

test 'searches a single field with nested subfields' do
VCR.use_cassette('opensearch single field nested') do
params = { contributors: 'mcternan' }
results = Opensearch.new.search(0, params, Timdex::OSClient)
assert_equal "A common table : 80 recipes and stories from my shared cultures /",
results['hits']['hits'].first['_source']['title']
end
end

test 'searches multiple fields' do
VCR.use_cassette('opensearch multiple fields') do
params = { q: 'chinese', title: 'common', contributors: 'mcternan'}
results = Opensearch.new.search(0, params, Timdex::OSClient)
assert_equal "A common table : 80 recipes and stories from my shared cultures /",
results['hits']['hits'].first['_source']['title']
end
end
end
113 changes: 113 additions & 0 deletions test/vcr_cassettes/opensearch_multiple_fields.yml

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

Loading

0 comments on commit 5fefc47

Please sign in to comment.