Skip to content

Commit

Permalink
Merge pull request #551 from MITLibraries/rdi-247-support-for-multiva…
Browse files Browse the repository at this point in the history
…lue-facets

OpenSearch and GraphQL multivalue facets
  • Loading branch information
JPrevost authored Aug 1, 2022
2 parents f648363 + 9d53ac2 commit 647292a
Show file tree
Hide file tree
Showing 6 changed files with 319 additions and 51 deletions.
2 changes: 1 addition & 1 deletion app/graphql/types/aggregations_type.rb
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@ class AggregationCountType < Types::BaseObject
end

class AggregationsType < Types::BaseObject
field :content_format, [Types::AggregationCountType], null: true
field :format, [Types::AggregationCountType], null: true
field :content_type, [Types::AggregationCountType], null: true
field :contributors, [Types::AggregationCountType], null: true
field :languages, [Types::AggregationCountType], null: true
Expand Down
7 changes: 2 additions & 5 deletions app/graphql/types/query_type.rb
Original file line number Diff line number Diff line change
Expand Up @@ -57,8 +57,7 @@ def record_id(id:, index:)
description: 'It is not recommended to provide an index value unless we have provided you with one for your specific use case'

# applied facets
argument :collection_facet, [String], required: false, default_value: nil
argument :content_type_facet, String, required: false, default_value: nil
argument :content_type_facet, [String], required: false, default_value: nil
argument :contributors_facet, [String], required: false, default_value: nil
argument :format_facet, [String], required: false, default_value: nil
argument :languages_facet, [String], required: false, default_value: nil
Expand Down Expand Up @@ -177,10 +176,8 @@ def collapse_buckets(es_aggs)
subjects: es_aggs['subjects']['subject_names']['buckets'],
languages: es_aggs['languages']['buckets'],
literary_form: es_aggs['literary_form']['buckets'],

content_format: es_aggs['content_format']['buckets'],
format: es_aggs['content_format']['buckets'],
content_type: es_aggs['content_type']['buckets']

}
end
else
Expand Down
76 changes: 32 additions & 44 deletions app/models/opensearch.rb
Original file line number Diff line number Diff line change
Expand Up @@ -30,7 +30,7 @@ def query
bool: {
should: multisearch,
must: matches,
filter: filters
filter: filters(@params)
}
}
end
Expand Down Expand Up @@ -106,58 +106,51 @@ def matches
end

# https://www.elastic.co/guide/en/elasticsearch/reference/current/query-filter-context.html
def filters
def filters(params)
f = []
f.push filter(@params[:collection_facet], 'collections') if @params[:collection_facet]
f.push filter(@params[:contributors_facet], 'contributors') if @params[:contributors_facet]

f.push filter_single(@params[:content_type_facet], 'content_type') if @params[:content_type_facet]

f.push filter(@params[:content_format_type], 'format') if @params[:content_format_type]
if params[:contributors_facet].present?
params[:contributors_facet].each do |p|
f.push filter_field_by_value('contributors.value.keyword', p)
end
end

f.push filter(@params[:languages_facet], 'languages') if @params[:languages_facet]
if params[:content_type_facet].present?
params[:content_type_facet].each do |p|
f.push filter_field_by_value('content_type', p)
end
end

f.push filter_single(@params[:literary_form_facet], 'literary_form') if @params[:literary_form_facet]
if params[:content_format_facet].present?
params[:content_format_facet].each do |p|
f.push filter_field_by_value('format', p)
end
end

f.push filter_sources(@params[:source_facet]) if @params[:source_facet]
if params[:languages_facet].present?
params[:languages_facet].each do |p|
f.push filter_field_by_value('languages', p)
end
end

f.push filter(@params[:subjects_facet], 'subjects') if @params[:subjects_facet]
f
end
# literary_form is a single value aggregation
f.push filter_field_by_value('literary_form', params[:literary_form_facet]) if params[:literary_form_facet].present?

# use `filter` when we accept multiple of the same parameter in our data
# model
def filter(param, field)
terms = []
# source aggregation is "OR" and not "AND" so it does not use the filter_field_by_value method
f.push filter_sources(params[:source_facet]) if params[:source_facet]

param.each do |t|
if field == 'contributors'
terms.push(
nested: {
path: 'contributors',
query: {
bool: {
must: [{
match: {
'contributors.value.keyword': t
}
}]
}
}
}
)
else
terms.push(term: { "#{field}.keyword": t })
if params[:subjects_facet].present?
params[:subjects_facet].each do |p|
f.push filter_field_by_value('subjects.value.keyword', p)
end
end

terms
f
end

# use `filter_single` when we only accept a single value in our data model
def filter_single(param, field)
def filter_field_by_value(field, value)
{
term: { "#{field}": param }
term: { "#{field}": value }
}
end

Expand All @@ -184,11 +177,6 @@ def source_array(param)
# https://www.elastic.co/guide/en/elasticsearch/reference/current/search-aggregations-bucket-terms-aggregation.html
def aggregations
{
collections: {
terms: {
field: 'collections.keyword'
}
},
contributors: {
nested: {
path: 'contributors'
Expand Down
46 changes: 45 additions & 1 deletion test/controllers/graphql_controller_v2_test.rb
Original file line number Diff line number Diff line change
Expand Up @@ -423,7 +423,7 @@ def setup
end
end

test 'graphqlv2 can retrive a record from a specified index' do
test 'graphqlv2 can retrieve a record from a specified index' do
# fragile test: specific item expected in specified index
VCR.use_cassette('graphql v2 retrieve from rdi* index') do
post '/graphql', params: { query:
Expand All @@ -438,4 +438,48 @@ def setup
assert_equal('zenodo:5728409', json['data']['recordId']['timdexRecordId'])
end
end

test 'graphqlv2 can apply multi-value facets' do
# fragile test: facet data required to have at least 2 records with both
# `dataset` and `still image` contentTypes
VCR.use_cassette('graphql v2 apply multiple content types facets') do
post '/graphql', params: { query:
'{
search(index: "rdi*", contentTypeFacet:["dataset"]) {
hits
aggregations {
contentType {
key
docCount
}
}
}
}' }

json_dataset = JSON.parse(response.body)
initial_hits_count = json_dataset['data']['search']['hits']
initial_still_images_count = json_dataset['data']['search']['aggregations']['contentType'].find do |x|
x['key'] == 'still image'
end ['docCount']

post '/graphql', params: { query:
'{
search(index: "rdi*", contentTypeFacet:["dataset", "still image"]) {
hits
aggregations {
contentType {
key
docCount
}
}
}
}' }

json_dataset_still_image = JSON.parse(response.body)
final_hits_count = json_dataset_still_image['data']['search']['hits']

assert(initial_hits_count > final_hits_count)
assert_equal(final_hits_count, initial_still_images_count)
end
end
end
131 changes: 131 additions & 0 deletions test/models/opensearch_test.rb
Original file line number Diff line number Diff line change
Expand Up @@ -120,4 +120,135 @@ class OpensearchTest < ActiveSupport::TestCase

assert_equal(expected, Opensearch.new.filter_sources(sources))
end

test 'filter_field_by_value query structure' do
expected = {
term: { fakefield: 'i am a fake value' }
}

assert_equal(expected, Opensearch.new.filter_field_by_value('fakefield', 'i am a fake value'))
end

test 'filters query structure when no filters passed' do
expected_filters = []
params = {}

assert_equal(expected_filters, Opensearch.new.filters(params))
end

test 'filters query structure for single contributors_facet' do
expected_filters =
[
{ term: { 'contributors.value.keyword': 'Lastname, Firstname' } }
]
params = { contributors_facet: ['Lastname, Firstname'] }

assert_equal(expected_filters, Opensearch.new.filters(params))
end

test 'filters query structure for multiple contributors_facet' do
expected_filters =
[
{ term: { 'contributors.value.keyword': 'Lastname, Firstname' } },
{ term: { 'contributors.value.keyword': 'Another name' } }
]
params = { contributors_facet: ['Lastname, Firstname', 'Another name'] }

assert_equal(expected_filters, Opensearch.new.filters(params))
end

test 'filters query structure for single content_type_facet' do
expected_filters =
[
{ term: { content_type: 'cheese' } }
]
params = { content_type_facet: ['cheese'] }

assert_equal(expected_filters, Opensearch.new.filters(params))
end

test 'filters query structure for multiple content_type_facet' do
expected_filters =
[
{ term: { content_type: 'cheese' } },
{ term: { content_type: 'ice cream' } }
]
params = { content_type_facet: ['cheese', 'ice cream'] }

assert_equal(expected_filters, Opensearch.new.filters(params))
end

test 'filters query structure for single content_format_facet' do
expected_filters =
[
{ term: { format: 'cheese' } }
]
params = { content_format_facet: ['cheese'] }

assert_equal(expected_filters, Opensearch.new.filters(params))
end

test 'filters query structure for multiple content_format_facet' do
expected_filters =
[
{ term: { format: 'cheese' } },
{ term: { format: 'ice cream' } }
]
params = { content_format_facet: ['cheese', 'ice cream'] }

assert_equal(expected_filters, Opensearch.new.filters(params))
end

test 'filters query structure for single languages_facet' do
expected_filters =
[
{ term: { languages: 'cheese' } }
]
params = { languages_facet: ['cheese'] }

assert_equal(expected_filters, Opensearch.new.filters(params))
end

test 'filters query structure for multiple languages_facet' do
expected_filters =
[
{ term: { languages: 'cheese' } },
{ term: { languages: 'ice cream' } }
]
params = { languages_facet: ['cheese', 'ice cream'] }

assert_equal(expected_filters, Opensearch.new.filters(params))
end

# literary form is only single value
test 'filters query structure for literary_form_facet' do
expected_filters =
[
{ term: { literary_form: 'cheese' } }
]
params = { literary_form_facet: 'cheese' }

assert_equal(expected_filters, Opensearch.new.filters(params))
end

test 'filters query structure for single subjects_facet' do
expected_filters =
[
{ term: { 'subjects.value.keyword': 'cheese' } }
]
params = { subjects_facet: ['cheese'] }

assert_equal(expected_filters, Opensearch.new.filters(params))
end

test 'filters query structure for multiple subjects_facet' do
expected_filters =
[
{ term: { 'subjects.value.keyword': 'cheese' } },
{ term: { 'subjects.value.keyword': 'ice cream' } }
]
params = { subjects_facet: ['cheese', 'ice cream'] }

assert_equal(expected_filters, Opensearch.new.filters(params))
end
end
Loading

0 comments on commit 647292a

Please sign in to comment.