Skip to content

Commit

Permalink
Merge pull request #19 from mlibrary/solrcloud_0.3_compatible
Browse files Browse the repository at this point in the history
Change to use solrcloud gem v0.3.0
  • Loading branch information
niquerio authored Dec 7, 2023
2 parents 696b70b + aac7437 commit f17e6c8
Show file tree
Hide file tree
Showing 8 changed files with 111 additions and 22 deletions.
4 changes: 2 additions & 2 deletions .github/workflows/tests.yml
Original file line number Diff line number Diff line change
Expand Up @@ -33,7 +33,7 @@ jobs:
with:
ruby-version: '3.2'
bundler-cache: true
- name: run tests
run: bundle exec rspec
- name: run tests but not the ones that touch solrcloud
run: bundle exec rspec --tag ~solrcloud
env:
GHA_TEST: "true"
2 changes: 1 addition & 1 deletion Gemfile
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@ gem "canister"
gem "rubyzip"
gem "semantic_logger"
gem "thor"
gem "solr_cloud-connection"
gem "solr_cloud-connection", ">= 0.3.0"

gem "sqlite3", "~> 1.4", platforms: :mri
gem "jdbc-sqlite3", "~> 3.28", platforms: :jruby
Expand Down
4 changes: 2 additions & 2 deletions Gemfile.lock
Original file line number Diff line number Diff line change
Expand Up @@ -91,7 +91,7 @@ GEM
simplecov_json_formatter (~> 0.1)
simplecov-html (0.12.3)
simplecov_json_formatter (0.1.4)
solr_cloud-connection (0.1.0)
solr_cloud-connection (0.3.0)
faraday (~> 2.7.12)
httpx (~> 1.1.5)
rubyzip (~> 2.3.0)
Expand Down Expand Up @@ -142,7 +142,7 @@ DEPENDENCIES
semantic_logger
sequel (~> 5.60)
simplecov
solr_cloud-connection
solr_cloud-connection (>= 0.3.0)
sqlite3 (~> 1.4)
standardrb
thor
Expand Down
37 changes: 23 additions & 14 deletions lib/authority_browse/solr.rb
Original file line number Diff line number Diff line change
Expand Up @@ -43,7 +43,9 @@ def self.solr_conf_dir
#
# @return[Nil]
def self.create_configset_if_needed
unless S.solrcloud.configset?(configset_name)
if S.solrcloud.has_configset?(configset_name)
S.solrcloud.get_configset(configset_name)
else
S.solrcloud.create_configset(
name: configset_name,
confdir: solr_conf_dir
Expand All @@ -62,6 +64,10 @@ def self.create_daily_collection
)
end

def self.latest_daily_collection
_sorted_collections.last
end

# This creates the daily collection and then sets the reindex alias to that
# collection
#
Expand All @@ -75,48 +81,51 @@ def self.set_up_daily_collection
#
# @return[Nil]
def self.set_daily_reindex_alias
S.solrcloud.create_alias(name: reindex_alias, collection_name: collection_name, force: true)
latest_daily_collection.alias_as(reindex_alias)
end

# This sets the production alias to today's collection.
#
# @return[Nil]
def self.set_production_alias
S.solrcloud.create_alias(name: production_alias, collection_name: collection_name, force: true)
latest_daily_collection.alias_as(production_alias)
end

# This verifies that today's collection has enough documents in it. For now
# the collection must have more than 7_000_000 documents in it.
#
# @raise [NotEnoughDocsError] if there aren't enough docs in the collection
# @return[Nil]
def self.verify_reindex
body = S.solrcloud.get("solr/#{collection_name}/select", {q: "*:*"}).body
raise NotEnoughDocsError unless body["response"]["numFound"] > 7000000
def self.verify_reindex(min_records: S.min_authority_browse_record_count)
raise NotEnoughDocsError unless latest_daily_collection.count > min_records
end

# This deletes all authority_browse collections with dates that are older
# than the newest three authority_browse collections.
#
# @return[Nil]
def self.prune_old_collections(keep: 3)
S.logger.info "Pruning the following collections: #{list_old_collections}"
list_old_collections(keep: keep).each do |coll|
def self.prune_old_collections(collections: list_old_collections(keep: keep), keep: 3)
S.logger.info "Pruning the following collections: #{collections}"
collections.each do |coll|
coll.delete!
end
end

# Lists the authority_browse collections that are older than the newest
# three authority_browse collections
#
# @param list [Array]<SolrCloud::Collection> Array of all SolrCloud collections
# @param list [Array<SolrCloud::Collection>] Array of all SolrCloud collections
# @param keep [Integer] how many versions to keep, even if they're old
# @return [Array]<SolrCloud::Collection> Array of old authority browse Solrcloud collections
def self.list_old_collections(list: S.solrcloud.collections, keep: 3)
# @return [Array<SolrCloud::Collection>] Array of old authority browse Solrcloud collections
def self.list_old_collections(list: S.solrcloud.only_collections, keep: 3)
_sorted_collections(list: list)[0..(0 - keep - 1)]
end

def self._sorted_collections(list: S.solrcloud.only_collections)
list.select do |item|
item.name.match?("authority_browse")
end.sort do |a, b|
a.name.split("_").last <=> b.name.split("_").last
end[0..(0 - keep - 1)]
end
end
end
end
6 changes: 3 additions & 3 deletions lib/authority_browse/solr/uploader.rb
Original file line number Diff line number Diff line change
Expand Up @@ -2,19 +2,19 @@ module AuthorityBrowse
module Solr
class Uploader
def initialize(collection:)
@conn = S.solrcloud.alias collection
@collection = S.solrcloud.get_collection collection
@endpoint = "solr/#{collection}/update"
end

# Uploads docs to solr
# @param docs [Array] Array of json strings of docs
def upload(docs)
body = "[" + docs.join(",") + "]"
@conn.post(@endpoint, body)
@collection.post(@endpoint, body)
end

def commit
@conn.get(@endpoint, commit: "true")
@collection.commit
end
end
end
Expand Down
5 changes: 5 additions & 0 deletions lib/services.rb
Original file line number Diff line number Diff line change
Expand Up @@ -64,6 +64,7 @@
end

S.register(:today) { Time.now.strftime "%Y-%m-%d-%H-%M-%S" }
S.register(:min_authority_browse_record_count) { 7_000_000 }

# Solr stuff

Expand All @@ -73,6 +74,10 @@
S.register(:solr_configuration) { ENV["SOLR_CONFIGURATION"] || "authority_browse" }
S.register(:solr_collection) { ENV["SOLR_COLLECTION"] || "authority_browse" }
S.register(:biblio_solr) { ENV["BIBLIO_SOLR"] }

# @!method S.solrcloud
# @return [SolrCloud::Connection]
# @!scope S
S.register(:solrcloud) do
SolrCloud::Connection.new(
url: S.solr_host,
Expand Down
70 changes: 70 additions & 0 deletions spec/integrations/solrcloud_spec.rb
Original file line number Diff line number Diff line change
@@ -0,0 +1,70 @@
RSpec.describe Browse::CLI::Solr, solrcloud: true do
before(:all) do
WebMock.allow_net_connect!
end
after(:all) do
WebMock.disable_net_connect!
end
before(:each) do
@today_collection_name = AuthorityBrowse::Solr.collection_name
end
after(:each) do
coll = S.solrcloud.get_collection(@today_collection_name)
cset = S.solrcloud.get_configset(AuthorityBrowse::Solr.configset_name)
if !coll.nil?
coll.aliases.each { |x| x.delete! }
coll.delete!
end
cset&.delete!
end
subject do
described_class.new
end
context "browse set_up_daily_authority_browse_collection" do
it "creates the daily collection and sets the reindex alias" do
configset_name = AuthorityBrowse::Solr.configset_name
expect(S.solrcloud.has_configset?(configset_name)).to eq(false)
expect(S.solrcloud.get_collection(@today_collection_name)).to be_nil
subject.invoke(:set_up_daily_authority_browse_collection)
expect(S.solrcloud.has_configset?(configset_name)).to eq(true)
collection = S.solrcloud.get_collection(@today_collection_name)
expect(collection).not_to be_nil
expect(collection.has_alias?(AuthorityBrowse::Solr.reindex_alias)).to eq(true)
end
end

context "browse verify_and_deploy_authority_browse_collection" do
it "verifies that the collection has enough records and then sets the production alias to it" do
subject.invoke(:set_up_daily_authority_browse_collection)
one_doc = [{
id: "twain mark 1835 1910\u001fname",
browse_field: "name",
term: "Twain, Mark, 1835-1910",
count: 7,
date_of_index: "2023-09-02T00:00:00Z"
}.to_json]

collection = S.solrcloud.get_collection(@today_collection_name)
uploader = AuthorityBrowse::Solr::Uploader.new(collection: @today_collection_name)
uploader.upload(one_doc)
uploader.commit

subject.invoke(:verify_and_deploy_authority_browse_collection)

expect(collection.has_alias?(AuthorityBrowse::Solr.production_alias)).to eq(true)
end
end
context "AuthorityBrowse::Solr.prune_old_collections" do
# not testing the cli invocation because we want to inject collections to prune
it "prunes the old collections" do
subject.invoke(:set_up_daily_authority_browse_collection)
col = S.solrcloud.get_collection(@today_collection_name)
expect(col).not_to be_nil
col.aliases.each { |x| x.delete! }
# actual subject
AuthorityBrowse::Solr.prune_old_collections(collections: [col], keep: 0)

expect(S.solrcloud.only_collection_names).not_to include(@today_collection_name)
end
end
end
5 changes: 5 additions & 0 deletions spec/spec_helper.rb
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,11 @@
S.register(:database_host) { "127.0.0.1" }
end

# S.register(:git_tag) { "my.test.tag" }
S.register(:git_tag) { "version" }
S.register(:today) { "2099-12-01-00-00-00" }
S.register(:min_authority_browse_record_count) { 0 }

Services.register(:database) do
root = Sequel.connect(
adapter: Services[:database_adapter],
Expand Down

0 comments on commit f17e6c8

Please sign in to comment.