diff --git a/.github/workflows/tests.yml b/.github/workflows/tests.yml index e48c3474..9042fc5e 100644 --- a/.github/workflows/tests.yml +++ b/.github/workflows/tests.yml @@ -33,7 +33,7 @@ jobs: with: ruby-version: '3.2' bundler-cache: true - - name: run tests - run: bundle exec rspec + - name: run tests but not the ones that touch solrcloud + run: bundle exec rspec --tag ~solrcloud env: GHA_TEST: "true" diff --git a/Gemfile b/Gemfile index b4c4e932..c74adc16 100644 --- a/Gemfile +++ b/Gemfile @@ -12,7 +12,7 @@ gem "canister" gem "rubyzip" gem "semantic_logger" gem "thor" -gem "solr_cloud-connection" +gem "solr_cloud-connection", ">= 0.3.0" gem "sqlite3", "~> 1.4", platforms: :mri gem "jdbc-sqlite3", "~> 3.28", platforms: :jruby diff --git a/Gemfile.lock b/Gemfile.lock index 633763dc..04d625d0 100644 --- a/Gemfile.lock +++ b/Gemfile.lock @@ -91,7 +91,7 @@ GEM simplecov_json_formatter (~> 0.1) simplecov-html (0.12.3) simplecov_json_formatter (0.1.4) - solr_cloud-connection (0.1.0) + solr_cloud-connection (0.3.0) faraday (~> 2.7.12) httpx (~> 1.1.5) rubyzip (~> 2.3.0) @@ -142,7 +142,7 @@ DEPENDENCIES semantic_logger sequel (~> 5.60) simplecov - solr_cloud-connection + solr_cloud-connection (>= 0.3.0) sqlite3 (~> 1.4) standardrb thor diff --git a/lib/authority_browse/solr.rb b/lib/authority_browse/solr.rb index 8d0837ae..aacfc17b 100644 --- a/lib/authority_browse/solr.rb +++ b/lib/authority_browse/solr.rb @@ -43,7 +43,9 @@ def self.solr_conf_dir # # @return[Nil] def self.create_configset_if_needed - unless S.solrcloud.configset?(configset_name) + if S.solrcloud.has_configset?(configset_name) + S.solrcloud.get_configset(configset_name) + else S.solrcloud.create_configset( name: configset_name, confdir: solr_conf_dir @@ -62,6 +64,10 @@ def self.create_daily_collection ) end + def self.latest_daily_collection + _sorted_collections.last + end + # This creates the daily collection and then sets the reindex alias to that # collection # @@ -75,32 +81,31 @@ def self.set_up_daily_collection # # @return[Nil] def self.set_daily_reindex_alias - S.solrcloud.create_alias(name: reindex_alias, collection_name: collection_name, force: true) + latest_daily_collection.alias_as(reindex_alias) end # This sets the production alias to today's collection. # # @return[Nil] def self.set_production_alias - S.solrcloud.create_alias(name: production_alias, collection_name: collection_name, force: true) + latest_daily_collection.alias_as(production_alias) end # This verifies that today's collection has enough documents in it. For now # the collection must have more than 7_000_000 documents in it. - # + # @raise [NotEnoughDocsError] if there aren't enough docs in the collection # @return[Nil] - def self.verify_reindex - body = S.solrcloud.get("solr/#{collection_name}/select", {q: "*:*"}).body - raise NotEnoughDocsError unless body["response"]["numFound"] > 7000000 + def self.verify_reindex(min_records: S.min_authority_browse_record_count) + raise NotEnoughDocsError unless latest_daily_collection.count > min_records end # This deletes all authority_browse collections with dates that are older # than the newest three authority_browse collections. # # @return[Nil] - def self.prune_old_collections(keep: 3) - S.logger.info "Pruning the following collections: #{list_old_collections}" - list_old_collections(keep: keep).each do |coll| + def self.prune_old_collections(collections: list_old_collections(keep: keep), keep: 3) + S.logger.info "Pruning the following collections: #{collections}" + collections.each do |coll| coll.delete! end end @@ -108,15 +113,19 @@ def self.prune_old_collections(keep: 3) # Lists the authority_browse collections that are older than the newest # three authority_browse collections # - # @param list [Array] Array of all SolrCloud collections + # @param list [Array] Array of all SolrCloud collections # @param keep [Integer] how many versions to keep, even if they're old - # @return [Array] Array of old authority browse Solrcloud collections - def self.list_old_collections(list: S.solrcloud.collections, keep: 3) + # @return [Array] Array of old authority browse Solrcloud collections + def self.list_old_collections(list: S.solrcloud.only_collections, keep: 3) + _sorted_collections(list: list)[0..(0 - keep - 1)] + end + + def self._sorted_collections(list: S.solrcloud.only_collections) list.select do |item| item.name.match?("authority_browse") end.sort do |a, b| a.name.split("_").last <=> b.name.split("_").last - end[0..(0 - keep - 1)] + end end end end diff --git a/lib/authority_browse/solr/uploader.rb b/lib/authority_browse/solr/uploader.rb index c78ffc7d..7be43196 100644 --- a/lib/authority_browse/solr/uploader.rb +++ b/lib/authority_browse/solr/uploader.rb @@ -2,7 +2,7 @@ module AuthorityBrowse module Solr class Uploader def initialize(collection:) - @conn = S.solrcloud.alias collection + @collection = S.solrcloud.get_collection collection @endpoint = "solr/#{collection}/update" end @@ -10,11 +10,11 @@ def initialize(collection:) # @param docs [Array] Array of json strings of docs def upload(docs) body = "[" + docs.join(",") + "]" - @conn.post(@endpoint, body) + @collection.post(@endpoint, body) end def commit - @conn.get(@endpoint, commit: "true") + @collection.commit end end end diff --git a/lib/services.rb b/lib/services.rb index 2b45454d..b2e491be 100644 --- a/lib/services.rb +++ b/lib/services.rb @@ -64,6 +64,7 @@ end S.register(:today) { Time.now.strftime "%Y-%m-%d-%H-%M-%S" } +S.register(:min_authority_browse_record_count) { 7_000_000 } # Solr stuff @@ -73,6 +74,10 @@ S.register(:solr_configuration) { ENV["SOLR_CONFIGURATION"] || "authority_browse" } S.register(:solr_collection) { ENV["SOLR_COLLECTION"] || "authority_browse" } S.register(:biblio_solr) { ENV["BIBLIO_SOLR"] } + +# @!method S.solrcloud +# @return [SolrCloud::Connection] +# @!scope S S.register(:solrcloud) do SolrCloud::Connection.new( url: S.solr_host, diff --git a/spec/integrations/solrcloud_spec.rb b/spec/integrations/solrcloud_spec.rb new file mode 100644 index 00000000..e4a24b77 --- /dev/null +++ b/spec/integrations/solrcloud_spec.rb @@ -0,0 +1,70 @@ +RSpec.describe Browse::CLI::Solr, solrcloud: true do + before(:all) do + WebMock.allow_net_connect! + end + after(:all) do + WebMock.disable_net_connect! + end + before(:each) do + @today_collection_name = AuthorityBrowse::Solr.collection_name + end + after(:each) do + coll = S.solrcloud.get_collection(@today_collection_name) + cset = S.solrcloud.get_configset(AuthorityBrowse::Solr.configset_name) + if !coll.nil? + coll.aliases.each { |x| x.delete! } + coll.delete! + end + cset&.delete! + end + subject do + described_class.new + end + context "browse set_up_daily_authority_browse_collection" do + it "creates the daily collection and sets the reindex alias" do + configset_name = AuthorityBrowse::Solr.configset_name + expect(S.solrcloud.has_configset?(configset_name)).to eq(false) + expect(S.solrcloud.get_collection(@today_collection_name)).to be_nil + subject.invoke(:set_up_daily_authority_browse_collection) + expect(S.solrcloud.has_configset?(configset_name)).to eq(true) + collection = S.solrcloud.get_collection(@today_collection_name) + expect(collection).not_to be_nil + expect(collection.has_alias?(AuthorityBrowse::Solr.reindex_alias)).to eq(true) + end + end + + context "browse verify_and_deploy_authority_browse_collection" do + it "verifies that the collection has enough records and then sets the production alias to it" do + subject.invoke(:set_up_daily_authority_browse_collection) + one_doc = [{ + id: "twain mark 1835 1910\u001fname", + browse_field: "name", + term: "Twain, Mark, 1835-1910", + count: 7, + date_of_index: "2023-09-02T00:00:00Z" + }.to_json] + + collection = S.solrcloud.get_collection(@today_collection_name) + uploader = AuthorityBrowse::Solr::Uploader.new(collection: @today_collection_name) + uploader.upload(one_doc) + uploader.commit + + subject.invoke(:verify_and_deploy_authority_browse_collection) + + expect(collection.has_alias?(AuthorityBrowse::Solr.production_alias)).to eq(true) + end + end + context "AuthorityBrowse::Solr.prune_old_collections" do + # not testing the cli invocation because we want to inject collections to prune + it "prunes the old collections" do + subject.invoke(:set_up_daily_authority_browse_collection) + col = S.solrcloud.get_collection(@today_collection_name) + expect(col).not_to be_nil + col.aliases.each { |x| x.delete! } + # actual subject + AuthorityBrowse::Solr.prune_old_collections(collections: [col], keep: 0) + + expect(S.solrcloud.only_collection_names).not_to include(@today_collection_name) + end + end +end diff --git a/spec/spec_helper.rb b/spec/spec_helper.rb index afb78286..b13d4a08 100644 --- a/spec/spec_helper.rb +++ b/spec/spec_helper.rb @@ -15,6 +15,11 @@ S.register(:database_host) { "127.0.0.1" } end +# S.register(:git_tag) { "my.test.tag" } +S.register(:git_tag) { "version" } +S.register(:today) { "2099-12-01-00-00-00" } +S.register(:min_authority_browse_record_count) { 0 } + Services.register(:database) do root = Sequel.connect( adapter: Services[:database_adapter],