From b0a8163b7561cf46c3291634fead9247c1e43456 Mon Sep 17 00:00:00 2001 From: Bill Dueber Date: Thu, 30 Nov 2023 19:57:55 -0500 Subject: [PATCH 1/3] Change to using published solrcloud gem and allow specifying how many old collections to keep. --- Gemfile | 4 ++-- Gemfile.lock | 27 ++++++++++++--------------- lib/authority_browse/solr.rb | 27 +++++++-------------------- spec/authority_browse/solr_spec.rb | 12 +++++++++--- 4 files changed, 30 insertions(+), 40 deletions(-) diff --git a/Gemfile b/Gemfile index 83ab17ee..b4c4e932 100644 --- a/Gemfile +++ b/Gemfile @@ -1,7 +1,7 @@ source "https://rubygems.org" git_source(:github) { |repo| "https://github.com/#{repo}.git" } -gem "httpx", "~> 0.21" +gem "httpx" # , "~> 0.21" gem "zinzout", "~> 0.1" gem "faraday", "~> 2.5" gem "faraday-follow_redirects" @@ -12,7 +12,7 @@ gem "canister" gem "rubyzip" gem "semantic_logger" gem "thor" -gem "solr_cloud-connection", git: "https://github.com/mlibrary/solr_cloud-connection" +gem "solr_cloud-connection" gem "sqlite3", "~> 1.4", platforms: :mri gem "jdbc-sqlite3", "~> 3.28", platforms: :jruby diff --git a/Gemfile.lock b/Gemfile.lock index df382eb6..633763dc 100644 --- a/Gemfile.lock +++ b/Gemfile.lock @@ -1,12 +1,3 @@ -GIT - remote: https://github.com/mlibrary/solr_cloud-connection - revision: ea3c856c5611d5fe070677dd09b6b663bc5a2944 - specs: - solr_cloud-connection (0.1.0) - faraday - httpx - rubyzip - GEM remote: https://rubygems.org/ specs: @@ -24,7 +15,7 @@ GEM diff-lcs (1.5.0) docile (1.4.0) dotenv (2.8.1) - faraday (2.7.11) + faraday (2.7.12) base64 faraday-net_http (>= 2.0, < 3.1) ruby2_keywords (>= 0.0.4) @@ -35,9 +26,9 @@ GEM ffi-icu (0.5.1) ffi (~> 1.0, >= 1.0.9) hashdiff (1.0.1) - http-2-next (0.5.1) - httpx (0.24.7) - http-2-next (< 1.0.0) + http-2-next (1.0.1) + httpx (1.1.5) + http-2-next (>= 1.0.1) json (2.6.3) language_server-protocol (3.17.0.3) lint_roller (1.1.0) @@ -100,6 +91,11 @@ GEM simplecov_json_formatter (~> 0.1) simplecov-html (0.12.3) simplecov_json_formatter (0.1.4) + solr_cloud-connection (0.1.0) + faraday (~> 2.7.12) + httpx (~> 1.1.5) + rubyzip (~> 2.3.0) + sqlite3 (1.6.7-arm64-darwin) sqlite3 (1.6.7-x86_64-linux) standard (1.31.2) language_server-protocol (~> 3.17.0.2) @@ -124,6 +120,7 @@ GEM zinzout (0.1.1) PLATFORMS + arm64-darwin-22 x86_64-linux DEPENDENCIES @@ -134,7 +131,7 @@ DEPENDENCIES faraday (~> 2.5) faraday-follow_redirects ffi-icu - httpx (~> 0.21) + httpx jdbc-sqlite3 (~> 3.28) milemarker (~> 1.0) mysql2 @@ -145,7 +142,7 @@ DEPENDENCIES semantic_logger sequel (~> 5.60) simplecov - solr_cloud-connection! + solr_cloud-connection sqlite3 (~> 1.4) standardrb thor diff --git a/lib/authority_browse/solr.rb b/lib/authority_browse/solr.rb index ff39ca51..b4d76c8e 100644 --- a/lib/authority_browse/solr.rb +++ b/lib/authority_browse/solr.rb @@ -75,28 +75,14 @@ def self.set_up_daily_collection # # @return[Nil] def self.set_daily_reindex_alias - S.solrcloud.get( - "solr/admin/collections", - { - action: "CREATEALIAS", - name: reindex_alias, - collections: [collection_name] - } - ) + S.solrcloud.create_alias(name: reindex_alias, collection_name: collection_name) end # This sets the production alias to today's collection. # # @return[Nil] def self.set_production_alias - S.solrcloud.get( - "solr/admin/collections", - { - action: "CREATEALIAS", - name: production_alias, - collections: [collection_name] - } - ) + S.solrcloud.create_alias(name: production_alias, collection_name: collection_name) end # This verifies that today's collection has enough documents in it. For now @@ -114,8 +100,8 @@ def self.verify_reindex # @return[Nil] def self.prune_old_collections S.logger.info "Pruning the following collections: #{list_old_collections}" - list_old_collections.each do |coll| - S.solrcloud.get("/solr/admin/collections", {action: "DELETE", name: coll, wt: "json"}) + list_old_collections.map { |c| S.solrcloud.connection(c) }.each do |coll| + coll.delete! end end @@ -123,14 +109,15 @@ def self.prune_old_collections # three authority_browse collections # # @param list [Array] Array of all SolrCloud collections + # @param keep [Integer] how many versions to keep, even if they're old # @return [Array] Array of old authority browse Solrcloud collection # strings - def self.list_old_collections(list = S.solrcloud.collections) + def self.list_old_collections(list = S.solrcloud.collections, keep: 3) list.select do |item| item.match?("authority_browse") end.sort do |a, b| Date.parse(a.split("_").last) <=> Date.parse(b.split("_").last) - end[0..-4] + end[0..(0 - keep - 1)] end end end diff --git a/spec/authority_browse/solr_spec.rb b/spec/authority_browse/solr_spec.rb index 31f860ab..5120191f 100644 --- a/spec/authority_browse/solr_spec.rb +++ b/spec/authority_browse/solr_spec.rb @@ -1,7 +1,7 @@ RSpec.describe AuthorityBrowse::Solr do context "#list_old_collections" do - it "returns authority_browse collections older than the newest three" do - list = [ + before(:all) do + @list = [ "something_11d2069_2023-11-16", "something_11d2069_2023-11-15", "something_11d2069_2023-11-14", @@ -12,10 +12,16 @@ "authority_browse_11d2069_2023-11-15", "authority_browse_11d2069_2023-11-14" ] - expect(described_class.list_old_collections(list)).to eq([ + end + it "returns authority_browse collections older than the newest three" do + expect(described_class.list_old_collections(@list)).to eq([ "authority_browse_1.0.1_2023-11-13", "authority_browse_11d2069_2023-11-13" ]) end + + it "returns old collections with a custom keep_at_least" do + expect(described_class.list_old_collections(@list, keep: 4)).to eq(["authority_browse_1.0.1_2023-11-13"]) + end end end From 89c457b4d2a1e03cffaf8f35f4dfc72490a58326 Mon Sep 17 00:00:00 2001 From: Monique Rio Date: Fri, 1 Dec 2023 08:57:16 -0500 Subject: [PATCH 2/3] fixes for using SolrCloud gem --- lib/authority_browse/solr.rb | 19 +++++++++---------- spec/authority_browse/solr_spec.rb | 8 ++++---- 2 files changed, 13 insertions(+), 14 deletions(-) diff --git a/lib/authority_browse/solr.rb b/lib/authority_browse/solr.rb index b4d76c8e..579dd7da 100644 --- a/lib/authority_browse/solr.rb +++ b/lib/authority_browse/solr.rb @@ -75,14 +75,14 @@ def self.set_up_daily_collection # # @return[Nil] def self.set_daily_reindex_alias - S.solrcloud.create_alias(name: reindex_alias, collection_name: collection_name) + S.solrcloud.create_alias(name: reindex_alias, collection_name: collection_name, force: true) end # This sets the production alias to today's collection. # # @return[Nil] def self.set_production_alias - S.solrcloud.create_alias(name: production_alias, collection_name: collection_name) + S.solrcloud.create_alias(name: production_alias, collection_name: collection_name, force: true) end # This verifies that today's collection has enough documents in it. For now @@ -98,9 +98,9 @@ def self.verify_reindex # than the newest three authority_browse collections. # # @return[Nil] - def self.prune_old_collections + def self.prune_old_collections(keep: 3) S.logger.info "Pruning the following collections: #{list_old_collections}" - list_old_collections.map { |c| S.solrcloud.connection(c) }.each do |coll| + list_old_collections(keep: keep).each do |coll| coll.delete! end end @@ -108,15 +108,14 @@ def self.prune_old_collections # Lists the authority_browse collections that are older than the newest # three authority_browse collections # - # @param list [Array] Array of all SolrCloud collections + # @param list [Array] Array of all SolrCloud collections # @param keep [Integer] how many versions to keep, even if they're old - # @return [Array] Array of old authority browse Solrcloud collection - # strings - def self.list_old_collections(list = S.solrcloud.collections, keep: 3) + # @return [Array] Array of old authority browse Solrcloud collections + def self.list_old_collections(list: S.solrcloud.collections, keep: 3) list.select do |item| - item.match?("authority_browse") + item.name.match?("authority_browse") end.sort do |a, b| - Date.parse(a.split("_").last) <=> Date.parse(b.split("_").last) + Date.parse(a.name.split("_").last) <=> Date.parse(b.name.split("_").last) end[0..(0 - keep - 1)] end end diff --git a/spec/authority_browse/solr_spec.rb b/spec/authority_browse/solr_spec.rb index 5120191f..deba6075 100644 --- a/spec/authority_browse/solr_spec.rb +++ b/spec/authority_browse/solr_spec.rb @@ -1,6 +1,6 @@ RSpec.describe AuthorityBrowse::Solr do context "#list_old_collections" do - before(:all) do + before(:each) do @list = [ "something_11d2069_2023-11-16", "something_11d2069_2023-11-15", @@ -11,17 +11,17 @@ "authority_browse_11d2069_2023-11-13", "authority_browse_11d2069_2023-11-15", "authority_browse_11d2069_2023-11-14" - ] + ].map { |x| instance_double(SolrCloud::Collection, name: x) } end it "returns authority_browse collections older than the newest three" do - expect(described_class.list_old_collections(@list)).to eq([ + expect(described_class.list_old_collections(list: @list).map { |x| x.name }).to eq([ "authority_browse_1.0.1_2023-11-13", "authority_browse_11d2069_2023-11-13" ]) end it "returns old collections with a custom keep_at_least" do - expect(described_class.list_old_collections(@list, keep: 4)).to eq(["authority_browse_1.0.1_2023-11-13"]) + expect(described_class.list_old_collections(list: @list, keep: 4).map { |x| x.name }).to eq(["authority_browse_1.0.1_2023-11-13"]) end end end From b855f7c8c4be5257a5d0f1048e562044c49897e4 Mon Sep 17 00:00:00 2001 From: Monique Rio Date: Fri, 1 Dec 2023 11:25:15 -0500 Subject: [PATCH 3/3] change daily date string to have hours minutes seconds --- lib/authority_browse/solr.rb | 2 +- lib/services.rb | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/lib/authority_browse/solr.rb b/lib/authority_browse/solr.rb index 579dd7da..8d0837ae 100644 --- a/lib/authority_browse/solr.rb +++ b/lib/authority_browse/solr.rb @@ -115,7 +115,7 @@ def self.list_old_collections(list: S.solrcloud.collections, keep: 3) list.select do |item| item.name.match?("authority_browse") end.sort do |a, b| - Date.parse(a.name.split("_").last) <=> Date.parse(b.name.split("_").last) + a.name.split("_").last <=> b.name.split("_").last end[0..(0 - keep - 1)] end end diff --git a/lib/services.rb b/lib/services.rb index 00fbd079..bbe9cc14 100644 --- a/lib/services.rb +++ b/lib/services.rb @@ -63,7 +63,7 @@ tag end -S.register(:today) { Date.today.strftime "%Y-%m-%d" } +S.register(:today) { Time.now.strftime "%Y-%m-%d-%H-%M-%S" } # Solr stuff