Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

A script to identify invalid clusters in Print Holdings MongoDB #291

Merged
merged 1 commit into from
Oct 13, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
35 changes: 35 additions & 0 deletions bin/cluster_validator.rb
Original file line number Diff line number Diff line change
@@ -0,0 +1,35 @@
require "cluster"
require "services"
Services.mongo!

# Goes through all clusters, checks if they are valid,
# and prints the first ocn of any invalid cluster to a file.

class ClusterValidator
attr_reader :output_path # file name
def initialize
# Make an output file in the right place
ymd = Time.now.strftime("%Y-%m-%d")
dir = Settings.local_report_path
FileUtils.mkdir_p(dir)
@output_path = "#{dir}/cluster_validator_#{ymd}.txt"
end

def run
puts "Writing to #{output_path}"
File.open(output_path, "w") do |outf|
# Go through each cluster and check if valid.
outf.puts "# These are ocns of invalid clusters:"
Cluster.each do |c|
unless c.valid?
outf.puts(c.ocns.first)
end
end
outf.puts "# Done"
end
end
end

if __FILE__ == $0
ClusterValidator.new.run
end
53 changes: 53 additions & 0 deletions spec/cluster_validator_spec.rb
Original file line number Diff line number Diff line change
@@ -0,0 +1,53 @@
# frozen_string_literal: true

require "spec_helper"
require "loader/cluster_loader"
require_relative "../bin/cluster_validator"

RSpec.describe ClusterValidator do
let(:cluster_validator) { described_class.new }
# The output file will have 2 lines, header and footer, even if no body.
# So if the output file has 2 lines it is "empty" for the purposes of these tests.
let(:empty_file_line_count) { 2 }
let(:one_invalid_cluster_line_count) { 3 }
# Files only differ in commitment.phase (1 in valid, 999 in invalid)
let(:valid_cluster_fixt) { fixture("single_cluster_valid.json") }
let(:invalid_cluster_fixt) { fixture("single_cluster_invalid.json") }
before(:each) do
Cluster.collection.find.delete_many
end
def get_output_lines
described_class.new.run
File.read(cluster_validator.output_path).split("\n")
end
it "makes an outfile when it runs" do
expect(File.exist?(cluster_validator.output_path)).to be false
cluster_validator.run
expect(File.exist?(cluster_validator.output_path)).to be true
end
it "makes an empty-ish outfile if there are no clusters" do
# empty-ish meaning it'll only have the header and footer, which begin with "#".
lines = get_output_lines
expect(lines.count).to eq empty_file_line_count
expect(lines[0]).to start_with("#")
expect(lines[1]).to start_with("#")
end
it "does NOT count valid clusters" do
# Start with loading a valid cluster, and verify.
Loader::ClusterLoader.new.load(valid_cluster_fixt)
# Verify we have one valid cluster.
# Verify it does not count towards the report.
expect(Cluster.count).to eq 1
expect(Cluster.first.valid?).to be true
expect(get_output_lines.count).to eq empty_file_line_count
end
it "DOES count invalid clusters" do
# Start with loading an invalid cluster, and verify.
Loader::ClusterLoader.new.load(invalid_cluster_fixt)
# Verify we have one invalid cluster.
# Verify it does count towards the report.
expect(Cluster.count).to eq 1
expect(Cluster.first.valid?).to be false
expect(get_output_lines.count).to eq one_invalid_cluster_line_count
end
end
61 changes: 61 additions & 0 deletions spec/fixtures/single_cluster_invalid.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,61 @@
[
{
"ocns": [
5
],
"last_modified": "2023-10-13 13:15:31 UTC",
"holdings": [
{
"enum_chron": "",
"n_enum": "",
"n_chron": "",
"n_enum_chron": "",
"ocn": 5,
"organization": "umich",
"country_code": "us",
"weight": 1.0,
"local_id": "loc_1",
"mono_multi_serial": "mix",
"date_received": "2023-10-13 00:00:00 UTC",
"condition": "",
"issn": null,
"status": null,
"uuid": "3bbd8c32-7d53-42f2-9f92-52690b047881",
"gov_doc_flag": false
}
],
"ht_items": [
{
"ocns": [
5
],
"enum_chron": "",
"n_enum": "",
"n_chron": "",
"n_enum_chron": "",
"item_id": "test.140236",
"ht_bib_key": 486522,
"rights": "pd",
"access": "allow",
"bib_fmt": "BK",
"collection_code": "MIU",
"billing_entity": "umich"
}
],
"commitments": [
{
"policies": [

],
"phase": 999,
"facsimile": true,
"uuid": "b5f06bf7-60f6-41b9-8101-59c7501a5ce9",
"committed_date": "2021-08-18 00:00:00 UTC",
"organization": "umich",
"ocn": 5,
"local_id": "loc_1",
"oclc_sym": "uiu"
}
]
}
]
61 changes: 61 additions & 0 deletions spec/fixtures/single_cluster_valid.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,61 @@
[
{
"ocns": [
5
],
"last_modified": "2023-10-13 13:15:31 UTC",
"holdings": [
{
"enum_chron": "",
"n_enum": "",
"n_chron": "",
"n_enum_chron": "",
"ocn": 5,
"organization": "umich",
"country_code": "us",
"weight": 1.0,
"local_id": "loc_1",
"mono_multi_serial": "mix",
"date_received": "2023-10-13 00:00:00 UTC",
"condition": "",
"issn": null,
"status": null,
"uuid": "3bbd8c32-7d53-42f2-9f92-52690b047881",
"gov_doc_flag": false
}
],
"ht_items": [
{
"ocns": [
5
],
"enum_chron": "",
"n_enum": "",
"n_chron": "",
"n_enum_chron": "",
"item_id": "test.140236",
"ht_bib_key": 486522,
"rights": "pd",
"access": "allow",
"bib_fmt": "BK",
"collection_code": "MIU",
"billing_entity": "umich"
}
],
"commitments": [
{
"policies": [

],
"phase": 0,
"facsimile": true,
"uuid": "b5f06bf7-60f6-41b9-8101-59c7501a5ce9",
"committed_date": "2021-08-18 00:00:00 UTC",
"organization": "umich",
"ocn": 5,
"local_id": "loc_1",
"oclc_sym": "uiu"
}
]
}
]