From 96d75a79f2ef22d5ef4e0a075b015c79f6226591 Mon Sep 17 00:00:00 2001 From: Brian Moses Hall Date: Fri, 20 Oct 2023 15:07:44 -0400 Subject: [PATCH] Appease Standardrb --- Rakefile | 2 +- exe/catchup | 36 ++++++------- exe/swap_production_and_reindex | 23 ++++---- hathifiles_database.gemspec | 26 +++++---- lib/hathifiles_database.rb | 6 +-- lib/hathifiles_database/cli.rb | 15 ++---- lib/hathifiles_database/columns.rb | 6 +-- lib/hathifiles_database/constants.rb | 9 ++-- lib/hathifiles_database/datafile.rb | 11 ++-- lib/hathifiles_database/db/connection.rb | 54 ++++++++----------- .../db/migrations/001_create.rb | 15 +++--- .../db/migrations/100_created_no_indexes.rb | 5 +- .../db/migrations/101_add_index.rb | 21 ++++---- lib/hathifiles_database/db/writer.rb | 22 +++----- lib/hathifiles_database/line.rb | 11 ++-- lib/hathifiles_database/linespec.rb | 43 +++++++-------- 16 files changed, 133 insertions(+), 172 deletions(-) diff --git a/Rakefile b/Rakefile index b7e9ed5..c92b11e 100644 --- a/Rakefile +++ b/Rakefile @@ -3,4 +3,4 @@ require "rspec/core/rake_task" RSpec::Core::RakeTask.new(:spec) -task :default => :spec +task default: :spec diff --git a/exe/catchup b/exe/catchup index 5eea36f..42695ff 100755 --- a/exe/catchup +++ b/exe/catchup @@ -1,17 +1,17 @@ #!/usr/bin/env ruby -require 'date_named_file' -require 'hathifiles_database' -require 'dotenv' -require 'date' +require "date_named_file" +require "hathifiles_database" +require "dotenv" +require "date" -HF_FILES = '/htapps/archive/hathifiles' -LOGFILE_DIR ='../logs/hathifiles_database' +HF_FILES = "/htapps/archive/hathifiles" +LOGFILE_DIR = "../logs/hathifiles_database" def usage puts " - Usage: + Usage: ruby #{__FILE__} <'dev' or 'production'> e.g ruby #{__FILE__} 20211101 production ruby #{__FILE__} first_of_month dev # or just 'fom' @@ -25,15 +25,15 @@ usage if ARGV.size != 2 devprod = ARGV[1].downcase envfilename = case devprod.downcase - when 'dev' - '.devenv' - when 'prod' - '.env' - else - puts "\nUnknown target '#{devprod}'" - usage - exit 1 - end +when "dev" + ".devenv" +when "prod" + ".env" +else + puts "\nUnknown target '#{devprod}'" + usage + exit 1 +end envfile = Pathname.new(__dir__).parent + envfilename start_date = ARGV[0].downcase @@ -44,11 +44,9 @@ elsif %w[yesterday].include? start_date start_date = today - 1 end - - Dotenv.load(envfile) -connection_string = ENV['HATHIFILES_MYSQL_CONNECTION'] +connection_string = ENV["HATHIFILES_MYSQL_CONNECTION"] connection = HathifilesDatabase.new(connection_string) connection.logger.info "Connecting to #{connection_string}" diff --git a/exe/swap_production_and_reindex b/exe/swap_production_and_reindex index beacb3b..827d513 100755 --- a/exe/swap_production_and_reindex +++ b/exe/swap_production_and_reindex @@ -1,23 +1,23 @@ #!/usr/bin/env ruby -require 'date_named_file' -require 'hathifiles_database' -require 'dotenv' +require "date_named_file" +require "hathifiles_database" +require "dotenv" -HF_FILES = '/htapps/archive/hathifiles' -LOGFILE_DIR ='../logs/hathifiles_database' +HF_FILES = "/htapps/archive/hathifiles" +LOGFILE_DIR = "../logs/hathifiles_database" -envfile = Pathname.new(__dir__).parent + '.env' -devenvfile = Pathname.new(__dir__).parent + '.devenv' +envfile = Pathname.new(__dir__).parent + ".env" +# devenvfile = Pathname.new(__dir__).parent + ".devenv" Dotenv.load(envfile) -connection_string = ENV['HATHIFILES_MYSQL_CONNECTION'] +connection_string = ENV["HATHIFILES_MYSQL_CONNECTION"] connection = HathifilesDatabase.new(connection_string) production = connection.rawdb Dotenv.load(envfile) -connection_string = ENV['HATHIFILES_MYSQL_CONNECTION'] +connection_string = ENV["HATHIFILES_MYSQL_CONNECTION"] reindex_connection = HathifilesDatabase.new(connection_string) reindex = reindex_connection.rawdb @@ -34,12 +34,11 @@ def ri(t) "hathifiles_reindex.#{t}" end - # Get tables that are in both tables = production.tables.intersection(reindex.tables) -renames = tables.flat_map{|t| [[prod(t), tmp(t)], [ri(t), prod(t)], [tmp(t), ri(t)]]} -sql = "RENAME TABLE " + renames.map{|x| x.join(" TO ")}.join(', ') +renames = tables.flat_map { |t| [[prod(t), tmp(t)], [ri(t), prod(t)], [tmp(t), ri(t)]] } +sql = "RENAME TABLE " + renames.map { |x| x.join(" TO ") }.join(", ") production.run(sql) diff --git a/hathifiles_database.gemspec b/hathifiles_database.gemspec index 9f9b2b2..10ee809 100644 --- a/hathifiles_database.gemspec +++ b/hathifiles_database.gemspec @@ -8,7 +8,7 @@ Gem::Specification.new do |spec| spec.authors = ["Bill Dueber"] spec.email = ["bill@dueber.com"] - spec.summary = %q{Keep a database of the data in the hathifiles} + spec.summary = "Keep a database of the data in the hathifiles" spec.homepage = "https://github.com/billdueber/hathifiles_database" spec.license = "MIT" @@ -16,11 +16,11 @@ Gem::Specification.new do |spec| spec.metadata["homepage_uri"] = spec.homepage spec.metadata["source_code_uri"] = spec.homepage - spec.metadata["changelog_uri"] = spec.homepage + '/CHANGELOG.md' + spec.metadata["changelog_uri"] = spec.homepage + "/CHANGELOG.md" # Specify which files should be added to the gem when it is released. # The `git ls-files -z` loads the files in the RubyGem that have been added into git. - spec.files = Dir.chdir(File.expand_path('..', __FILE__)) do + spec.files = Dir.chdir(File.expand_path("..", __FILE__)) do `git ls-files -z`.split("\x0").reject { |f| f.match(%r{^(test|spec|features)/}) } end spec.bindir = "exe" @@ -33,16 +33,14 @@ Gem::Specification.new do |spec| spec.add_development_dependency "pry" spec.add_development_dependency "yard" - spec.add_dependency 'dotenv' - spec.add_dependency 'ettin' # config - spec.add_dependency 'library_stdnums' # normalize - spec.add_dependency 'sequel' - spec.add_dependency 'hanami-cli' # command line - - spec.add_dependency 'sqlite3' - spec.add_dependency 'mysql2' - spec.add_dependency 'tty-prompt' - spec.add_dependency 'date_named_file' - + spec.add_dependency "dotenv" + spec.add_dependency "ettin" # config + spec.add_dependency "library_stdnums" # normalize + spec.add_dependency "sequel" + spec.add_dependency "hanami-cli" # command line + spec.add_dependency "sqlite3" + spec.add_dependency "mysql2" + spec.add_dependency "tty-prompt" + spec.add_dependency "date_named_file" end diff --git a/lib/hathifiles_database.rb b/lib/hathifiles_database.rb index 78c0dc0..053b047 100644 --- a/lib/hathifiles_database.rb +++ b/lib/hathifiles_database.rb @@ -1,8 +1,8 @@ # frozen_string_literal: true -require 'hathifiles_database/version' -require 'hathifiles_database/datafile' -require 'hathifiles_database/db/connection' +require "hathifiles_database/version" +require "hathifiles_database/datafile" +require "hathifiles_database/db/connection" module HathifilesDatabase def self.new(connection_string) diff --git a/lib/hathifiles_database/cli.rb b/lib/hathifiles_database/cli.rb index bfc9afd..e54e9bb 100644 --- a/lib/hathifiles_database/cli.rb +++ b/lib/hathifiles_database/cli.rb @@ -1,5 +1,5 @@ -require 'date' -require 'hanami/cli' +require "date" +require "hanami/cli" module HathifilesDatabase module CLI @@ -7,30 +7,25 @@ module Commands extend Hanami::CLI::Registry class Date8 < Date - def to_s - self.strftime('%Y%m%d') + strftime("%Y%m%d") end def self.range_since(dt) - dateify(dt).upto self.today + dateify(dt).upto today end def self.dateify(dt) if dt.respond_to? :to_date dt.to_date else - self.parse(dt.to_s) + parse(dt.to_s) end end end class Update - - - end - end end end diff --git a/lib/hathifiles_database/columns.rb b/lib/hathifiles_database/columns.rb index a288314..d296f36 100644 --- a/lib/hathifiles_database/columns.rb +++ b/lib/hathifiles_database/columns.rb @@ -1,7 +1,7 @@ # frozen_string_literal: true # Ordered as they are in the hathifiles -require 'library_stdnums' +require "library_stdnums" module HathifilesDatabase module Columns @@ -13,13 +13,13 @@ class Column # @param [Proc] transform_lambda Code to transform the data before storing def initialize(column, table, transform_lambda = nil) @column = column - @table = table.to_sym + @table = table.to_sym @transform_lambda = transform_lambda end # @return [Boolean] def scalar - raise 'Override #scalar for column types' + raise "Override #scalar for column types" end end diff --git a/lib/hathifiles_database/constants.rb b/lib/hathifiles_database/constants.rb index 9ef7401..2c05503 100644 --- a/lib/hathifiles_database/constants.rb +++ b/lib/hathifiles_database/constants.rb @@ -1,13 +1,12 @@ # frozen_string_literal: true # Ordered as they are in the hathifiles -require 'library_stdnums' -require 'logger' +require "library_stdnums" +require "logger" module HathifilesDatabase module Constants - - LOGGER = Logger.new(STDERR) + LOGGER = Logger.new($stderr) # Database table names MAINTABLE = :hf @@ -34,7 +33,5 @@ module Constants content_provider_code ] - end end - diff --git a/lib/hathifiles_database/datafile.rb b/lib/hathifiles_database/datafile.rb index 67234a4..041f6b5 100644 --- a/lib/hathifiles_database/datafile.rb +++ b/lib/hathifiles_database/datafile.rb @@ -1,9 +1,9 @@ # frozen_string_literal: true -require 'zlib' -require_relative 'linespec' -require 'hathifiles_database/db/writer' -require 'delegate' +require "zlib" +require_relative "linespec" +require "hathifiles_database/db/writer" +require "delegate" module HathifilesDatabase class Datafile < SimpleDelegator @@ -56,7 +56,7 @@ def dump_files_for_data_import(destination_dir, nodate_suffix: false) filepaths = w_class.outputfile_paths_from_linespec(@linespec, output_dir: destination_dir, nodate_suffix: nodate_suffix) writer = w_class.new(outputfile_paths: filepaths, maintable_name: @linespec.maintable_name) line_number = 1 - self.each do |line| + each do |line| logger.info "#{line_number} lines processed" if line_number % 500_000 == 0 writer << line line_number += 1 @@ -65,6 +65,5 @@ def dump_files_for_data_import(destination_dir, nodate_suffix: false) logger.info "" filepaths end - end end diff --git a/lib/hathifiles_database/db/connection.rb b/lib/hathifiles_database/db/connection.rb index 6d13be1..ffd010f 100644 --- a/lib/hathifiles_database/db/connection.rb +++ b/lib/hathifiles_database/db/connection.rb @@ -1,24 +1,23 @@ # frozen_string_literal: true -require 'hathifiles_database/line' -require 'hathifiles_database/linespec' -require 'hathifiles_database/constants' -require 'hathifiles_database/exceptions' -require 'hathifiles_database/db/writer' -require 'logger' +require "hathifiles_database/line" +require "hathifiles_database/linespec" +require "hathifiles_database/constants" +require "hathifiles_database/exceptions" +require "hathifiles_database/db/writer" +require "logger" -require 'sequel' +require "sequel" Sequel.extension(:migration) module HathifilesDatabase class DB class Connection - extend HathifilesDatabase::Exception - LOGGER = Logger.new(STDERR) - MIGRATION_DIR = Pathname.new(__dir__) + 'migrations' + LOGGER = Logger.new($stderr) + MIGRATION_DIR = Pathname.new(__dir__) + "migrations" attr_accessor :logger, :rawdb @@ -30,31 +29,30 @@ class Connection # @param [#info] logger A logger object that responds to, e.g., `#warn`, # `#info`, etc. def initialize(connection_string, logger: LOGGER) - @rawdb = Sequel.connect(connection_string + '?local_infile=1&CharSet=utf8mb4') + @rawdb = Sequel.connect(connection_string + "?local_infile=1&CharSet=utf8mb4") # __setobj__(@rawdb) - @main_table = @rawdb[Constants::MAINTABLE] + @main_table = @rawdb[Constants::MAINTABLE] @foreign_tables = Constants::FOREIGN_TABLES.values.each_with_object({}) do |tablename, h| h[tablename] = @rawdb[tablename] end - @logger = logger + @logger = logger end # Update the tables from a file just by directly deleting/inserting # the values. It's slow, but not so slow that it's not fine for a normal # nightly changefile, and it's a lot less screwing around. def update_from_file(filepath, linespec = LineSpec.default_linespec, logger: Constants::LOGGER) - path = Pathname.new(filepath) + # path = Pathname.new(filepath) datafile = HathifilesDatabase::Datafile.new(filepath, linespec, logger: logger) upsert(datafile) end - # Update the database with data from a bunch of HathifileDatabase::Line # objects. # @param [Enumerable] lines An enumeration of # lines (generally just a datafile, which has the right interface) def upsert(lines) - slice_size = 100 + slice_size = 100 log_report_chunk_size = 5000 mysql_set_foreign_key_checks(:on) @rawdb.transaction do @@ -74,7 +72,6 @@ def upsert(lines) end end - def delete_existing_data(lines) @main_table.where(htid: lines.map(&:htid)).delete @foreign_tables.each_pair do |_tablename, table| @@ -99,31 +96,30 @@ def add(lines) # Migration targets TABLES_CREATED_NO_INDEXES = 100 - DROP_EVERYTHING = 0 - + DROP_EVERYTHING = 0 # Create all the tables needed def create_tables! Sequel::Migrator.run(@rawdb, MIGRATION_DIR, - allow_missing_migration_files: true, - target: TABLES_CREATED_NO_INDEXES) + allow_missing_migration_files: true, + target: TABLES_CREATED_NO_INDEXES) end def drop_tables! Sequel::Migrator.run(@rawdb, MIGRATION_DIR, - allow_missing_migration_files: true, - target: DROP_EVERYTHING) + allow_missing_migration_files: true, + target: DROP_EVERYTHING) end def add_indexes! Sequel::Migrator.run(@rawdb, MIGRATION_DIR, - allow_missing_migration_files: true) + allow_missing_migration_files: true) end def drop_indexes! Sequel::Migrator.run(@rawdb, MIGRATION_DIR, - allow_missing_migration_files: true, - target: TABLES_CREATED_NO_INDEXES) + allow_missing_migration_files: true, + target: TABLES_CREATED_NO_INDEXES) end def recreate_tables! @@ -131,7 +127,6 @@ def recreate_tables! create_tables! end - # Load the given filepath into the table named. # Note that we have to explicitly state that there's isn't an escape character # (hence "ESCAPED BY ''") because some fields end with a backslash -- the default @@ -140,13 +135,11 @@ def recreate_tables! # @param [Symbol] tablename # @param [Pathname, String] filepath Path to the tab-delimited file to load def load_tab_delimited_file(tablename, filepath) - @rawdb.run("LOAD DATA LOCAL INFILE '#{filepath}' INTO TABLE #{tablename} CHARACTER SET utf8mb4 FIELDS TERMINATED BY '\t' ESCAPED BY ''") + @rawdb.run("LOAD DATA LOCAL INFILE '#{filepath}' INTO TABLE #{tablename} CHARACTER SET utf8mb4 FIELDS TERMINATED BY '\t' ESCAPED BY ''") end - # Start from scratch def start_from_scratch(fullfile, linespec: LineSpec.default_linespec, destination_dir: Dir.tmpdir) - datafile = Datafile.new(fullfile, linespec) logger.info "Dumping files to #{destination_dir} for later import" dump_file_paths = datafile.dump_files_for_data_import(destination_dir) @@ -172,7 +165,6 @@ def mysql_set_foreign_key_checks(on_or_off) raise ArgumentError.new("mysql_set_foreign_key_checks must be send :on or :off") end end - end end end diff --git a/lib/hathifiles_database/db/migrations/001_create.rb b/lib/hathifiles_database/db/migrations/001_create.rb index ec024f4..c8e2f9c 100644 --- a/lib/hathifiles_database/db/migrations/001_create.rb +++ b/lib/hathifiles_database/db/migrations/001_create.rb @@ -1,6 +1,5 @@ -require 'sequel' -require 'hathifiles_database/constants' -include HathifilesDatabase::Constants +require "sequel" +require "hathifiles_database/constants" Sequel.extension :migration @@ -14,7 +13,8 @@ # Sequel.migration do up do - create_table(MAINTABLE, collate: "utf8_general_ci", charset: "utf8") do + create_table(HathifilesDatabase::Constants::MAINTABLE, collate: "utf8_general_ci", + charset: "utf8") do String :htid, null: false TrueClass :access String :rights_code @@ -44,19 +44,18 @@ end FOREIGN_TABLES.values.each do |table| - create_table(table, collate: "utf8_general_ci", charset: 'utf8') do + create_table(table, collate: "utf8_general_ci", charset: "utf8") do String :htid, null: false String :value, null: false end end - end down do - FOREIGN_TABLES.values.each do |table| + HathifilesDatabase::Constants::FOREIGN_TABLES.values.each do |table| drop_table(table) end - drop_table(MAINTABLE) + drop_table(HathifilesDatabase::Constants::MAINTABLE) end end diff --git a/lib/hathifiles_database/db/migrations/100_created_no_indexes.rb b/lib/hathifiles_database/db/migrations/100_created_no_indexes.rb index 58ea312..d33db3b 100644 --- a/lib/hathifiles_database/db/migrations/100_created_no_indexes.rb +++ b/lib/hathifiles_database/db/migrations/100_created_no_indexes.rb @@ -1,6 +1,5 @@ -require 'sequel' -require 'hathifiles_database/constants' -include HathifilesDatabase::Constants +require "sequel" +require "hathifiles_database/constants" Sequel.extension :migration diff --git a/lib/hathifiles_database/db/migrations/101_add_index.rb b/lib/hathifiles_database/db/migrations/101_add_index.rb index 26d9597..553c7da 100644 --- a/lib/hathifiles_database/db/migrations/101_add_index.rb +++ b/lib/hathifiles_database/db/migrations/101_add_index.rb @@ -1,14 +1,13 @@ -require 'sequel' -require 'hathifiles_database/constants' -include HathifilesDatabase::Constants +require "sequel" +require "hathifiles_database/constants" Sequel.extension :migration # Add indexes after importing data Sequel.migration do up do - alter_table(MAINTABLE) do - MAINTABLE_INDEXES.each do |col| + alter_table(HathifilesDatabase::Constants::MAINTABLE) do + HathifilesDatabase::Constants::MAINTABLE_INDEXES.each do |col| HathifilesDatabase::Constants::LOGGER.info("Adding index #{col} to main table") add_index [col] HathifilesDatabase::Constants::LOGGER.info(" #{col} index added.") @@ -16,18 +15,18 @@ end HathifilesDatabase::Constants::LOGGER.info("Done with main table") - FOREIGN_TABLES.values.each do |table| + HathifilesDatabase::Constants::FOREIGN_TABLES.values.each do |table| alter_table(table) do - HathifilesDatabase::Constants::LOGGER.info("Adding htid/value index to #{table}") + HathifilesDatabase::Constants::LOGGER.info("Adding htid/value index to #{table}") add_index [:htid] add_index [:value] - HathifilesDatabase::Constants::LOGGER.info("Done with table #{table}") + HathifilesDatabase::Constants::LOGGER.info("Done with table #{table}") end end end down do - FOREIGN_TABLES.values.each do |table| + HathifilesDatabase::Constants::FOREIGN_TABLES.values.each do |table| alter_table(table) do drop_index [:htid] drop_index [:value] @@ -35,11 +34,9 @@ end alter_table(MAINTABLE) do - MAINTABLE_INDEXES.each do |col| + HathifilesDatabase::Constants::MAINTABLE_INDEXES.each do |col| drop_index [col] end end - end - end diff --git a/lib/hathifiles_database/db/writer.rb b/lib/hathifiles_database/db/writer.rb index 620ca5e..26d2b4a 100644 --- a/lib/hathifiles_database/db/writer.rb +++ b/lib/hathifiles_database/db/writer.rb @@ -1,9 +1,7 @@ module HathifilesDatabase class DB module Writer - class InfileDatabaseWriter - attr_accessor :logger, :connection # @param [HathifilesDatabase::DB::Connection] connection The database connection @@ -32,26 +30,22 @@ def bulk_load_dump_files @connection.load_tab_delimited_file(tablename, filepath) end end - - end - class TempfileWriter - attr_accessor :filepaths, :output_files, :maintable_name # @param [Hash] outputfile_paths, mapping tablename to an outputfile, # as returned by TempFileWriter.outputfile_paths_from_linespec # @param [Symbol, String] maintable_name The name of the main table (e.g., :hf) def initialize(outputfile_paths:, maintable_name:) - @filepaths = outputfile_paths + @filepaths = outputfile_paths @output_files = @filepaths.each_with_object({}) do |kv, h| table, filepath = *kv filepath.parent.mkpath - h[table] = File.open(filepath, 'w:utf-8') + h[table] = File.open(filepath, "w:utf-8") end - @maintable = @output_files[maintable_name.to_sym] + @maintable = @output_files[maintable_name.to_sym] end def write(line) @@ -67,11 +61,11 @@ def write(line) alias_method :<<, :write def close - output_files.values.each {|f| f.close} + output_files.values.each { |f| f.close } end def self.outputfile_paths_from_linespec(linespec, nodate_suffix: false, output_dir: Dir.tmpdir) - ddir = Pathname.new(output_dir) + ddir = Pathname.new(output_dir) suffix = create_suffix(nodate_suffix) linespec.tables.each_with_object({}) do |table, h| filename = "#{table}#{suffix}.tsv" @@ -80,15 +74,15 @@ def self.outputfile_paths_from_linespec(linespec, nodate_suffix: false, output_d end end - private def self.create_suffix(nodate_suffix = false) if nodate_suffix - '' + "" else - '_' + DateTime.now.strftime('%Y%m%d_%H%M') + "_" + DateTime.now.strftime("%Y%m%d_%H%M") end end + private_class_method :method end end end diff --git a/lib/hathifiles_database/line.rb b/lib/hathifiles_database/line.rb index cace442..450fd76 100644 --- a/lib/hathifiles_database/line.rb +++ b/lib/hathifiles_database/line.rb @@ -1,8 +1,8 @@ # frozen_string_literal: true -require 'hathifiles_database/exceptions' -require 'hathifiles_database/columns' -require 'hathifiles_database/linespec' +require "hathifiles_database/exceptions" +require "hathifiles_database/columns" +require "hathifiles_database/linespec" module HathifilesDatabase class Line @@ -17,7 +17,7 @@ def initialize(specs, values, fileline: nil) end def empty? - @htid.nil? or @htid == '' + @htid.nil? or @htid == "" end # @param [Array] specs List of column specs @@ -27,7 +27,7 @@ def add_values!(specs, values) if spec.scalar add_to_main_table spec.transform(values[index]) else - add_to_main_table(spec.transform(values[index]).join(',')) + add_to_main_table(spec.transform(values[index]).join(",")) add_to_foreign_table(spec.table, spec.transform(values[index])) end end @@ -40,6 +40,5 @@ def add_to_main_table(value) def add_to_foreign_table(table, values) @foreign_table_data[table] = values.compact end - end end diff --git a/lib/hathifiles_database/linespec.rb b/lib/hathifiles_database/linespec.rb index 1a70df5..3d35bf1 100644 --- a/lib/hathifiles_database/linespec.rb +++ b/lib/hathifiles_database/linespec.rb @@ -1,19 +1,16 @@ # frozen_string_literal: true -require 'hathifiles_database/exceptions' -require 'hathifiles_database/columns' -require 'hathifiles_database/line' -require 'hathifiles_database/constants' -require 'library_stdnums' -require 'date' - +require "hathifiles_database/exceptions" +require "hathifiles_database/columns" +require "hathifiles_database/line" +require "hathifiles_database/constants" +require "library_stdnums" +require "date" module HathifilesDatabase - # A LineSpec is basically an array of columns (maintable or foreign) and # ways to get to them. class LineSpec - include Enumerable attr_accessor :maintable_name @@ -23,8 +20,8 @@ class LineSpec # @param [Array(str) { str == 'allow' ? 1 : 0} + ALLOW = ->(str) { (str == "allow") ? 1 : 0 } - ISBN_NORMALIZE = ->(str) { str.split(/[\s,;|]+/).map{|x| StdNum::ISBN.allNormalizedValues(x)}.flatten.compact.uniq } - ISSN_NORMALIZE = ->(str) { str.split(/[\s,;|]+/).map{|x| StdNum::ISSN.normalize(x)}.flatten.compact.uniq } + ISBN_NORMALIZE = ->(str) { str.split(/[\s,;|]+/).map { |x| StdNum::ISBN.allNormalizedValues(x) }.flatten.compact.uniq } + ISSN_NORMALIZE = ->(str) { str.split(/[\s,;|]+/).map { |x| StdNum::ISSN.normalize(x) }.flatten.compact.uniq } LCCN_NORMALIZE = ->(str) { [str, StdNum::LCCN.normalize(str)] } - DATEIFY = ->(str) { - DateTime.parse(str).strftime('%Y-%m-%d %H:%M:%S') + DATEIFY = ->(str) { + DateTime.parse(str).strftime("%Y-%m-%d %H:%M:%S") } - DEFAULT_LINESPEC = self.new do + DEFAULT_LINESPEC = new do maintable(:htid) # 1 maintable(:access, ALLOW) # 2 maintable(:rights_code) # 3 @@ -123,16 +120,15 @@ def parse(rawline, fileline = nil) Line.new(self, split(rawline), fileline: fileline) end - # Split on tabs and verify that we have the right number of columns # @param [String] rawline Raw line from the hathifile # @return [Array] def split(rawline) - vals = rawline.split(/\t/) + vals = rawline.split("\t") vals[-1].chomp! # Sometimes the author isn't there so we're one short - vals.push '' if author_missing?(vals) + vals.push "" if author_missing?(vals) # Everything look ok? validate!(vals) @@ -149,8 +145,7 @@ def author_missing?(vals) end def validate!(vals) - raise HathifilesDatabase::Exception::WrongNumberOfColumns.new(htid: vals.first, count: vals.count, expected: @count ) if @count != vals.count + raise HathifilesDatabase::Exception::WrongNumberOfColumns.new(htid: vals.first, count: vals.count, expected: @count) if @count != vals.count end end end -