Skip to content

Commit

Permalink
Add Address name data migration rake task
Browse files Browse the repository at this point in the history
After solidusio#3908 the table spree_addresses includes the name
field, so it can be populated with historical data already present
in the table by joining the content of firstname and lastname.
Running `rails solidus:migrations:migrate_address_names:up` will
take care of the job.

The task is arranged in steps that require user input in order to
proceed:

  * the task will exit no-op when no record with blank name is
    found;
  * if records are found, the user is asked to confirm before
    proceeding;
  * the user is now asked for the record batch size to be used.
    My tests showed that 100'000 records per batch should balance
    the necessity to avoid locking records for long and have the
    script complete without too much overhead;
  * before migrating data, the user is asked to confirm once again.

Running this task is required when upgrading to Solidus 3.0.
  • Loading branch information
filippoliverani authored and spaghetticode committed Mar 8, 2021
1 parent 61864f8 commit b7ba9ad
Show file tree
Hide file tree
Showing 2 changed files with 239 additions and 0 deletions.
154 changes: 154 additions & 0 deletions core/lib/tasks/migrations/migrate_address_names.rake
Original file line number Diff line number Diff line change
@@ -0,0 +1,154 @@
# frozen_string_literal: true

require 'thor'

namespace :solidus do
namespace :migrations do
namespace :migrate_address_names do
desc 'Backfills Spree::Address name attribute using firstname and lastname
concatenation in order to retain historical data when upgrading to new
address name format'
task up: :environment do
class Spree::AddressForMigration < ApplicationRecord
self.table_name = 'spree_addresses'
end

count = Spree::AddressForMigration.unscoped.where(name: [nil, '']).count
connection = ActiveRecord::Base.connection
adapter_name = connection.adapter_name.downcase
shell = Thor::Shell::Basic.new
puts "Your DB contains #{count} addresses that may be affected by this task."
# `trim` is not needed for pg or mysql when using `concat_ws`:
# select concat_ws('joinstring', 'foo', null);
# concat_ws
# -----------
# foo
# (1 row)
# select concat_ws('joinstring', 'foo', null) = trim(concat_ws('joinstring', 'foo', null));
# ?column?
# ----------
# t
# (1 row)
unless count.zero?
concat_statement = begin
case adapter_name
when /sqlite/
"name = TRIM(COALESCE(firstname, '') || ' ' || COALESCE(lastname, ''))"
when /postgres/, /mysql2/
"name = CONCAT_WS(' ', firstname, lastname)"
else
abort "No migration path available for adapter #{adapter_name}. Please write your own."
end
end
answer = shell.ask('do you want to proceed?', limited_to: ['Y', 'N'], case_insensitive: true)
if answer == 'Y'
# The batch size should be limited to avoid locking the table records for too long. These are
# the numbers I got with 1_000_000 records in `spree_addresses`, all with different name and
# surname, with postgresql:
#
# Updating 1000000 records in one shot
# batch took 178 seconds
#
# Updating 1000000 addresses in batches of 200000
# batch took 36 seconds
# batch took 31 seconds
# batch took 31 seconds
# batch took 31 seconds
# batch took 30 seconds
#
# Updating 1000000 addresses in batches of 150000
# batch took 29 seconds
# batch took 27 seconds
# batch took 27 seconds
# batch took 27 seconds
# batch took 26 seconds
# batch took 26 seconds
# batch took 19 seconds
#
# Updating 1000000 addresses in batches of 100000
# batch took 17 seconds
# batch took 15 seconds
# batch took 17 seconds
# batch took 17 seconds
# batch took 17 seconds
# batch took 17 seconds
# batch took 17 seconds
# batch took 17 seconds
# batch took 17 seconds
# batch took 17 seconds
#
# This is with mysql:
# Updating 1000000 records in one shot
# batch updated in 153 seconds
#
# Updating 1000000 records in batches of 200000, this may take a while...
# batch took 41 seconds
# batch took 37 seconds
# batch took 35 seconds
# batch took 28 seconds
# batch took 27 seconds
#
# Updating 1000000 records in batches of 150000, this may take a while...
# batch took 30 seconds
# batch took 29 seconds
# batch took 18 seconds
# batch took 18 seconds
# batch took 17 seconds
# batch took 29 seconds
# batch took 12 seconds
#
# Updating 1000000 records in batches of 100000, this may take a while...
# batch took 10 seconds
# batch took 11 seconds
# batch took 12 seconds
# batch took 13 seconds
# batch took 12 seconds
# batch took 12 seconds
# batch took 14 seconds
# batch took 19 seconds
# batch took 20 seconds
# batch took 21 seconds
#
# Please note that the migration will be much faster when there's no index
# on the `name` column. For example, with mysql each batch takes exactly
# the same time:
#
# Updating 1000000 records in batches of 200000, this may take a while...
# batch took 17 seconds
# batch took 17 seconds
# batch took 17 seconds
# batch took 16 seconds
# batch took 17 seconds
#
# So, if special need arise, one can drop the index added with migration
# 20210122110141_add_name_to_spree_addresses.rb and add the index later,
# in non blocking ways. For postgresql:
# add_index(:spree_addresses, :name, algorithm: :concurrently)
#
# For mysql 5.6:
# sql = "ALTER TABLE spree_addresses ADD INDEX index_spree_addresses_on_name (name), ALGORITHM=INPLACE, LOCK=NONE;"
# ActiveRecord::Base.connection.execute sql
#
puts 'Data migration will happen in batches. The default value is 100_000, which should take less than 20 seconds on mysql or postgresql.'
size = shell.ask('Please enter a different batch size, or press return to confirm the default')
size = (size.presence || 100_000).to_i

abort "Invalid batch size number #{size}, please run the task again." unless size.positive?

puts "We're going to migrate #{count} records in batches of #{size}"
answer = shell.ask('do you want to proceed?', limited_to: ['Y', 'N'], case_insensitive: true)
if answer == 'Y'
puts "Updating #{count} records in batches of #{size}, this may take a while..."

Spree::AddressForMigration.unscoped.where(name: [nil, '']).in_batches(of: size).each do |batch|
now = Time.zone.now
batch.update_all(concat_statement)
puts "batch took #{(Time.zone.now - now).to_i} seconds"
end
end
end
end
end
end
end
end
85 changes: 85 additions & 0 deletions core/spec/lib/tasks/migrations/migrate_address_names_spec.rb
Original file line number Diff line number Diff line change
@@ -0,0 +1,85 @@
# frozen_string_literal: true

require 'rails_helper'

path = Spree::Core::Engine.root.join('lib/tasks/migrations/migrate_address_names.rake')

RSpec.describe 'solidus:migrations:migrate_address_names' do
around do |example|
ignored_columns = Spree::Address.ignored_columns
Spree::Address.ignored_columns = []
Spree::Address.reset_column_information

example.run
ensure
Spree::Address.ignored_columns = ignored_columns
Spree::Address.reset_column_information
end

describe 'up' do
include_context(
'rake',
task_path: path,
task_name: 'solidus:migrations:migrate_address_names:up'
)

context "when there are no records to migrate" do
it "simply exits" do
expect { task.invoke }.to output(
"Your DB contains 0 addresses that may be affected by this task.\n"
).to_stdout
end
end

context "when there are records to migrate" do
let!(:complete_address) { create(:address, firstname: 'Jane', lastname: 'Smith') }
let!(:partial_address) { create(:address, firstname: nil, lastname: 'Doe') }

before do
Spree::Address.update_all(name: nil)
end

context "when the DB adapter is not supported" do
before do
allow(ActiveRecord::Base.connection).to receive(:adapter_name) { 'ms_sql' }
end

it "exists with error" do
expect { task.invoke }.to raise_error(SystemExit)
end
end

context "when the DB adapter is supported" do
before do
allow_any_instance_of(Thor::Shell::Basic).to receive(:ask).with(
'do you want to proceed?',
limited_to: ['Y', 'N'],
case_insensitive: true
).and_return('Y')

allow_any_instance_of(Thor::Shell::Basic).to receive(:ask).with(
'Please enter a different batch size, or press return to confirm the default'
).and_return(size)
end

context 'when providing valid batch size number' do
let(:size) { 10 }

it 'migrates name data by setting the actual field on the DB' do
expect { task.invoke }.to change { complete_address.reload[:name] }.to('Jane Smith')
.and change { partial_address.reload[:name] }.to('Doe')
end
end

context 'when providing invalid batch size number' do
let(:size) { 'foobar' }

it 'exits without migrating name data' do
expect { task.invoke }.to raise_error(SystemExit)
expect(Spree::Address.pluck(:name)).to be_all(&:nil?)
end
end
end
end
end
end

0 comments on commit b7ba9ad

Please sign in to comment.