Add Address name data migration rake task

After solidusio#3908 the table spree_addresses includes the name field, so it can be populated with historical data already present in the table by joining the content of firstname and lastname. Running `rails solidus:migrations:migrate_address_names:up` will take care of the job. The task is arranged in steps that require user input in order to proceed: * the task will exit no-op when no record with blank name is found; * if records are found, the user is asked to confirm before proceeding; * the user is now asked for the record batch size to be used. My tests showed that 100'000 records per batch should balance the necessity to avoid locking records for long and have the script complete without too much overhead; * before migrating data, the user is asked to confirm once again. Running this task is required when upgrading to Solidus 3.0.
nebulab · Mar 8, 2021 · b7ba9ad · b7ba9ad
1 parent 61864f8
commit b7ba9ad
Show file tree

Hide file tree

Showing 2 changed files with 239 additions and 0 deletions.
diff --git a/core/lib/tasks/migrations/migrate_address_names.rake b/core/lib/tasks/migrations/migrate_address_names.rake
@@ -0,0 +1,154 @@
+# frozen_string_literal: true
+
+require 'thor'
+
+namespace :solidus do
+  namespace :migrations do
+    namespace :migrate_address_names do
+      desc 'Backfills Spree::Address name attribute using firstname and lastname
+        concatenation in order to retain historical data when upgrading to new
+        address name format'
+      task up: :environment do
+        class Spree::AddressForMigration < ApplicationRecord
+          self.table_name = 'spree_addresses'
+        end
+
+        count = Spree::AddressForMigration.unscoped.where(name: [nil, '']).count
+        connection = ActiveRecord::Base.connection
+        adapter_name = connection.adapter_name.downcase
+        shell = Thor::Shell::Basic.new
+        puts "Your DB contains #{count} addresses that may be affected by this task."
+        # `trim` is not needed for pg or mysql when using `concat_ws`:
+        # select concat_ws('joinstring', 'foo', null);
+        #  concat_ws
+        # -----------
+        #  foo
+        # (1 row)
+        # select concat_ws('joinstring', 'foo', null) = trim(concat_ws('joinstring', 'foo', null));
+        #  ?column?
+        # ----------
+        #  t
+        # (1 row)
+        unless count.zero?
+          concat_statement = begin
+            case adapter_name
+            when /sqlite/
+              "name = TRIM(COALESCE(firstname, '') || ' ' || COALESCE(lastname, ''))"
+            when /postgres/, /mysql2/
+              "name = CONCAT_WS(' ', firstname, lastname)"
+            else
+              abort "No migration path available for adapter #{adapter_name}. Please write your own."
+            end
+          end
+          answer = shell.ask('do you want to proceed?', limited_to: ['Y', 'N'], case_insensitive: true)
+          if answer == 'Y'
+            # The batch size should be limited to avoid locking the table records for too long. These are
+            # the numbers I got with 1_000_000 records in `spree_addresses`, all with different name and
+            # surname, with postgresql:
+            #
+            # Updating 1000000 records in one shot
+            # batch took 178 seconds
+            #
+            # Updating 1000000 addresses in batches of 200000
+            # batch took 36 seconds
+            # batch took 31 seconds
+            # batch took 31 seconds
+            # batch took 31 seconds
+            # batch took 30 seconds
+            #
+            # Updating 1000000 addresses in batches of 150000
+            # batch took 29 seconds
+            # batch took 27 seconds
+            # batch took 27 seconds
+            # batch took 27 seconds
+            # batch took 26 seconds
+            # batch took 26 seconds
+            # batch took 19 seconds
+            #
+            # Updating 1000000 addresses in batches of 100000
+            # batch took 17 seconds
+            # batch took 15 seconds
+            # batch took 17 seconds
+            # batch took 17 seconds
+            # batch took 17 seconds
+            # batch took 17 seconds
+            # batch took 17 seconds
+            # batch took 17 seconds
+            # batch took 17 seconds
+            # batch took 17 seconds
+            #
+            # This is with mysql:
+            # Updating 1000000 records in one shot
+            # batch updated in 153 seconds
+            #
+            # Updating 1000000 records in batches of 200000, this may take a while...
+            # batch took 41 seconds
+            # batch took 37 seconds
+            # batch took 35 seconds
+            # batch took 28 seconds
+            # batch took 27 seconds
+            #
+            # Updating 1000000 records in batches of 150000, this may take a while...
+            # batch took 30 seconds
+            # batch took 29 seconds
+            # batch took 18 seconds
+            # batch took 18 seconds
+            # batch took 17 seconds
+            # batch took 29 seconds
+            # batch took 12 seconds
+            #
+            # Updating 1000000 records in batches of 100000, this may take a while...
+            # batch took 10 seconds
+            # batch took 11 seconds
+            # batch took 12 seconds
+            # batch took 13 seconds
+            # batch took 12 seconds
+            # batch took 12 seconds
+            # batch took 14 seconds
+            # batch took 19 seconds
+            # batch took 20 seconds
+            # batch took 21 seconds
+            #
+            # Please note that the migration will be much faster when there's no index
+            # on the `name` column. For example, with mysql each batch takes exactly
+            # the same time:
+            #
+            # Updating 1000000 records in batches of 200000, this may take a while...
+            # batch took 17 seconds
+            # batch took 17 seconds
+            # batch took 17 seconds
+            # batch took 16 seconds
+            # batch took 17 seconds
+            #
+            # So, if special need arise, one can drop the index added with migration
+            # 20210122110141_add_name_to_spree_addresses.rb and add the index later,
+            # in non blocking ways. For postgresql:
+            # add_index(:spree_addresses, :name, algorithm: :concurrently)
+            #
+            # For mysql 5.6:
+            # sql = "ALTER TABLE spree_addresses ADD INDEX index_spree_addresses_on_name (name), ALGORITHM=INPLACE, LOCK=NONE;"
+            # ActiveRecord::Base.connection.execute sql
+            #
+            puts 'Data migration will happen in batches. The default value is 100_000, which should take less than 20 seconds on mysql or postgresql.'
+            size = shell.ask('Please enter a different batch size, or press return to confirm the default')
+            size = (size.presence || 100_000).to_i
+
+            abort "Invalid batch size number #{size}, please run the task again." unless size.positive?
+
+            puts "We're going to migrate #{count} records in batches of #{size}"
+            answer = shell.ask('do you want to proceed?', limited_to: ['Y', 'N'], case_insensitive: true)
+            if answer == 'Y'
+              puts "Updating #{count} records in batches of #{size}, this may take a while..."
+
+              Spree::AddressForMigration.unscoped.where(name: [nil, '']).in_batches(of: size).each do |batch|
+                now = Time.zone.now
+                batch.update_all(concat_statement)
+                puts "batch took #{(Time.zone.now - now).to_i} seconds"
+              end
+            end
+          end
+        end
+      end
+    end
+  end
+end
diff --git a/core/spec/lib/tasks/migrations/migrate_address_names_spec.rb b/core/spec/lib/tasks/migrations/migrate_address_names_spec.rb
@@ -0,0 +1,85 @@
+# frozen_string_literal: true
+
+require 'rails_helper'
+
+path = Spree::Core::Engine.root.join('lib/tasks/migrations/migrate_address_names.rake')
+
+RSpec.describe 'solidus:migrations:migrate_address_names' do
+  around do |example|
+    ignored_columns = Spree::Address.ignored_columns
+    Spree::Address.ignored_columns = []
+    Spree::Address.reset_column_information
+
+    example.run
+  ensure
+    Spree::Address.ignored_columns = ignored_columns
+    Spree::Address.reset_column_information
+  end
+
+  describe 'up' do
+    include_context(
+      'rake',
+      task_path: path,
+      task_name: 'solidus:migrations:migrate_address_names:up'
+    )
+
+    context "when there are no records to migrate" do
+      it "simply exits" do
+        expect { task.invoke }.to output(
+          "Your DB contains 0 addresses that may be affected by this task.\n"
+        ).to_stdout
+      end
+    end
+
+    context "when there are records to migrate" do
+      let!(:complete_address) { create(:address, firstname: 'Jane', lastname: 'Smith') }
+      let!(:partial_address) { create(:address, firstname: nil, lastname: 'Doe') }
+
+      before do
+        Spree::Address.update_all(name: nil)
+      end
+
+      context "when the DB adapter is not supported" do
+        before do
+          allow(ActiveRecord::Base.connection).to receive(:adapter_name) { 'ms_sql' }
+        end
+
+        it "exists with error" do
+          expect { task.invoke }.to raise_error(SystemExit)
+        end
+      end
+
+      context "when the DB adapter is supported" do
+        before do
+          allow_any_instance_of(Thor::Shell::Basic).to receive(:ask).with(
+            'do you want to proceed?',
+            limited_to: ['Y', 'N'],
+            case_insensitive: true
+          ).and_return('Y')
+
+          allow_any_instance_of(Thor::Shell::Basic).to receive(:ask).with(
+            'Please enter a different batch size, or press return to confirm the default'
+          ).and_return(size)
+        end
+
+        context 'when providing valid batch size number' do
+          let(:size) { 10 }
+
+          it 'migrates name data by setting the actual field on the DB' do
+            expect { task.invoke }.to change { complete_address.reload[:name] }.to('Jane Smith')
+              .and change { partial_address.reload[:name] }.to('Doe')
+          end
+        end
+
+        context 'when providing invalid batch size number' do
+          let(:size) { 'foobar' }
+
+          it 'exits without migrating name data' do
+            expect { task.invoke }.to raise_error(SystemExit)
+            expect(Spree::Address.pluck(:name)).to be_all(&:nil?)
+          end
+        end
+      end
+    end
+  end
+end