BigW Consortium Gitlab

Commit e036a374 by Grzegorz Bizon

Add walk_table_in_batches and refactor migration helpers

parent 24a4199a
......@@ -221,17 +221,19 @@ module Gitlab
# make things _more_ complex).
#
# rubocop: disable Metrics/AbcSize
def update_column_in_batches(table, column, value)
def update_column_in_batches(table, column, value, &scope)
if transaction_open?
raise 'update_column_in_batches can not be run inside a transaction, ' \
'you can disable transactions by calling disable_ddl_transaction! ' \
'in the body of your migration class'
raise <<-MSG
update_column_in_batches helper can not be run inside a transaction.
You can disable transactions by calling `disable_ddl_transaction!`
method in the body of your migration class.
MSG
end
table = Arel::Table.new(table)
table_arel = Arel::Table.new(table)
count_arel = table.project(Arel.star.count.as('count'))
count_arel = yield table, count_arel if block_given?
count_arel = table_arel.project(Arel.star.count.as('count'))
count_arel = yield table_arel, count_arel if block_given?
total = exec_query(count_arel.to_sql).to_hash.first['count'].to_i
......@@ -246,37 +248,56 @@ module Gitlab
# rows for GitLab.com.
batch_size = max_size if batch_size > max_size
walk_table_in_batches(table, of: batch_size, scope: scope) do
Arel::UpdateManager.new(ActiveRecord::Base)
.table(table_arel)
.set([[table_arel[column], value]])
end
end
def walk_table_in_batches(table, of: 1000, scope: nil)
if transaction_open?
raise <<-MSG
walk_table_in_batches helper can not be run inside a transaction.
You can disable transactions by calling `disable_ddl_transaction!`
method in the body of your migration class.
MSG
end
table = Arel::Table.new(table)
start_arel = table.project(table[:id]).order(table[:id].asc).take(1)
start_arel = yield table, start_arel if block_given?
start_id = exec_query(start_arel.to_sql).to_hash.first['id'].to_i
start_arel = scope.call(table, start_arel) if scope
start_id = exec_query(start_arel.to_sql).to_hash.first.to_h['id'].to_i
loop do
1.step do |batch|
stop_arel = table.project(table[:id])
.where(table[:id].gteq(start_id))
.order(table[:id].asc)
.take(1)
.skip(batch_size)
stop_arel = yield table, stop_arel if block_given?
stop_row = exec_query(stop_arel.to_sql).to_hash.first
.skip(of)
update_arel = Arel::UpdateManager.new(ActiveRecord::Base)
.table(table)
.set([[table[column], value]])
.where(table[:id].gteq(start_id))
stop_arel = scope.call(table, stop_arel) if scope
stop_id = exec_query(stop_arel.to_sql)
.to_hash.first.to_h['id'].to_i
if stop_row
stop_id = stop_row['id'].to_i
start_id = stop_id
update_arel = update_arel.where(table[:id].lt(stop_id))
end
action = yield(batch, start_id, stop_id)
update_arel = yield table, update_arel if block_given?
if action.is_a?(Arel::TreeManager)
exec_arel = action.where(table[:id].gteq(start_id))
exec_arel = exec_arel.where(table[:id].lt(stop_id)) if stop_id.nonzero?
exec_arel = scope.call(table, exec_arel) if scope
execute(update_arel.to_sql)
execute(exec_arel.to_sql)
end
# There are no more rows left to update.
break unless stop_row
if stop_id.zero?
# there are no more rows left to update
break
else
# next loop
start_id = stop_id
end
end
end
......
......@@ -2,9 +2,7 @@ require 'spec_helper'
describe Gitlab::Database::MigrationHelpers, lib: true do
let(:model) do
ActiveRecord::Migration.new.extend(
Gitlab::Database::MigrationHelpers
)
ActiveRecord::Migration.new.extend(described_class)
end
before do
......@@ -264,7 +262,7 @@ describe Gitlab::Database::MigrationHelpers, lib: true do
describe '#update_column_in_batches' do
context 'when running outside of a transaction' do
before do
expect(model).to receive(:transaction_open?).and_return(false)
expect(model).to receive(:transaction_open?).twice.and_return(false)
create_list(:empty_project, 5)
end
......@@ -313,6 +311,53 @@ describe Gitlab::Database::MigrationHelpers, lib: true do
end
end
describe '#walk_table_in_batches' do
context 'when running outside of a transaction' do
before do
expect(model).to receive(:transaction_open?).and_return(false)
create_list(:empty_project, 6)
end
it 'yields for each batch' do
expect { |b| model.walk_table_in_batches(:projects, of: 2, &b) }
.to yield_control.exactly(3).times
end
it 'yields successive ranges' do
expect { |b| model.walk_table_in_batches(:projects, of: 2, &b) }
.to yield_successive_args([1, Integer, Integer],
[2, Integer, Integer],
[3, Integer, 0])
end
context 'when a scope is provided' do
it 'limits the scope of the statement provided inside the block' do
first_id = Project.first.id
scope = ->(table, query) { query.where(table[:id].eq(first_id)) }
model.walk_table_in_batches(:projects, scope: scope) do
Arel::UpdateManager.new(ActiveRecord::Base)
.table(Arel::Table.new(:projects))
.set([[Arel::Table.new(:projects)[:archived], true]])
end
expect(Project.where(archived: true).count).to eq(1)
end
end
end
context 'when running inside the transaction' do
it 'raises RuntimeError' do
expect(model).to receive(:transaction_open?).and_return(true)
expect do
model.update_column_in_batches(:projects, :star_count, Arel.sql('1+1'))
end.to raise_error(RuntimeError)
end
end
end
describe '#add_column_with_default' do
context 'outside of a transaction' do
context 'when a column limit is not set' do
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment