BigW Consortium Gitlab

Commit 1507ff8a by Sean McGivern

Make MR diff background migration less likely to time out

This version does not use transactions, but individual statements. As we have unique constraints on the target tables for the inserts, we can just ignore uniqueness violations there (as long as we always insert the same batch size, in the same order). This means the spec now must use truncation, not a transaction, as the uniqueness violation means that the whole transaction for that spec would be invalid, which isn't what we'd want. In real-world use, this isn't run in a transaction anyway. This commit also wraps unhandled exceptions, for easier finding in Sentry, and logs with a consistent format, for easier searching.
parent 91719415
......@@ -3,6 +3,12 @@ module Gitlab
class DeserializeMergeRequestDiffsAndCommits
attr_reader :diff_ids, :commit_rows, :file_rows
class Error < StandardError
def backtrace
cause.backtrace
end
end
class MergeRequestDiff < ActiveRecord::Base
self.table_name = 'merge_request_diffs'
end
......@@ -34,6 +40,10 @@ module Gitlab
end
flush_buffers!
rescue => e
Rails.logger.info("#{self.class.name}: failed for IDs #{merge_request_diffs.map(&:id)} with #{e.class.name}")
raise Error.new(e.inspect)
end
private
......@@ -46,22 +56,28 @@ module Gitlab
def flush_buffers!
if diff_ids.any?
MergeRequestDiff.transaction do
commit_rows.each_slice(BUFFER_ROWS).each do |commit_rows_slice|
Gitlab::Database.bulk_insert('merge_request_diff_commits', commit_rows_slice)
end
file_rows.each_slice(DIFF_FILE_BUFFER_ROWS).each do |file_rows_slice|
Gitlab::Database.bulk_insert('merge_request_diff_files', file_rows_slice)
end
commit_rows.each_slice(BUFFER_ROWS).each do |commit_rows_slice|
bulk_insert('merge_request_diff_commits', commit_rows_slice)
end
MergeRequestDiff.where(id: diff_ids).update_all(st_commits: nil, st_diffs: nil)
file_rows.each_slice(DIFF_FILE_BUFFER_ROWS).each do |file_rows_slice|
bulk_insert('merge_request_diff_files', file_rows_slice)
end
MergeRequestDiff.where(id: diff_ids).update_all(st_commits: nil, st_diffs: nil)
end
reset_buffers!
end
def bulk_insert(table, rows)
Gitlab::Database.bulk_insert(table, rows)
rescue ActiveRecord::RecordNotUnique
ids = rows.map { |row| row[:merge_request_diff_id] }.uniq.sort
Rails.logger.info("#{self.class.name}: rows inserted twice for IDs #{ids}")
end
def single_diff_rows(merge_request_diff)
sha_attribute = Gitlab::Database::ShaAttribute.new
commits = YAML.load(merge_request_diff.st_commits) rescue []
......
require 'spec_helper'
describe Gitlab::BackgroundMigration::DeserializeMergeRequestDiffsAndCommits do
describe Gitlab::BackgroundMigration::DeserializeMergeRequestDiffsAndCommits, :truncate do
describe '#perform' do
set(:merge_request) { create(:merge_request) }
set(:merge_request_diff) { merge_request.merge_request_diff }
let(:merge_request) { create(:merge_request) }
let(:merge_request_diff) { merge_request.merge_request_diff }
let(:updated_merge_request_diff) { MergeRequestDiff.find(merge_request_diff.id) }
def diffs_to_hashes(diffs)
......@@ -84,12 +84,6 @@ describe Gitlab::BackgroundMigration::DeserializeMergeRequestDiffsAndCommits do
allow(Gitlab::Database).to receive(:bulk_insert).and_call_original
end
it 'updates and continues' do
expect(described_class::MergeRequestDiff).to receive(:transaction).twice
subject.perform(start_id, stop_id)
end
it 'inserts commit rows in chunks of BUFFER_ROWS' do
# There are 29 commits in each diff, so we should have slices of 20 + 9 + 20 + 9.
stub_const("#{described_class}::BUFFER_ROWS", 20)
......@@ -119,27 +113,87 @@ describe Gitlab::BackgroundMigration::DeserializeMergeRequestDiffsAndCommits do
context 'when BUFFER_ROWS is not exceeded' do
it 'only updates once' do
expect(described_class::MergeRequestDiff).to receive(:transaction).once
expect(Gitlab::Database).to receive(:bulk_insert)
.with('merge_request_diff_commits', anything)
.once
.and_call_original
expect(Gitlab::Database).to receive(:bulk_insert)
.with('merge_request_diff_files', anything)
.once
.and_call_original
subject.perform(start_id, stop_id)
end
end
end
context 'when the merge request diff update fails' do
before do
allow(described_class::MergeRequestDiff)
.to receive(:update_all).and_raise(ActiveRecord::Rollback)
end
context 'when some rows were already inserted due to a previous failure' do
before do
subject.perform(start_id, stop_id)
it 'does not add any diff commits' do
expect { subject.perform(merge_request_diff.id, merge_request_diff.id) }
.not_to change { MergeRequestDiffCommit.count }
convert_to_yaml(start_id, merge_request_diff.commits, diffs_to_hashes(merge_request_diff.merge_request_diff_files))
convert_to_yaml(stop_id, updated_merge_request_diff.commits, diffs_to_hashes(updated_merge_request_diff.merge_request_diff_files))
end
it 'does not raise' do
expect { subject.perform(start_id, stop_id) }.not_to raise_exception
end
it 'logs a message' do
expect(Rails.logger).to receive(:info)
.with(
a_string_matching(described_class.name).and(matching([start_id, stop_id].inspect))
)
.twice
subject.perform(start_id, stop_id)
end
it 'ends up with the correct rows' do
expect(updated_merge_request_diff.commits.count).to eq(29)
expect(updated_merge_request_diff.raw_diffs.count).to eq(20)
end
end
it 'does not add any diff files' do
expect { subject.perform(merge_request_diff.id, merge_request_diff.id) }
.not_to change { MergeRequestDiffFile.count }
context 'when the merge request diff update fails' do
let(:exception) { ActiveRecord::RecordNotFound }
let(:perform_ignoring_exceptions) do
begin
subject.perform(start_id, stop_id)
rescue described_class::Error
end
end
before do
allow_any_instance_of(described_class::MergeRequestDiff::ActiveRecord_Relation)
.to receive(:update_all).and_raise(exception)
end
it 'raises an error' do
expect { subject.perform(start_id, stop_id) }
.to raise_exception(described_class::Error)
end
it 'logs the error' do
expect(Rails.logger).to receive(:info).with(
a_string_matching(described_class.name)
.and(matching([start_id, stop_id].inspect))
.and(matching(exception.name))
)
perform_ignoring_exceptions
end
it 'still adds diff commits' do
expect { perform_ignoring_exceptions }
.to change { MergeRequestDiffCommit.count }
end
it 'still adds diff files' do
expect { perform_ignoring_exceptions }
.to change { MergeRequestDiffFile.count }
end
end
end
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment