BigW Consortium Gitlab

Commit 4ebbfe5d by Sean McGivern

Remove serialised diff and commit columns

The st_commits and st_diffs columns on merge_request_diffs historically held the YAML-serialised data for a merge request diff, in a variety of formats. Since 9.5, these have been migrated in the background to two new tables: merge_request_diff_commits and merge_request_diff_files. That has the advantage that we can actually query the data (for instance, to find out how many commits we've stored), and that it can't be in a variety of formats, but must match the new schema. This is the final step of that journey, where we drop those columns and remove all references to them. This is a breaking change to the importer, because we can no longer import diffs created in the old format, and we cannot guarantee the export will be in the new format unless it was generated after this commit.
parent 9cb38f04
......@@ -27,7 +27,7 @@ class Projects::MergeRequests::DiffsController < Projects::MergeRequests::Applic
@merge_request.merge_request_diff
end
@merge_request_diffs = @merge_request.merge_request_diffs.viewable.select_without_diff.order_id_desc
@merge_request_diffs = @merge_request.merge_request_diffs.viewable.order_id_desc
@comparable_diffs = @merge_request_diffs.select { |diff| diff.id < @merge_request_diff.id }
if params[:start_sha].present?
......
......@@ -283,9 +283,9 @@ class MergeRequest < ActiveRecord::Base
if persisted?
merge_request_diff.commit_shas
elsif compare_commits
compare_commits.reverse.map(&:sha)
compare_commits.to_a.reverse.map(&:sha)
else
[]
Array(diff_head_sha)
end
end
......@@ -482,7 +482,7 @@ class MergeRequest < ActiveRecord::Base
def merge_request_diff_for(diff_refs_or_sha)
@merge_request_diffs_by_diff_refs_or_sha ||= Hash.new do |h, diff_refs_or_sha|
diffs = merge_request_diffs.viewable.select_without_diff
diffs = merge_request_diffs.viewable
h[diff_refs_or_sha] =
if diff_refs_or_sha.is_a?(Gitlab::Diff::DiffRefs)
diffs.find_by_diff_refs(diff_refs_or_sha)
......@@ -906,28 +906,18 @@ class MergeRequest < ActiveRecord::Base
# Note that this could also return SHA from now dangling commits
#
def all_commit_shas
if persisted?
# MySQL doesn't support LIMIT in a subquery.
diffs_relation =
if Gitlab::Database.postgresql?
merge_request_diffs.order(id: :desc).limit(100)
else
merge_request_diffs
end
return commit_shas unless persisted?
column_shas = MergeRequestDiffCommit
.where(merge_request_diff: diffs_relation)
.limit(10_000)
.pluck('sha')
diffs_relation = merge_request_diffs
serialised_shas = merge_request_diffs.where.not(st_commits: nil).flat_map(&:commit_shas)
# MySQL doesn't support LIMIT in a subquery.
diffs_relation = diffs_relation.recent if Gitlab::Database.postgresql?
(column_shas + serialised_shas).uniq
elsif compare_commits
compare_commits.to_a.reverse.map(&:id)
else
[diff_head_sha]
end
MergeRequestDiffCommit
.where(merge_request_diff: diffs_relation)
.limit(10_000)
.pluck('sha')
.uniq
end
def merge_commit
......
class MergeRequestDiff < ActiveRecord::Base
include Sortable
include Importable
include Gitlab::EncodingHelper
include ManualInverseAssociation
include IgnorableColumn
# Prevent store of diff if commits amount more then 500
# Don't display more than 100 commits at once
COMMITS_SAFE_SIZE = 100
# Valid types of serialized diffs allowed by Gitlab::Git::Diff
VALID_CLASSES = [Hash, Rugged::Patch, Rugged::Diff::Delta].freeze
ignore_column :st_commits,
:st_diffs
belongs_to :merge_request
manual_inverse_association :merge_request, :merge_request_diff
......@@ -16,9 +16,6 @@ class MergeRequestDiff < ActiveRecord::Base
has_many :merge_request_diff_files, -> { order(:merge_request_diff_id, :relative_order) }
has_many :merge_request_diff_commits, -> { order(:merge_request_diff_id, :relative_order) }
serialize :st_commits # rubocop:disable Cop/ActiveRecordSerialize
serialize :st_diffs # rubocop:disable Cop/ActiveRecordSerialize
state_machine :state, initial: :empty do
state :collected
state :overflow
......@@ -32,6 +29,8 @@ class MergeRequestDiff < ActiveRecord::Base
scope :viewable, -> { without_state(:empty) }
scope :recent, -> { order(id: :desc).limit(100) }
# All diff information is collected from repository after object is created.
# It allows you to override variables like head_commit_sha before getting diff.
after_create :save_git_content, unless: :importing?
......@@ -40,14 +39,6 @@ class MergeRequestDiff < ActiveRecord::Base
find_by(start_commit_sha: diff_refs.start_sha, head_commit_sha: diff_refs.head_sha, base_commit_sha: diff_refs.base_sha)
end
def self.select_without_diff
select(column_names - ['st_diffs'])
end
def st_commits
super || []
end
# Collect information about commits and diff from repository
# and save it to the database as serialized data
def save_git_content
......@@ -129,11 +120,7 @@ class MergeRequestDiff < ActiveRecord::Base
end
def commit_shas
if st_commits.present?
st_commits.map { |commit| commit[:id] }
else
merge_request_diff_commits.map(&:sha)
end
merge_request_diff_commits.map(&:sha)
end
def diff_refs=(new_diff_refs)
......@@ -208,34 +195,11 @@ class MergeRequestDiff < ActiveRecord::Base
end
def commits_count
if st_commits.present?
st_commits.size
else
merge_request_diff_commits.size
end
end
def utf8_st_diffs
return [] if st_diffs.blank?
st_diffs.map do |diff|
diff.each do |k, v|
diff[k] = encode_utf8(v) if v.respond_to?(:encoding)
end
end
merge_request_diff_commits.size
end
private
# Old GitLab implementations may have generated diffs as ["--broken-diff"].
# Avoid an error 500 by ignoring bad elements. See:
# https://gitlab.com/gitlab-org/gitlab-ce/issues/20776
def valid_raw_diff?(raw)
return false unless raw.respond_to?(:each)
raw.any? { |element| VALID_CLASSES.include?(element.class) }
end
def create_merge_request_diff_files(diffs)
rows = diffs.map.with_index do |diff, index|
diff_hash = diff.to_hash.merge(
......@@ -259,9 +223,7 @@ class MergeRequestDiff < ActiveRecord::Base
end
def load_diffs(options)
return Gitlab::Git::DiffCollection.new([]) unless diffs_from_database
raw = diffs_from_database
raw = merge_request_diff_files.map(&:to_hash)
if paths = options[:paths]
raw = raw.select do |diff|
......@@ -272,22 +234,8 @@ class MergeRequestDiff < ActiveRecord::Base
Gitlab::Git::DiffCollection.new(raw, options)
end
def diffs_from_database
return @diffs_from_database if defined?(@diffs_from_database)
@diffs_from_database =
if st_diffs.present?
if valid_raw_diff?(st_diffs)
st_diffs
end
elsif merge_request_diff_files.present?
merge_request_diff_files.map(&:to_hash)
end
end
def load_commits
commits = st_commits.presence || merge_request_diff_commits
commits = commits.map { |commit| Commit.from_hash(commit.to_hash, project) }
commits = merge_request_diff_commits.map { |commit| Commit.from_hash(commit.to_hash, project) }
CommitCollection
.new(merge_request.source_project, commits, merge_request.source_branch)
......
class CleanUpFromMergeRequestDiffsAndCommits < ActiveRecord::Migration
include Gitlab::Database::MigrationHelpers
DOWNTIME = false
class MergeRequestDiff < ActiveRecord::Base
self.table_name = 'merge_request_diffs'
include ::EachBatch
end
disable_ddl_transaction!
def up
Gitlab::BackgroundMigration.steal('DeserializeMergeRequestDiffsAndCommits')
# The literal '--- []\n' value is created by the import process and treated
# as null by the application, so we can ignore those - even if we were
# migrating, it wouldn't create any rows.
literal_prefix = Gitlab::Database.postgresql? ? 'E' : ''
non_empty = "
(st_commits IS NOT NULL AND st_commits != #{literal_prefix}'--- []\n')
OR
(st_diffs IS NOT NULL AND st_diffs != #{literal_prefix}'--- []\n')
".squish
MergeRequestDiff.where(non_empty).each_batch(of: 500) do |relation, index|
range = relation.pluck('MIN(id)', 'MAX(id)').first
Gitlab::BackgroundMigration::DeserializeMergeRequestDiffsAndCommits.new.perform(*range)
end
end
def down
end
end
......@@ -3,8 +3,19 @@ class LimitsToMysql < ActiveRecord::Migration
def up
return unless ActiveRecord::Base.configurations[Rails.env]['adapter'] =~ /^mysql/
change_column :merge_request_diffs, :st_commits, :text, limit: 2147483647
change_column :merge_request_diffs, :st_diffs, :text, limit: 2147483647
# These columns were removed in 10.3, but this is called from two places:
# 1. A migration run after they were added, but before they were removed.
# 2. A rake task which can be run at any time.
#
# Because of item 2, we need these checks.
if column_exists?(:merge_request_diffs, :st_commits)
change_column :merge_request_diffs, :st_commits, :text, limit: 2147483647
end
if column_exists?(:merge_request_diffs, :st_diffs)
change_column :merge_request_diffs, :st_diffs, :text, limit: 2147483647
end
change_column :snippets, :content, :text, limit: 2147483647
change_column :notes, :st_diff, :text, limit: 2147483647
end
......
class RemoveMergeRequestDiffStCommitsAndStDiffs < ActiveRecord::Migration
include Gitlab::Database::MigrationHelpers
DOWNTIME = false
def change
remove_column :merge_request_diffs, :st_commits, :text
remove_column :merge_request_diffs, :st_diffs, :text
end
end
......@@ -1011,8 +1011,6 @@ ActiveRecord::Schema.define(version: 20171124132536) do
create_table "merge_request_diffs", force: :cascade do |t|
t.string "state"
t.text "st_commits"
t.text "st_diffs"
t.integer "merge_request_id", null: false
t.datetime "created_at"
t.datetime "updated_at"
......
......@@ -731,8 +731,6 @@ X-Gitlab-Event: Merge Request Hook
"title": "MS-Viewport",
"created_at": "2013-12-03T17:23:34Z",
"updated_at": "2013-12-03T17:23:34Z",
"st_commits": null,
"st_diffs": null,
"milestone_id": null,
"state": "opened",
"merge_status": "unchecked",
......
......@@ -30,7 +30,8 @@ with all their related data and be moved into a new GitLab instance.
| GitLab version | Import/Export version |
| ---------------- | --------------------- |
| 10.0 to current | 0.2.0 |
| 10.3 to current | 0.2.1 |
| 10.0 | 0.2.0 |
| 9.4.0 | 0.1.8 |
| 9.2.0 | 0.1.7 |
| 8.17.0 | 0.1.6 |
......
......@@ -3,7 +3,6 @@ module Gitlab
class PlanEventFetcher < BaseEventFetcher
def initialize(*args)
@projections = [mr_diff_table[:id],
mr_diff_table[:st_commits],
issue_metrics_table[:first_mentioned_in_commit_at]]
super(*args)
......@@ -37,12 +36,7 @@ module Gitlab
def first_time_reference_commit(event)
return nil unless event && merge_request_diff_commits
commits =
if event['st_commits'].present?
YAML.load(event['st_commits'])
else
merge_request_diff_commits[event['id'].to_i]
end
commits = merge_request_diff_commits[event['id'].to_i]
return nil if commits.blank?
......
......@@ -3,7 +3,7 @@ module Gitlab
extend self
# For every version update, the version history in import_export.md has to be kept up to date.
VERSION = '0.2.0'.freeze
VERSION = '0.2.1'.freeze
FILENAME_LIMIT = 50
def export_path(relative_path:)
......
......@@ -133,8 +133,6 @@ methods:
- :type
services:
- :type
merge_request_diff:
- :utf8_st_diffs
merge_request_diff_files:
- :utf8_diff
merge_requests:
......
......@@ -58,7 +58,6 @@ module Gitlab
def setup_models
case @relation_name
when :merge_request_diff then setup_st_diff_commits
when :merge_request_diff_files then setup_diff
when :notes then setup_note
when :project_label, :project_labels then setup_label
......@@ -208,13 +207,6 @@ module Gitlab
relation_class: relation_class)
end
def setup_st_diff_commits
@relation_hash['st_diffs'] = @relation_hash.delete('utf8_st_diffs')
HashUtil.deep_symbolize_array!(@relation_hash['st_diffs'])
HashUtil.deep_symbolize_array_with_date!(@relation_hash['st_commits'])
end
def setup_diff
@relation_hash['diff'] = @relation_hash.delete('utf8_diff')
end
......
require 'spec_helper'
describe Gitlab::BackgroundMigration::DeserializeMergeRequestDiffsAndCommits, :truncate do
describe Gitlab::BackgroundMigration::DeserializeMergeRequestDiffsAndCommits, :truncate, :migration, schema: 20171114162227 do
let(:merge_request_diffs) { table(:merge_request_diffs) }
let(:merge_requests) { table(:merge_requests) }
describe '#perform' do
let(:merge_request) { create(:merge_request) }
let(:merge_request_diff) { merge_request.merge_request_diff }
let(:project) { create(:project, :repository) }
let(:merge_request) { merge_requests.create!(iid: 1, target_project_id: project.id, source_project_id: project.id, target_branch: 'feature', source_branch: 'master').becomes(MergeRequest) }
let(:merge_request_diff) { MergeRequest.find(merge_request.id).create_merge_request_diff }
let(:updated_merge_request_diff) { MergeRequestDiff.find(merge_request_diff.id) }
def diffs_to_hashes(diffs)
......@@ -68,7 +72,7 @@ describe Gitlab::BackgroundMigration::DeserializeMergeRequestDiffsAndCommits, :t
let(:stop_id) { described_class::MergeRequestDiff.maximum(:id) }
before do
merge_request.reload_diff(true)
merge_request.create_merge_request_diff
convert_to_yaml(start_id, merge_request_diff.commits, diffs_to_hashes(merge_request_diff.merge_request_diff_files))
convert_to_yaml(stop_id, updated_merge_request_diff.commits, diffs_to_hashes(updated_merge_request_diff.merge_request_diff_files))
......@@ -288,7 +292,7 @@ describe Gitlab::BackgroundMigration::DeserializeMergeRequestDiffsAndCommits, :t
context 'when the merge request diffs are Rugged::Patch instances' do
let(:commits) { merge_request_diff.commits.map(&:to_hash) }
let(:first_commit) { merge_request.project.repository.commit(merge_request_diff.head_commit_sha) }
let(:first_commit) { project.repository.commit(merge_request_diff.head_commit_sha) }
let(:expected_commits) { commits }
let(:diffs) { first_commit.rugged_diff_from_parent.patches }
let(:expected_diffs) { [] }
......@@ -298,7 +302,7 @@ describe Gitlab::BackgroundMigration::DeserializeMergeRequestDiffsAndCommits, :t
context 'when the merge request diffs are Rugged::Diff::Delta instances' do
let(:commits) { merge_request_diff.commits.map(&:to_hash) }
let(:first_commit) { merge_request.project.repository.commit(merge_request_diff.head_commit_sha) }
let(:first_commit) { project.repository.commit(merge_request_diff.head_commit_sha) }
let(:expected_commits) { commits }
let(:diffs) { first_commit.rugged_diff_from_parent.deltas }
let(:expected_diffs) { [] }
......
This source diff could not be displayed because it is too large. You can view the blob instead.
......@@ -95,26 +95,12 @@ describe Gitlab::ImportExport::ProjectTreeRestorer do
end
end
it 'has the correct data for merge request st_diffs' do
# makes sure we are renaming the custom method +utf8_st_diffs+ into +st_diffs+
# one MergeRequestDiff uses the new format, where st_diffs is expected to be nil
expect(MergeRequestDiff.where.not(st_diffs: nil).count).to eq(8)
end
it 'has the correct data for merge request diff files' do
expect(MergeRequestDiffFile.where.not(diff: nil).count).to eq(9)
expect(MergeRequestDiffFile.where.not(diff: nil).count).to eq(55)
end
it 'has the correct data for merge request diff commits in serialised and table formats' do
expect(MergeRequestDiff.where.not(st_commits: nil).count).to eq(7)
expect(MergeRequestDiffCommit.count).to eq(6)
end
it 'has the correct time for merge request st_commits' do
st_commits = MergeRequestDiff.where.not(st_commits: nil).first.st_commits
expect(st_commits.first[:committed_date]).to be_kind_of(Time)
it 'has the correct data for merge request diff commits' do
expect(MergeRequestDiffCommit.count).to eq(77)
end
it 'has the correct data for merge request latest_merge_request_diff' do
......
......@@ -93,10 +93,6 @@ describe Gitlab::ImportExport::ProjectTreeSaver do
expect(saved_project_json['merge_requests'].first['merge_request_diff']).not_to be_empty
end
it 'has merge requests diff st_diffs' do
expect(saved_project_json['merge_requests'].first['merge_request_diff']['utf8_st_diffs']).not_to be_nil
end
it 'has merge request diff files' do
expect(saved_project_json['merge_requests'].first['merge_request_diff']['merge_request_diff_files']).not_to be_empty
end
......@@ -172,12 +168,6 @@ describe Gitlab::ImportExport::ProjectTreeSaver do
expect(saved_project_json['custom_attributes'].count).to eq(2)
end
it 'does not complain about non UTF-8 characters in MR diffs' do
ActiveRecord::Base.connection.execute("UPDATE merge_request_diffs SET st_diffs = '---\n- :diff: !binary |-\n LS0tIC9kZXYvbnVsbAorKysgYi9pbWFnZXMvbnVjb3IucGRmCkBAIC0wLDAg\n KzEsMTY3OSBAQAorJVBERi0xLjUNJeLjz9MNCisxIDAgb2JqDTw8L01ldGFk\n YXR'")
expect(project_tree_saver.save).to be true
end
it 'does not complain about non UTF-8 characters in MR diff files' do
ActiveRecord::Base.connection.execute("UPDATE merge_request_diff_files SET diff = '---\n- :diff: !binary |-\n LS0tIC9kZXYvbnVsbAorKysgYi9pbWFnZXMvbnVjb3IucGRmCkBAIC0wLDAg\n KzEsMTY3OSBAQAorJVBERi0xLjUNJeLjz9MNCisxIDAgb2JqDTw8L01ldGFk\n YXR'")
......
......@@ -173,7 +173,6 @@ MergeRequest:
MergeRequestDiff:
- id
- state
- st_commits
- merge_request_id
- created_at
- updated_at
......
require 'spec_helper'
describe MergeRequestDiff do
let(:diff_with_commits) { create(:merge_request).merge_request_diff }
describe 'create new record' do
subject { create(:merge_request).merge_request_diff }
subject { diff_with_commits }
it { expect(subject).to be_valid }
it { expect(subject).to be_persisted }
......@@ -23,57 +25,41 @@ describe MergeRequestDiff do
end
describe '#diffs' do
let(:mr) { create(:merge_request, :with_diffs) }
let(:mr_diff) { mr.merge_request_diff }
context 'when the :ignore_whitespace_change option is set' do
it 'creates a new compare object instead of loading from the DB' do
expect(mr_diff).not_to receive(:load_diffs)
expect(mr_diff.compare).to receive(:diffs).and_call_original
expect(diff_with_commits).not_to receive(:load_diffs)
expect(diff_with_commits.compare).to receive(:diffs).and_call_original
mr_diff.raw_diffs(ignore_whitespace_change: true)
diff_with_commits.raw_diffs(ignore_whitespace_change: true)
end
end
context 'when the raw diffs are empty' do
before do
MergeRequestDiffFile.delete_all(merge_request_diff_id: mr_diff.id)
end
it 'returns an empty DiffCollection' do
expect(mr_diff.raw_diffs).to be_a(Gitlab::Git::DiffCollection)
expect(mr_diff.raw_diffs).to be_empty
end
end
context 'when the raw diffs have invalid content' do
before do
MergeRequestDiffFile.delete_all(merge_request_diff_id: mr_diff.id)
mr_diff.update_attributes(st_diffs: ["--broken-diff"])
MergeRequestDiffFile.delete_all(merge_request_diff_id: diff_with_commits.id)
end
it 'returns an empty DiffCollection' do
expect(mr_diff.raw_diffs.to_a).to be_empty
expect(mr_diff.raw_diffs).to be_a(Gitlab::Git::DiffCollection)
expect(mr_diff.raw_diffs).to be_empty
expect(diff_with_commits.raw_diffs).to be_a(Gitlab::Git::DiffCollection)
expect(diff_with_commits.raw_diffs).to be_empty
end
end
context 'when the raw diffs exist' do
it 'returns the diffs' do
expect(mr_diff.raw_diffs).to be_a(Gitlab::Git::DiffCollection)
expect(mr_diff.raw_diffs).not_to be_empty
expect(diff_with_commits.raw_diffs).to be_a(Gitlab::Git::DiffCollection)
expect(diff_with_commits.raw_diffs).not_to be_empty
end
context 'when the :paths option is set' do
let(:diffs) { mr_diff.raw_diffs(paths: ['files/ruby/popen.rb', 'files/ruby/popen.rb']) }
let(:diffs) { diff_with_commits.raw_diffs(paths: ['files/ruby/popen.rb', 'files/ruby/popen.rb']) }
it 'only returns diffs that match the (old path, new path) given' do
expect(diffs.map(&:new_path)).to contain_exactly('files/ruby/popen.rb')
end
it 'uses the diffs from the DB' do
expect(mr_diff).to receive(:load_diffs)
expect(diff_with_commits).to receive(:load_diffs)
diffs
end
......@@ -117,51 +103,29 @@ describe MergeRequestDiff do
end
describe '#commit_shas' do
it 'returns all commits SHA using serialized commits' do
subject.st_commits = [
{ id: 'sha1' },
{ id: 'sha2' }
]
expect(subject.commit_shas).to eq(%w(sha1 sha2))
it 'returns all commit SHAs using commits from the DB' do
expect(diff_with_commits.commit_shas).not_to be_empty
expect(diff_with_commits.commit_shas).to all(match(/\h{40}/))
end
end
describe '#compare_with' do
subject { create(:merge_request, source_branch: 'fix').merge_request_diff }
it 'delegates compare to the service' do
expect(CompareService).to receive(:new).and_call_original
subject.compare_with(nil)
diff_with_commits.compare_with(nil)
end
it 'uses git diff A..B approach by default' do
diffs = subject.compare_with('0b4bc9a49b562e85de7cc9e834518ea6828729b9').diffs
diffs = diff_with_commits.compare_with('0b4bc9a49b562e85de7cc9e834518ea6828729b9').diffs
expect(diffs.size).to eq(3)
expect(diffs.size).to eq(21)
end
end
describe '#commits_count' do
it 'returns number of commits using serialized commits' do
subject.st_commits = [
{ id: 'sha1' },
{ id: 'sha2' }
]
expect(subject.commits_count).to eq 2
end
end
describe '#utf8_st_diffs' do
it 'does not raise error when a hash value is in binary' do
subject.st_diffs = [
{ diff: "\0" },
{ diff: "\x05\x00\x68\x65\x6c\x6c\x6f" }
]
expect { subject.utf8_st_diffs }.not_to raise_error
expect(diff_with_commits.commits_count).to eq(29)
end
end
end
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment