1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
module Gitlab
module BackgroundMigration
class DeserializeMergeRequestDiffsAndCommits
attr_reader :diff_ids, :commit_rows, :file_rows
class MergeRequestDiff < ActiveRecord::Base
self.table_name = 'merge_request_diffs'
end
BUFFER_ROWS = 1000
def perform(start_id, stop_id)
merge_request_diffs = MergeRequestDiff
.select(:id, :st_commits, :st_diffs)
.where('st_commits IS NOT NULL OR st_diffs IS NOT NULL')
.where(id: start_id..stop_id)
reset_buffers!
merge_request_diffs.each do |merge_request_diff|
commits, files = single_diff_rows(merge_request_diff)
diff_ids << merge_request_diff.id
commit_rows.concat(commits)
file_rows.concat(files)
if diff_ids.length > BUFFER_ROWS ||
commit_rows.length > BUFFER_ROWS ||
file_rows.length > BUFFER_ROWS
flush_buffers!
end
end
flush_buffers!
end
private
def reset_buffers!
@diff_ids = []
@commit_rows = []
@file_rows = []
end
def flush_buffers!
if diff_ids.any?
MergeRequestDiff.transaction do
Gitlab::Database.bulk_insert('merge_request_diff_commits', commit_rows)
Gitlab::Database.bulk_insert('merge_request_diff_files', file_rows)
MergeRequestDiff.where(id: diff_ids).update_all(st_commits: nil, st_diffs: nil)
end
end
reset_buffers!
end
def single_diff_rows(merge_request_diff)
sha_attribute = Gitlab::Database::ShaAttribute.new
commits = YAML.load(merge_request_diff.st_commits) rescue []
commit_rows = commits.map.with_index do |commit, index|
commit_hash = commit.to_hash.with_indifferent_access.except(:parent_ids)
sha = commit_hash.delete(:id)
commit_hash.merge(
merge_request_diff_id: merge_request_diff.id,
relative_order: index,
sha: sha_attribute.type_cast_for_database(sha)
)
end
diffs = YAML.load(merge_request_diff.st_diffs) rescue []
diffs = [] unless valid_raw_diffs?(diffs)
file_rows = diffs.map.with_index do |diff, index|
diff_hash = diff.to_hash.with_indifferent_access.merge(
binary: false,
merge_request_diff_id: merge_request_diff.id,
relative_order: index
)
diff_hash.tap do |hash|
diff_text = hash[:diff]
hash[:too_large] = !!hash[:too_large]
hash[:a_mode] ||= guess_mode(hash[:new_file], hash[:diff])
hash[:b_mode] ||= guess_mode(hash[:deleted_file], hash[:diff])
# Compatibility with old diffs created with Psych.
if diff_text.encoding == Encoding::BINARY && !diff_text.ascii_only?
hash[:binary] = true
hash[:diff] = [diff_text].pack('m0')
end
end
end
[commit_rows, file_rows]
end
# This doesn't have to be 100% accurate, because it's only used for
# display - it won't change file modes in the repository. Submodules are
# created as 600, regular files as 644.
def guess_mode(file_missing, diff)
return '0' if file_missing
diff.include?('Subproject commit') ? '160000' : '100644'
end
# Unlike MergeRequestDiff#valid_raw_diff?, don't count Rugged objects as
# valid, because we don't render them usefully anyway.
def valid_raw_diffs?(diffs)
return false unless diffs.respond_to?(:each)
diffs.all? { |diff| diff.is_a?(Hash) }
end
end
end
end