BigW Consortium Gitlab

Batchload blobs for diff generation

After installing a new gem, batch-loader, a construct can be used to queue data to be fetched in bulk. The gem was also introduced in both gitlab-org/gitlab-ce!14680 and gitlab-org/gitlab-ce!14846, but those mrs are not merged yet. For the generation of diffs, both the old blob and the new blob need to be loaded. This for every file in the diff, too. Now we collect all these so we do 1 fetch. Three `.allow_n_plus_1_calls` have been removed, which I expect to be valid, but this needs to be confirmed by a full CI run. Possibly closes: - https://gitlab.com/gitlab-org/gitlab-ce/issues/37445 - https://gitlab.com/gitlab-org/gitlab-ce/issues/37599 - https://gitlab.com/gitlab-org/gitlab-ce/issues/37431
parent 6dd89059
...@@ -263,6 +263,8 @@ gem 'gettext_i18n_rails', '~> 1.8.0' ...@@ -263,6 +263,8 @@ gem 'gettext_i18n_rails', '~> 1.8.0'
gem 'gettext_i18n_rails_js', '~> 1.2.0' gem 'gettext_i18n_rails_js', '~> 1.2.0'
gem 'gettext', '~> 3.2.2', require: false, group: :development gem 'gettext', '~> 3.2.2', require: false, group: :development
gem 'batch-loader'
# Perf bar # Perf bar
gem 'peek', '~> 1.0.1' gem 'peek', '~> 1.0.1'
gem 'peek-gc', '~> 0.0.2' gem 'peek-gc', '~> 0.0.2'
......
...@@ -73,6 +73,7 @@ GEM ...@@ -73,6 +73,7 @@ GEM
thread_safe (~> 0.3, >= 0.3.1) thread_safe (~> 0.3, >= 0.3.1)
babosa (1.0.2) babosa (1.0.2)
base32 (0.3.2) base32 (0.3.2)
batch-loader (1.1.1)
bcrypt (3.1.11) bcrypt (3.1.11)
bcrypt_pbkdf (1.0.0) bcrypt_pbkdf (1.0.0)
benchmark-ips (2.3.0) benchmark-ips (2.3.0)
...@@ -982,6 +983,7 @@ DEPENDENCIES ...@@ -982,6 +983,7 @@ DEPENDENCIES
awesome_print (~> 1.2.0) awesome_print (~> 1.2.0)
babosa (~> 1.0.2) babosa (~> 1.0.2)
base32 (~> 0.3.0) base32 (~> 0.3.0)
batch-loader
bcrypt_pbkdf (~> 1.0) bcrypt_pbkdf (~> 1.0)
benchmark-ips (~> 2.3.0) benchmark-ips (~> 2.3.0)
better_errors (~> 2.1.0) better_errors (~> 2.1.0)
......
...@@ -22,12 +22,7 @@ class Projects::CommitController < Projects::ApplicationController ...@@ -22,12 +22,7 @@ class Projects::CommitController < Projects::ApplicationController
apply_diff_view_cookie! apply_diff_view_cookie!
respond_to do |format| respond_to do |format|
format.html do format.html { render }
# n+1: https://gitlab.com/gitlab-org/gitlab-ce/issues/37599
Gitlab::GitalyClient.allow_n_plus_1_calls do
render
end
end
format.diff { render text: @commit.to_diff } format.diff { render text: @commit.to_diff }
format.patch { render text: @commit.to_patch } format.patch { render text: @commit.to_patch }
end end
...@@ -112,7 +107,7 @@ class Projects::CommitController < Projects::ApplicationController ...@@ -112,7 +107,7 @@ class Projects::CommitController < Projects::ApplicationController
end end
def commit def commit
@noteable = @commit ||= @project.commit(params[:id]) @noteable = @commit ||= @project.commit_by(oid: params[:id])
end end
def define_commit_vars def define_commit_vars
......
...@@ -10,10 +10,7 @@ class Projects::MergeRequests::DiffsController < Projects::MergeRequests::Applic ...@@ -10,10 +10,7 @@ class Projects::MergeRequests::DiffsController < Projects::MergeRequests::Applic
def show def show
@environment = @merge_request.environments_for(current_user).last @environment = @merge_request.environments_for(current_user).last
# n+1: https://gitlab.com/gitlab-org/gitlab-ce/issues/37431 render json: { html: view_to_html_string("projects/merge_requests/diffs/_diffs") }
Gitlab::GitalyClient.allow_n_plus_1_calls do
render json: { html: view_to_html_string("projects/merge_requests/diffs/_diffs") }
end
end end
def diff_for_path def diff_for_path
......
...@@ -76,12 +76,24 @@ class Blob < SimpleDelegator ...@@ -76,12 +76,24 @@ class Blob < SimpleDelegator
new(blob, project) new(blob, project)
end end
def self.lazy(project, commit_id, path)
BatchLoader.for(commit_id: commit_id, path: path).batch do |items, loader|
project.repository.blobs_at(items.map(&:values)).each do |blob|
loader.call({ commit_id: blob.commit_id, path: blob.path }, blob) if blob
end
end
end
def initialize(blob, project = nil) def initialize(blob, project = nil)
@project = project @project = project
super(blob) super(blob)
end end
def inspect
"#<#{self.class.name} oid:#{id[0..8]} commit:#{commit_id[0..8]} path:#{path}>"
end
# Returns the data of the blob. # Returns the data of the blob.
# #
# If the blob is a text based blob the content is converted to UTF-8 and any # If the blob is a text based blob the content is converted to UTF-8 and any
...@@ -95,7 +107,10 @@ class Blob < SimpleDelegator ...@@ -95,7 +107,10 @@ class Blob < SimpleDelegator
end end
def load_all_data! def load_all_data!
super(project.repository) if project # Endpoint needed: gitlab-org/gitaly#756
Gitlab::GitalyClient.allow_n_plus_1_calls do
super(project.repository) if project
end
end end
def no_highlighting? def no_highlighting?
......
...@@ -84,7 +84,7 @@ class Commit ...@@ -84,7 +84,7 @@ class Commit
end end
def id def id
@raw.id raw.id
end end
def ==(other) def ==(other)
...@@ -361,7 +361,7 @@ class Commit ...@@ -361,7 +361,7 @@ class Commit
@deltas ||= raw.deltas @deltas ||= raw.deltas
end end
def diffs(diff_options = nil) def diffs(diff_options = {})
Gitlab::Diff::FileCollection::Commit.new(self, diff_options: diff_options) Gitlab::Diff::FileCollection::Commit.new(self, diff_options: diff_options)
end end
......
...@@ -478,6 +478,11 @@ class Repository ...@@ -478,6 +478,11 @@ class Repository
nil nil
end end
# items is an Array like: [[oid, path], [oid1, path1]]
def blobs_at(items)
raw_repository.batch_blobs(items).map { |blob| Blob.decorate(blob, project) }
end
def root_ref def root_ref
if raw_repository if raw_repository
raw_repository.root_ref raw_repository.root_ref
......
---
title: Fetch blobs in bulk when generating diffs
merge_request:
author:
type: performance
Rails.application.config.middleware.use(BatchLoader::Middleware)
...@@ -25,6 +25,10 @@ module Gitlab ...@@ -25,6 +25,10 @@ module Gitlab
@repository = repository @repository = repository
@diff_refs = diff_refs @diff_refs = diff_refs
@fallback_diff_refs = fallback_diff_refs @fallback_diff_refs = fallback_diff_refs
# Ensure items are collected in the the batch
new_blob
old_blob
end end
def position(position_marker, position_type: :text) def position(position_marker, position_type: :text)
...@@ -95,21 +99,15 @@ module Gitlab ...@@ -95,21 +99,15 @@ module Gitlab
end end
def new_blob def new_blob
return @new_blob if defined?(@new_blob) return unless new_content_sha
sha = new_content_sha
return @new_blob = nil unless sha
@new_blob = repository.blob_at(sha, file_path) Blob.lazy(repository.project, new_content_sha, file_path)
end end
def old_blob def old_blob
return @old_blob if defined?(@old_blob) return unless old_content_sha
sha = old_content_sha
return @old_blob = nil unless sha
@old_blob = repository.blob_at(sha, old_path) Blob.lazy(repository.project, old_content_sha, old_path)
end end
def content_sha def content_sha
......
...@@ -22,10 +22,7 @@ module Gitlab ...@@ -22,10 +22,7 @@ module Gitlab
end end
def diff_files def diff_files
# n+1: https://gitlab.com/gitlab-org/gitlab-ce/issues/37445 @diff_files ||= @diffs.decorate! { |diff| decorate_diff!(diff) }
Gitlab::GitalyClient.allow_n_plus_1_calls do
@diff_files ||= @diffs.decorate! { |diff| decorate_diff!(diff) }
end
end end
def diff_file_with_old_path(old_path) def diff_file_with_old_path(old_path)
......
...@@ -179,6 +179,8 @@ module Gitlab ...@@ -179,6 +179,8 @@ module Gitlab
) )
end end
end end
rescue Rugged::ReferenceError
nil
end end
def rugged_raw(repository, sha, limit:) def rugged_raw(repository, sha, limit:)
......
...@@ -1161,6 +1161,11 @@ module Gitlab ...@@ -1161,6 +1161,11 @@ module Gitlab
Gitlab::Git::Blob.find(self, sha, path) unless Gitlab::Git.blank_ref?(sha) Gitlab::Git::Blob.find(self, sha, path) unless Gitlab::Git.blank_ref?(sha)
end end
# Items should be of format [[commit_id, path], [commit_id1, path1]]
def batch_blobs(items, blob_size_limit: nil)
Gitlab::Git::Blob.batch(self, items, blob_size_limit: blob_size_limit)
end
def commit_index(user, branch_name, index, options) def commit_index(user, branch_name, index, options)
committer = user_to_committer(user) committer = user_to_committer(user)
......
require 'spec_helper' require 'spec_helper'
describe Projects::CommitController do describe Projects::CommitController do
let(:project) { create(:project, :repository) } set(:project) { create(:project, :repository) }
let(:user) { create(:user) } set(:user) { create(:user) }
let(:commit) { project.commit("master") } let(:commit) { project.commit("master") }
let(:master_pickable_sha) { '7d3b0f7cff5f37573aea97cebfd5692ea1689924' } let(:master_pickable_sha) { '7d3b0f7cff5f37573aea97cebfd5692ea1689924' }
let(:master_pickable_commit) { project.commit(master_pickable_sha) } let(:master_pickable_commit) { project.commit(master_pickable_sha) }
before do before do
sign_in(user) sign_in(user)
project.team << [user, :master] project.add_master(user)
end end
describe 'GET show' do describe 'GET show' do
......
...@@ -116,12 +116,8 @@ describe Gitlab::Diff::File do ...@@ -116,12 +116,8 @@ describe Gitlab::Diff::File do
end end
context 'when renamed' do context 'when renamed' do
let(:commit) { project.commit('6907208d755b60ebeacb2e9dfea74c92c3449a1f') } let(:commit) { project.commit('94bb47ca1297b7b3731ff2a36923640991e9236f') }
let(:diff_file) { commit.diffs.diff_file_with_new_path('files/js/commit.coffee') } let(:diff_file) { commit.diffs.diff_file_with_new_path('CHANGELOG.md') }
before do
allow(diff_file.new_blob).to receive(:id).and_return(diff_file.old_blob.id)
end
it 'returns false' do it 'returns false' do
expect(diff_file.content_changed?).to be_falsey expect(diff_file.content_changed?).to be_falsey
......
...@@ -16,6 +16,23 @@ describe Blob do ...@@ -16,6 +16,23 @@ describe Blob do
end end
end end
describe '.lazy' do
let(:project) { create(:project, :repository) }
let(:commit) { project.commit_by(oid: 'e63f41fe459e62e1228fcef60d7189127aeba95a') }
it 'fetches all blobs when the first is accessed' do
changelog = described_class.lazy(project, commit.id, 'CHANGELOG')
contributing = described_class.lazy(project, commit.id, 'CONTRIBUTING.md')
expect(Gitlab::Git::Blob).to receive(:batch).once.and_call_original
expect(Gitlab::Git::Blob).not_to receive(:find)
# Access property so the values are loaded
changelog.id
contributing.id
end
end
describe '#data' do describe '#data' do
context 'using a binary blob' do context 'using a binary blob' do
it 'returns the data as-is' do it 'returns the data as-is' do
......
...@@ -32,10 +32,8 @@ describe DiffViewer::Base do ...@@ -32,10 +32,8 @@ describe DiffViewer::Base do
end end
context 'when the binaryness does not match' do context 'when the binaryness does not match' do
before do let(:commit) { project.commit_by(oid: 'ae73cb07c9eeaf35924a10f713b364d32b2dd34f') }
allow(diff_file.old_blob).to receive(:binary?).and_return(false) let(:diff_file) { commit.diffs.diff_file_with_new_path('Gemfile.zip') }
allow(diff_file.new_blob).to receive(:binary?).and_return(false)
end
it 'returns false' do it 'returns false' do
expect(viewer_class.can_render?(diff_file)).to be_falsey expect(viewer_class.can_render?(diff_file)).to be_falsey
...@@ -60,8 +58,7 @@ describe DiffViewer::Base do ...@@ -60,8 +58,7 @@ describe DiffViewer::Base do
context 'when the binaryness does not match' do context 'when the binaryness does not match' do
before do before do
allow(diff_file.old_blob).to receive(:binary?).and_return(true) allow_any_instance_of(Blob).to receive(:binary?).and_return(true)
allow(diff_file.new_blob).to receive(:binary?).and_return(true)
end end
it 'returns false' do it 'returns false' do
...@@ -77,12 +74,12 @@ describe DiffViewer::Base do ...@@ -77,12 +74,12 @@ describe DiffViewer::Base do
end end
context 'when the file was renamed and only the old blob is supported' do context 'when the file was renamed and only the old blob is supported' do
let(:commit) { project.commit('2f63565e7aac07bcdadb654e253078b727143ec4') } let(:commit) { project.commit_by(oid: '2f63565e7aac07bcdadb654e253078b727143ec4') }
let(:diff_file) { commit.diffs.diff_file_with_new_path('files/images/6049019_460s.jpg') } let(:diff_file) { commit.diffs.diff_file_with_new_path('files/images/6049019_460s.jpg') }
before do before do
allow(diff_file).to receive(:renamed_file?).and_return(true) allow(diff_file).to receive(:renamed_file?).and_return(true)
allow(diff_file.new_blob).to receive(:extension).and_return('jpeg') viewer_class.extensions = %w(notjpg)
end end
it 'returns false' do it 'returns false' do
...@@ -94,8 +91,7 @@ describe DiffViewer::Base do ...@@ -94,8 +91,7 @@ describe DiffViewer::Base do
describe '#collapsed?' do describe '#collapsed?' do
context 'when the combined blob size is larger than the collapse limit' do context 'when the combined blob size is larger than the collapse limit' do
before do before do
allow(diff_file.old_blob).to receive(:raw_size).and_return(512.kilobytes) allow(diff_file).to receive(:raw_size).and_return(1025.kilobytes)
allow(diff_file.new_blob).to receive(:raw_size).and_return(513.kilobytes)
end end
it 'returns true' do it 'returns true' do
...@@ -113,8 +109,7 @@ describe DiffViewer::Base do ...@@ -113,8 +109,7 @@ describe DiffViewer::Base do
describe '#too_large?' do describe '#too_large?' do
context 'when the combined blob size is larger than the size limit' do context 'when the combined blob size is larger than the size limit' do
before do before do
allow(diff_file.old_blob).to receive(:raw_size).and_return(2.megabytes) allow(diff_file).to receive(:raw_size).and_return(6.megabytes)
allow(diff_file.new_blob).to receive(:raw_size).and_return(4.megabytes)
end end
it 'returns true' do it 'returns true' do
...@@ -132,8 +127,7 @@ describe DiffViewer::Base do ...@@ -132,8 +127,7 @@ describe DiffViewer::Base do
describe '#render_error' do describe '#render_error' do
context 'when the combined blob size is larger than the size limit' do context 'when the combined blob size is larger than the size limit' do
before do before do
allow(diff_file.old_blob).to receive(:raw_size).and_return(2.megabytes) allow(diff_file).to receive(:raw_size).and_return(6.megabytes)
allow(diff_file.new_blob).to receive(:raw_size).and_return(4.megabytes)
end end
it 'returns :too_large' do it 'returns :too_large' do
......
require 'spec_helper' require 'spec_helper'
describe DiffViewer::ServerSide do describe DiffViewer::ServerSide do
let(:project) { create(:project, :repository) } set(:project) { create(:project, :repository) }
let(:commit) { project.commit('570e7b2abdd848b95f2f578043fc23bd6f6fd24d') } let(:commit) { project.commit_by(oid: '570e7b2abdd848b95f2f578043fc23bd6f6fd24d') }
let(:diff_file) { commit.diffs.diff_file_with_new_path('files/ruby/popen.rb') } let!(:diff_file) { commit.diffs.diff_file_with_new_path('files/ruby/popen.rb') }
let(:viewer_class) do let(:viewer_class) do
Class.new(DiffViewer::Base) do Class.new(DiffViewer::Base) do
...@@ -15,8 +15,7 @@ describe DiffViewer::ServerSide do ...@@ -15,8 +15,7 @@ describe DiffViewer::ServerSide do
describe '#prepare!' do describe '#prepare!' do
it 'loads all diff file data' do it 'loads all diff file data' do
expect(diff_file.old_blob).to receive(:load_all_data!) expect(Blob).to receive(:lazy).at_least(:twice)
expect(diff_file.new_blob).to receive(:load_all_data!)
subject.prepare! subject.prepare!
end end
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment