BigW Consortium Gitlab

Commit 942bd5b4 by Douwe Maan

Merge branch 'blob-batch-methods' into 'master'

Add Gitlab::Git::Blob.batch method See merge request !13225
parents a4e394ca 29a1c5a1
...@@ -20,66 +20,7 @@ module Gitlab ...@@ -20,66 +20,7 @@ module Gitlab
if is_enabled if is_enabled
find_by_gitaly(repository, sha, path) find_by_gitaly(repository, sha, path)
else else
find_by_rugged(repository, sha, path) find_by_rugged(repository, sha, path, limit: MAX_DATA_DISPLAY_SIZE)
end
end
end
def find_by_gitaly(repository, sha, path)
path = path.sub(/\A\/*/, '')
path = '/' if path.empty?
name = File.basename(path)
entry = Gitlab::GitalyClient::CommitService.new(repository).tree_entry(sha, path, MAX_DATA_DISPLAY_SIZE)
return unless entry
case entry.type
when :COMMIT
new(
id: entry.oid,
name: name,
size: 0,
data: '',
path: path,
commit_id: sha
)
when :BLOB
new(
id: entry.oid,
name: name,
size: entry.size,
data: entry.data.dup,
mode: entry.mode.to_s(8),
path: path,
commit_id: sha,
binary: binary?(entry.data)
)
end
end
def find_by_rugged(repository, sha, path)
commit = repository.lookup(sha)
root_tree = commit.tree
blob_entry = find_entry_by_path(repository, root_tree.oid, path)
return nil unless blob_entry
if blob_entry[:type] == :commit
submodule_blob(blob_entry, path, sha)
else
blob = repository.lookup(blob_entry[:oid])
if blob
new(
id: blob.oid,
name: blob_entry[:name],
size: blob.size,
data: blob.content(MAX_DATA_DISPLAY_SIZE),
mode: blob_entry[:filemode].to_s(8),
path: path,
commit_id: sha,
binary: blob.binary?
)
end end
end end
end end
...@@ -109,6 +50,21 @@ module Gitlab ...@@ -109,6 +50,21 @@ module Gitlab
detect && detect[:type] == :binary detect && detect[:type] == :binary
end end
# Returns an array of Blob instances, specified in blob_references as
# [[commit_sha, path], [commit_sha, path], ...]. If blob_size_limit < 0 then the
# full blob contents are returned. If blob_size_limit >= 0 then each blob will
# contain no more than limit bytes in its data attribute.
#
# Keep in mind that this method may allocate a lot of memory. It is up
# to the caller to limit the number of blobs and blob_size_limit.
#
def batch(repository, blob_references, blob_size_limit: nil)
blob_size_limit ||= MAX_DATA_DISPLAY_SIZE
blob_references.map do |sha, path|
find_by_rugged(repository, sha, path, limit: blob_size_limit)
end
end
private private
# Recursive search of blob id by path # Recursive search of blob id by path
...@@ -153,6 +109,66 @@ module Gitlab ...@@ -153,6 +109,66 @@ module Gitlab
commit_id: sha commit_id: sha
) )
end end
def find_by_gitaly(repository, sha, path)
path = path.sub(/\A\/*/, '')
path = '/' if path.empty?
name = File.basename(path)
entry = Gitlab::GitalyClient::CommitService.new(repository).tree_entry(sha, path, MAX_DATA_DISPLAY_SIZE)
return unless entry
case entry.type
when :COMMIT
new(
id: entry.oid,
name: name,
size: 0,
data: '',
path: path,
commit_id: sha
)
when :BLOB
new(
id: entry.oid,
name: name,
size: entry.size,
data: entry.data.dup,
mode: entry.mode.to_s(8),
path: path,
commit_id: sha,
binary: binary?(entry.data)
)
end
end
def find_by_rugged(repository, sha, path, limit:)
commit = repository.lookup(sha)
root_tree = commit.tree
blob_entry = find_entry_by_path(repository, root_tree.oid, path)
return nil unless blob_entry
if blob_entry[:type] == :commit
submodule_blob(blob_entry, path, sha)
else
blob = repository.lookup(blob_entry[:oid])
if blob
new(
id: blob.oid,
name: blob_entry[:name],
size: blob.size,
# Rugged::Blob#content is expensive; don't call it if we don't have to.
data: limit.zero? ? '' : blob.content(limit),
mode: blob_entry[:filemode].to_s(8),
path: path,
commit_id: sha,
binary: blob.binary?
)
end
end
end
end end
def initialize(options) def initialize(options)
......
...@@ -152,6 +152,77 @@ describe Gitlab::Git::Blob, seed_helper: true do ...@@ -152,6 +152,77 @@ describe Gitlab::Git::Blob, seed_helper: true do
end end
end end
describe '.batch' do
let(:blob_references) do
[
[SeedRepo::Commit::ID, "files/ruby/popen.rb"],
[SeedRepo::Commit::ID, 'six']
]
end
subject { described_class.batch(repository, blob_references) }
it { expect(subject.size).to eq(blob_references.size) }
context 'first blob' do
let(:blob) { subject[0] }
it { expect(blob.id).to eq(SeedRepo::RubyBlob::ID) }
it { expect(blob.name).to eq(SeedRepo::RubyBlob::NAME) }
it { expect(blob.path).to eq("files/ruby/popen.rb") }
it { expect(blob.commit_id).to eq(SeedRepo::Commit::ID) }
it { expect(blob.data[0..10]).to eq(SeedRepo::RubyBlob::CONTENT[0..10]) }
it { expect(blob.size).to eq(669) }
it { expect(blob.mode).to eq("100644") }
end
context 'second blob' do
let(:blob) { subject[1] }
it { expect(blob.id).to eq('409f37c4f05865e4fb208c771485f211a22c4c2d') }
it { expect(blob.data).to eq('') }
it 'does not mark the blob as binary' do
expect(blob).not_to be_binary
end
end
context 'limiting' do
subject { described_class.batch(repository, blob_references, blob_size_limit: blob_size_limit) }
context 'default' do
let(:blob_size_limit) { nil }
it 'limits to MAX_DATA_DISPLAY_SIZE' do
stub_const('Gitlab::Git::Blob::MAX_DATA_DISPLAY_SIZE', 100)
expect(subject.first.data.size).to eq(100)
end
end
context 'positive' do
let(:blob_size_limit) { 10 }
it { expect(subject.first.data.size).to eq(10) }
end
context 'zero' do
let(:blob_size_limit) { 0 }
it { expect(subject.first.data).to eq('') }
end
context 'negative' do
let(:blob_size_limit) { -1 }
it 'ignores MAX_DATA_DISPLAY_SIZE' do
stub_const('Gitlab::Git::Blob::MAX_DATA_DISPLAY_SIZE', 100)
expect(subject.first.data.size).to eq(669)
end
end
end
end
describe 'encoding' do describe 'encoding' do
context 'file with russian text' do context 'file with russian text' do
let(:blob) { Gitlab::Git::Blob.find(repository, SeedRepo::Commit::ID, "encoding/russian.rb") } let(:blob) { Gitlab::Git::Blob.find(repository, SeedRepo::Commit::ID, "encoding/russian.rb") }
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment