BigW Consortium Gitlab

Commit 9cf4e473 by Douwe Maan Committed by Mayra Cabrera

Merge branch 'security-45689-fix-archive-cache-bug' into 'security-10-7'

Serve archive requests with the correct file in all cases (10.7) See merge request gitlab/gitlabhq!2376
parent e71351d4
......@@ -20,11 +20,12 @@ class RepositoryArchiveCleanUpService
private
def clean_up_old_archives
run(%W(find #{path} -not -path #{path} -type f \( -name \*.tar -o -name \*.bz2 -o -name \*.tar.gz -o -name \*.zip \) -maxdepth 2 -mmin +#{mmin} -delete))
run(%W(find #{path} -mindepth 1 -maxdepth 3 -type f \( -name \*.tar -o -name \*.bz2 -o -name \*.tar.gz -o -name \*.zip \) -mmin +#{mmin} -delete))
end
def clean_up_empty_directories
run(%W(find #{path} -not -path #{path} -type d -empty -name \*.git -maxdepth 1 -delete))
run(%W(find #{path} -mindepth 2 -maxdepth 2 -type d -empty -delete))
run(%W(find #{path} -mindepth 1 -maxdepth 1 -type d -empty -delete))
end
def run(cmd)
......
---
title: Serve archive requests with the correct file in all cases
merge_request:
author:
type: security
......@@ -391,18 +391,6 @@ module Gitlab
nil
end
def archive_prefix(ref, sha, append_sha:)
append_sha = (ref != sha) if append_sha.nil?
project_name = self.name.chomp('.git')
formatted_ref = ref.tr('/', '-')
prefix_segments = [project_name, formatted_ref]
prefix_segments << sha if append_sha
prefix_segments.join('-')
end
def archive_metadata(ref, storage_path, format = "tar.gz", append_sha:)
ref ||= root_ref
commit = Gitlab::Git::Commit.find(self, ref)
......@@ -413,12 +401,44 @@ module Gitlab
{
'RepoPath' => path,
'ArchivePrefix' => prefix,
'ArchivePath' => archive_file_path(prefix, storage_path, format),
'ArchivePath' => archive_file_path(storage_path, commit.id, prefix, format),
'CommitId' => commit.id
}
end
def archive_file_path(name, storage_path, format = "tar.gz")
# This is both the filename of the archive (missing the extension) and the
# name of the top-level member of the archive under which all files go
#
# FIXME: The generated prefix is incorrect for projects with hashed
# storage enabled
def archive_prefix(ref, sha, append_sha:)
append_sha = (ref != sha) if append_sha.nil?
project_name = self.name.chomp('.git')
formatted_ref = ref.tr('/', '-')
prefix_segments = [project_name, formatted_ref]
prefix_segments << sha if append_sha
prefix_segments.join('-')
end
private :archive_prefix
# The full path on disk where the archive should be stored. This is used
# to cache the archive between requests.
#
# The path is a global namespace, so needs to be globally unique. This is
# achieved by including `gl_repository` in the path.
#
# Archives relating to a particular ref when the SHA is not present in the
# filename must be invalidated when the ref is updated to point to a new
# SHA. This is achieved by including the SHA in the path.
#
# As this is a full path on disk, it is not "cloud native". This should
# be resolved by either removing the cache, or moving the implementation
# into Gitaly and removing the ArchivePath parameter from the git-archive
# senddata response.
def archive_file_path(storage_path, sha, name, format = "tar.gz")
# Build file path
return nil unless name
......@@ -436,8 +456,9 @@ module Gitlab
end
file_name = "#{name}.#{extension}"
File.join(storage_path, self.name, file_name)
File.join(storage_path, self.gl_repository, sha, file_name)
end
private :archive_file_path
# Return repo size in megabytes
def size
......
......@@ -234,59 +234,72 @@ describe Gitlab::Git::Repository, seed_helper: true do
it_behaves_like 'wrapping gRPC errors', Gitlab::GitalyClient::RefService, :tag_names
end
shared_examples 'archive check' do |extenstion|
it { expect(metadata['ArchivePath']).to match(%r{tmp/gitlab-git-test.git/gitlab-git-test-master-#{SeedRepo::LastCommit::ID}}) }
it { expect(metadata['ArchivePath']).to end_with extenstion }
end
describe '#archive_metadata' do
let(:storage_path) { '/tmp' }
let(:cache_key) { File.join(repository.gl_repository, SeedRepo::LastCommit::ID) }
describe '#archive_prefix' do
let(:project_name) { 'project-name'}
let(:append_sha) { true }
let(:ref) { 'master' }
let(:format) { nil }
before do
expect(repository).to receive(:name).once.and_return(project_name)
end
let(:expected_extension) { 'tar.gz' }
let(:expected_filename) { "#{expected_prefix}.#{expected_extension}" }
let(:expected_path) { File.join(storage_path, cache_key, expected_filename) }
let(:expected_prefix) { "gitlab-git-test-#{ref}-#{SeedRepo::LastCommit::ID}" }
it 'returns parameterised string for a ref containing slashes' do
prefix = repository.archive_prefix('test/branch', 'SHA', append_sha: nil)
subject(:metadata) { repository.archive_metadata(ref, storage_path, format, append_sha: append_sha) }
expect(prefix).to eq("#{project_name}-test-branch-SHA")
it 'sets RepoPath to the repository path' do
expect(metadata['RepoPath']).to eq(repository.path)
end
it 'returns correct string for a ref containing dots' do
prefix = repository.archive_prefix('test.branch', 'SHA', append_sha: nil)
expect(prefix).to eq("#{project_name}-test.branch-SHA")
it 'sets CommitId to the commit SHA' do
expect(metadata['CommitId']).to eq(SeedRepo::LastCommit::ID)
end
it 'returns string with sha when append_sha is false' do
prefix = repository.archive_prefix('test.branch', 'SHA', append_sha: false)
expect(prefix).to eq("#{project_name}-test.branch")
it 'sets ArchivePrefix to the expected prefix' do
expect(metadata['ArchivePrefix']).to eq(expected_prefix)
end
end
describe '#archive' do
let(:metadata) { repository.archive_metadata('master', '/tmp', append_sha: true) }
it 'sets ArchivePath to the expected globally-unique path' do
# This is really important from a security perspective. Think carefully
# before changing it: https://gitlab.com/gitlab-org/gitlab-ce/issues/45689
expect(expected_path).to include(File.join(repository.gl_repository, SeedRepo::LastCommit::ID))
it_should_behave_like 'archive check', '.tar.gz'
end
describe '#archive_zip' do
let(:metadata) { repository.archive_metadata('master', '/tmp', 'zip', append_sha: true) }
expect(metadata['ArchivePath']).to eq(expected_path)
end
it_should_behave_like 'archive check', '.zip'
end
context 'append_sha varies archive path and filename' do
where(:append_sha, :ref, :expected_prefix) do
sha = SeedRepo::LastCommit::ID
describe '#archive_bz2' do
let(:metadata) { repository.archive_metadata('master', '/tmp', 'tbz2', append_sha: true) }
true | 'master' | "gitlab-git-test-master-#{sha}"
true | sha | "gitlab-git-test-#{sha}-#{sha}"
false | 'master' | "gitlab-git-test-master"
false | sha | "gitlab-git-test-#{sha}"
nil | 'master' | "gitlab-git-test-master-#{sha}"
nil | sha | "gitlab-git-test-#{sha}"
end
it_should_behave_like 'archive check', '.tar.bz2'
end
with_them do
it { expect(metadata['ArchivePrefix']).to eq(expected_prefix) }
it { expect(metadata['ArchivePath']).to eq(expected_path) }
end
end
describe '#archive_fallback' do
let(:metadata) { repository.archive_metadata('master', '/tmp', 'madeup', append_sha: true) }
context 'format varies archive path and filename' do
where(:format, :expected_extension) do
nil | 'tar.gz'
'madeup' | 'tar.gz'
'tbz2' | 'tar.bz2'
'zip' | 'zip'
end
it_should_behave_like 'archive check', '.tar.gz'
with_them do
it { expect(metadata['ArchivePrefix']).to eq(expected_prefix) }
it { expect(metadata['ArchivePath']).to eq(expected_path) }
end
end
end
describe '#size' do
......
require 'spec_helper'
describe RepositoryArchiveCleanUpService do
describe '#execute' do
subject(:service) { described_class.new }
subject(:service) { described_class.new }
describe '#execute (new archive locations)' do
let(:sha) { "0" * 40 }
it 'removes outdated archives and directories in a new-style path' do
in_directory_with_files("project-999/#{sha}", %w[tar tar.bz2 tar.gz zip], 3.hours) do |dirname, files|
service.execute
files.each { |filename| expect(File.exist?(filename)).to be_falsy }
expect(File.directory?(dirname)).to be_falsy
expect(File.directory?(File.dirname(dirname))).to be_falsy
end
end
it 'does not remove directories when they contain outdated non-archives' do
in_directory_with_files("project-999/#{sha}", %w[tar conf rb], 2.hours) do |dirname, files|
service.execute
expect(File.directory?(dirname)).to be_truthy
end
end
it 'does not remove in-date archives in a new-style path' do
in_directory_with_files("project-999/#{sha}", %w[tar tar.bz2 tar.gz zip], 1.hour) do |dirname, files|
service.execute
files.each { |filename| expect(File.exist?(filename)).to be_truthy }
end
end
end
describe '#execute (legacy archive locations)' do
context 'when the downloads directory does not exist' do
it 'does not remove any archives' do
path = '/invalid/path/'
stub_repository_downloads_path(path)
allow(File).to receive(:directory?).and_call_original
expect(File).to receive(:directory?).with(path).and_return(false)
expect(service).not_to receive(:clean_up_old_archives)
expect(service).not_to receive(:clean_up_empty_directories)
......@@ -19,7 +51,7 @@ describe RepositoryArchiveCleanUpService do
context 'when the downloads directory exists' do
shared_examples 'invalid archive files' do |dirname, extensions, mtime|
it 'does not remove files and directoy' do
it 'does not remove files and directory' do
in_directory_with_files(dirname, extensions, mtime) do |dir, files|
service.execute
......@@ -43,7 +75,7 @@ describe RepositoryArchiveCleanUpService do
end
context 'with files older than 2 hours inside invalid directories' do
it_behaves_like 'invalid archive files', 'john_doe/sample.git', %w[conf rb tar tar.gz], 2.hours
it_behaves_like 'invalid archive files', 'john/doe/sample.git', %w[conf rb tar tar.gz], 2.hours
end
context 'with files newer than 2 hours that matches valid archive extensions' do
......@@ -58,24 +90,24 @@ describe RepositoryArchiveCleanUpService do
it_behaves_like 'invalid archive files', 'sample.git', %w[conf rb tar tar.gz], 1.hour
end
end
end
def in_directory_with_files(dirname, extensions, mtime)
Dir.mktmpdir do |tmpdir|
stub_repository_downloads_path(tmpdir)
dir = File.join(tmpdir, dirname)
files = create_temporary_files(dir, extensions, mtime)
def in_directory_with_files(dirname, extensions, mtime)
Dir.mktmpdir do |tmpdir|
stub_repository_downloads_path(tmpdir)
dir = File.join(tmpdir, dirname)
files = create_temporary_files(dir, extensions, mtime)
yield(dir, files)
end
yield(dir, files)
end
end
def stub_repository_downloads_path(path)
allow(Gitlab.config.gitlab).to receive(:repository_downloads_path).and_return(path)
end
def stub_repository_downloads_path(path)
allow(Gitlab.config.gitlab).to receive(:repository_downloads_path).and_return(path)
end
def create_temporary_files(dir, extensions, mtime)
FileUtils.mkdir_p(dir)
FileUtils.touch(extensions.map { |ext| File.join(dir, "sample.#{ext}") }, mtime: Time.now - mtime)
end
def create_temporary_files(dir, extensions, mtime)
FileUtils.mkdir_p(dir)
FileUtils.touch(extensions.map { |ext| File.join(dir, "sample.#{ext}") }, mtime: Time.now - mtime)
end
end
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment