BigW Consortium Gitlab

Add service to clean up repository archive cache

Replace invocation of `find` with Ruby code that matches old cached files in a better, and safe way to avoid data-integrity issues.
parent 6ae177ef
......@@ -11,16 +11,6 @@ class Repository
attr_accessor :path_with_namespace, :project
def self.clean_old_archives
Gitlab::Metrics.measure(:clean_old_archives) do
repository_downloads_path = Gitlab.config.gitlab.repository_downloads_path
return unless File.directory?(repository_downloads_path)
Gitlab::Popen.popen(%W(find #{repository_downloads_path} -not -path #{repository_downloads_path} -mmin +120 -delete))
end
end
def initialize(path_with_namespace, project)
@path_with_namespace = path_with_namespace
@project = project
......
class RepositoryArchiveCleanUpService
ALLOWED_ARCHIVE_EXTENSIONS = %w[tar tar.bz2 tar.gz zip].join(',').freeze
LAST_MODIFIED_TIME_IN_MINUTES = 120
def initialize(mmin = LAST_MODIFIED_TIME_IN_MINUTES)
@mmin = mmin
@path = Gitlab.config.gitlab.repository_downloads_path
end
def execute
Gitlab::Metrics.measure(:repository_archive_clean_up) do
return unless File.directory?(path)
clean_up_old_archives
clean_up_empty_directories
end
end
private
attr_reader :mmin, :path
def clean_up_old_archives
Dir.glob("#{path}/**.git/*{#{ALLOWED_ARCHIVE_EXTENSIONS}}") do |filename|
File.delete(filename) if older?(filename)
end
end
def older?(filename)
File.exist?(filename) && File.new(filename).mtime < (Time.now - mmin * 60)
end
def clean_up_empty_directories
Dir.glob("#{path}/**.git/").reverse_each do |dir|
Dir.rmdir(dir) if empty?(dir)
end
end
def empty?(dir)
(Dir.entries(dir) - %w[. ..]).empty?
end
end
......@@ -4,6 +4,6 @@ class RepositoryArchiveCacheWorker
sidekiq_options queue: :default
def perform
Repository.clean_old_archives
RepositoryArchiveCleanUpService.new.execute
end
end
......@@ -1172,30 +1172,6 @@ describe Repository, models: true do
end
end
describe '.clean_old_archives' do
let(:path) { Gitlab.config.gitlab.repository_downloads_path }
context 'when the downloads directory does not exist' do
it 'does not remove any archives' do
expect(File).to receive(:directory?).with(path).and_return(false)
expect(Gitlab::Popen).not_to receive(:popen)
described_class.clean_old_archives
end
end
context 'when the downloads directory exists' do
it 'removes old archives' do
expect(File).to receive(:directory?).with(path).and_return(true)
expect(Gitlab::Popen).to receive(:popen)
described_class.clean_old_archives
end
end
end
describe "#keep_around" do
it "stores a reference to the specified commit sha so it isn't garbage collected" do
repository.keep_around(sample_commit.id)
......
require 'spec_helper'
describe RepositoryArchiveCleanUpService, services: true do
describe '#execute' do
let(:path) { Gitlab.config.gitlab.repository_downloads_path }
subject(:service) { described_class.new }
context 'when the downloads directory does not exist' do
it 'does not remove any archives' do
expect(File).to receive(:directory?).with(path).and_return(false)
expect(service).not_to receive(:clean_up_old_archives)
expect(service).not_to receive(:clean_up_empty_directories)
service.execute
end
end
context 'when the downloads directory exists' do
it 'removes old archives' do
expect(File).to receive(:directory?).with(path).and_return(true)
expect(service).to receive(:clean_up_old_archives)
expect(service).to receive(:clean_up_empty_directories)
service.execute
end
end
end
end
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment