BigW Consortium Gitlab

Commit b6ea41d1 by Michael Kozono

Find and store unhashed upload file paths

parent ab814e4d
module Gitlab
module BackgroundMigration
class PrepareUnhashedUploads
FILE_PATH_BATCH_SIZE = 500
UPLOAD_DIR = "#{CarrierWave.root}/uploads"
class UnhashedUploadFile < ActiveRecord::Base
self.table_name = 'unhashed_upload_files'
end
......@@ -8,8 +11,8 @@ module Gitlab
def perform
return unless migrate?
clear_unhashed_upload_files
store_unhashed_upload_files
clear_unhashed_upload_file_paths
store_unhashed_upload_file_paths
schedule_populate_untracked_uploads_jobs
end
......@@ -19,12 +22,55 @@ module Gitlab
UnhashedUploadFile.table_exists?
end
def clear_unhashed_upload_files
# TODO
def clear_unhashed_upload_file_paths
UnhashedUploadFile.delete_all
end
def store_unhashed_upload_files
# TODO
def store_unhashed_upload_file_paths
return unless Dir.exists?(UPLOAD_DIR)
file_paths = []
each_file_path(UPLOAD_DIR) do |file_path|
file_paths << file_path
if file_paths.size >= FILE_PATH_BATCH_SIZE
insert_file_paths(file_paths)
file_paths = []
end
end
insert_file_paths(file_paths) if file_paths.any?
end
def each_file_path(search_dir, &block)
cmd = build_find_command(search_dir)
Open3.popen2(*cmd) do |stdin, stdout, status_thread|
stdout.each_line("\0") do |line|
yield(line.chomp("\0"))
end
raise "Find command failed" unless status_thread.value.success?
end
end
def build_find_command(search_dir)
cmd = ['find', search_dir, '-type', 'f', '!', '-path', "#{UPLOAD_DIR}/@hashed/*", '!', '-path', "#{UPLOAD_DIR}/tmp/*", '-print0']
['ionice', '-c', 'Idle'] + cmd if ionice_is_available?
cmd
end
def ionice_is_available?
Gitlab::Utils.which('ionice')
rescue StandardError
# In this case, returning false is relatively safe, even though it isn't very nice
false
end
def insert_file_paths(file_paths)
file_paths.each do |file_path|
UnhashedUploadFile.create!(path: file_path)
end
end
def schedule_populate_untracked_uploads_jobs
......
require 'spec_helper'
describe Gitlab::BackgroundMigration::PrepareUnhashedUploads, :migration, schema: 20171103140253 do
let!(:unhashed_upload_files) { table(:unhashed_upload_files) }
let(:user1) { create(:user) }
let(:user2) { create(:user) }
let(:project1) { create(:project) }
let(:project2) { create(:project) }
let(:appearance) { create(:appearance) }
context 'when files were uploaded before and after hashed storage was enabled' do
before do
fixture = Rails.root.join('spec', 'fixtures', 'rails_sample.jpg')
uploaded_file = fixture_file_upload(fixture)
user1.update(avatar: uploaded_file)
project1.update(avatar: uploaded_file)
appearance.update(logo: uploaded_file, header_logo: uploaded_file)
uploaded_file = fixture_file_upload(fixture)
UploadService.new(project1, uploaded_file, FileUploader).execute # Markdown upload
stub_application_setting(hashed_storage_enabled: true)
# Hashed files
uploaded_file = fixture_file_upload(fixture)
UploadService.new(project2, uploaded_file, FileUploader).execute
end
it 'adds unhashed files to the unhashed_upload_files table' do
expect do
described_class.new.perform
end.to change { unhashed_upload_files.count }.from(0).to(5)
end
it 'does not add hashed files to the unhashed_upload_files table' do
described_class.new.perform
hashed_file_path = project2.uploads.where(uploader: 'FileUploader').first.path
expect(unhashed_upload_files.where("path like '%#{hashed_file_path}%'").exists?).to be_falsey
end
# E.g. from a previous failed run of this background migration
context 'when there is existing data in unhashed_upload_files' do
before do
unhashed_upload_files.create(path: '/foo/bar.jpg')
end
it 'clears existing data before adding new data' do
expect do
described_class.new.perform
end.to change { unhashed_upload_files.count }.from(1).to(5)
end
end
# E.g. The installation is in use at the time of migration, and someone has
# just uploaded a file
context 'when there are files in /uploads/tmp' do
before do
FileUtils.touch(Rails.root.join(described_class::UPLOAD_DIR, 'tmp', 'some_file.jpg'))
end
it 'does not add files from /uploads/tmp' do
expect do
described_class.new.perform
end.to change { unhashed_upload_files.count }.from(0).to(5)
end
end
end
# Very new or lightly-used installations that are running this migration
# may not have an upload directory because they have no uploads.
context 'when no files were ever uploaded' do
it 'does not add to the unhashed_upload_files table (and does not raise error)' do
expect do
described_class.new.perform
end.not_to change { unhashed_upload_files.count }.from(0)
end
end
end
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment