BigW Consortium Gitlab

Commit 2ca4123c by Douwe Maan Committed by Mark Fletcher

Merge branch 'mk-fix-no-untracked-upload-files-error' into 'master'

Resolve "PrepareUntrackedUploads PostgreSQL syntax error" Closes #42881 See merge request gitlab-org/gitlab-ce!17019
parent 8f74bb89
---
title: Resolve PrepareUntrackedUploads PostgreSQL syntax error
merge_request: 17019
author:
type: fixed
# See http://doc.gitlab.com/ce/development/migration_style_guide.html
# for more information on how to write migrations for GitLab.
class SchedulePopulateUntrackedUploadsIfNeeded < ActiveRecord::Migration
include Gitlab::Database::MigrationHelpers
DOWNTIME = false
FOLLOW_UP_MIGRATION = 'PopulateUntrackedUploads'.freeze
class UntrackedFile < ActiveRecord::Base
include EachBatch
self.table_name = 'untracked_files_for_uploads'
end
def up
if table_exists?(:untracked_files_for_uploads)
process_or_remove_table
end
end
def down
# nothing
end
private
def process_or_remove_table
if UntrackedFile.all.empty?
drop_temp_table
else
schedule_populate_untracked_uploads_jobs
end
end
def drop_temp_table
drop_table(:untracked_files_for_uploads, if_exists: true)
end
def schedule_populate_untracked_uploads_jobs
say "Scheduling #{FOLLOW_UP_MIGRATION} background migration jobs since there are rows in untracked_files_for_uploads."
bulk_queue_background_migration_jobs_by_range(
UntrackedFile, FOLLOW_UP_MIGRATION)
end
end
...@@ -11,7 +11,7 @@ ...@@ -11,7 +11,7 @@
# #
# It's strongly recommended that you check this file into your version control system. # It's strongly recommended that you check this file into your version control system.
ActiveRecord::Schema.define(version: 20180206200543) do ActiveRecord::Schema.define(version: 20180208183958) do
# These are extensions that must be enabled in order to support this database # These are extensions that must be enabled in order to support this database
enable_extension "plpgsql" enable_extension "plpgsql"
......
...@@ -43,7 +43,11 @@ module Gitlab ...@@ -43,7 +43,11 @@ module Gitlab
store_untracked_file_paths store_untracked_file_paths
schedule_populate_untracked_uploads_jobs if UntrackedFile.all.empty?
drop_temp_table
else
schedule_populate_untracked_uploads_jobs
end
end end
private private
...@@ -92,7 +96,7 @@ module Gitlab ...@@ -92,7 +96,7 @@ module Gitlab
end end
end end
yield(paths) yield(paths) if paths.any?
end end
def build_find_command(search_dir) def build_find_command(search_dir)
...@@ -165,6 +169,11 @@ module Gitlab ...@@ -165,6 +169,11 @@ module Gitlab
bulk_queue_background_migration_jobs_by_range( bulk_queue_background_migration_jobs_by_range(
UntrackedFile, FOLLOW_UP_MIGRATION) UntrackedFile, FOLLOW_UP_MIGRATION)
end end
def drop_temp_table
UntrackedFile.connection.drop_table(:untracked_files_for_uploads,
if_exists: true)
end
end end
end end
end end
...@@ -8,8 +8,6 @@ describe Gitlab::BackgroundMigration::PrepareUntrackedUploads, :sidekiq do ...@@ -8,8 +8,6 @@ describe Gitlab::BackgroundMigration::PrepareUntrackedUploads, :sidekiq do
before do before do
DatabaseCleaner.clean DatabaseCleaner.clean
drop_temp_table_if_exists
end end
after do after do
...@@ -23,57 +21,7 @@ describe Gitlab::BackgroundMigration::PrepareUntrackedUploads, :sidekiq do ...@@ -23,57 +21,7 @@ describe Gitlab::BackgroundMigration::PrepareUntrackedUploads, :sidekiq do
end end
end end
# E.g. The installation is in use at the time of migration, and someone has shared_examples 'prepares the untracked_files_for_uploads table' do
# just uploaded a file
shared_examples 'does not add files in /uploads/tmp' do
let(:tmp_file) { Rails.root.join(described_class::ABSOLUTE_UPLOAD_DIR, 'tmp', 'some_file.jpg') }
before do
FileUtils.mkdir(File.dirname(tmp_file))
FileUtils.touch(tmp_file)
end
after do
FileUtils.rm(tmp_file)
end
it 'does not add files from /uploads/tmp' do
described_class.new.perform
expect(untracked_files_for_uploads.count).to eq(5)
end
end
it 'ensures the untracked_files_for_uploads table exists' do
expect do
described_class.new.perform
end.to change { ActiveRecord::Base.connection.table_exists?(:untracked_files_for_uploads) }.from(false).to(true)
end
it 'has a path field long enough for really long paths' do
described_class.new.perform
component = 'a' * 255
long_path = [
'uploads',
component, # project.full_path
component # filename
].flatten.join('/')
record = untracked_files_for_uploads.create!(path: long_path)
expect(record.reload.path.size).to eq(519)
end
context "test bulk insert with ON CONFLICT DO NOTHING or IGNORE" do
around do |example|
# If this is CI, we use Postgres 9.2 so this whole context should be
# skipped since we're unable to use ON CONFLICT DO NOTHING or IGNORE.
if described_class.new.send(:can_bulk_insert_and_ignore_duplicates?)
example.run
end
end
context 'when files were uploaded before and after hashed storage was enabled' do context 'when files were uploaded before and after hashed storage was enabled' do
let!(:appearance) { create_or_update_appearance(logo: uploaded_file, header_logo: uploaded_file) } let!(:appearance) { create_or_update_appearance(logo: uploaded_file, header_logo: uploaded_file) }
let!(:user) { create(:user, :with_avatar) } let!(:user) { create(:user, :with_avatar) }
...@@ -90,6 +38,21 @@ describe Gitlab::BackgroundMigration::PrepareUntrackedUploads, :sidekiq do ...@@ -90,6 +38,21 @@ describe Gitlab::BackgroundMigration::PrepareUntrackedUploads, :sidekiq do
UploadService.new(project2, uploaded_file, FileUploader).execute UploadService.new(project2, uploaded_file, FileUploader).execute
end end
it 'has a path field long enough for really long paths' do
described_class.new.perform
component = 'a' * 255
long_path = [
'uploads',
component, # project.full_path
component # filename
].flatten.join('/')
record = untracked_files_for_uploads.create!(path: long_path)
expect(record.reload.path.size).to eq(519)
end
it 'adds unhashed files to the untracked_files_for_uploads table' do it 'adds unhashed files to the untracked_files_for_uploads table' do
described_class.new.perform described_class.new.perform
...@@ -130,7 +93,10 @@ describe Gitlab::BackgroundMigration::PrepareUntrackedUploads, :sidekiq do ...@@ -130,7 +93,10 @@ describe Gitlab::BackgroundMigration::PrepareUntrackedUploads, :sidekiq do
end end
end end
# E.g. The installation is in use at the time of migration, and someone has
# just uploaded a file
context 'when there are files in /uploads/tmp' do context 'when there are files in /uploads/tmp' do
<<<<<<< HEAD
it_behaves_like 'does not add files in /uploads/tmp' it_behaves_like 'does not add files in /uploads/tmp'
end end
end end
...@@ -155,66 +121,65 @@ describe Gitlab::BackgroundMigration::PrepareUntrackedUploads, :sidekiq do ...@@ -155,66 +121,65 @@ describe Gitlab::BackgroundMigration::PrepareUntrackedUploads, :sidekiq do
before do before do
# Markdown upload before enabling hashed_storage # Markdown upload before enabling hashed_storage
UploadService.new(project1, uploaded_file, FileUploader).execute UploadService.new(project1, uploaded_file, FileUploader).execute
=======
let(:tmp_file) { Rails.root.join(described_class::ABSOLUTE_UPLOAD_DIR, 'tmp', 'some_file.jpg') }
>>>>>>> c133f1a7480... Merge branch 'mk-fix-no-untracked-upload-files-error' into 'master'
stub_application_setting(hashed_storage_enabled: true) before do
FileUtils.mkdir(File.dirname(tmp_file))
# Markdown upload after enabling hashed_storage FileUtils.touch(tmp_file)
UploadService.new(project2, uploaded_file, FileUploader).execute
end
it 'adds unhashed files to the untracked_files_for_uploads table' do
described_class.new.perform
expect(untracked_files_for_uploads.count).to eq(5)
end
it 'adds files with paths relative to CarrierWave.root' do
described_class.new.perform
untracked_files_for_uploads.all.each do |file|
expect(file.path.start_with?('uploads/')).to be_truthy
end end
end
it 'does not add hashed files to the untracked_files_for_uploads table' do
described_class.new.perform
hashed_file_path = project2.uploads.where(uploader: 'FileUploader').first.path after do
expect(untracked_files_for_uploads.where("path like '%#{hashed_file_path}%'").exists?).to be_falsey FileUtils.rm(tmp_file)
end end
it 'correctly schedules the follow-up background migration jobs' do it 'does not add files from /uploads/tmp' do
described_class.new.perform described_class.new.perform
expect(described_class::FOLLOW_UP_MIGRATION).to be_scheduled_migration(1, 5) expect(untracked_files_for_uploads.count).to eq(5)
expect(BackgroundMigrationWorker.jobs.size).to eq(1) end
end end
# E.g. from a previous failed run of this background migration context 'when the last batch size exactly matches the max batch size' do
context 'when there is existing data in untracked_files_for_uploads' do it 'does not raise error' do
before do stub_const("#{described_class}::FIND_BATCH_SIZE", 5)
described_class.new.perform
end
it 'does not error or produce duplicates of existing data' do
expect do expect do
described_class.new.perform described_class.new.perform
end.not_to change { untracked_files_for_uploads.count }.from(5) end.not_to raise_error
expect(untracked_files_for_uploads.count).to eq(5)
end end
end end
end
end
context 'when there are files in /uploads/tmp' do # If running on Postgres 9.2 (like on CI), this whole context is skipped
it_behaves_like 'does not add files in /uploads/tmp' # since we're unable to use ON CONFLICT DO NOTHING or IGNORE.
end context "test bulk insert with ON CONFLICT DO NOTHING or IGNORE", if: described_class.new.send(:can_bulk_insert_and_ignore_duplicates?) do
it_behaves_like 'prepares the untracked_files_for_uploads table'
end
# If running on Postgres 9.2 (like on CI), the stubbed method has no effect.
#
# If running on Postgres 9.5+ or MySQL, then this context effectively tests
# the bulk insert functionality without ON CONFLICT DO NOTHING or IGNORE.
context 'test bulk insert without ON CONFLICT DO NOTHING or IGNORE' do
before do
allow_any_instance_of(described_class).to receive(:postgresql_pre_9_5?).and_return(true)
end end
it_behaves_like 'prepares the untracked_files_for_uploads table'
end end
# Very new or lightly-used installations that are running this migration # Very new or lightly-used installations that are running this migration
# may not have an upload directory because they have no uploads. # may not have an upload directory because they have no uploads.
context 'when no files were ever uploaded' do context 'when no files were ever uploaded' do
it 'does not add to the untracked_files_for_uploads table (and does not raise error)' do it 'deletes the `untracked_files_for_uploads` table (and does not raise error)' do
described_class.new.perform described_class.new.perform
expect(untracked_files_for_uploads.count).to eq(0) expect(untracked_files_for_uploads.connection.table_exists?(:untracked_files_for_uploads)).to be_falsey
end end
end end
end end
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment