BigW Consortium Gitlab
Skip to content
Projects
Groups
Snippets
Help
This project
Loading...
Sign in / Register
Toggle navigation
G
gitlab-ce
Project
Overview
Details
Activity
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
0
Issues
0
List
Board
Labels
Milestones
Merge Requests
0
Merge Requests
0
Registry
Registry
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Commits
Issue Boards
Open sidebar
Forest Godfrey
gitlab-ce
Commits
edb5cac4
Commit
edb5cac4
authored
Nov 20, 2017
by
Michael Kozono
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
Use bulk inserts
parent
17ce21d7
Hide whitespace changes
Inline
Side-by-side
Showing
3 changed files
with
184 additions
and
71 deletions
+184
-71
prepare_untracked_uploads.rb
lib/gitlab/background_migration/prepare_untracked_uploads.rb
+35
-20
prepare_untracked_uploads_spec.rb
...ab/background_migration/prepare_untracked_uploads_spec.rb
+145
-47
track_untracked_uploads_spec.rb
spec/migrations/track_untracked_uploads_spec.rb
+4
-4
No files found.
lib/gitlab/background_migration/prepare_untracked_uploads.rb
View file @
edb5cac4
...
...
@@ -20,7 +20,19 @@ module Gitlab
def
perform
ensure_temporary_tracking_table_exists
# Since Postgres < 9.5 does not have ON CONFLICT DO NOTHING, and since
# doing inserts-if-not-exists without ON CONFLICT DO NOTHING would be
# slow, start with an empty table for Postgres < 9.5.
# That way we can do bulk inserts at ~30x the speed of individual
# inserts (~20 minutes worth of inserts at GitLab.com scale instead of
# ~10 hours).
# In all other cases, installations will get both bulk inserts and the
# ability for these jobs to retry without having to clear and reinsert.
clear_untracked_file_paths
unless
can_bulk_insert_and_ignore_duplicates?
store_untracked_file_paths
schedule_populate_untracked_uploads_jobs
end
...
...
@@ -44,6 +56,10 @@ module Gitlab
end
end
def
clear_untracked_file_paths
UntrackedFile
.
delete_all
end
def
store_untracked_file_paths
return
unless
Dir
.
exist?
(
ABSOLUTE_UPLOAD_DIR
)
...
...
@@ -96,36 +112,35 @@ module Gitlab
end
def
insert_file_paths
(
file_paths
)
ActiveRecord
::
Base
.
transaction
do
file_paths
.
each
do
|
file_path
|
insert_file_path
(
file_path
)
end
end
end
sql
=
if
postgresql_pre_9_5?
"INSERT INTO
#{
table_columns_and_values_for_insert
(
file_paths
)
}
;"
elsif
postgresql?
"INSERT INTO
#{
table_columns_and_values_for_insert
(
file_paths
)
}
ON CONFLICT DO NOTHING;"
else
# MySQL
"INSERT IGNORE INTO
#{
table_columns_and_values_for_insert
(
file_paths
)
}
;"
end
def
insert_file_path
(
file_path
)
if
postgresql_pre_9_5?
# No easy way to do ON CONFLICT DO NOTHING before Postgres 9.5 so just use Rails
return
UntrackedFile
.
where
(
path:
file_path
).
first_or_create
end
ActiveRecord
::
Base
.
connection
.
execute
(
sql
)
end
table_columns_and_values
=
'untracked_files_for_uploads (path, created_at, updated_at) VALUES (?, ?, ?)'
def
table_columns_and_values_for_insert
(
file_paths
)
timestamp
=
Time
.
now
.
utc
.
iso8601
sql
=
if
postgresql?
"INSERT INTO
#{
table_columns_and_values
}
ON CONFLICT DO NOTHING;"
else
"INSERT IGNORE INTO
#{
table_columns_and_values
}
;"
end
values
=
file_paths
.
map
do
|
file_path
|
ActiveRecord
::
Base
.
send
(
:sanitize_sql_array
,
[
'(?, ?, ?)'
,
file_path
,
timestamp
,
timestamp
])
# rubocop:disable GitlabSecurity/PublicSend
end
.
join
(
', '
)
timestamp
=
Time
.
now
.
utc
.
iso8601
sql
=
ActiveRecord
::
Base
.
send
(
:sanitize_sql_array
,
[
sql
,
file_path
,
timestamp
,
timestamp
])
# rubocop:disable GitlabSecurity/PublicSend
ActiveRecord
::
Base
.
connection
.
execute
(
sql
)
"
#{
UntrackedFile
.
table_name
}
(path, created_at, updated_at) VALUES
#{
values
}
"
end
def
postgresql?
@postgresql
||=
Gitlab
::
Database
.
postgresql?
end
def
can_bulk_insert_and_ignore_duplicates?
!
postgresql_pre_9_5?
end
def
postgresql_pre_9_5?
@postgresql_pre_9_5
||=
postgresql?
&&
ActiveRecord
::
Base
.
connection
.
select_value
(
'SHOW server_version_num'
).
to_i
<
90500
...
...
spec/lib/gitlab/background_migration/prepare_untracked_uploads_spec.rb
View file @
edb5cac4
...
...
@@ -53,80 +53,178 @@ describe Gitlab::BackgroundMigration::PrepareUntrackedUploads, :migration, :side
expect
(
record
.
reload
.
path
.
size
).
to
eq
(
519
)
end
context
'when files were uploaded before and after hashed storage was enabled'
do
let!
(
:appearance
)
{
create
(
:appearance
,
logo:
uploaded_file
,
header_logo:
uploaded_file
)
}
let!
(
:user
)
{
create
(
:user
,
:with_avatar
)
}
let!
(
:project1
)
{
create
(
:project
,
:with_avatar
)
}
let
(
:project2
)
{
create
(
:project
)
}
# instantiate after enabling hashed_storage
context
"test bulk insert with ON CONFLICT DO NOTHING or IGNORE"
do
around
do
|
example
|
# If this is CI, we use Postgres 9.2 so this whole context should be
# skipped since we're unable to use ON CONFLICT DO NOTHING or IGNORE.
if
described_class
.
new
.
send
(
:can_bulk_insert_and_ignore_duplicates?
)
example
.
run
end
end
before
do
# Markdown upload before enabling hashed_storage
UploadService
.
new
(
project1
,
uploaded_file
,
FileUploader
).
execute
context
'when files were uploaded before and after hashed storage was enabled'
do
let!
(
:appearance
)
{
create
(
:appearance
,
logo:
uploaded_file
,
header_logo:
uploaded_file
)
}
let!
(
:user
)
{
create
(
:user
,
:with_avatar
)
}
let!
(
:project1
)
{
create
(
:project
,
:with_avatar
)
}
let
(
:project2
)
{
create
(
:project
)
}
# instantiate after enabling hashed_storage
stub_application_setting
(
hashed_storage_enabled:
true
)
before
do
# Markdown upload before enabling hashed_storage
UploadService
.
new
(
project1
,
uploaded_file
,
FileUploader
).
execute
# Markdown upload after enabling hashed_storage
UploadService
.
new
(
project2
,
uploaded_file
,
FileUploader
).
execute
end
stub_application_setting
(
hashed_storage_enabled:
true
)
it
'adds unhashed files to the untracked_files_for_uploads table'
do
described_class
.
new
.
perform
# Markdown upload after enabling hashed_storage
UploadService
.
new
(
project2
,
uploaded_file
,
FileUploader
).
execute
end
expect
(
untracked_files_for_uploads
.
count
).
to
eq
(
5
)
end
it
'adds unhashed files to the untracked_files_for_uploads table'
do
described_class
.
new
.
perform
it
'adds files with paths relative to CarrierWave.root'
do
described_class
.
new
.
perform
untracked_files_for_uploads
.
all
.
each
do
|
file
|
expect
(
file
.
path
.
start_with?
(
'uploads/'
)).
to
be_truthy
expect
(
untracked_files_for_uploads
.
count
).
to
eq
(
5
)
end
end
it
'does not add hashed files to the untracked_files_for_uploads table
'
do
described_class
.
new
.
perform
hashed_file_path
=
project2
.
uploads
.
where
(
uploader:
'FileUploader'
).
first
.
path
expect
(
untracked_files_for_uploads
.
where
(
"path like '%
#{
hashed_file_path
}
%'"
).
exists?
).
to
be_falsey
end
it
'adds files with paths relative to CarrierWave.root
'
do
described_class
.
new
.
perform
untracked_files_for_uploads
.
all
.
each
do
|
file
|
expect
(
file
.
path
.
start_with?
(
'uploads/'
)).
to
be_truthy
end
end
it
'correctly schedules the follow-up background migration jobs
'
do
described_class
.
new
.
perform
it
'does not add hashed files to the untracked_files_for_uploads table
'
do
described_class
.
new
.
perform
expect
(
described_class
::
FOLLOW_UP_MIGRATION
).
to
be_scheduled_migration
(
1
,
5
)
expect
(
BackgroundMigrationWorker
.
jobs
.
size
).
to
eq
(
1
)
end
hashed_file_path
=
project2
.
uploads
.
where
(
uploader:
'FileUploader'
).
first
.
path
expect
(
untracked_files_for_uploads
.
where
(
"path like '%
#{
hashed_file_path
}
%'"
).
exists?
).
to
be_falsey
end
# E.g. from a previous failed run of this background migration
context
'when there is existing data in untracked_files_for_uploads'
do
before
do
it
'correctly schedules the follow-up background migration jobs'
do
described_class
.
new
.
perform
expect
(
described_class
::
FOLLOW_UP_MIGRATION
).
to
be_scheduled_migration
(
1
,
5
)
expect
(
BackgroundMigrationWorker
.
jobs
.
size
).
to
eq
(
1
)
end
it
'does not error or produce duplicates of existing data'
do
expect
do
# E.g. from a previous failed run of this background migration
context
'when there is existing data in untracked_files_for_uploads'
do
before
do
described_class
.
new
.
perform
end
.
not_to
change
{
untracked_files_for_uploads
.
count
}.
from
(
5
)
end
it
'does not error or produce duplicates of existing data'
do
expect
do
described_class
.
new
.
perform
end
.
not_to
change
{
untracked_files_for_uploads
.
count
}.
from
(
5
)
end
end
# E.g. The installation is in use at the time of migration, and someone has
# just uploaded a file
context
'when there are files in /uploads/tmp'
do
let
(
:tmp_file
)
{
Rails
.
root
.
join
(
described_class
::
ABSOLUTE_UPLOAD_DIR
,
'tmp'
,
'some_file.jpg'
)
}
before
do
FileUtils
.
touch
(
tmp_file
)
end
after
do
FileUtils
.
rm
(
tmp_file
)
end
it
'does not add files from /uploads/tmp'
do
described_class
.
new
.
perform
expect
(
untracked_files_for_uploads
.
count
).
to
eq
(
5
)
end
end
end
end
context
'test bulk insert without ON CONFLICT DO NOTHING or IGNORE'
do
before
do
# If this is CI, we use Postgres 9.2 so this stub has no effect.
#
# If this is being run on Postgres 9.5+ or MySQL, then this stub allows us
# to test the bulk insert functionality without ON CONFLICT DO NOTHING or
# IGNORE.
allow_any_instance_of
(
described_class
).
to
receive
(
:postgresql_pre_9_5?
).
and_return
(
true
)
end
# E.g. The installation is in use at the time of migration, and someone has
# just uploaded a file
context
'when there are files in /uploads/tmp'
do
let
(
:tmp_file
)
{
Rails
.
root
.
join
(
described_class
::
ABSOLUTE_UPLOAD_DIR
,
'tmp'
,
'some_file.jpg'
)
}
context
'when files were uploaded before and after hashed storage was enabled'
do
let!
(
:appearance
)
{
create
(
:appearance
,
logo:
uploaded_file
,
header_logo:
uploaded_file
)
}
let!
(
:user
)
{
create
(
:user
,
:with_avatar
)
}
let!
(
:project1
)
{
create
(
:project
,
:with_avatar
)
}
let
(
:project2
)
{
create
(
:project
)
}
# instantiate after enabling hashed_storage
before
do
FileUtils
.
touch
(
tmp_file
)
end
# Markdown upload before enabling hashed_storage
UploadService
.
new
(
project1
,
uploaded_file
,
FileUploader
).
execute
after
do
FileUtils
.
rm
(
tmp_file
)
stub_application_setting
(
hashed_storage_enabled:
true
)
# Markdown upload after enabling hashed_storage
UploadService
.
new
(
project2
,
uploaded_file
,
FileUploader
).
execute
end
it
'
does not add files from /uploads/tmp
'
do
it
'
adds unhashed files to the untracked_files_for_uploads table
'
do
described_class
.
new
.
perform
expect
(
untracked_files_for_uploads
.
count
).
to
eq
(
5
)
end
it
'adds files with paths relative to CarrierWave.root'
do
described_class
.
new
.
perform
untracked_files_for_uploads
.
all
.
each
do
|
file
|
expect
(
file
.
path
.
start_with?
(
'uploads/'
)).
to
be_truthy
end
end
it
'does not add hashed files to the untracked_files_for_uploads table'
do
described_class
.
new
.
perform
hashed_file_path
=
project2
.
uploads
.
where
(
uploader:
'FileUploader'
).
first
.
path
expect
(
untracked_files_for_uploads
.
where
(
"path like '%
#{
hashed_file_path
}
%'"
).
exists?
).
to
be_falsey
end
it
'correctly schedules the follow-up background migration jobs'
do
described_class
.
new
.
perform
expect
(
described_class
::
FOLLOW_UP_MIGRATION
).
to
be_scheduled_migration
(
1
,
5
)
expect
(
BackgroundMigrationWorker
.
jobs
.
size
).
to
eq
(
1
)
end
# E.g. from a previous failed run of this background migration
context
'when there is existing data in untracked_files_for_uploads'
do
before
do
described_class
.
new
.
perform
end
it
'does not error or produce duplicates of existing data'
do
expect
do
described_class
.
new
.
perform
end
.
not_to
change
{
untracked_files_for_uploads
.
count
}.
from
(
5
)
end
end
# E.g. The installation is in use at the time of migration, and someone has
# just uploaded a file
context
'when there are files in /uploads/tmp'
do
let
(
:tmp_file
)
{
Rails
.
root
.
join
(
described_class
::
ABSOLUTE_UPLOAD_DIR
,
'tmp'
,
'some_file.jpg'
)
}
before
do
FileUtils
.
touch
(
tmp_file
)
end
after
do
FileUtils
.
rm
(
tmp_file
)
end
it
'does not add files from /uploads/tmp'
do
described_class
.
new
.
perform
expect
(
untracked_files_for_uploads
.
count
).
to
eq
(
5
)
end
end
end
end
...
...
spec/migrations/track_untracked_uploads_spec.rb
View file @
edb5cac4
...
...
@@ -62,8 +62,8 @@ describe TrackUntrackedUploads, :migration, :sidekiq do
expect
(
appearance
.
reload
.
uploads
.
where
(
"path like '%/header_logo/%'"
).
first
.
attributes
).
to
include
(
@appearance_header_logo_attributes
)
expect
(
user2
.
reload
.
uploads
.
first
.
attributes
).
to
include
(
@user2_avatar_attributes
)
expect
(
project2
.
reload
.
uploads
.
first
.
attributes
).
to
include
(
@project2_avatar_attributes
)
expect
(
project2
.
uploads
.
la
st
.
attributes
).
to
include
(
@project2_markdown_attributes
)
expect
(
project2
.
reload
.
uploads
.
where
(
uploader:
'AvatarUploader'
).
first
.
attributes
).
to
include
(
@project2_avatar_attributes
)
expect
(
project2
.
uploads
.
where
(
uploader:
'FileUploader'
).
fir
st
.
attributes
).
to
include
(
@project2_markdown_attributes
)
end
it
'ignores already-tracked uploads'
do
...
...
@@ -71,8 +71,8 @@ describe TrackUntrackedUploads, :migration, :sidekiq do
expect
(
appearance
.
reload
.
uploads
.
where
(
"path like '%/logo/%'"
).
first
.
attributes
).
to
include
(
@appearance_logo_attributes
)
expect
(
user1
.
reload
.
uploads
.
first
.
attributes
).
to
include
(
@user1_avatar_attributes
)
expect
(
project1
.
reload
.
uploads
.
first
.
attributes
).
to
include
(
@project1_avatar_attributes
)
expect
(
project1
.
uploads
.
la
st
.
attributes
).
to
include
(
@project1_markdown_attributes
)
expect
(
project1
.
reload
.
uploads
.
where
(
uploader:
'AvatarUploader'
).
first
.
attributes
).
to
include
(
@project1_avatar_attributes
)
expect
(
project1
.
uploads
.
where
(
uploader:
'FileUploader'
).
fir
st
.
attributes
).
to
include
(
@project1_markdown_attributes
)
end
it
'the temporary table untracked_files_for_uploads no longer exists'
do
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment