BigW Consortium Gitlab
Skip to content
Projects
Groups
Snippets
Help
This project
Loading...
Sign in / Register
Toggle navigation
G
gitlab-ce
Project
Overview
Details
Activity
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
0
Issues
0
List
Board
Labels
Milestones
Merge Requests
0
Merge Requests
0
Registry
Registry
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Commits
Issue Boards
Open sidebar
Forest Godfrey
gitlab-ce
Commits
edb5cac4
Commit
edb5cac4
authored
Nov 20, 2017
by
Michael Kozono
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
Use bulk inserts
parent
17ce21d7
Hide whitespace changes
Inline
Side-by-side
Showing
3 changed files
with
184 additions
and
71 deletions
+184
-71
prepare_untracked_uploads.rb
lib/gitlab/background_migration/prepare_untracked_uploads.rb
+35
-20
prepare_untracked_uploads_spec.rb
...ab/background_migration/prepare_untracked_uploads_spec.rb
+145
-47
track_untracked_uploads_spec.rb
spec/migrations/track_untracked_uploads_spec.rb
+4
-4
No files found.
lib/gitlab/background_migration/prepare_untracked_uploads.rb
View file @
edb5cac4
...
@@ -20,7 +20,19 @@ module Gitlab
...
@@ -20,7 +20,19 @@ module Gitlab
def
perform
def
perform
ensure_temporary_tracking_table_exists
ensure_temporary_tracking_table_exists
# Since Postgres < 9.5 does not have ON CONFLICT DO NOTHING, and since
# doing inserts-if-not-exists without ON CONFLICT DO NOTHING would be
# slow, start with an empty table for Postgres < 9.5.
# That way we can do bulk inserts at ~30x the speed of individual
# inserts (~20 minutes worth of inserts at GitLab.com scale instead of
# ~10 hours).
# In all other cases, installations will get both bulk inserts and the
# ability for these jobs to retry without having to clear and reinsert.
clear_untracked_file_paths
unless
can_bulk_insert_and_ignore_duplicates?
store_untracked_file_paths
store_untracked_file_paths
schedule_populate_untracked_uploads_jobs
schedule_populate_untracked_uploads_jobs
end
end
...
@@ -44,6 +56,10 @@ module Gitlab
...
@@ -44,6 +56,10 @@ module Gitlab
end
end
end
end
def
clear_untracked_file_paths
UntrackedFile
.
delete_all
end
def
store_untracked_file_paths
def
store_untracked_file_paths
return
unless
Dir
.
exist?
(
ABSOLUTE_UPLOAD_DIR
)
return
unless
Dir
.
exist?
(
ABSOLUTE_UPLOAD_DIR
)
...
@@ -96,36 +112,35 @@ module Gitlab
...
@@ -96,36 +112,35 @@ module Gitlab
end
end
def
insert_file_paths
(
file_paths
)
def
insert_file_paths
(
file_paths
)
ActiveRecord
::
Base
.
transaction
do
sql
=
if
postgresql_pre_9_5?
file_paths
.
each
do
|
file_path
|
"INSERT INTO
#{
table_columns_and_values_for_insert
(
file_paths
)
}
;"
insert_file_path
(
file_path
)
elsif
postgresql?
end
"INSERT INTO
#{
table_columns_and_values_for_insert
(
file_paths
)
}
ON CONFLICT DO NOTHING;"
end
else
# MySQL
end
"INSERT IGNORE INTO
#{
table_columns_and_values_for_insert
(
file_paths
)
}
;"
end
def
insert_file_path
(
file_path
)
ActiveRecord
::
Base
.
connection
.
execute
(
sql
)
if
postgresql_pre_9_5?
end
# No easy way to do ON CONFLICT DO NOTHING before Postgres 9.5 so just use Rails
return
UntrackedFile
.
where
(
path:
file_path
).
first_or_create
end
table_columns_and_values
=
'untracked_files_for_uploads (path, created_at, updated_at) VALUES (?, ?, ?)'
def
table_columns_and_values_for_insert
(
file_paths
)
timestamp
=
Time
.
now
.
utc
.
iso8601
sql
=
if
postgresql?
values
=
file_paths
.
map
do
|
file_path
|
"INSERT INTO
#{
table_columns_and_values
}
ON CONFLICT DO NOTHING;"
ActiveRecord
::
Base
.
send
(
:sanitize_sql_array
,
[
'(?, ?, ?)'
,
file_path
,
timestamp
,
timestamp
])
# rubocop:disable GitlabSecurity/PublicSend
else
end
.
join
(
', '
)
"INSERT IGNORE INTO
#{
table_columns_and_values
}
;"
end
timestamp
=
Time
.
now
.
utc
.
iso8601
"
#{
UntrackedFile
.
table_name
}
(path, created_at, updated_at) VALUES
#{
values
}
"
sql
=
ActiveRecord
::
Base
.
send
(
:sanitize_sql_array
,
[
sql
,
file_path
,
timestamp
,
timestamp
])
# rubocop:disable GitlabSecurity/PublicSend
ActiveRecord
::
Base
.
connection
.
execute
(
sql
)
end
end
def
postgresql?
def
postgresql?
@postgresql
||=
Gitlab
::
Database
.
postgresql?
@postgresql
||=
Gitlab
::
Database
.
postgresql?
end
end
def
can_bulk_insert_and_ignore_duplicates?
!
postgresql_pre_9_5?
end
def
postgresql_pre_9_5?
def
postgresql_pre_9_5?
@postgresql_pre_9_5
||=
postgresql?
&&
@postgresql_pre_9_5
||=
postgresql?
&&
ActiveRecord
::
Base
.
connection
.
select_value
(
'SHOW server_version_num'
).
to_i
<
90500
ActiveRecord
::
Base
.
connection
.
select_value
(
'SHOW server_version_num'
).
to_i
<
90500
...
...
spec/lib/gitlab/background_migration/prepare_untracked_uploads_spec.rb
View file @
edb5cac4
...
@@ -53,80 +53,178 @@ describe Gitlab::BackgroundMigration::PrepareUntrackedUploads, :migration, :side
...
@@ -53,80 +53,178 @@ describe Gitlab::BackgroundMigration::PrepareUntrackedUploads, :migration, :side
expect
(
record
.
reload
.
path
.
size
).
to
eq
(
519
)
expect
(
record
.
reload
.
path
.
size
).
to
eq
(
519
)
end
end
context
'when files were uploaded before and after hashed storage was enabled'
do
context
"test bulk insert with ON CONFLICT DO NOTHING or IGNORE"
do
let!
(
:appearance
)
{
create
(
:appearance
,
logo:
uploaded_file
,
header_logo:
uploaded_file
)
}
around
do
|
example
|
let!
(
:user
)
{
create
(
:user
,
:with_avatar
)
}
# If this is CI, we use Postgres 9.2 so this whole context should be
let!
(
:project1
)
{
create
(
:project
,
:with_avatar
)
}
# skipped since we're unable to use ON CONFLICT DO NOTHING or IGNORE.
let
(
:project2
)
{
create
(
:project
)
}
# instantiate after enabling hashed_storage
if
described_class
.
new
.
send
(
:can_bulk_insert_and_ignore_duplicates?
)
example
.
run
end
end
before
do
context
'when files were uploaded before and after hashed storage was enabled'
do
# Markdown upload before enabling hashed_storage
let!
(
:appearance
)
{
create
(
:appearance
,
logo:
uploaded_file
,
header_logo:
uploaded_file
)
}
UploadService
.
new
(
project1
,
uploaded_file
,
FileUploader
).
execute
let!
(
:user
)
{
create
(
:user
,
:with_avatar
)
}
let!
(
:project1
)
{
create
(
:project
,
:with_avatar
)
}
let
(
:project2
)
{
create
(
:project
)
}
# instantiate after enabling hashed_storage
stub_application_setting
(
hashed_storage_enabled:
true
)
before
do
# Markdown upload before enabling hashed_storage
UploadService
.
new
(
project1
,
uploaded_file
,
FileUploader
).
execute
# Markdown upload after enabling hashed_storage
stub_application_setting
(
hashed_storage_enabled:
true
)
UploadService
.
new
(
project2
,
uploaded_file
,
FileUploader
).
execute
end
it
'adds unhashed files to the untracked_files_for_uploads table'
do
# Markdown upload after enabling hashed_storage
described_class
.
new
.
perform
UploadService
.
new
(
project2
,
uploaded_file
,
FileUploader
).
execute
end
expect
(
untracked_files_for_uploads
.
count
).
to
eq
(
5
)
it
'adds unhashed files to the untracked_files_for_uploads table'
do
end
described_class
.
new
.
perform
it
'adds files with paths relative to CarrierWave.root'
do
expect
(
untracked_files_for_uploads
.
count
).
to
eq
(
5
)
described_class
.
new
.
perform
untracked_files_for_uploads
.
all
.
each
do
|
file
|
expect
(
file
.
path
.
start_with?
(
'uploads/'
)).
to
be_truthy
end
end
end
it
'does not add hashed files to the untracked_files_for_uploads table
'
do
it
'adds files with paths relative to CarrierWave.root
'
do
described_class
.
new
.
perform
described_class
.
new
.
perform
untracked_files_for_uploads
.
all
.
each
do
|
file
|
hashed_file_path
=
project2
.
uploads
.
where
(
uploader:
'FileUploader'
).
first
.
path
expect
(
file
.
path
.
start_with?
(
'uploads/'
)).
to
be_truthy
expect
(
untracked_files_for_uploads
.
where
(
"path like '%
#{
hashed_file_path
}
%'"
).
exists?
).
to
be_falsey
end
end
end
it
'correctly schedules the follow-up background migration jobs
'
do
it
'does not add hashed files to the untracked_files_for_uploads table
'
do
described_class
.
new
.
perform
described_class
.
new
.
perform
expect
(
described_class
::
FOLLOW_UP_MIGRATION
).
to
be_scheduled_migration
(
1
,
5
)
hashed_file_path
=
project2
.
uploads
.
where
(
uploader:
'FileUploader'
).
first
.
path
expect
(
BackgroundMigrationWorker
.
jobs
.
size
).
to
eq
(
1
)
expect
(
untracked_files_for_uploads
.
where
(
"path like '%
#{
hashed_file_path
}
%'"
).
exists?
).
to
be_falsey
end
end
# E.g. from a previous failed run of this background migration
it
'correctly schedules the follow-up background migration jobs'
do
context
'when there is existing data in untracked_files_for_uploads'
do
before
do
described_class
.
new
.
perform
described_class
.
new
.
perform
expect
(
described_class
::
FOLLOW_UP_MIGRATION
).
to
be_scheduled_migration
(
1
,
5
)
expect
(
BackgroundMigrationWorker
.
jobs
.
size
).
to
eq
(
1
)
end
end
it
'does not error or produce duplicates of existing data'
do
# E.g. from a previous failed run of this background migration
expect
do
context
'when there is existing data in untracked_files_for_uploads'
do
before
do
described_class
.
new
.
perform
described_class
.
new
.
perform
end
.
not_to
change
{
untracked_files_for_uploads
.
count
}.
from
(
5
)
end
it
'does not error or produce duplicates of existing data'
do
expect
do
described_class
.
new
.
perform
end
.
not_to
change
{
untracked_files_for_uploads
.
count
}.
from
(
5
)
end
end
end
# E.g. The installation is in use at the time of migration, and someone has
# just uploaded a file
context
'when there are files in /uploads/tmp'
do
let
(
:tmp_file
)
{
Rails
.
root
.
join
(
described_class
::
ABSOLUTE_UPLOAD_DIR
,
'tmp'
,
'some_file.jpg'
)
}
before
do
FileUtils
.
touch
(
tmp_file
)
end
after
do
FileUtils
.
rm
(
tmp_file
)
end
it
'does not add files from /uploads/tmp'
do
described_class
.
new
.
perform
expect
(
untracked_files_for_uploads
.
count
).
to
eq
(
5
)
end
end
end
end
context
'test bulk insert without ON CONFLICT DO NOTHING or IGNORE'
do
before
do
# If this is CI, we use Postgres 9.2 so this stub has no effect.
#
# If this is being run on Postgres 9.5+ or MySQL, then this stub allows us
# to test the bulk insert functionality without ON CONFLICT DO NOTHING or
# IGNORE.
allow_any_instance_of
(
described_class
).
to
receive
(
:postgresql_pre_9_5?
).
and_return
(
true
)
end
end
# E.g. The installation is in use at the time of migration, and someone has
context
'when files were uploaded before and after hashed storage was enabled'
do
# just uploaded a file
let!
(
:appearance
)
{
create
(
:appearance
,
logo:
uploaded_file
,
header_logo:
uploaded_file
)
}
context
'when there are files in /uploads/tmp'
do
let!
(
:user
)
{
create
(
:user
,
:with_avatar
)
}
let
(
:tmp_file
)
{
Rails
.
root
.
join
(
described_class
::
ABSOLUTE_UPLOAD_DIR
,
'tmp'
,
'some_file.jpg'
)
}
let!
(
:project1
)
{
create
(
:project
,
:with_avatar
)
}
let
(
:project2
)
{
create
(
:project
)
}
# instantiate after enabling hashed_storage
before
do
before
do
FileUtils
.
touch
(
tmp_file
)
# Markdown upload before enabling hashed_storage
end
UploadService
.
new
(
project1
,
uploaded_file
,
FileUploader
).
execute
after
do
stub_application_setting
(
hashed_storage_enabled:
true
)
FileUtils
.
rm
(
tmp_file
)
# Markdown upload after enabling hashed_storage
UploadService
.
new
(
project2
,
uploaded_file
,
FileUploader
).
execute
end
end
it
'
does not add files from /uploads/tmp
'
do
it
'
adds unhashed files to the untracked_files_for_uploads table
'
do
described_class
.
new
.
perform
described_class
.
new
.
perform
expect
(
untracked_files_for_uploads
.
count
).
to
eq
(
5
)
expect
(
untracked_files_for_uploads
.
count
).
to
eq
(
5
)
end
end
it
'adds files with paths relative to CarrierWave.root'
do
described_class
.
new
.
perform
untracked_files_for_uploads
.
all
.
each
do
|
file
|
expect
(
file
.
path
.
start_with?
(
'uploads/'
)).
to
be_truthy
end
end
it
'does not add hashed files to the untracked_files_for_uploads table'
do
described_class
.
new
.
perform
hashed_file_path
=
project2
.
uploads
.
where
(
uploader:
'FileUploader'
).
first
.
path
expect
(
untracked_files_for_uploads
.
where
(
"path like '%
#{
hashed_file_path
}
%'"
).
exists?
).
to
be_falsey
end
it
'correctly schedules the follow-up background migration jobs'
do
described_class
.
new
.
perform
expect
(
described_class
::
FOLLOW_UP_MIGRATION
).
to
be_scheduled_migration
(
1
,
5
)
expect
(
BackgroundMigrationWorker
.
jobs
.
size
).
to
eq
(
1
)
end
# E.g. from a previous failed run of this background migration
context
'when there is existing data in untracked_files_for_uploads'
do
before
do
described_class
.
new
.
perform
end
it
'does not error or produce duplicates of existing data'
do
expect
do
described_class
.
new
.
perform
end
.
not_to
change
{
untracked_files_for_uploads
.
count
}.
from
(
5
)
end
end
# E.g. The installation is in use at the time of migration, and someone has
# just uploaded a file
context
'when there are files in /uploads/tmp'
do
let
(
:tmp_file
)
{
Rails
.
root
.
join
(
described_class
::
ABSOLUTE_UPLOAD_DIR
,
'tmp'
,
'some_file.jpg'
)
}
before
do
FileUtils
.
touch
(
tmp_file
)
end
after
do
FileUtils
.
rm
(
tmp_file
)
end
it
'does not add files from /uploads/tmp'
do
described_class
.
new
.
perform
expect
(
untracked_files_for_uploads
.
count
).
to
eq
(
5
)
end
end
end
end
end
end
...
...
spec/migrations/track_untracked_uploads_spec.rb
View file @
edb5cac4
...
@@ -62,8 +62,8 @@ describe TrackUntrackedUploads, :migration, :sidekiq do
...
@@ -62,8 +62,8 @@ describe TrackUntrackedUploads, :migration, :sidekiq do
expect
(
appearance
.
reload
.
uploads
.
where
(
"path like '%/header_logo/%'"
).
first
.
attributes
).
to
include
(
@appearance_header_logo_attributes
)
expect
(
appearance
.
reload
.
uploads
.
where
(
"path like '%/header_logo/%'"
).
first
.
attributes
).
to
include
(
@appearance_header_logo_attributes
)
expect
(
user2
.
reload
.
uploads
.
first
.
attributes
).
to
include
(
@user2_avatar_attributes
)
expect
(
user2
.
reload
.
uploads
.
first
.
attributes
).
to
include
(
@user2_avatar_attributes
)
expect
(
project2
.
reload
.
uploads
.
first
.
attributes
).
to
include
(
@project2_avatar_attributes
)
expect
(
project2
.
reload
.
uploads
.
where
(
uploader:
'AvatarUploader'
).
first
.
attributes
).
to
include
(
@project2_avatar_attributes
)
expect
(
project2
.
uploads
.
la
st
.
attributes
).
to
include
(
@project2_markdown_attributes
)
expect
(
project2
.
uploads
.
where
(
uploader:
'FileUploader'
).
fir
st
.
attributes
).
to
include
(
@project2_markdown_attributes
)
end
end
it
'ignores already-tracked uploads'
do
it
'ignores already-tracked uploads'
do
...
@@ -71,8 +71,8 @@ describe TrackUntrackedUploads, :migration, :sidekiq do
...
@@ -71,8 +71,8 @@ describe TrackUntrackedUploads, :migration, :sidekiq do
expect
(
appearance
.
reload
.
uploads
.
where
(
"path like '%/logo/%'"
).
first
.
attributes
).
to
include
(
@appearance_logo_attributes
)
expect
(
appearance
.
reload
.
uploads
.
where
(
"path like '%/logo/%'"
).
first
.
attributes
).
to
include
(
@appearance_logo_attributes
)
expect
(
user1
.
reload
.
uploads
.
first
.
attributes
).
to
include
(
@user1_avatar_attributes
)
expect
(
user1
.
reload
.
uploads
.
first
.
attributes
).
to
include
(
@user1_avatar_attributes
)
expect
(
project1
.
reload
.
uploads
.
first
.
attributes
).
to
include
(
@project1_avatar_attributes
)
expect
(
project1
.
reload
.
uploads
.
where
(
uploader:
'AvatarUploader'
).
first
.
attributes
).
to
include
(
@project1_avatar_attributes
)
expect
(
project1
.
uploads
.
la
st
.
attributes
).
to
include
(
@project1_markdown_attributes
)
expect
(
project1
.
uploads
.
where
(
uploader:
'FileUploader'
).
fir
st
.
attributes
).
to
include
(
@project1_markdown_attributes
)
end
end
it
'the temporary table untracked_files_for_uploads no longer exists'
do
it
'the temporary table untracked_files_for_uploads no longer exists'
do
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment