BigW Consortium Gitlab

Run background migrations with a minimum interval

This adds a minimum interval to BackgroundMigrationWorker, ensuring background migrations of the same class only run once every 5 minutes. This prevents a thundering herd problem where scheduled migrations all run at once due to their delays having been expired (e.g. as the result of a queue being paused for a long time). If a job was recently executed it's rescheduled with a delay that equals the remaining time of the job's lease. This means that if the lease expires in two minutes we only need to wait two minutes, instead of five. Fixes https://gitlab.com/gitlab-org/gitlab-ce/issues/41624
parent 0788b37f
class BackgroundMigrationWorker
include ApplicationWorker
# The minimum amount of time between processing two jobs of the same migration
# class.
#
# This interval is set to 5 minutes so autovacuuming and other maintenance
# related tasks have plenty of time to clean up after a migration has been
# performed.
MIN_INTERVAL = 5.minutes.to_i
# Performs the background migration.
#
# See Gitlab::BackgroundMigration.perform for more information.
#
# class_name - The class name of the background migration to run.
# arguments - The arguments to pass to the migration class.
def perform(class_name, arguments = [])
Gitlab::BackgroundMigration.perform(class_name, arguments)
should_perform, ttl = perform_and_ttl(class_name)
if should_perform
Gitlab::BackgroundMigration.perform(class_name, arguments)
else
# If the lease could not be obtained this means either another process is
# running a migration of this class or we ran one recently. In this case
# we'll reschedule the job in such a way that it is picked up again around
# the time the lease expires.
self.class.perform_in(ttl || MIN_INTERVAL, class_name, arguments)
end
end
def perform_and_ttl(class_name)
if always_perform?
# In test environments `perform_in` will run right away. This can then
# lead to stack level errors in the above `#perform`. To work around this
# we'll just perform the migration right away in the test environment.
[true, nil]
else
lease = lease_for(class_name)
[lease.try_obtain, lease.ttl]
end
end
def lease_for(class_name)
Gitlab::ExclusiveLease
.new("#{self.class.name}:#{class_name}", timeout: MIN_INTERVAL)
end
def always_perform?
Rails.env.test?
end
end
---
title: Run background migrations with a minimum interval
merge_request:
author:
type: changed
......@@ -842,6 +842,12 @@ into similar problems in the future (e.g. when new tables are created).
def queue_background_migration_jobs_by_range_at_intervals(model_class, job_class_name, delay_interval, batch_size: BACKGROUND_MIGRATION_BATCH_SIZE)
raise "#{model_class} does not have an ID to use for batch ranges" unless model_class.column_names.include?('id')
# To not overload the worker too much we enforce a minimum interval both
# when scheduling and performing jobs.
if delay_interval < BackgroundMigrationWorker::MIN_INTERVAL
delay_interval = BackgroundMigrationWorker::MIN_INTERVAL
end
model_class.each_batch(of: batch_size) do |relation, index|
start_id, end_id = relation.pluck('MIN(id), MAX(id)').first
......
......@@ -71,5 +71,16 @@ module Gitlab
redis.exists(@redis_shared_state_key)
end
end
# Returns the TTL of the Redis key.
#
# This method will return `nil` if no TTL could be obtained.
def ttl
Gitlab::Redis::SharedState.with do |redis|
ttl = redis.ttl(@redis_shared_state_key)
ttl if ttl.positive?
end
end
end
end
......@@ -1006,12 +1006,12 @@ describe Gitlab::Database::MigrationHelpers do
context 'with batch_size option' do
it 'queues jobs correctly' do
Sidekiq::Testing.fake! do
model.queue_background_migration_jobs_by_range_at_intervals(User, 'FooJob', 10.seconds, batch_size: 2)
model.queue_background_migration_jobs_by_range_at_intervals(User, 'FooJob', 10.minutes, batch_size: 2)
expect(BackgroundMigrationWorker.jobs[0]['args']).to eq(['FooJob', [id1, id2]])
expect(BackgroundMigrationWorker.jobs[0]['at']).to eq(10.seconds.from_now.to_f)
expect(BackgroundMigrationWorker.jobs[0]['at']).to eq(10.minutes.from_now.to_f)
expect(BackgroundMigrationWorker.jobs[1]['args']).to eq(['FooJob', [id3, id3]])
expect(BackgroundMigrationWorker.jobs[1]['at']).to eq(20.seconds.from_now.to_f)
expect(BackgroundMigrationWorker.jobs[1]['at']).to eq(20.minutes.from_now.to_f)
end
end
end
......@@ -1019,10 +1019,10 @@ describe Gitlab::Database::MigrationHelpers do
context 'without batch_size option' do
it 'queues jobs correctly' do
Sidekiq::Testing.fake! do
model.queue_background_migration_jobs_by_range_at_intervals(User, 'FooJob', 10.seconds)
model.queue_background_migration_jobs_by_range_at_intervals(User, 'FooJob', 10.minutes)
expect(BackgroundMigrationWorker.jobs[0]['args']).to eq(['FooJob', [id1, id3]])
expect(BackgroundMigrationWorker.jobs[0]['at']).to eq(10.seconds.from_now.to_f)
expect(BackgroundMigrationWorker.jobs[0]['at']).to eq(10.minutes.from_now.to_f)
end
end
end
......
......@@ -73,4 +73,19 @@ describe Gitlab::ExclusiveLease, :clean_gitlab_redis_shared_state do
described_class.new(key, timeout: 3600).try_obtain
end
end
describe '#ttl' do
it 'returns the TTL of the Redis key' do
lease = described_class.new('kittens', timeout: 100)
lease.try_obtain
expect(lease.ttl <= 100).to eq(true)
end
it 'returns nil when the lease does not exist' do
lease = described_class.new('kittens', timeout: 10)
expect(lease.ttl).to be_nil
end
end
end
......@@ -27,11 +27,11 @@ describe NormalizeLdapExternUids, :migration, :sidekiq do
migrate!
expect(BackgroundMigrationWorker.jobs[0]['args']).to eq([described_class::MIGRATION, [1, 2]])
expect(BackgroundMigrationWorker.jobs[0]['at']).to eq(10.seconds.from_now.to_f)
expect(BackgroundMigrationWorker.jobs[0]['at']).to eq(5.minutes.from_now.to_f)
expect(BackgroundMigrationWorker.jobs[1]['args']).to eq([described_class::MIGRATION, [3, 4]])
expect(BackgroundMigrationWorker.jobs[1]['at']).to eq(20.seconds.from_now.to_f)
expect(BackgroundMigrationWorker.jobs[1]['at']).to eq(10.minutes.from_now.to_f)
expect(BackgroundMigrationWorker.jobs[2]['args']).to eq([described_class::MIGRATION, [5, 5]])
expect(BackgroundMigrationWorker.jobs[2]['at']).to eq(30.seconds.from_now.to_f)
expect(BackgroundMigrationWorker.jobs[2]['at']).to eq(15.minutes.from_now.to_f)
expect(BackgroundMigrationWorker.jobs.size).to eq 3
end
end
......
require 'spec_helper'
describe BackgroundMigrationWorker, :sidekiq do
describe BackgroundMigrationWorker, :sidekiq, :clean_gitlab_redis_shared_state do
let(:worker) { described_class.new }
describe '.perform' do
it 'performs a background migration' do
expect(Gitlab::BackgroundMigration)
.to receive(:perform)
.with('Foo', [10, 20])
described_class.new.perform('Foo', [10, 20])
worker.perform('Foo', [10, 20])
end
it 'reschedules a migration if it was performed recently' do
expect(worker)
.to receive(:always_perform?)
.and_return(false)
worker.lease_for('Foo').try_obtain
expect(Gitlab::BackgroundMigration)
.not_to receive(:perform)
expect(described_class)
.to receive(:perform_in)
.with(a_kind_of(Numeric), 'Foo', [10, 20])
worker.perform('Foo', [10, 20])
end
end
end
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment