1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
module RepositoryCheck
class BatchWorker
include Sidekiq::Worker
RUN_TIME = 3600
sidekiq_options retry: false
def perform
start = Time.now
# This loop will break after a little more than one hour ('a little
# more' because `git fsck` may take a few minutes), or if it runs out of
# projects to check. By default sidekiq-cron will start a new
# RepositoryCheckWorker each hour so that as long as there are repositories to
# check, only one (or two) will be checked at a time.
project_ids.each do |project_id|
break if Time.now - start >= RUN_TIME
break unless current_settings.repository_checks_enabled
next unless try_obtain_lease(project_id)
SingleRepositoryWorker.new.perform(project_id)
end
end
private
# Project.find_each does not support WHERE clauses and
# Project.find_in_batches does not support ordering. So we just build an
# array of ID's. This is OK because we do it only once an hour, because
# getting ID's from Postgres is not terribly slow, and because no user
# has to sit and wait for this query to finish.
def project_ids
limit = 10_000
never_checked_projects = Project.where('last_repository_check_at IS NULL AND created_at < ?', 24.hours.ago).
limit(limit).pluck(:id)
old_check_projects = Project.where('last_repository_check_at < ?', 1.month.ago).
reorder('last_repository_check_at ASC').limit(limit).pluck(:id)
never_checked_projects + old_check_projects
end
def try_obtain_lease(id)
# Use a 24-hour timeout because on servers/projects where 'git fsck' is
# super slow we definitely do not want to run it twice in parallel.
Gitlab::ExclusiveLease.new(
"project_repository_check:#{id}",
timeout: 24.hours
).try_obtain
end
def current_settings
# No caching of the settings! If we cache them and an admin disables
# this feature, an active RepositoryCheckWorker would keep going for up
# to 1 hour after the feature was disabled.
if Rails.env.test?
Gitlab::CurrentSettings.fake_application_settings
else
ApplicationSetting.current
end
end
end
end