BigW Consortium Gitlab

batch_worker.rb 2.23 KB
Newer Older
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35
module RepositoryCheck
  class BatchWorker
    include Sidekiq::Worker
  
    RUN_TIME = 3600
  
    sidekiq_options retry: false
  
    def perform
      start = Time.now
  
      # This loop will break after a little more than one hour ('a little
      # more' because `git fsck` may take a few minutes), or if it runs out of
      # projects to check. By default sidekiq-cron will start a new
      # RepositoryCheckWorker each hour so that as long as there are repositories to
      # check, only one (or two) will be checked at a time.
      project_ids.each do |project_id|
        break if Time.now - start >= RUN_TIME
        break unless current_settings.repository_checks_enabled
  
        next unless try_obtain_lease(project_id)
  
        SingleRepositoryWorker.new.perform(project_id)
      end
    end
  
    private
  
    # Project.find_each does not support WHERE clauses and
    # Project.find_in_batches does not support ordering. So we just build an
    # array of ID's. This is OK because we do it only once an hour, because
    # getting ID's from Postgres is not terribly slow, and because no user
    # has to sit and wait for this query to finish.
    def project_ids
      limit = 10_000
36 37
      never_checked_projects = Project.where('last_repository_check_at IS NULL AND created_at < ?', 24.hours.ago).
        limit(limit).pluck(:id)
38
      old_check_projects = Project.where('last_repository_check_at < ?', 1.month.ago).
39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63
        reorder('last_repository_check_at ASC').limit(limit).pluck(:id)
      never_checked_projects + old_check_projects
    end
  
    def try_obtain_lease(id)
      # Use a 24-hour timeout because on servers/projects where 'git fsck' is
      # super slow we definitely do not want to run it twice in parallel.
      Gitlab::ExclusiveLease.new(
        "project_repository_check:#{id}",
        timeout: 24.hours
      ).try_obtain
    end
  
    def current_settings
      # No caching of the settings! If we cache them and an admin disables
      # this feature, an active RepositoryCheckWorker would keep going for up
      # to 1 hour after the feature was disabled.
      if Rails.env.test?
        Gitlab::CurrentSettings.fake_application_settings
      else
        ApplicationSetting.current
      end
    end
  end
end