BigW Consortium Gitlab

Commit 55ca2da7 by Sean McGivern

Merge branch 'smarter-cache-invalidation' into 'master'

Smarter cache invalidation Fixes https://gitlab.com/gitlab-org/gitlab-ce/issues/23550 See merge request !7360
parents d3236af1 289e4336
......@@ -1086,7 +1086,7 @@ class Project < ActiveRecord::Base
"refs/heads/#{branch}",
force: true)
repository.copy_gitattributes(branch)
repository.expire_avatar_cache(branch)
repository.expire_avatar_cache
reload_default_branch
end
......
......@@ -18,7 +18,9 @@ class Tree
def readme
return @readme if defined?(@readme)
available_readmes = blobs.select(&:readme?)
available_readmes = blobs.select do |blob|
Gitlab::FileDetector.type_of(blob.name) == :readme
end
previewable_readmes = available_readmes.select do |blob|
previewable?(blob.name)
......
......@@ -18,7 +18,7 @@ class GitPushService < BaseService
#
def execute
@project.repository.after_create if @project.empty_repo?
@project.repository.after_push_commit(branch_name, params[:newrev])
@project.repository.after_push_commit(branch_name)
if push_remove_branch?
@project.repository.after_remove_branch
......@@ -51,12 +51,32 @@ class GitPushService < BaseService
execute_related_hooks
perform_housekeeping
update_caches
end
def update_gitattributes
@project.repository.copy_gitattributes(params[:ref])
end
def update_caches
if is_default_branch?
paths = Set.new
@push_commits.each do |commit|
commit.raw_diffs(deltas_only: true).each do |diff|
paths << diff.new_path
end
end
types = Gitlab::FileDetector.types_in_paths(paths.to_a)
else
types = []
end
ProjectCacheWorker.perform_async(@project.id, types)
end
protected
def execute_related_hooks
......@@ -70,7 +90,6 @@ class GitPushService < BaseService
@project.execute_hooks(build_push_data.dup, :push_hooks)
@project.execute_services(build_push_data.dup, :push_hooks)
Ci::CreatePipelineService.new(@project, current_user, build_push_data).execute
ProjectCacheWorker.perform_async(@project.id)
if push_remove_branch?
AfterBranchDeleteService
......
# Worker for updating any project specific caches.
#
# This worker runs at most once every 15 minutes per project. This is to ensure
# that multiple instances of jobs for this worker don't hammer the underlying
# storage engine as much.
class ProjectCacheWorker
include Sidekiq::Worker
include DedicatedSidekiqQueue
LEASE_TIMEOUT = 15.minutes.to_i
def self.lease_for(project_id)
Gitlab::ExclusiveLease.
new("project_cache_worker:#{project_id}", timeout: LEASE_TIMEOUT)
end
# project_id - The ID of the project for which to flush the cache.
# refresh - An Array containing extra types of data to refresh such as
# `:readme` to flush the README and `:changelog` to flush the
# CHANGELOG.
def perform(project_id, refresh = [])
project = Project.find_by(id: project_id)
# Overwrite Sidekiq's implementation so we only schedule when actually needed.
def self.perform_async(project_id)
# If a lease for this project is still being held there's no point in
# scheduling a new job.
super unless lease_for(project_id).exists?
end
return unless project && project.repository.exists?
def perform(project_id)
if try_obtain_lease_for(project_id)
Rails.logger.
info("Obtained ProjectCacheWorker lease for project #{project_id}")
else
Rails.logger.
info("Could not obtain ProjectCacheWorker lease for project #{project_id}")
return
end
update_repository_size(project)
project.update_commit_count
update_caches(project_id)
project.repository.refresh_method_caches(refresh.map(&:to_sym))
end
def update_caches(project_id)
project = Project.find(project_id)
def update_repository_size(project)
return unless try_obtain_lease_for(project.id, :update_repository_size)
return unless project.repository.exists?
Rails.logger.info("Updating repository size for project #{project.id}")
project.update_repository_size
project.update_commit_count
if project.repository.root_ref
project.repository.build_cache
end
end
def try_obtain_lease_for(project_id)
self.class.lease_for(project_id).try_obtain
private
def try_obtain_lease_for(project_id, section)
Gitlab::ExclusiveLease.
new("project_cache_worker:#{project_id}:#{section}", timeout: LEASE_TIMEOUT).
try_obtain
end
end
---
title: Rework cache invalidation so only changed data is refreshed
merge_request: 7360
author:
require 'set'
module Gitlab
# Module that can be used to detect if a path points to a special file such as
# a README or a CONTRIBUTING file.
module FileDetector
PATTERNS = {
readme: /\Areadme/i,
changelog: /\A(changelog|history|changes|news)/i,
license: /\A(licen[sc]e|copying)(\..+|\z)/i,
contributing: /\Acontributing/i,
version: 'version',
gitignore: '.gitignore',
koding: '.koding.yml',
gitlab_ci: '.gitlab-ci.yml',
avatar: /\Alogo\.(png|jpg|gif)\z/
}
# Returns an Array of file types based on the given paths.
#
# This method can be used to check if a list of file paths (e.g. of changed
# files) involve any special files such as a README or a LICENSE file.
#
# Example:
#
# types_in_paths(%w{README.md foo/bar.txt}) # => [:readme]
def self.types_in_paths(paths)
types = Set.new
paths.each do |path|
type = type_of(path)
types << type if type
end
types.to_a
end
# Returns the type of a file path, or nil if none could be detected.
#
# Returned types are Symbols such as `:readme`, `:version`, etc.
#
# Example:
#
# type_of('README.md') # => :readme
# type_of('VERSION') # => :version
def self.type_of(path)
name = File.basename(path)
PATTERNS.each do |type, search|
did_match = if search.is_a?(Regexp)
name =~ search
else
name.casecmp(search) == 0
end
return type if did_match
end
nil
end
end
end
require 'spec_helper'
describe Gitlab::FileDetector do
describe '.types_in_paths' do
it 'returns the file types for the given paths' do
expect(described_class.types_in_paths(%w(README.md CHANGELOG VERSION VERSION))).
to eq(%i{readme changelog version})
end
it 'does not include unrecognized file paths' do
expect(described_class.types_in_paths(%w(README.md foo.txt))).
to eq(%i{readme})
end
end
describe '.type_of' do
it 'returns the type of a README file' do
expect(described_class.type_of('README.md')).to eq(:readme)
end
it 'returns the type of a changelog file' do
%w(CHANGELOG HISTORY CHANGES NEWS).each do |file|
expect(described_class.type_of(file)).to eq(:changelog)
end
end
it 'returns the type of a license file' do
%w(LICENSE LICENCE COPYING).each do |file|
expect(described_class.type_of(file)).to eq(:license)
end
end
it 'returns the type of a version file' do
expect(described_class.type_of('VERSION')).to eq(:version)
end
it 'returns the type of a .gitignore file' do
expect(described_class.type_of('.gitignore')).to eq(:gitignore)
end
it 'returns the type of a Koding config file' do
expect(described_class.type_of('.koding.yml')).to eq(:koding)
end
it 'returns the type of a GitLab CI config file' do
expect(described_class.type_of('.gitlab-ci.yml')).to eq(:gitlab_ci)
end
it 'returns the type of an avatar' do
%w(logo.gif logo.png logo.jpg).each do |file|
expect(described_class.type_of(file)).to eq(:avatar)
end
end
it 'returns nil for an unknown file' do
expect(described_class.type_of('foo.txt')).to be_nil
end
end
end
......@@ -1572,7 +1572,7 @@ describe Project, models: true do
end
it 'expires the avatar cache' do
expect(project.repository).to receive(:expire_avatar_cache).with(project.default_branch)
expect(project.repository).to receive(:expire_avatar_cache)
project.change_head(project.default_branch)
end
......
......@@ -14,7 +14,7 @@ describe API::API, api: true do
describe "GET /projects/:id/repository/branches" do
it "returns an array of project branches" do
project.repository.expire_cache
project.repository.expire_all_method_caches
get api("/projects/#{project.id}/repository/branches", user)
expect(response).to have_http_status(200)
......
......@@ -27,27 +27,14 @@ describe GitPushService, services: true do
it { is_expected.to be_truthy }
it 'flushes general cached data' do
expect(project.repository).to receive(:expire_cache).
with('master', newrev)
it 'calls the after_push_commit hook' do
expect(project.repository).to receive(:after_push_commit).with('master')
subject
end
it 'flushes the visible content cache' do
expect(project.repository).to receive(:expire_has_visible_content_cache)
subject
end
it 'flushes the branches cache' do
expect(project.repository).to receive(:expire_branches_cache)
subject
end
it 'flushes the branch count cache' do
expect(project.repository).to receive(:expire_branch_count_cache)
it 'calls the after_create_branch hook' do
expect(project.repository).to receive(:after_create_branch)
subject
end
......@@ -56,21 +43,8 @@ describe GitPushService, services: true do
context 'existing branch' do
it { is_expected.to be_truthy }
it 'flushes general cached data' do
expect(project.repository).to receive(:expire_cache).
with('master', newrev)
subject
end
it 'does not flush the branches cache' do
expect(project.repository).not_to receive(:expire_branches_cache)
subject
end
it 'does not flush the branch count cache' do
expect(project.repository).not_to receive(:expire_branch_count_cache)
it 'calls the after_push_commit hook' do
expect(project.repository).to receive(:after_push_commit).with('master')
subject
end
......@@ -81,27 +55,14 @@ describe GitPushService, services: true do
it { is_expected.to be_truthy }
it 'flushes the visible content cache' do
expect(project.repository).to receive(:expire_has_visible_content_cache)
subject
end
it 'flushes the branches cache' do
expect(project.repository).to receive(:expire_branches_cache)
subject
end
it 'flushes the branch count cache' do
expect(project.repository).to receive(:expire_branch_count_cache)
it 'calls the after_push_commit hook' do
expect(project.repository).to receive(:after_push_commit).with('master')
subject
end
it 'flushes general cached data' do
expect(project.repository).to receive(:expire_cache).
with('master', newrev)
it 'calls the after_remove_branch hook' do
expect(project.repository).to receive(:after_remove_branch)
subject
end
......@@ -598,6 +559,51 @@ describe GitPushService, services: true do
end
end
describe '#update_caches' do
let(:service) do
described_class.new(project,
user,
oldrev: sample_commit.parent_id,
newrev: sample_commit.id,
ref: 'refs/heads/master')
end
context 'on the default branch' do
before do
allow(service).to receive(:is_default_branch?).and_return(true)
end
it 'flushes the caches of any special files that have been changed' do
commit = double(:commit)
diff = double(:diff, new_path: 'README.md')
expect(commit).to receive(:raw_diffs).with(deltas_only: true).
and_return([diff])
service.push_commits = [commit]
expect(ProjectCacheWorker).to receive(:perform_async).
with(project.id, %i(readme))
service.update_caches
end
end
context 'on a non-default branch' do
before do
allow(service).to receive(:is_default_branch?).and_return(false)
end
it 'does not flush any conditional caches' do
expect(ProjectCacheWorker).to receive(:perform_async).
with(project.id, []).
and_call_original
service.update_caches
end
end
end
def execute_service(project, user, oldrev, newrev, ref)
service = described_class.new(project, user, oldrev: oldrev, newrev: newrev, ref: ref )
service.execute
......
......@@ -18,7 +18,7 @@ describe GitTagPushService, services: true do
end
it 'flushes general cached data' do
expect(project.repository).to receive(:expire_cache)
expect(project.repository).to receive(:before_push_tag)
subject
end
......@@ -28,12 +28,6 @@ describe GitTagPushService, services: true do
subject
end
it 'flushes the tag count cache' do
expect(project.repository).to receive(:expire_tag_count_cache)
subject
end
end
describe "Git Tag Push Data" do
......
......@@ -2,62 +2,78 @@ require 'spec_helper'
describe ProjectCacheWorker do
let(:project) { create(:project) }
let(:worker) { described_class.new }
subject { described_class.new }
describe '.perform_async' do
it 'schedules the job when no lease exists' do
allow_any_instance_of(Gitlab::ExclusiveLease).to receive(:exists?).
and_return(false)
describe '#perform' do
before do
allow_any_instance_of(Gitlab::ExclusiveLease).to receive(:try_obtain).
and_return(true)
end
expect_any_instance_of(described_class).to receive(:perform)
context 'with a non-existing project' do
it 'does nothing' do
expect(worker).not_to receive(:update_repository_size)
described_class.perform_async(project.id)
worker.perform(-1)
end
end
it 'does not schedule the job when a lease exists' do
allow_any_instance_of(Gitlab::ExclusiveLease).to receive(:exists?).
and_return(true)
context 'with an existing project without a repository' do
it 'does nothing' do
allow_any_instance_of(Repository).to receive(:exists?).and_return(false)
expect_any_instance_of(described_class).not_to receive(:perform)
expect(worker).not_to receive(:update_repository_size)
described_class.perform_async(project.id)
worker.perform(project.id)
end
end
end
describe '#perform' do
context 'when an exclusive lease can be obtained' do
before do
allow(subject).to receive(:try_obtain_lease_for).with(project.id).
and_return(true)
end
context 'with an existing project' do
it 'updates the repository size' do
expect(worker).to receive(:update_repository_size).and_call_original
it 'updates project cache data' do
expect_any_instance_of(Repository).to receive(:size)
expect_any_instance_of(Repository).to receive(:commit_count)
worker.perform(project.id)
end
expect_any_instance_of(Project).to receive(:update_repository_size)
expect_any_instance_of(Project).to receive(:update_commit_count)
it 'updates the commit count' do
expect_any_instance_of(Project).to receive(:update_commit_count).
and_call_original
subject.perform(project.id)
worker.perform(project.id)
end
it 'handles missing repository data' do
expect_any_instance_of(Repository).to receive(:exists?).and_return(false)
expect_any_instance_of(Repository).not_to receive(:size)
it 'refreshes the method caches' do
expect_any_instance_of(Repository).to receive(:refresh_method_caches).
with(%i(readme)).
and_call_original
subject.perform(project.id)
worker.perform(project.id, %i(readme))
end
end
end
context 'when an exclusive lease can not be obtained' do
it 'does nothing' do
allow(subject).to receive(:try_obtain_lease_for).with(project.id).
describe '#update_repository_size' do
context 'when a lease could not be obtained' do
it 'does not update the repository size' do
allow(worker).to receive(:try_obtain_lease_for).
with(project.id, :update_repository_size).
and_return(false)
expect(subject).not_to receive(:update_caches)
expect(project).not_to receive(:update_repository_size)
worker.update_repository_size(project)
end
end
context 'when a lease could be obtained' do
it 'updates the repository size' do
allow(worker).to receive(:try_obtain_lease_for).
with(project.id, :update_repository_size).
and_return(true)
expect(project).to receive(:update_repository_size).and_call_original
subject.perform(project.id)
worker.update_repository_size(project)
end
end
end
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment