BigW Consortium Gitlab

Commit 49df1f27 by Robert Speicher

Merge branch 'reuse-queries-in-reference-parsers' into 'master'

Re-use queries in reference parsers This MR changes the reference parsing pipeline to cache queried objects and re-use them on subsequent runs. Data is cached in the `RequestStore` module so it's automatically flushed after a request. Certain references are processed multiple times. For example, for every system note we check if it's a cross reference by getting the issues, MRs and commits it references. When redacting data we may end up querying these very same objects. By caching this we can quite drastically reduce timings and SQL query counts. #15607 See merge request !5020
parents 92772f85 9ac4c556
...@@ -20,6 +20,7 @@ v 8.10.0 (unreleased) ...@@ -20,6 +20,7 @@ v 8.10.0 (unreleased)
- Add Spring EmojiOne updates. - Add Spring EmojiOne updates.
- Fix viewing notification settings when a project is pending deletion - Fix viewing notification settings when a project is pending deletion
- Fix pagination when sorting by columns with lots of ties (like priority) - Fix pagination when sorting by columns with lots of ties (like priority)
- The Markdown reference parsers now re-use query results to prevent running the same queries multiple times !5020
- Updated project header design - Updated project header design
- Exclude email check from the standard health check - Exclude email check from the standard health check
- Updated layout for Projects, Groups, Users on Admin area !4424 - Updated layout for Projects, Groups, Users on Admin area !4424
......
...@@ -133,8 +133,9 @@ module Banzai ...@@ -133,8 +133,9 @@ module Banzai
return {} if nodes.empty? return {} if nodes.empty?
ids = unique_attribute_values(nodes, attribute) ids = unique_attribute_values(nodes, attribute)
rows = collection_objects_for_ids(collection, ids)
collection.where(id: ids).each_with_object({}) do |row, hash| rows.each_with_object({}) do |row, hash|
hash[row.id] = row hash[row.id] = row
end end
end end
...@@ -153,6 +154,31 @@ module Banzai ...@@ -153,6 +154,31 @@ module Banzai
values.to_a values.to_a
end end
# Queries the collection for the objects with the given IDs.
#
# If the RequestStore module is enabled this method will only query any
# objects that have not yet been queried. For objects that have already
# been queried the object is returned from the cache.
def collection_objects_for_ids(collection, ids)
if RequestStore.active?
cache = collection_cache[collection_cache_key(collection)]
to_query = ids.map(&:to_i) - cache.keys
unless to_query.empty?
collection.where(id: to_query).each { |row| cache[row.id] = row }
end
cache.values
else
collection.where(id: ids)
end
end
# Returns the cache key to use for a collection.
def collection_cache_key(collection)
collection.respond_to?(:model) ? collection.model : collection
end
# Processes the list of HTML documents and returns an Array containing all # Processes the list of HTML documents and returns an Array containing all
# the references. # the references.
def process(documents) def process(documents)
...@@ -189,7 +215,7 @@ module Banzai ...@@ -189,7 +215,7 @@ module Banzai
end end
def find_projects_for_hash_keys(hash) def find_projects_for_hash_keys(hash)
Project.where(id: hash.keys) collection_objects_for_ids(Project, hash.keys)
end end
private private
...@@ -199,6 +225,12 @@ module Banzai ...@@ -199,6 +225,12 @@ module Banzai
def lazy(&block) def lazy(&block)
Gitlab::Lazy.new(&block) Gitlab::Lazy.new(&block)
end end
def collection_cache
RequestStore[:banzai_collection_cache] ||= Hash.new do |hash, key|
hash[key] = {}
end
end
end end
end end
end end
...@@ -73,7 +73,7 @@ module Banzai ...@@ -73,7 +73,7 @@ module Banzai
def find_users(ids) def find_users(ids)
return [] if ids.empty? return [] if ids.empty?
User.where(id: ids).to_a collection_objects_for_ids(User, ids)
end end
def find_users_for_groups(ids) def find_users_for_groups(ids)
...@@ -85,7 +85,8 @@ module Banzai ...@@ -85,7 +85,8 @@ module Banzai
def find_users_for_projects(ids) def find_users_for_projects(ids)
return [] if ids.empty? return [] if ids.empty?
Project.where(id: ids).flat_map { |p| p.team.members.to_a } collection_objects_for_ids(Project, ids).
flat_map { |p| p.team.members.to_a }
end end
end end
end end
......
...@@ -234,4 +234,79 @@ describe Banzai::ReferenceParser::BaseParser, lib: true do ...@@ -234,4 +234,79 @@ describe Banzai::ReferenceParser::BaseParser, lib: true do
to eq([project]) to eq([project])
end end
end end
describe '#collection_objects_for_ids' do
context 'with RequestStore disabled' do
it 'queries the collection directly' do
collection = User.all
expect(collection).to receive(:where).twice.and_call_original
2.times do
expect(subject.collection_objects_for_ids(collection, [user.id])).
to eq([user])
end
end
end
context 'with RequestStore enabled' do
before do
cache = Hash.new { |hash, key| hash[key] = {} }
allow(RequestStore).to receive(:active?).and_return(true)
allow(subject).to receive(:collection_cache).and_return(cache)
end
it 'queries the collection on the first call' do
expect(subject.collection_objects_for_ids(User, [user.id])).
to eq([user])
end
it 'does not query previously queried objects' do
collection = User.all
expect(collection).to receive(:where).once.and_call_original
2.times do
expect(subject.collection_objects_for_ids(collection, [user.id])).
to eq([user])
end
end
it 'casts String based IDs to Fixnums before querying objects' do
2.times do
expect(subject.collection_objects_for_ids(User, [user.id.to_s])).
to eq([user])
end
end
it 'queries any additional objects after the first call' do
other_user = create(:user)
expect(subject.collection_objects_for_ids(User, [user.id])).
to eq([user])
expect(subject.collection_objects_for_ids(User, [user.id, other_user.id])).
to eq([user, other_user])
end
it 'caches objects on a per collection class basis' do
expect(subject.collection_objects_for_ids(User, [user.id])).
to eq([user])
expect(subject.collection_objects_for_ids(Project, [project.id])).
to eq([project])
end
end
end
describe '#collection_cache_key' do
it 'returns the cache key for a Class' do
expect(subject.collection_cache_key(Project)).to eq(Project)
end
it 'returns the cache key for an ActiveRecord::Relation' do
expect(subject.collection_cache_key(Project.all)).to eq(Project)
end
end
end end
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment