Refactor getting user groups/projects/contributions

This new setup no longer loads any IDs into memory using "pluck", instead using SQL UNIONs to merge the various datasets together. This results in greatly improved query performance as well as a reduction of memory usage. The old setup was in particular problematic when requesting the authorized projects _including_ public/internal projects as this would result in roughly 65000 project IDs being loaded into memory. These IDs would in turn be passed to other queries.

Refactor getting user groups/projects/contributions
5fcd9986 · Yorick Peterse · bfd9855a · 5fcd9986 · 5fcd9986
Commit 5fcd9986 authored Nov 16, 2015 by Yorick Peterse
Hide whitespace changes
Inline Side-by-side

Showing with 96 additions and 20 deletions

user.rb app/models/user.rb +48 -16

user_spec.rb spec/models/user_spec.rb +48 -4

No files found.
--- a/app/models/user.rb
+++ b/app/models/user.rb
@@ -389,21 +389,40 @@ class User < ActiveRecord::Base
    end
  end

-  # Groups user has access to
-  def authorized_groups
-    @authorized_groups ||=
-      begin
-        union = Gitlab::SQL::Union.
-          new([groups.select(:id), authorized_projects.select(:namespace_id)])
+  # Returns the groups a user has access to, optionally including any public
+  # groups.
+  #
+  # public_internal - When set to "true" all public groups and groups of public
+  #                   projects are also included.
+  #
+  # Returns an ActiveRecord::Relation
+  def authorized_groups(public_internal = false)
+    union = Gitlab::SQL::Union.
+      new([groups.select(:id), authorized_projects(public_internal).
+           select(:namespace_id)])

-        Group.where("namespaces.id IN (#{union.to_sql})")
-      end
+    sql = "namespaces.id IN (#{union.to_sql})"
+
+    if public_internal
+      sql << ' OR public IS TRUE'
+    end
+
+    Group.where(sql)
  end

-  # Projects user has access to
-  def authorized_projects
-    @authorized_projects ||=
-      Project.where("projects.id IN (#{projects_union.to_sql})")
+  # Returns the groups a user is authorized to access.
+  #
+  # public_internal - When set to "true" all public/internal projects will also
+  #                   be included.
+  def authorized_projects(public_internal = false)
+    base = "projects.id IN (#{projects_union.to_sql})"
+
+    if public_internal
+      Project.where("#{base} OR projects.visibility_level IN (?)",
+                    Project.public_and_internal_levels)
+    else
+      Project.where(base)
+    end
  end

  def owned_projects
@@ -726,12 +745,25 @@ class User < ActiveRecord::Base
    Doorkeeper::AccessToken.where(resource_owner_id: self.id, revoked_at: nil)
  end

-  def contributed_projects_ids
-    Event.contributions.where(author_id: self).
+  # Returns the projects a user contributed to in the last year.
+  #
+  # This method relies on a subquery as this performs significantly better
+  # compared to a JOIN when coupled with, for example,
+  # `Project.visible_to_user`. That is, consider the following code:
+  #
+  #     some_user.contributed_projects.visible_to_user(other_user)
+  #
+  # If this method were to use a JOIN the resulting query would take roughly 200
+  # ms on a database with a similar size to gitlab.com's database. On the other
+  # hand, using a subquery means we can get the exact same data in about 40 ms.
+  def contributed_projects
+    events = Event.select(:project_id).
+      contributions.where(author_id: self).
      where("created_at > ?", Time.now - 1.year).
-      reorder(project_id: :desc).
      uniq.
-      pluck(:project_id)
+      reorder(nil)
+
+    Project.where(id: events)
  end

  def restricted_signup_domains

--- a/spec/models/user_spec.rb
+++ b/spec/models/user_spec.rb
@@ -686,7 +686,7 @@ describe User do
    end
  end

-  describe "#contributed_projects_ids" do
+  describe "#contributed_projects" do
    subject { create(:user) }
    let!(:project1) { create(:project) }
    let!(:project2) { create(:project, forked_from_project: project3) }
@@ -701,15 +701,15 @@ describe User do
    end

    it "includes IDs for projects the user has pushed to" do
-      expect(subject.contributed_projects_ids).to include(project1.id)
+      expect(subject.contributed_projects).to include(project1)
    end

    it "includes IDs for projects the user has had merge requests merged into" do
-      expect(subject.contributed_projects_ids).to include(project3.id)
+      expect(subject.contributed_projects).to include(project3)
    end

    it "doesn't include IDs for unrelated projects" do
-      expect(subject.contributed_projects_ids).not_to include(project2.id)
+      expect(subject.contributed_projects).not_to include(project2)
    end
  end

@@ -758,4 +758,48 @@ describe User do
      expect(subject.recent_push).to eq(nil)
    end
  end
+
+  describe '#authorized_groups' do
+    let!(:user) { create(:user) }
+    let!(:private_group) { create(:group) }
+    let!(:public_group) { create(:group, public: true) }
+
+    before do
+      private_group.add_user(user, Gitlab::Access::MASTER)
+    end
+
+    describe 'excluding public groups' do
+      subject { user.authorized_groups }
+
+      it { is_expected.to eq([private_group]) }
+    end
+
+    describe 'including public groups' do
+      subject { user.authorized_groups(true) }
+
+      it { is_expected.to eq([public_group, private_group]) }
+    end
+  end
+
+  describe '#authorized_projects' do
+    let!(:user) { create(:user) }
+    let!(:private_project) { create(:project, :private) }
+    let!(:public_project) { create(:project, :public) }
+
+    before do
+      private_project.team << [user, Gitlab::Access::MASTER]
+    end
+
+    describe 'excluding public projects' do
+      subject { user.authorized_projects }
+
+      it { is_expected.to eq([private_project]) }
+    end
+
+    describe 'including public projects' do
+      subject { user.authorized_projects(true) }
+
+      it { is_expected.to eq([public_project, private_project]) }
+    end
+  end
 end