BigW Consortium Gitlab

import.rb 13.5 KB
Newer Older
1
require_relative 'error'
2 3
module Github
  class Import
4
    include Gitlab::ShellAdapter
5

6 7 8 9 10 11 12 13 14
    class MergeRequest < ::MergeRequest
      self.table_name = 'merge_requests'

      self.reset_callbacks :save
      self.reset_callbacks :commit
      self.reset_callbacks :update
      self.reset_callbacks :validate
    end

15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32
    class Issue < ::Issue
      self.table_name = 'issues'

      self.reset_callbacks :save
      self.reset_callbacks :commit
      self.reset_callbacks :update
      self.reset_callbacks :validate
    end

    class Note < ::Note
      self.table_name = 'notes'

      self.reset_callbacks :save
      self.reset_callbacks :commit
      self.reset_callbacks :update
      self.reset_callbacks :validate
    end

33 34 35 36 37 38 39 40
    class LegacyDiffNote < ::LegacyDiffNote
      self.table_name = 'notes'

      self.reset_callbacks :commit
      self.reset_callbacks :update
      self.reset_callbacks :validate
    end

41
    attr_reader :project, :repository, :repo, :options, :errors, :cached, :verbose
42

43
    def initialize(project, options)
44 45
      @project = project
      @repository = project.repository
46
      @repo = project.import_source
47
      @options = options
48
      @verbose = options.fetch(:verbose, false)
49
      @cached  = Hash.new { |hash, key| hash[key] = Hash.new }
50 51 52
      @errors  = []
    end

53
    # rubocop: disable Rails/Output
54
    def execute
55
      puts 'Fetching repository...'.color(:aqua) if verbose
56
      fetch_repository
57
      puts 'Fetching labels...'.color(:aqua) if verbose
58
      fetch_labels
59
      puts 'Fetching milestones...'.color(:aqua) if verbose
60
      fetch_milestones
61
      puts 'Fetching pull requests...'.color(:aqua) if verbose
62
      fetch_pull_requests
63
      puts 'Fetching issues...'.color(:aqua) if verbose
64
      fetch_issues
65
      puts 'Cloning wiki repository...'.color(:aqua) if verbose
66
      fetch_wiki_repository
67
      puts 'Expiring repository cache...'.color(:aqua) if verbose
68 69
      expire_repository_cache

70 71 72 73 74
      true
    rescue Github::RepositoryFetchError
      false
    ensure
      keep_track_of_errors
75 76 77 78 79
    end

    private

    def fetch_repository
80
      begin
81
        project.create_repository unless project.repository.exists?
82
        project.repository.add_remote('github', "https://{options.fetch(:token)}@github.com/#{repo}.git")
83 84 85
        project.repository.set_remote_as_mirror('github')
        project.repository.fetch_remote('github', forced: true)
      rescue Gitlab::Shell::Error => e
86
        error(:project, "https://github.com/#{repo}.git", e.message)
87
        raise Github::RepositoryFetchError
88
      end
89
    end
90

91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106
    def fetch_wiki_repository
      wiki_url  = "https://{options.fetch(:token)}@github.com/#{repo}.wiki.git"
      wiki_path = "#{project.path_with_namespace}.wiki"

      unless project.wiki.repository_exists?
        gitlab_shell.import_repository(project.repository_storage_path, wiki_path, wiki_url)
      end
    rescue Gitlab::Shell::Error => e
      # GitHub error message when the wiki repo has not been created,
      # this means that repo has wiki enabled, but have no pages. So,
      # we can skip the import.
      if e.message !~ /repository not exported/
        errors(:wiki, wiki_url, e.message)
      end
    end

107 108
    def fetch_labels
      url = "/repos/#{repo}/labels"
109

110
      while url
111
        response = Github::Client.new(options).get(url)
112 113 114

        response.body.each do |raw|
          begin
115
            representation = Github::Representation::Label.new(raw)
116

117 118 119 120 121
            label = project.labels.find_or_create_by!(title: representation.title) do |label|
              label.color = representation.color
            end

            cached[:label_ids][label.title] = label.id
122
          rescue => e
123
            error(:label, representation.url, e.message)
124
          end
125
        end
126

127
        url = response.rels[:next]
128
      end
129
    end
130

131 132
    def fetch_milestones
      url = "/repos/#{repo}/milestones"
133

134
      while url
135
        response = Github::Client.new(options).get(url, state: :all)
136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153

        response.body.each do |raw|
          begin
            milestone = Github::Representation::Milestone.new(raw)
            next if project.milestones.where(iid: milestone.iid).exists?

            project.milestones.create!(
              iid: milestone.iid,
              title: milestone.title,
              description: milestone.description,
              due_date: milestone.due_date,
              state: milestone.state,
              created_at: milestone.created_at,
              updated_at: milestone.updated_at
            )
          rescue => e
            error(:milestone, milestone.url, e.message)
          end
154
        end
155

156
        url = response.rels[:next]
157
      end
158
    end
159

160 161
    def fetch_pull_requests
      url = "/repos/#{repo}/pulls"
162

163
      while url
164
        response = Github::Client.new(options).get(url, state: :all, sort: :created, direction: :asc)
165 166

        response.body.each do |raw|
167
          pull_request  = Github::Representation::PullRequest.new(raw, options.merge(project: project))
168 169 170 171
          merge_request = MergeRequest.find_or_initialize_by(iid: pull_request.iid, source_project_id: project.id)
          next unless merge_request.new_record? && pull_request.valid?

          begin
172
            restore_branches(pull_request)
173

174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193
            author_id   = user_id(pull_request.author, project.creator_id)
            description = format_description(pull_request.description, pull_request.author)

            merge_request.attributes = {
              iid: pull_request.iid,
              title: pull_request.title,
              description: description,
              source_project: pull_request.source_project,
              source_branch: pull_request.source_branch_name,
              source_branch_sha: pull_request.source_branch_sha,
              target_project: pull_request.target_project,
              target_branch: pull_request.target_branch_name,
              target_branch_sha: pull_request.target_branch_sha,
              state: pull_request.state,
              milestone_id: milestone_id(pull_request.milestone),
              author_id: author_id,
              assignee_id: user_id(pull_request.assignee),
              created_at: pull_request.created_at,
              updated_at: pull_request.updated_at
            }
194

195
            merge_request.save!(validate: false)
196
            merge_request.merge_request_diffs.create
197 198

            # Fetch review comments
199
            review_comments_url = "/repos/#{repo}/pulls/#{pull_request.iid}/comments"
200
            fetch_comments(merge_request, :review_comment, review_comments_url, LegacyDiffNote)
201 202

            # Fetch comments
203
            comments_url = "/repos/#{repo}/issues/#{pull_request.iid}/comments"
204
            fetch_comments(merge_request, :comment, comments_url)
205
          rescue => e
206
            error(:pull_request, pull_request.url, e.message)
207 208
          ensure
            clean_up_restored_branches(pull_request)
209
          end
210 211
        end

212
        url = response.rels[:next]
213
      end
214
    end
215

216 217
    def fetch_issues
      url = "/repos/#{repo}/issues"
218

219
      while url
220
        response = Github::Client.new(options).get(url, state: :all, sort: :created, direction: :asc)
221 222

        response.body.each do |raw|
223
          representation = Github::Representation::Issue.new(raw, options)
224 225

          begin
226 227 228 229 230 231 232 233 234 235 236 237
            # Every pull request is an issue, but not every issue
            # is a pull request. For this reason, "shared" actions
            # for both features, like manipulating assignees, labels
            # and milestones, are provided within the Issues API.
            if representation.pull_request?
              next unless representation.has_labels?

              merge_request = MergeRequest.find_by!(target_project_id: project.id, iid: representation.iid)
              merge_request.update_attribute(:label_ids, label_ids(representation.labels))
            else
              next if Issue.where(iid: representation.iid, project_id: project.id).exists?

238
              author_id          = user_id(representation.author, project.creator_id)
239 240 241 242
              issue              = Issue.new
              issue.iid          = representation.iid
              issue.project_id   = project.id
              issue.title        = representation.title
243
              issue.description  = format_description(representation.description, representation.author)
244 245 246
              issue.state        = representation.state
              issue.label_ids    = label_ids(representation.labels)
              issue.milestone_id = milestone_id(representation.milestone)
247
              issue.author_id    = author_id
248 249 250
              issue.assignee_id  = user_id(representation.assignee)
              issue.created_at   = representation.created_at
              issue.updated_at   = representation.updated_at
251
              issue.save!(validate: false)
252

253
              # Fetch comments
254
              if representation.has_comments?
255
                comments_url = "/repos/#{repo}/issues/#{issue.iid}/comments"
256
                fetch_comments(issue, :comment, comments_url)
257 258
              end
            end
259 260 261
          rescue => e
            error(:issue, representation.url, e.message)
          end
262
        end
263

264
        url = response.rels[:next]
265 266 267
      end
    end

268
    def fetch_comments(noteable, type, url, klass = Note)
269
      while url
270
        comments = Github::Client.new(options).get(url)
271 272 273 274

        ActiveRecord::Base.no_touching do
          comments.body.each do |raw|
            begin
275 276
              representation  = Github::Representation::Comment.new(raw, options)
              author_id       = user_id(representation.author, project.creator_id)
277

278
              note            = klass.new
279 280
              note.project_id = project.id
              note.noteable   = noteable
281
              note.note       = format_description(representation.note, representation.author)
282 283
              note.commit_id  = representation.commit_id
              note.line_code  = representation.line_code
284
              note.author_id  = author_id
285 286 287 288 289 290 291 292 293
              note.created_at = representation.created_at
              note.updated_at = representation.updated_at
              note.save!(validate: false)
            rescue => e
              error(type, representation.url, e.message)
            end
          end
        end

294
        url = comments.rels[:next]
295 296 297
      end
    end

298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324
    def fetch_releases
      url = "/repos/#{repo}/releases"

      while url
        response = Github::Client.new(options).get(url)

        response.body.each do |raw|
          representation = Github::Representation::Release.new(raw)
          next unless representation.valid?

          release = ::Release.find_or_initialize_by(project_id: project.id, tag: representation.tag)
          next unless relese.new_record?

          begin
            release.description = representation.description
            release.created_at  = representation.created_at
            release.updated_at  = representation.updated_at
            release.save!(validate: false)
          rescue => e
            error(:release, representation.url, e.message)
          end
        end

        url = response.rels[:next]
      end
    end

325 326 327 328 329
    def restore_branches(pull_request)
      restore_source_branch(pull_request) unless pull_request.source_branch_exists?
      restore_target_branch(pull_request) unless pull_request.target_branch_exists?
    end

330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350
    def restore_source_branch(pull_request)
      repository.create_branch(pull_request.source_branch_name, pull_request.source_branch_sha)
    end

    def restore_target_branch(pull_request)
      repository.create_branch(pull_request.target_branch_name, pull_request.target_branch_sha)
    end

    def remove_branch(name)
      repository.delete_branch(name)
    rescue Rugged::ReferenceError
      errors << { type: :branch, url: nil, error: "Could not clean up restored branch: #{name}" }
    end

    def clean_up_restored_branches(pull_request)
      return if pull_request.opened?

      remove_branch(pull_request.source_branch_name) unless pull_request.source_branch_exists?
      remove_branch(pull_request.target_branch_name) unless pull_request.target_branch_exists?
    end

351 352
    def label_ids(labels)
      labels.map { |attrs| cached[:label_ids][attrs.fetch('name')] }.compact
353 354
    end

355 356 357 358 359 360 361 362
    def milestone_id(milestone)
      return unless milestone.present?

      project.milestones.select(:id).find_by(iid: milestone.iid)&.id
    end

    def user_id(user, fallback_id = nil)
      return unless user.present?
363
      return cached[:user_ids][user.id] if cached[:user_ids].key?(user.id)
364

365
      gitlab_user_id = user_id_by_external_uid(user.id) || user_id_by_email(user.email)
366

367 368
      cached[:gitlab_user_ids][user.id] = gitlab_user_id.present?
      cached[:user_ids][user.id] = gitlab_user_id || fallback_id
369 370
    end

371
    def user_id_by_email(email)
372 373 374 375 376
      return nil unless email

      ::User.find_by_any_email(email)&.id
    end

377
    def user_id_by_external_uid(id)
378 379 380 381
      return nil unless id

      ::User.select(:id)
            .joins(:identities)
382
            .merge(::Identity.where(provider: :github, extern_uid: id))
383 384 385
            .first&.id
    end

386
    def format_description(body, author)
387
      return body if cached[:gitlab_user_ids][author.id]
388 389 390 391

      "*Created by: #{author.username}*\n\n#{body}"
    end

392 393 394 395
    def expire_repository_cache
      repository.expire_content_cache
    end

396
    def keep_track_of_errors
397 398 399 400 401 402 403 404
      return unless errors.any?

      project.update_column(:import_error, {
        message: 'The remote data could not be fully imported.',
        errors: errors
      }.to_json)
    end

405 406 407 408 409
    def error(type, url, message)
      errors << { type: type, url: Gitlab::UrlSanitizer.sanitize(url), error: message }
    end
  end
end