BigW Consortium Gitlab

diff.rb 8.2 KB
Newer Older
1 2
# Gitaly note: JV: needs RPC for Gitlab::Git::Diff.between.

Robert Speicher committed
3 4 5 6
# Gitlab::Git::Diff is a wrapper around native Rugged::Diff object
module Gitlab
  module Git
    class Diff
7
      TimeoutError = Class.new(StandardError)
8
      include Gitlab::EncodingHelper
Robert Speicher committed
9 10 11 12 13 14 15

      # Diff properties
      attr_accessor :old_path, :new_path, :a_mode, :b_mode, :diff

      # Stats properties
      attr_accessor :new_file, :renamed_file, :deleted_file

16 17 18 19
      alias_method :new_file?, :new_file
      alias_method :deleted_file?, :deleted_file
      alias_method :renamed_file?, :renamed_file

20
      attr_accessor :expanded
21
      attr_writer :too_large
Robert Speicher committed
22

Douwe Maan committed
23 24
      alias_method :expanded?, :expanded

25
      SERIALIZE_KEYS = %i(diff new_path old_path a_mode b_mode new_file renamed_file deleted_file too_large).freeze
26

27 28
      # The maximum size of a diff to display.
      SIZE_LIMIT = 100.kilobytes
29

30 31
      # The maximum size before a diff is collapsed.
      COLLAPSE_LIMIT = 10.kilobytes
Robert Speicher committed
32

33
      class << self
Robert Speicher committed
34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65
        def between(repo, head, base, options = {}, *paths)
          straight = options.delete(:straight) || false

          common_commit = if straight
                            base
                          else
                            # Only show what is new in the source branch
                            # compared to the target branch, not the other way
                            # around. The linex below with merge_base is
                            # equivalent to diff with three dots (git diff
                            # branch1...branch2) From the git documentation:
                            # "git diff A...B" is equivalent to "git diff
                            # $(git-merge-base A B) B"
                            repo.merge_base_commit(head, base)
                          end

          options ||= {}
          actual_options = filter_diff_options(options)
          repo.diff(common_commit, head, actual_options, *paths)
        end

        # Return a copy of the +options+ hash containing only keys that can be
        # passed to Rugged.  Allowed options are:
        #
        #  :ignore_whitespace_change ::
        #    If true, changes in amount of whitespace will be ignored.
        #
        #  :disable_pathspec_match ::
        #    If true, the given +*paths+ will be applied as exact matches,
        #    instead of as fnmatch patterns.
        #
        def filter_diff_options(options, default_options = {})
66 67
          allowed_options = [:ignore_whitespace_change,
                             :disable_pathspec_match, :paths,
68
                             :max_files, :max_lines, :limits, :expanded]
Robert Speicher committed
69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90

          if default_options
            actual_defaults = default_options.dup
            actual_defaults.keep_if do |key|
              allowed_options.include?(key)
            end
          else
            actual_defaults = {}
          end

          if options
            filtered_opts = options.dup
            filtered_opts.keep_if do |key|
              allowed_options.include?(key)
            end
            filtered_opts = actual_defaults.merge(filtered_opts)
          else
            filtered_opts = actual_defaults
          end

          filtered_opts
        end
91 92

        # Return a binary diff message like:
93
        #
94
        # "Binary files a/file/path and b/file/path differ\n"
95 96
        # This is used when we detect that a diff is binary
        # using CharlockHolmes when Rugged treats it as text.
97 98 99
        def binary_message(old_path, new_path)
          "Binary files #{old_path} and #{new_path} differ\n"
        end
Robert Speicher committed
100 101
      end

102 103 104
      def initialize(raw_diff, expanded: true)
        @expanded = expanded

Robert Speicher committed
105 106
        case raw_diff
        when Hash
107
          init_from_hash(raw_diff)
108
          prune_diff_if_eligible
Robert Speicher committed
109
        when Rugged::Patch, Rugged::Diff::Delta
110
          init_from_rugged(raw_diff)
111
        when Gitlab::GitalyClient::Diff
112
          init_from_gitaly(raw_diff)
113
          prune_diff_if_eligible
114 115
        when Gitaly::CommitDelta
          init_from_gitaly(raw_diff)
Robert Speicher committed
116 117 118 119 120 121 122 123 124 125
        when nil
          raise "Nil as raw diff passed"
        else
          raise "Invalid raw diff type: #{raw_diff.class}"
        end
      end

      def to_hash
        hash = {}

126
        SERIALIZE_KEYS.each do |key|
127
          hash[key] = send(key) # rubocop:disable GitlabSecurity/PublicSend
Robert Speicher committed
128 129 130 131 132
        end

        hash
      end

133 134 135 136
      def mode_changed?
        a_mode && b_mode && a_mode != b_mode
      end

Robert Speicher committed
137 138 139 140 141 142 143 144 145
      def submodule?
        a_mode == '160000' || b_mode == '160000'
      end

      def line_count
        @line_count ||= Util.count_lines(@diff)
      end

      def too_large?
146
        if @too_large.nil?
147
          @too_large = @diff.bytesize >= SIZE_LIMIT
148 149 150
        else
          @too_large
        end
Robert Speicher committed
151 152
      end

153 154 155
      # This is used by `to_hash` and `init_from_hash`.
      alias_method :too_large, :too_large?

156
      def too_large!
Robert Speicher committed
157 158 159 160 161 162 163
        @diff = ''
        @line_count = 0
        @too_large = true
      end

      def collapsed?
        return @collapsed if defined?(@collapsed)
164

165
        @collapsed = !expanded && @diff.bytesize >= COLLAPSE_LIMIT
Robert Speicher committed
166 167
      end

168
      def collapse!
Robert Speicher committed
169 170 171 172 173
        @diff = ''
        @line_count = 0
        @collapsed = true
      end

174
      def json_safe_diff
175
        return @diff unless detect_binary?(@diff)
176 177

        # the diff is binary, let's make a message for it
178
        Diff.binary_message(@old_path, @new_path)
179 180
      end

181 182 183 184
      def has_binary_notice?
        @diff.start_with?('Binary')
      end

Robert Speicher committed
185 186
      private

187
      def init_from_rugged(rugged)
Robert Speicher committed
188
        if rugged.is_a?(Rugged::Patch)
189
          init_from_rugged_patch(rugged)
Robert Speicher committed
190 191 192 193 194 195 196 197 198 199 200 201 202 203
          d = rugged.delta
        else
          d = rugged
        end

        @new_path = encode!(d.new_file[:path])
        @old_path = encode!(d.old_file[:path])
        @a_mode = d.old_file[:mode].to_s(8)
        @b_mode = d.new_file[:mode].to_s(8)
        @new_file = d.added?
        @renamed_file = d.renamed?
        @deleted_file = d.deleted?
      end

204
      def init_from_rugged_patch(patch)
Robert Speicher committed
205 206
        # Don't bother initializing diffs that are too large. If a diff is
        # binary we're not going to display anything so we skip the size check.
207
        return if !patch.delta.binary? && prune_large_patch(patch)
Robert Speicher committed
208

209
        @diff = encode!(strip_diff_headers(patch.to_s))
Robert Speicher committed
210 211
      end

212
      def init_from_hash(hash)
Robert Speicher committed
213 214
        raw_diff = hash.symbolize_keys

215
        SERIALIZE_KEYS.each do |key|
216
          send(:"#{key}=", raw_diff[key.to_sym]) # rubocop:disable GitlabSecurity/PublicSend
Robert Speicher committed
217
        end
218 219
      end

220
      def init_from_gitaly(diff)
221
        @diff = encode!(diff.patch) if diff.respond_to?(:patch)
222 223 224 225 226 227 228
        @new_path = encode!(diff.to_path.dup)
        @old_path = encode!(diff.from_path.dup)
        @a_mode = diff.old_mode.to_s(8)
        @b_mode = diff.new_mode.to_s(8)
        @new_file = diff.from_id == BLANK_SHA
        @renamed_file = diff.from_path != diff.to_path
        @deleted_file = diff.to_id == BLANK_SHA
229 230

        collapse! if diff.respond_to?(:collapsed) && diff.collapsed
231
      end
Robert Speicher committed
232

233 234 235 236 237 238
      def prune_diff_if_eligible
        if too_large?
          too_large!
        elsif collapsed?
          collapse!
        end
Robert Speicher committed
239 240 241 242
      end

      # If the patch surpasses any of the diff limits it calls the appropiate
      # prune method and returns true. Otherwise returns false.
243
      def prune_large_patch(patch)
Robert Speicher committed
244 245 246 247 248 249
        size = 0

        patch.each_hunk do |hunk|
          hunk.each_line do |line|
            size += line.content.bytesize

250
            if size >= SIZE_LIMIT
251
              too_large!
Robert Speicher committed
252 253 254 255 256
              return true
            end
          end
        end

257
        if !expanded && size >= COLLAPSE_LIMIT
258
          collapse!
Robert Speicher committed
259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283
          return true
        end

        false
      end

      # Strip out the information at the beginning of the patch's text to match
      # Grit's output
      def strip_diff_headers(diff_text)
        # Delete everything up to the first line that starts with '---' or
        # 'Binary'
        diff_text.sub!(/\A.*?^(---|Binary)/m, '\1')

        if diff_text.start_with?('---', 'Binary')
          diff_text
        else
          # If the diff_text did not contain a line starting with '---' or
          # 'Binary', return the empty string. No idea why; we are just
          # preserving behavior from before the refactor.
          ''
        end
      end
    end
  end
end