BigW Consortium Gitlab

untrusted_regexp.rb 1.35 KB
Newer Older
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24
module Gitlab
  # An untrusted regular expression is any regexp containing patterns sourced
  # from user input.
  #
  # Ruby's built-in regular expression library allows patterns which complete in
  # exponential time, permitting denial-of-service attacks.
  #
  # Not all regular expression features are available in untrusted regexes, and
  # there is a strict limit on total execution time. See the RE2 documentation
  # at https://github.com/google/re2/wiki/Syntax for more details.
  class UntrustedRegexp
    delegate :===, to: :regexp

    def initialize(pattern)
      @regexp = RE2::Regexp.new(pattern, log_errors: false)

      raise RegexpError.new(regexp.error) unless regexp.ok?
    end

    def replace_all(text, rewrite)
      RE2.GlobalReplace(text, regexp, rewrite)
    end

    def scan(text)
25 26 27
      matches = scan_regexp.scan(text).to_a
      matches.map!(&:first) if regexp.number_of_capturing_groups.zero?
      matches
28 29 30 31 32 33 34 35 36 37 38 39 40 41
    end

    def replace(text, rewrite)
      RE2.Replace(text, regexp, rewrite)
    end

    private

    attr_reader :regexp

    # RE2 scan operates differently to Ruby scan when there are no capture
    # groups, so work around it
    def scan_regexp
      @scan_regexp ||=
42
        if regexp.number_of_capturing_groups.zero?
43 44 45 46 47 48 49
          RE2::Regexp.new('(' + regexp.source + ')')
        else
          regexp
        end
    end
  end
end