BigW Consortium Gitlab

html_parser.rb 704 Bytes
Newer Older
1 2 3 4 5 6 7 8 9 10 11 12 13
module Gitlab
  module Email
    class HTMLParser
      def self.parse_reply(raw_body)
        new(raw_body).filtered_text
      end

      attr_reader :raw_body
      def initialize(raw_body)
        @raw_body = raw_body
      end

      def document
14
        @document ||= Nokogiri::HTML.parse(raw_body)
15 16 17
      end

      def filter_replies!
18 19
        document.xpath('//blockquote').each(&:remove)
        document.xpath('//table').each(&:remove)
20 21 22
      end

      def filtered_html
23 24 25 26
        @filtered_html ||= begin
          filter_replies!
          document.inner_html
        end
27 28 29 30 31 32 33 34
      end

      def filtered_text
        @filtered_text ||= Html2Text.convert(filtered_html)
      end
    end
  end
end