BigW Consortium Gitlab

querying.rb 1.98 KB
Newer Older
1 2
module Banzai
  module Querying
3 4
    module_function

5 6 7
    # Searches a Nokogiri document using a CSS query, optionally optimizing it
    # whenever possible.
    #
8 9 10
    # document          - A document/element to search.
    # query             - The CSS query to use.
    # reference_options - A hash with nodes filter options
11
    #
12 13 14
    # Returns an array of Nokogiri::XML::Element objects if location is specified
    # in reference_options. Otherwise it would a Nokogiri::XML::NodeSet.
    def css(document, query, reference_options = {})
15 16 17
      # When using "a.foo" Nokogiri compiles this to "//a[...]" but
      # "descendant::a[...]" is quite a bit faster and achieves the same result.
      xpath = Nokogiri::CSS.xpath_for(query)[0].gsub(%r{^//}, 'descendant::')
18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59
      xpath = restrict_to_p_nodes_at_root(xpath) if filter_nodes_at_beginning?(reference_options)
      nodes = document.xpath(xpath)

      filter_nodes(nodes, reference_options)
    end

    def restrict_to_p_nodes_at_root(xpath)
      xpath.gsub('descendant::', './p/')
    end

    def filter_nodes(nodes, reference_options)
      if filter_nodes_at_beginning?(reference_options)
        filter_nodes_at_beginning(nodes)
      else
        nodes
      end
    end

    def filter_nodes_at_beginning?(reference_options)
      reference_options && reference_options[:location] == :beginning
    end

    # Selects child nodes if they are present in the beginning among other siblings.
    #
    # nodes - A Nokogiri::XML::NodeSet.
    #
    # Returns an array of Nokogiri::XML::Element objects.
    def filter_nodes_at_beginning(nodes)
      parents_and_nodes = nodes.group_by(&:parent)
      filtered_nodes = []

      parents_and_nodes.each do |parent, nodes|
        children = parent.children
        nodes    = nodes.to_a

        children.each do |child|
          next if child.text.blank?
          node = nodes.shift
          break unless node == child
          filtered_nodes << node
        end
      end
60

61
      filtered_nodes
62 63 64
    end
  end
end