Module: Traject::Macros::NokogiriMacros

Included in:
Indexer::NokogiriIndexer
Defined in:
lib/traject/macros/nokogiri_macros.rb

Instance Method Summary collapse

Instance Method Details

#default_namespacesObject



5
6
7
8
9
10
11
# File 'lib/traject/macros/nokogiri_macros.rb', line 5

def default_namespaces
  @default_namespaces ||= (settings["nokogiri.namespaces"] || {}).tap { |ns|
    unless ns.kind_of?(Hash)
      raise ArgumentError, "nokogiri.namespaces must be a hash, not: #{ns.inspect}"
    end
  }
end

#extract_xpath(xpath, ns: {}, to_text: true) ⇒ Object



13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
# File 'lib/traject/macros/nokogiri_macros.rb', line 13

def extract_xpath(xpath, ns: {}, to_text: true)
  if ns && ns.length > 0
    namespaces = default_namespaces.merge(ns)
  else
    namespaces = default_namespaces
  end

  lambda do |record, accumulator|
    result = record.xpath(xpath, namespaces)

    if to_text
      # take all matches, for each match take all
      # text content, join it together separated with spaces
      # Make sure to avoid text content that was all blank, which is "between the children"
      # whitespace.
      result = result.collect do |n|
        if n.kind_of?(Nokogiri::XML::Attr)
          # attribute value
          n.value
        else
          # text from node
          n.xpath('.//text()').collect(&:text).tap do |arr|
            arr.reject! { |s| s =~ (/\A\s+\z/) }
          end.join(" ")
        end
      end
    else
      # just put all matches in accumulator as Nokogiri::XML::Node's
      result = result.to_a
    end

    accumulator.concat result
  end
end