Module: Traject::Macros::NokogiriMacros
- Included in:
- Indexer::NokogiriIndexer
- Defined in:
- lib/traject/macros/nokogiri_macros.rb
Instance Method Summary collapse
Instance Method Details
#default_namespaces ⇒ Object
5 6 7 8 9 10 11 |
# File 'lib/traject/macros/nokogiri_macros.rb', line 5 def default_namespaces @default_namespaces ||= (settings["nokogiri.namespaces"] || {}).tap { |ns| unless ns.kind_of?(Hash) raise ArgumentError, "nokogiri.namespaces must be a hash, not: #{ns.inspect}" end } end |
#extract_xpath(xpath, ns: {}, to_text: true) ⇒ Object
13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 |
# File 'lib/traject/macros/nokogiri_macros.rb', line 13 def extract_xpath(xpath, ns: {}, to_text: true) if ns && ns.length > 0 namespaces = default_namespaces.merge(ns) else namespaces = default_namespaces end lambda do |record, accumulator| result = record.xpath(xpath, namespaces) if to_text # take all matches, for each match take all # text content, join it together separated with spaces # Make sure to avoid text content that was all blank, which is "between the children" # whitespace. result = result.collect do |n| if n.kind_of?(Nokogiri::XML::Attr) # attribute value n.value else # text from node n.xpath('.//text()').collect(&:text).tap do |arr| arr.reject! { |s| s =~ (/\A\s+\z/) } end.join(" ") end end else # just put all matches in accumulator as Nokogiri::XML::Node's result = result.to_a end accumulator.concat result end end |