Class: Sc::Selector
- Inherits:
-
Object
- Object
- Sc::Selector
- Includes:
- RDF::NodeProxy, Scrappy::Formats
- Defined in:
- lib/scrappy/extractor/selector.rb
Direct Known Subclasses
BaseUriSelector, NewUriSelector, RootSelector, SectionSelector, SliceSelector, UriPatternSelector, UriSelector, VisualSelector, XPathSelector
Instance Method Summary collapse
Methods included from Scrappy::Formats
Instance Method Details
#select(doc) ⇒ Object
7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 |
# File 'lib/scrappy/extractor/selector.rb', line 7 def select doc if sc::debug.first=="true" and Scrappy::Agent::Options.debug and (Scrappy::Agent::Options.debug_key.nil? or doc[:value].downcase.include?(Scrappy::Agent::Options.debug_key) ) puts '== DEBUG' puts '== Selector:' puts node.serialize(:yarf, false) puts '== On fragment:' puts "URI: #{doc[:uri]}" puts "Content: #{doc[:content]}" puts "Value: #{doc[:value]}" end # Process selector # Filter method is defined in each subclass results = filter doc if sc::boolean.first=="true" results = results.map do |r| affirmations = ["yes", "true"] negations = ["no", "none", "false", "-", "--"] no = negations.include?(r[:value].downcase) yes = affirmations.include?(r[:value].downcase) if no value = "false" elsif yes value = "true" else value = :remove end r.merge :value=>value end results = results.select{ |r| r[:value] != :remove } end if sc::normalize_max.first max = sc::normalize_max.first.to_f min = sc::normalize_min.first.to_f in_range = sc::normalize_in_range.first == "true" results.each do |r| r[:value] = ((r[:value].to_f-min) / (max-min)).to_s end if in_range results = results.select { |r| r[:value].to_f <= 1.0 and r[:value].to_f >= 0.0 } end end if sc::nonempty.first=="true" results = results.select{ |r| r[:value] != ""} end if sc::debug.first=="true" and Scrappy::Agent::Options.debug and (Scrappy::Agent::Options.debug_key.nil? or doc[:value].downcase.include?(Scrappy::Agent::Options.debug_key) ) puts "== No results" if results.empty? results.each_with_index do |result, i| puts "== Result ##{i}:" puts "URI: #{result[:uri]}" puts "Content: #{result[:content]}" puts "Value: #{result[:value].inspect}" end puts end # Return results if no nested selectors return results if sc::selector.empty? # Process nested selectors results.map do |result| sc::selector.map { |s| graph.node(s).select result } end.flatten end |