Class: Extractor

Inherits:
Object
  • Object
show all
Defined in:
lib/feed_ninja/extractor.rb

Instance Attribute Summary collapse

Instance Method Summary collapse

Instance Attribute Details

#docObject

Returns the value of attribute doc.



5
6
7
# File 'lib/feed_ninja/extractor.rb', line 5

def doc
  @doc
end

Instance Method Details

#extract_image(xpath) ⇒ Object



23
24
25
26
27
# File 'lib/feed_ninja/extractor.rb', line 23

def extract_image(xpath)
  @doc.xpath(xpath).collect do | picture_href |
    URI.join(@base_uri, picture_href)
  end
end

#extract_images(xpaths) ⇒ Object



15
16
17
18
19
20
21
# File 'lib/feed_ninja/extractor.rb', line 15

def extract_images(xpaths)
  LOGGER.debug{ "collecting images for #{xpaths}" }
  [*xpaths].collect_concat do |xpath|
    LOGGER.debug{ "collecting image:xpath #{xpath}" }
    extract_image(xpath)
  end
end

#extract_xml(xpaths) ⇒ Object



29
30
31
32
33
34
35
36
37
38
# File 'lib/feed_ninja/extractor.rb', line 29

def extract_xml(xpaths)
  LOGGER.debug{ "collecting text" }
  [*xpaths].collect_concat do |xpath|
    LOGGER.debug{ "collecting text:xpath #{xpath}" }
    @doc.xpath(xpath).collect do |result|
      LOGGER.debug{ "collecting text:result #{result}" }
      result.to_s
    end
  end
end

#fetch(uri) ⇒ Object



7
8
9
10
11
12
13
# File 'lib/feed_ninja/extractor.rb', line 7

def fetch uri
  open(uri) do |site|
    @doc = Nokogiri::HTML(site)
    @base_uri = site.base_uri
    #return extract_image(doc, site.base_uri), extract_xml(doc)
  end
end