Class: Extractor
- Inherits:
-
Object
- Object
- Extractor
- Defined in:
- lib/feed_ninja/extractor.rb
Instance Attribute Summary collapse
-
#doc ⇒ Object
Returns the value of attribute doc.
Instance Method Summary collapse
- #extract_image(xpath) ⇒ Object
- #extract_images(xpaths) ⇒ Object
- #extract_xml(xpaths) ⇒ Object
- #fetch(uri) ⇒ Object
Instance Attribute Details
#doc ⇒ Object
Returns the value of attribute doc.
5 6 7 |
# File 'lib/feed_ninja/extractor.rb', line 5 def doc @doc end |
Instance Method Details
#extract_image(xpath) ⇒ Object
23 24 25 26 27 |
# File 'lib/feed_ninja/extractor.rb', line 23 def extract_image(xpath) @doc.xpath(xpath).collect do | picture_href | URI.join(@base_uri, picture_href) end end |
#extract_images(xpaths) ⇒ Object
15 16 17 18 19 20 21 |
# File 'lib/feed_ninja/extractor.rb', line 15 def extract_images(xpaths) LOGGER.debug{ "collecting images for #{xpaths}" } [*xpaths].collect_concat do |xpath| LOGGER.debug{ "collecting image:xpath #{xpath}" } extract_image(xpath) end end |
#extract_xml(xpaths) ⇒ Object
29 30 31 32 33 34 35 36 37 38 |
# File 'lib/feed_ninja/extractor.rb', line 29 def extract_xml(xpaths) LOGGER.debug{ "collecting text" } [*xpaths].collect_concat do |xpath| LOGGER.debug{ "collecting text:xpath #{xpath}" } @doc.xpath(xpath).collect do |result| LOGGER.debug{ "collecting text:result #{result}" } result.to_s end end end |
#fetch(uri) ⇒ Object
7 8 9 10 11 12 13 |
# File 'lib/feed_ninja/extractor.rb', line 7 def fetch uri open(uri) do |site| @doc = Nokogiri::HTML(site) @base_uri = site.base_uri #return extract_image(doc, site.base_uri), extract_xml(doc) end end |