Class: Extractor
- Inherits:
-
Object
- Object
- Extractor
- Defined in:
- lib/feed_ninja/extractor.rb
Instance Attribute Summary collapse
-
#doc ⇒ Object
Returns the value of attribute doc.
Instance Method Summary collapse
- #extract_image(base_url, xpath) ⇒ Object
- #extract_images(base_url, xpaths) ⇒ Object
- #extract_xml(xpaths) ⇒ Object
- #fetch(uri) ⇒ Object
Instance Attribute Details
#doc ⇒ Object
Returns the value of attribute doc.
5 6 7 |
# File 'lib/feed_ninja/extractor.rb', line 5 def doc @doc end |
Instance Method Details
#extract_image(base_url, xpath) ⇒ Object
22 23 24 25 26 27 28 29 30 31 |
# File 'lib/feed_ninja/extractor.rb', line 22 def extract_image(base_url, xpath) @doc.xpath(xpath).collect do | picture_src | if(picture_src.to_s.start_with? 'http') then picture_src.to_s else LOGGER.debug { "BASE URL IS #{base_url.class}" } "#{base_url.scheme}://#{base_url.host}/#{base_url.path}#{picture_src}" end end end |
#extract_images(base_url, xpaths) ⇒ Object
14 15 16 17 18 19 20 |
# File 'lib/feed_ninja/extractor.rb', line 14 def extract_images(base_url, xpaths) LOGGER.debug{ "collecting images for #{xpaths}" } [*xpaths].collect_concat do |xpath| LOGGER.debug{ "collecting image:xpath #{xpath}" } extract_image(URI(base_url), xpath) end end |
#extract_xml(xpaths) ⇒ Object
33 34 35 36 37 38 39 40 41 42 |
# File 'lib/feed_ninja/extractor.rb', line 33 def extract_xml(xpaths) LOGGER.debug{ "collecting text" } [*xpaths].collect_concat do |xpath| LOGGER.debug{ "collecting text:xpath #{xpath}" } @doc.xpath(xpath).collect do |result| LOGGER.debug{ "collecting text:result #{result}" } result.to_s end end end |
#fetch(uri) ⇒ Object
7 8 9 10 11 12 |
# File 'lib/feed_ninja/extractor.rb', line 7 def fetch uri open(uri) do |site| @doc = Nokogiri::HTML(site) #return extract_image(doc, site.base_uri), extract_xml(doc) end end |