Class: Extractor
- Inherits:
-
Object
- Object
- Extractor
- Defined in:
- lib/feed_ninja/extractor.rb
Instance Attribute Summary collapse
-
#doc ⇒ Object
Returns the value of attribute doc.
Instance Method Summary collapse
- #extract_image(base_url, xpath) ⇒ Object
- #extract_images(base_url, *xpaths) ⇒ Object
- #extract_xml(*xpaths) ⇒ Object
- #fetch(uri) ⇒ Object
Instance Attribute Details
#doc ⇒ Object
Returns the value of attribute doc.
2 3 4 |
# File 'lib/feed_ninja/extractor.rb', line 2 def doc @doc end |
Instance Method Details
#extract_image(base_url, xpath) ⇒ Object
17 18 19 20 21 22 23 24 25 |
# File 'lib/feed_ninja/extractor.rb', line 17 def extract_image(base_url, xpath) @doc.xpath(xpath).collect do | picture_src | if(picture_src.to_s.start_with? 'http') then picture_src.to_s else "#{base_url.scheme}://#{base_url.host}/#{base_url.path}#{picture_src}" end end end |
#extract_images(base_url, *xpaths) ⇒ Object
11 12 13 14 15 |
# File 'lib/feed_ninja/extractor.rb', line 11 def extract_images(base_url, *xpaths) Array(xpaths).collect_concat do |xpath| extract_image(base_url, xpath) end end |
#extract_xml(*xpaths) ⇒ Object
27 28 29 30 31 32 33 |
# File 'lib/feed_ninja/extractor.rb', line 27 def extract_xml *xpaths Array(xpaths).collect_concat do |xpath| @doc.xpath(xpath).collect do |result| result.to_s end end end |
#fetch(uri) ⇒ Object
4 5 6 7 8 9 |
# File 'lib/feed_ninja/extractor.rb', line 4 def fetch uri open(uri) do |site| @doc = Nokogiri::HTML(site) #return extract_image(doc, site.base_uri), extract_xml(doc) end end |