Class: Extractor

Inherits:
Object
  • Object
show all
Defined in:
lib/feed_ninja/extractor.rb

Instance Attribute Summary collapse

Instance Method Summary collapse

Instance Attribute Details

#docObject

Returns the value of attribute doc.



2
3
4
# File 'lib/feed_ninja/extractor.rb', line 2

def doc
  @doc
end

Instance Method Details

#extract_image(base_url, xpath) ⇒ Object



17
18
19
20
21
22
23
24
25
# File 'lib/feed_ninja/extractor.rb', line 17

def extract_image(base_url, xpath)
  @doc.xpath(xpath).collect do | picture_src |
    if(picture_src.to_s.start_with? 'http') then
      picture_src.to_s
    else
      "#{base_url.scheme}://#{base_url.host}/#{base_url.path}#{picture_src}"
    end
  end
end

#extract_images(base_url, *xpaths) ⇒ Object



11
12
13
14
15
# File 'lib/feed_ninja/extractor.rb', line 11

def extract_images(base_url, *xpaths)
  Array(xpaths).collect_concat do |xpath|
    extract_image(base_url, xpath)
  end
end

#extract_xml(*xpaths) ⇒ Object



27
28
29
30
31
32
33
# File 'lib/feed_ninja/extractor.rb', line 27

def extract_xml *xpaths
  Array(xpaths).collect_concat do |xpath|
    @doc.xpath(xpath).collect do |result|
      result.to_s
    end
  end
end

#fetch(uri) ⇒ Object



4
5
6
7
8
9
# File 'lib/feed_ninja/extractor.rb', line 4

def fetch uri
  open(uri) do |site|
    @doc = Nokogiri::HTML(site)
    #return extract_image(doc, site.base_uri), extract_xml(doc)
  end
end