Class: ContentLinkParser
- Inherits:
-
Object
- Object
- ContentLinkParser
- Defined in:
- lib/content_link_parser.rb
Instance Method Summary collapse
- #all_links ⇒ Object
- #find_matches(array, selector, attribute) ⇒ Object
-
#initialize(url, content, options = {}) ⇒ ContentLinkParser
constructor
A new instance of ContentLinkParser.
- #link_data ⇒ Object
- #method_missing(m) ⇒ Object
Constructor Details
#initialize(url, content, options = {}) ⇒ ContentLinkParser
Returns a new instance of ContentLinkParser.
7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 |
# File 'lib/content_link_parser.rb', line 7 def initialize(url, content, = {}) @options = @url = url @doc = Nokogiri::HTML(content) base_url = @url.to_s if @doc.at("base[href]") base_url = @doc.at("base[href]").attr("href").to_s end @absolutize = Absolutize.new(base_url, :output_debug => false, :raise_exceptions => false, :force_escaping => false, :remove_anchors => true) @options[:tags] = {} @options[:tags][:links] = [["a[href]", "href"], ["frame[src]", "src"], ["meta[@http-equiv=\"refresh\"]", "content"], ["link[href]:not([rel])", "href"], ["area[href]", "href"]] @options[:tags][:images] = [["img[src]", "src"]] @options[:tags][:related] = [["link[rel]", "href"]] @options[:tags][:scripts] = [["script[src]", "src"]] @options[:tags][:styles] = [["link[rel='stylesheet'][href]", "href"], ["style[@type^='text/css']", /url\("?(.*?)"?\)/]] #clear the default tags if required @options[:tags] = {} if @options[:ignore_default_tags] @options[:tags].merge!(@options[:additional_tags]) unless @options[:additional_tags].nil? end |
Dynamic Method Handling
This class handles dynamic methods through the method_missing method
#method_missing(m) ⇒ Object
44 45 46 47 48 49 50 51 52 53 54 55 |
# File 'lib/content_link_parser.rb', line 44 def method_missing(m) if @options[:tags].keys.include?(m) links = [] @options[:tags][m].each do |selector, attribute| find_matches(links, selector, attribute) end links.uniq else puts "Warning: There was no configuration on how to find #{m} links" [] end end |
Instance Method Details
#all_links ⇒ Object
39 40 41 42 |
# File 'lib/content_link_parser.rb', line 39 def all_links data = link_data data.keys.map{|key| data[key]}.flatten.uniq end |
#find_matches(array, selector, attribute) ⇒ Object
57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 |
# File 'lib/content_link_parser.rb', line 57 def find_matches(array, selector, attribute) if attribute.kind_of? String or attribute.kind_of? Symbol @doc.css(selector).each do |tag| begin uri = @absolutize.url(tag[attribute]) array << uri.to_s rescue end end elsif attribute.instance_of? Regexp @doc.css(selector).each do |tag| begin tag.content.scan(attribute) {|match| array << @absolutize.url(match[0])} rescue end end end end |
#link_data ⇒ Object
31 32 33 34 35 36 37 |
# File 'lib/content_link_parser.rb', line 31 def link_data data = {} @options[:tags].keys.each do |key| data[key.to_sym] = self.instance_eval(key.to_s) end data end |