Class: ContentLinkParser

Inherits:
Object
  • Object
show all
Defined in:
lib/content_link_parser.rb

Instance Method Summary collapse

Constructor Details

#initialize(url, content, options = {}) ⇒ ContentLinkParser

Returns a new instance of ContentLinkParser.



7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
# File 'lib/content_link_parser.rb', line 7

def initialize(url, content, options = {})
  @options = options
  @url = url
  @doc = Nokogiri::HTML(content)
  
  base_url = @url.to_s
  if @doc.at("base[href]")
    base_url = @doc.at("base[href]").attr("href").to_s
  end
  @absolutize = Absolutize.new(base_url, :output_debug => false, :raise_exceptions => false, :force_escaping => false, :remove_anchors => true)

  @options[:tags] = {}
  @options[:tags][:links] = [["a[href]", "href"], ["frame[src]", "src"], ["meta[@http-equiv=\"refresh\"]", "content"], ["link[href]:not([rel])", "href"], ["area[href]", "href"]]
  @options[:tags][:images] = [["img[src]", "src"]]
  @options[:tags][:related] = [["link[rel]", "href"]]
  @options[:tags][:scripts] = [["script[src]", "src"]]
  @options[:tags][:styles] = [["link[rel='stylesheet'][href]", "href"], ["style[@type^='text/css']", /url\("?(.*?)"?\)/]]    
  
  #clear the default tags if required
  @options[:tags] = {} if @options[:ignore_default_tags]
  @options[:tags].merge!(@options[:additional_tags]) unless @options[:additional_tags].nil?
  
end

Dynamic Method Handling

This class handles dynamic methods through the method_missing method

#method_missing(m) ⇒ Object



44
45
46
47
48
49
50
51
52
53
54
55
# File 'lib/content_link_parser.rb', line 44

def method_missing(m)
  if @options[:tags].keys.include?(m)
    links = []
    @options[:tags][m].each do |selector, attribute|
      find_matches(links, selector, attribute)
    end
    links.uniq
  else
    puts "Warning: There was no configuration on how to find #{m} links"
    []
  end
end

Instance Method Details



39
40
41
42
# File 'lib/content_link_parser.rb', line 39

def all_links
  data = link_data
  data.keys.map{|key| data[key]}.flatten.uniq
end

#find_matches(array, selector, attribute) ⇒ Object



57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
# File 'lib/content_link_parser.rb', line 57

def find_matches(array, selector, attribute)
  if attribute.kind_of? String or attribute.kind_of? Symbol
    @doc.css(selector).each do |tag|
      begin
        uri = @absolutize.url(tag[attribute])
        array << uri.to_s
      rescue
      end
    end
  elsif attribute.instance_of? Regexp
    @doc.css(selector).each do |tag|
      begin
        tag.content.scan(attribute) {|match| array << @absolutize.url(match[0])}
      rescue
      end
    end
  end
end


31
32
33
34
35
36
37
# File 'lib/content_link_parser.rb', line 31

def link_data
  data = {}
  @options[:tags].keys.each do |key|
    data[key.to_sym] = self.instance_eval(key.to_s)
  end
  data
end