Module: Trackman::Urls::HtmlParser
Instance Method Summary
collapse
Methods included from CssParser
#clean_comments, #parse_css
Instance Method Details
#css(doc) ⇒ Object
22
23
24
|
# File 'lib/trackman/urls/html_parser.rb', line 22
def css doc
refine(doc.xpath('//link[@type="text/css"]'), 'href')
end
|
#img(doc) ⇒ Object
12
13
14
15
16
17
|
# File 'lib/trackman/urls/html_parser.rb', line 12
def img doc
imgs = refine(doc.css('img'), 'src')
icons = refine(doc.xpath('//link[@rel="icon"]'), 'href')
imgs + icons
end
|
#js(doc) ⇒ Object
19
20
21
|
# File 'lib/trackman/urls/html_parser.rb', line 19
def js doc
refine(doc.xpath('//script'), 'src')
end
|
#parse(html) ⇒ Object
7
8
9
10
|
# File 'lib/trackman/urls/html_parser.rb', line 7
def parse html
doc = Nokogiri::HTML(html)
(img(doc) + js(doc) + css(doc) + parse_css(html)).uniq
end
|
#refine(paths, node) ⇒ Object
26
27
28
29
|
# File 'lib/trackman/urls/html_parser.rb', line 26
def refine(paths, node)
temp = paths.map{|n| n[node].to_s.gsub(/\?[^\?]*$/, '') }
temp.select{|n| n && n =~ /\w/ && n.internal_path? && !n.embedded? }
end
|