Class: ImageDownloader::Parser
- Inherits:
-
Object
- Object
- ImageDownloader::Parser
- Defined in:
- lib/image_downloader/parser.rb
Constant Summary collapse
- A_HREF_IMAGE_PREFIX =
'_a_href_'
- STYLE_URL_IMAGE_PREFIX =
'_style_url_'
- LINK_ICON_IMAGE_PREFIX =
'_link_icon_'
- COLLECT_METHODS_PREFIX =
'collect_from_'
Instance Attribute Summary collapse
-
#argument_url ⇒ Object
Returns the value of attribute argument_url.
-
#content ⇒ Object
Returns the value of attribute content.
-
#images ⇒ Object
Returns the value of attribute images.
-
#images_hash ⇒ Object
Returns the value of attribute images_hash.
-
#url ⇒ Object
Returns the value of attribute url.
-
#user_agent ⇒ Object
Returns the value of attribute user_agent.
Class Method Summary collapse
- .all_collect_from_methods ⇒ Object (also: all_image_places)
- .clear(str) ⇒ Object
Instance Method Summary collapse
- #collect_from_a_href(path) ⇒ Object
- #collect_from_img_src(path) ⇒ Object
- #collect_from_link_icon(path) ⇒ Object
- #collect_from_style_url(path) ⇒ Object
- #get_content ⇒ Object
- #get_content_raw ⇒ Object
- #get_images(path, h = {}) ⇒ Object
- #get_images_raw(path, h = {}) ⇒ Object
- #get_images_regexp(path, regexp) ⇒ Object
- #ignore_file_without(h = {}) ⇒ Object
-
#initialize(url, user_agent) ⇒ Parser
constructor
A new instance of Parser.
- #open_url ⇒ Object
- #push_to_images(path, src, h = {}) ⇒ Object
Constructor Details
#initialize(url, user_agent) ⇒ Parser
Returns a new instance of Parser.
24 25 26 27 28 29 30 |
# File 'lib/image_downloader/parser.rb', line 24 def initialize(url, user_agent) @argument_url = url @user_agent = user_agent @url = URI.parse(url) @images = [] @images_hash = {} end |
Instance Attribute Details
#argument_url ⇒ Object
Returns the value of attribute argument_url.
17 18 19 |
# File 'lib/image_downloader/parser.rb', line 17 def argument_url @argument_url end |
#content ⇒ Object
Returns the value of attribute content.
17 18 19 |
# File 'lib/image_downloader/parser.rb', line 17 def content @content end |
#images ⇒ Object
Returns the value of attribute images.
17 18 19 |
# File 'lib/image_downloader/parser.rb', line 17 def images @images end |
#images_hash ⇒ Object
Returns the value of attribute images_hash.
17 18 19 |
# File 'lib/image_downloader/parser.rb', line 17 def images_hash @images_hash end |
#url ⇒ Object
Returns the value of attribute url.
17 18 19 |
# File 'lib/image_downloader/parser.rb', line 17 def url @url end |
#user_agent ⇒ Object
Returns the value of attribute user_agent.
17 18 19 |
# File 'lib/image_downloader/parser.rb', line 17 def user_agent @user_agent end |
Class Method Details
.all_collect_from_methods ⇒ Object Also known as: all_image_places
117 118 119 |
# File 'lib/image_downloader/parser.rb', line 117 def self.all_collect_from_methods Parser.instance_methods.select{|m| m =~ /#{COLLECT_METHODS_PREFIX}/}.map{|m| m.to_sym}.to_hash_keys{true} end |
.clear(str) ⇒ Object
46 47 48 49 50 51 52 |
# File 'lib/image_downloader/parser.rb', line 46 def self.clear(str) if str =~ /url/i str.gsub!(/^.*?url\(/,'') str.gsub!(/\)/,'') end str end |
Instance Method Details
#collect_from_a_href(path) ⇒ Object
74 75 76 77 78 79 80 81 |
# File 'lib/image_downloader/parser.rb', line 74 def collect_from_a_href(path) self.content.xpath('//a').each do |a| href = a[:href] URL.remove_new_line_symbols!(href) next if href !~ /\.(?:#{Images::IMAGE_EXTENSIONS.join('|')})/i self.push_to_images(path,href,{:file_name_prefix => A_HREF_IMAGE_PREFIX}) end end |
#collect_from_img_src(path) ⇒ Object
66 67 68 69 70 71 72 |
# File 'lib/image_downloader/parser.rb', line 66 def collect_from_img_src(path) self.content.xpath('//img').each do |img| src = img[:src] URL.remove_new_line_symbols!(src) self.push_to_images(path,src) end end |
#collect_from_link_icon(path) ⇒ Object
94 95 96 97 98 99 100 |
# File 'lib/image_downloader/parser.rb', line 94 def collect_from_link_icon(path) self.content.xpath('//link[@rel="shortcut icon"]').each do |link| src = link[:href] URL.remove_new_line_symbols!(src) self.push_to_images(path,src,{:file_name_prefix => LINK_ICON_IMAGE_PREFIX}) end end |
#collect_from_style_url(path) ⇒ Object
83 84 85 86 87 88 89 90 91 92 |
# File 'lib/image_downloader/parser.rb', line 83 def collect_from_style_url(path) self.content.xpath("//*[@style]").each do |element| style = element[:style] next if style !~ /(?:background|background-image):\s*url\(['"]?(.*?)['"]?\)/i src = $1 next if !src URL.remove_new_line_symbols!(src) self.push_to_images(path,src,{:file_name_prefix => STYLE_URL_IMAGE_PREFIX}) end end |
#get_content ⇒ Object
58 59 60 |
# File 'lib/image_downloader/parser.rb', line 58 def get_content @content = Nokogiri::HTML(self.open_url) end |
#get_content_raw ⇒ Object
32 33 34 35 |
# File 'lib/image_downloader/parser.rb', line 32 def get_content_raw @content = self.open_url.read @content.gsub!(/[\n\r\t]+/,' ') end |
#get_images(path, h = {}) ⇒ Object
62 63 64 |
# File 'lib/image_downloader/parser.rb', line 62 def get_images(path,h={}) h.each_key{|key| self.send(key, path)} end |
#get_images_raw(path, h = {}) ⇒ Object
37 38 39 40 41 42 43 44 |
# File 'lib/image_downloader/parser.rb', line 37 def get_images_raw(path,h={}) self.content.scan(/['"]+([^'"]+\.(?:#{Images::IMAGE_EXTENSIONS.join('|')}))[^'"]*['"]+/i) {|src| self.push_to_images(path,Parser.clear(src[0])) } self.content.scan(/(?:href|src)=([^\s'">]+\.(?:#{Images::IMAGE_EXTENSIONS.join('|')}))[^\s'">]*[>\s]+/i) {|src| self.push_to_images(path,Parser.clear(src[0])) } end |
#get_images_regexp(path, regexp) ⇒ Object
54 55 56 |
# File 'lib/image_downloader/parser.rb', line 54 def get_images_regexp(path,regexp) self.content.scan(regexp) {|src| self.push_to_images(path,src.to_s)} end |
#ignore_file_without(h = {}) ⇒ Object
111 112 113 114 115 |
# File 'lib/image_downloader/parser.rb', line 111 def ignore_file_without(h={}) return if !h self.images.delete_if {|image| image.file_name !~ /\.[a-z]{0,5}$/i } if h[:extension] self.images.delete_if {|image| image.file_name !~ /\.(?:#{Images::IMAGE_EXTENSIONS.join('|')})$/i } if h[:image_extension] end |
#open_url ⇒ Object
125 126 127 |
# File 'lib/image_downloader/parser.rb', line 125 def open_url open(self.argument_url, 'User-Agent' => self.user_agent) end |
#push_to_images(path, src, h = {}) ⇒ Object
102 103 104 105 106 107 108 109 |
# File 'lib/image_downloader/parser.rb', line 102 def push_to_images(path,src,h={}) if !self.images_hash.has_key?(src) self.images_hash[src] = 1 self.images.push Images.new(self.url.host,URI.escape(src), { :catalog_path => path, :file_name_prefix => (h[:file_name_prefix] || '')}) end end |