Class: CURL
- Inherits:
-
Object
- Object
- CURL
- Defined in:
- lib/curl.rb
Constant Summary collapse
- AGENT_ALIASES =
{ 'Windows IE 6' => 'Mozilla/4.0 (compatible; MSIE 6.0; Windows NT 5.1)', 'Windows IE 7' => 'Mozilla/4.0 (compatible; MSIE 7.0; Windows NT 5.1; .NET CLR 1.1.4322; .NET CLR 2.0.50727)', 'Windows Mozilla' => 'Mozilla/5.0 Windows; U; Windows NT 5.0; en-US; rv:1.4b Gecko/20030516 Mozilla Firebird/0.6', 'Windows Mozilla 2' => 'Mozilla/5.0 Windows; U; Windows NT 5.0; ru-US; rv:1.4b Gecko/20030516', 'Windows Mozilla 3' => 'Mozilla/5.0 Windows; U; Windows NT 5.0; en-UK; rv:1.4b Gecko/20060516', 'Mac Safari' => 'Mozilla/5.0 (Macintosh; U; PPC Mac OS X; en) AppleWebKit/418 (KHTML, like Gecko) Safari/417.9.3', 'Mac FireFox' => 'Mozilla/5.0 (Macintosh; U; PPC Mac OS X Mach-O; en-US; rv:1.8.0.3) Gecko/20060426 Firefox/1.5.0.3', 'Mac Mozilla' => 'Mozilla/5.0 (Macintosh; U; PPC Mac OS X Mach-O; en-US; rv:1.4a) Gecko/20030401', 'Linux Mozilla' => 'Mozilla/5.0 (X11; U; Linux i686; en-US; rv:1.4) Gecko/20030624', 'Linux Konqueror' => 'Mozilla/5.0 (compatible; Konqueror/3; Linux)', 'IPhone' => 'Mozilla/5.0 (iPhone; U; CPU like Mac OS X; en) AppleWebKit/420+ (KHTML, like Gecko) Version/3.0 Mobile/1A543a Safari/419.3', 'IPhone Vkontakt' => 'VKontakte/1.1.8 CFNetwork/342.1 Darwin/9.4.1', 'Google'=>"Googlebot/2.1 (+http://www.google.com/bot.html)", "Yahoo-Slurp"=>"Mozilla/5.0 (compatible; Yahoo! Slurp; http://help.yahoo.com/help/us/ysearch/slurp)" }
Instance Attribute Summary collapse
-
#user_agent ⇒ Object
Returns the value of attribute user_agent.
Class Method Summary collapse
Instance Method Summary collapse
- #cache_file(url) ⇒ Object
- #cache_path(url) ⇒ Object
- #clear ⇒ Object
- #cookies ⇒ Object
- #debug=(debug = false) ⇒ Object
- #debug? ⇒ Boolean
- #get(url, keys = {}) ⇒ Object
- #get_header(url, location = false) ⇒ Object
- #get_raw(url, keys = {}) ⇒ Object
- #init_cook(hash, site = '') ⇒ Object
-
#initialize(keys = {}) {|_self| ... } ⇒ CURL
constructor
A new instance of CURL.
-
#post(url, post_data, ref = nil, count = 5, header = " --header \"Content-Type: application/x-www-form-urlencoded\" ") ⇒ Object
формат данных для поста data = { “subm”=>“1”, “sid”=>cap.split(“=”).last, “country”=>“1” }.
- #proxy(proxy_uri) ⇒ Object
- #save(url, path = "/tmp/curl/curl_#{rand}_#{rand}.jpg") ⇒ Object
- #save!(url, path = "/tmp/curl/curl_#{rand}_#{rand}.jpg") ⇒ Object
-
#send(url, post_data, ref = nil, count = 5) ⇒ Object
формат данных для поста data = { “subm”=>“1”, “sid”=>cap.split(“=”).last, “country”=>“1” }.
- #socks(socks_uri) ⇒ Object
- #user_agent_alias=(al) ⇒ Object
Constructor Details
#initialize(keys = {}) {|_self| ... } ⇒ CURL
Returns a new instance of CURL.
34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 |
# File 'lib/curl.rb', line 34 def initialize(keys={}) @socks_hostname = keys[:socks_hostname] ||= false @cache = ( keys[:cache] ? keys[:cache] : false ) @cache_time = ( keys[:cache_time] ? keys[:cache_time] : 3600*24*1 ) # 1 day cache life @connect_timeout = keys[:connect_timeout] || 6 @max_time = keys[:max_time] || 8 @retry = keys[:retry] || 1 @cookies_enable = ( keys[:cookies_disable] ? false : true ) @user_agent = AGENT_ALIASES["Google"]#AGENT_ALIASES[AGENT_ALIASES.keys[rand(6)]] FileUtils.makedirs("/tmp/curl/") @cookies_file = keys[:cookies] || "/tmp/curl/curl_#{rand}_#{rand}.jar" # @cookies_file = "/home/ruslan/curl.jar" #--header "Accept-Encoding: deflate" # @setup_params = ' --header "Accept: text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8" --header "Accept-Language: en-us,en;q=0.5" --header "Accept-Charset: ISO-8859-1,utf-8;q=0.7,*;q=0.7" ' @setup_params = " --connect-timeout #{@connect_timeout} --max-time #{@max_time} --retry #{@retry} --location --compressed --silent -k " # @setup_params = ' --location --silent ' yield self if block_given? end |
Instance Attribute Details
#user_agent ⇒ Object
Returns the value of attribute user_agent.
32 33 34 |
# File 'lib/curl.rb', line 32 def user_agent @user_agent end |
Class Method Details
.check(proxy) ⇒ Object
76 77 78 79 80 81 82 83 |
# File 'lib/curl.rb', line 76 def self.check(proxy) out = false catch_errors(5){ result = `curl --connect-timeout 6 --max-time 8 --silent --socks5 \"#{proxy}\" \"yahoo.com\" ` out = true if result.scan("yahoo").size>0 } out end |
Instance Method Details
#cache_file(url) ⇒ Object
97 98 99 |
# File 'lib/curl.rb', line 97 def cache_file(url) cache_path(url)+"/#{Digest::MD5.hexdigest(url)}.html" end |
#cache_path(url) ⇒ Object
94 95 96 |
# File 'lib/curl.rb', line 94 def cache_path(url) "#{@cache}/#{Digest::MD5.hexdigest(url)[0..1]}/#{Digest::MD5.hexdigest(url)[2..3]}/#{Digest::MD5.hexdigest(url)[4..5]}/#{Digest::MD5.hexdigest(url)[6..7]}" end |
#clear ⇒ Object
238 239 240 |
# File 'lib/curl.rb', line 238 def clear File.delete(@cookies_file) if File.exists?(@cookies_file) end |
#cookies ⇒ Object
57 58 59 |
# File 'lib/curl.rb', line 57 def @cookies_file end |
#debug=(debug = false) ⇒ Object
86 87 88 |
# File 'lib/curl.rb', line 86 def debug=(debug=false) @debug=debug end |
#debug? ⇒ Boolean
90 91 92 |
# File 'lib/curl.rb', line 90 def debug? @debug end |
#get(url, keys = {}) ⇒ Object
101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 |
# File 'lib/curl.rb', line 101 def get(url, keys={}) ref = keys[:ref] ||= nil count = keys[:count] ||= 3 encoding = keys[:encoding] ||= "utf-8" raw = ( keys[:raw]==nil ? false : keys[:raw] ) if @cache filename = cache_file(url) unless File.exists?(filename) && (File.exists?(filename) && File.ctime(filename) > Time.now-@cache_time) FileUtils.mkdir_p(cache_path(url)) result = get_raw(url, {:count=>count, :ref=>ref, :encoding=>encoding} ) #+" --output \"#{filename}\" ") puts "cache to file '#{filename}'" if @debug File.open(filename,"w"){|f| f.puts result} return result else puts "read from cache file '#{filename}'" if @debug return open(filename).read end else return get_raw(url, {:count=>count , :ref=>ref, :encoding=>encoding, :raw=>raw}) end end |
#get_header(url, location = false) ⇒ Object
214 215 216 217 218 219 |
# File 'lib/curl.rb', line 214 def get_header(url, location=false) cmd = "curl #{} #{browser_type} #{@setup_params} \"#{url}\" -i " cmd.gsub!(/\-\-location/,' ') unless location puts cmd.red if @debug open_pipe(cmd) end |
#get_raw(url, keys = {}) ⇒ Object
125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 |
# File 'lib/curl.rb', line 125 def get_raw(url, keys={}) ref = keys[:ref] ||= nil count = keys[:count] ||= 3 encoding = keys[:encoding] ||= "utf-8" raw = ( keys[:raw]==nil ? false : keys[:raw] ) cmd = "curl #{} #{browser_type} #{@setup_params} #{ref} \"#{url}\" " if @debug puts cmd.red end result = open_pipe(cmd) if result.to_s.strip.size == 0 puts "empty result, left #{count} try".yellow if @debug count -= 1 result = self.get(url,count) if count > 0 end # result.force_encoding(encoding) if raw return result else return ( encoding=="utf-8" ? result.clean : Iconv.new("UTF-8", "WINDOWS-1251").iconv(result) ) end end |
#init_cook(hash, site = '') ⇒ Object
242 243 244 245 246 247 248 249 |
# File 'lib/curl.rb', line 242 def init_cook(hash,site='') file = "# Netscape HTTP Cookie File\n# http://curl.haxx.se/rfc/cookie_spec.html\n# This file was generated by libcurl! Edit at your own risk.\n\n" hash.each do |key,val| file += "#{site}\tTRUE\t/\tFALSE\t0\t#{key}\t#{val}\n" end File.open(.scan(/\"(.+?)\"/).first.first,"w") {|f| f.puts file+"\n" } file+"\n" end |
#post(url, post_data, ref = nil, count = 5, header = " --header \"Content-Type: application/x-www-form-urlencoded\" ") ⇒ Object
формат данных для поста data = { “subm”=>“1”, “sid”=>cap.split(“=”).last, “country”=>“1” }
155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 |
# File 'lib/curl.rb', line 155 def post(url,post_data, ref = nil,count=5, header = " --header \"Content-Type: application/x-www-form-urlencoded\" " ) #header = " --header \"Content-Type: application/x-www-form-urlencoded\" " post_q = '--data "' post_data.each do |key,val| if key post_q += "#{key}=#{URI.escape(CGI.escape(val.to_s),'.')}&" unless key == 'IDontAgreeBtn' end end post_q += '"' post_q.gsub!('&"','"') cmd = "curl #{} #{browser_type} #{post_q} #{header} #{@setup_params} #{ref} \"#{url}\" " puts cmd.red if @debug result = open_pipe(cmd) if result.to_s.strip.size == 0 puts "empty result, left #{count} try".yellow if @debug count -= 1 result = self.post(url,post_data,nil,count) if count > 0 end result end |
#proxy(proxy_uri) ⇒ Object
61 62 63 64 65 66 |
# File 'lib/curl.rb', line 61 def proxy(proxy_uri) File.open("/tmp/aaaaaaaa.aaa","w"){|file| file.puts "#{Time.now}---"+proxy_uri} proxy = ( proxy_uri.is_a?(URI) ? proxy_uri : URI.parse("http://#{proxy_uri}") ) @setup_params = "#{@setup_params} --proxy \"#{proxy.host}:#{proxy.port}\" " @setup_params = "#{@setup_params} --proxy-user \"#{proxy.user}:#{proxy.password}\" " if proxy.user end |
#save(url, path = "/tmp/curl/curl_#{rand}_#{rand}.jpg") ⇒ Object
221 222 223 224 225 226 227 |
# File 'lib/curl.rb', line 221 def save(url,path="/tmp/curl/curl_#{rand}_#{rand}.jpg") FileUtils.mkdir_p(File.dirname(path)) cmd = "curl #{} #{browser_type} #{@setup_params} \"#{url}\" --output \"#{path}\" " puts cmd.red if @debug system(cmd) path end |
#save!(url, path = "/tmp/curl/curl_#{rand}_#{rand}.jpg") ⇒ Object
229 230 231 232 233 234 235 |
# File 'lib/curl.rb', line 229 def save!(url,path="/tmp/curl/curl_#{rand}_#{rand}.jpg") FileUtils.mkdir_p(File.dirname(path)) cmd = "curl #{browser_type} --location --compressed --silent \"#{url}\" --output \"#{path}\" " puts cmd.red if @debug system(cmd) path end |
#send(url, post_data, ref = nil, count = 5) ⇒ Object
формат данных для поста data = { “subm”=>“1”, “sid”=>cap.split(“=”).last, “country”=>“1” }
185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 |
# File 'lib/curl.rb', line 185 def send(url,post_data, ref = nil,count=5 ) post_q = '' # " -F \"method\"=\"post\" " post_data.each do |key,val| pre = "" if key key = key.to_s pre = "@" if key.scan("file").size>0 or key.scan("photo").size>0 or key.scan("@").size>0 key = key.to_s.gsub("@",'') val = val.to_s val = val.gsub('"','\"') post_q += " -F \"#{key}\"=#{pre}\"#{val}\" " end end cmd = "curl #{} #{browser_type} #{post_q} #{@setup_params} #{ref} \"#{url}\" " puts cmd.red if @debug result = open_pipe(cmd) #if result.to_s.strip.size == 0 # puts "empty result, left #{count} try".yellow if @debug # count -= 1 # result = self.send(url,post_data,nil,count) if count > 0 #end result end |
#socks(socks_uri) ⇒ Object
68 69 70 71 72 73 74 |
# File 'lib/curl.rb', line 68 def socks(socks_uri) socks = ( socks_uri.is_a?(URI) ? socks_uri : URI.parse("http://#{socks_uri}") ) s = @socks_hostname ? "--socks5-hostname" : "--socks5" @setup_params = "#{@setup_params} #{s} \"#{socks.host}:#{socks.port}\" " @setup_params = "#{@setup_params} --proxy-user \"#{socks.user}:#{socks.password}\" " if socks.user @setup_params end |
#user_agent_alias=(al) ⇒ Object
53 54 55 |
# File 'lib/curl.rb', line 53 def user_agent_alias=(al) self.user_agent = AGENT_ALIASES[al] || raise("unknown agent alias") end |