Class: CURL

Inherits:
Object
  • Object
show all
Defined in:
lib/curl.rb

Constant Summary collapse

AGENT_ALIASES =
{
  'Windows IE 6' => 'Mozilla/4.0 (compatible; MSIE 6.0; Windows NT 5.1)',
  'Windows IE 7' => 'Mozilla/4.0 (compatible; MSIE 7.0; Windows NT 5.1; .NET CLR 1.1.4322; .NET CLR 2.0.50727)',
  'Windows Mozilla' => 'Mozilla/5.0 Windows; U; Windows NT 5.0; en-US; rv:1.4b Gecko/20030516 Mozilla Firebird/0.6',
  'Windows Mozilla 2' => 'Mozilla/5.0 Windows; U; Windows NT 5.0; ru-US; rv:1.4b Gecko/20030516',
  'Windows Mozilla 3' => 'Mozilla/5.0 Windows; U; Windows NT 5.0; en-UK; rv:1.4b Gecko/20060516',
  'Mac Safari' => 'Mozilla/5.0 (Macintosh; U; PPC Mac OS X; en) AppleWebKit/418 (KHTML, like Gecko) Safari/417.9.3',
  'Mac FireFox' => 'Mozilla/5.0 (Macintosh; U; PPC Mac OS X Mach-O; en-US; rv:1.8.0.3) Gecko/20060426 Firefox/1.5.0.3',
  'Mac Mozilla' => 'Mozilla/5.0 (Macintosh; U; PPC Mac OS X Mach-O; en-US; rv:1.4a) Gecko/20030401',
  'Linux Mozilla' => 'Mozilla/5.0 (X11; U; Linux i686; en-US; rv:1.4) Gecko/20030624',
  'Linux Konqueror' => 'Mozilla/5.0 (compatible; Konqueror/3; Linux)',
  'IPhone' => 'Mozilla/5.0 (iPhone; U; CPU like Mac OS X; en) AppleWebKit/420+ (KHTML, like Gecko) Version/3.0 Mobile/1A543a Safari/419.3',
  'IPhone Vkontakt' => 'VKontakte/1.1.8 CFNetwork/342.1 Darwin/9.4.1',
  'Google'=>"Googlebot/2.1 (+http://www.google.com/bot.html)",
  "Yahoo-Slurp"=>"Mozilla/5.0 (compatible; Yahoo! Slurp; http://help.yahoo.com/help/us/ysearch/slurp)"

}

Instance Attribute Summary collapse

Class Method Summary collapse

Instance Method Summary collapse

Constructor Details

#initialize(keys = {}) {|_self| ... } ⇒ CURL

Returns a new instance of CURL.

Yields:

  • (_self)

Yield Parameters:

  • _self (CURL)

    the object that the method was called on



34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
# File 'lib/curl.rb', line 34

def initialize(keys={})
   @socks_hostname = keys[:socks_hostname] ||= false
   @cache = ( keys[:cache] ? keys[:cache] : false  )
   @cache_time = ( keys[:cache_time] ? keys[:cache_time] : 3600*24*1  ) # 1 day cache life
   @connect_timeout = keys[:connect_timeout] || 6
   @max_time = keys[:max_time] || 8
   @retry = keys[:retry] || 1
   @cookies_enable = ( keys[:cookies_disable] ? false : true  )
         @user_agent     = AGENT_ALIASES["Google"]#AGENT_ALIASES[AGENT_ALIASES.keys[rand(6)]]
         FileUtils.makedirs("/tmp/curl/")
   @cookies_file = keys[:cookies] || "/tmp/curl/curl_#{rand}_#{rand}.jar"
   # @cookies_file	= "/home/ruslan/curl.jar"		
   #--header "Accept-Encoding: deflate"
#    @setup_params	= ' --header "Accept: text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8" --header "Accept-Language: en-us,en;q=0.5" --header "Accept-Charset: ISO-8859-1,utf-8;q=0.7,*;q=0.7" '
   @setup_params	= " --connect-timeout #{@connect_timeout}  --max-time #{@max_time} --retry #{@retry}  --location --compressed --silent -k "
#		@setup_params	= ' --location --silent  '
	yield self if block_given?		
end

Instance Attribute Details

#user_agentObject

Returns the value of attribute user_agent.



32
33
34
# File 'lib/curl.rb', line 32

def user_agent
  @user_agent
end

Class Method Details

.check(proxy) ⇒ Object



76
77
78
79
80
81
82
83
# File 'lib/curl.rb', line 76

def self.check(proxy)
	out = false
	catch_errors(5){
	result = `curl --connect-timeout 6 --max-time 8  --silent --socks5 \"#{proxy}\" \"yahoo.com\" `
	out = true if result.scan("yahoo").size>0
		}
  out
end

Instance Method Details

#cache_file(url) ⇒ Object



97
98
99
# File 'lib/curl.rb', line 97

def cache_file(url)
  cache_path(url)+"/#{Digest::MD5.hexdigest(url)}.html"
end

#cache_path(url) ⇒ Object



94
95
96
# File 'lib/curl.rb', line 94

def cache_path(url)
  "#{@cache}/#{Digest::MD5.hexdigest(url)[0..1]}/#{Digest::MD5.hexdigest(url)[2..3]}/#{Digest::MD5.hexdigest(url)[4..5]}/#{Digest::MD5.hexdigest(url)[6..7]}"
end

#clearObject



238
239
240
# File 'lib/curl.rb', line 238

def clear
  File.delete(@cookies_file) if File.exists?(@cookies_file)
end

#cookiesObject



57
58
59
# File 'lib/curl.rb', line 57

def cookies
	@cookies_file    	
end

#debug=(debug = false) ⇒ Object



86
87
88
# File 'lib/curl.rb', line 86

def debug=(debug=false)
	@debug=debug
end

#debug?Boolean

Returns:

  • (Boolean)


90
91
92
# File 'lib/curl.rb', line 90

def debug?
	@debug
end

#get(url, keys = {}) ⇒ Object



101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
# File 'lib/curl.rb', line 101

def get(url, keys={})
  ref = keys[:ref] ||= nil
  count = keys[:count] ||= 3
  encoding = keys[:encoding] ||= "utf-8"
  raw = ( keys[:raw]==nil ? false : keys[:raw] )
  
  if @cache
    filename = cache_file(url)
    unless File.exists?(filename) && (File.exists?(filename) && File.ctime(filename) > Time.now-@cache_time)
      FileUtils.mkdir_p(cache_path(url))
      result = get_raw(url, {:count=>count, :ref=>ref, :encoding=>encoding} ) #+" --output \"#{filename}\" ")
      puts "cache to file '#{filename}'" if @debug
      File.open(filename,"w"){|f| f.puts result}
      return result
    else
      puts "read from cache file '#{filename}'" if @debug
      return open(filename).read
    end
  else
    return get_raw(url, {:count=>count , :ref=>ref, :encoding=>encoding, :raw=>raw})
  end
  
end

#get_header(url, location = false) ⇒ Object



214
215
216
217
218
219
# File 'lib/curl.rb', line 214

def get_header(url, location=false)
		cmd = "curl #{cookies_store} #{browser_type} #{@setup_params}  \"#{url}\" -i "
		cmd.gsub!(/\-\-location/,' ') unless location
	puts cmd.red  if @debug
	open_pipe(cmd)
end

#get_raw(url, keys = {}) ⇒ Object



125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
# File 'lib/curl.rb', line 125

def get_raw(url, keys={})
  ref = keys[:ref] ||= nil
  count = keys[:count] ||= 3
  encoding = keys[:encoding] ||= "utf-8"
  raw = ( keys[:raw]==nil ? false : keys[:raw] )
  
  cmd = "curl #{cookies_store} #{browser_type} #{@setup_params} #{ref}  \"#{url}\"  "
	if @debug
		puts cmd.red  
	end
	result = open_pipe(cmd)
		if result.to_s.strip.size == 0 
			puts "empty result, left #{count} try".yellow  if @debug
			count -= 1
			result = self.get(url,count) if count > 0
            end
  #      result.force_encoding(encoding)
  if raw
    return result
  else
    return ( encoding=="utf-8" ? result.clean : Iconv.new("UTF-8", "WINDOWS-1251").iconv(result) )
  end
    
end

#init_cook(hash, site = '') ⇒ Object



242
243
244
245
246
247
248
249
# File 'lib/curl.rb', line 242

def init_cook(hash,site='')
  file = "# Netscape HTTP Cookie File\n# http://curl.haxx.se/rfc/cookie_spec.html\n# This file was generated by libcurl! Edit at your own risk.\n\n"
  hash.each do |key,val|
    file += "#{site}\tTRUE\t/\tFALSE\t0\t#{key}\t#{val}\n"
  end
  File.open(cookies_store.scan(/\"(.+?)\"/).first.first,"w") {|f| f.puts file+"\n" }
  file+"\n"
end

#post(url, post_data, ref = nil, count = 5, header = " --header \"Content-Type: application/x-www-form-urlencoded\" ") ⇒ Object

формат данных для поста data = { “subm”=>“1”, “sid”=>cap.split(“=”).last, “country”=>“1” }



155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
# File 'lib/curl.rb', line 155

def post(url,post_data, ref = nil,count=5, header = " --header \"Content-Type: application/x-www-form-urlencoded\" "  )
	#header = " --header \"Content-Type: application/x-www-form-urlencoded\" "
	
			post_q = '--data "'
			post_data.each do |key,val|
if key
post_q += "#{key}=#{URI.escape(CGI.escape(val.to_s),'.')}&" unless key == 'IDontAgreeBtn'
end
			end
			post_q += '"'
			
			post_q.gsub!('&"','"')
		cmd = "curl #{cookies_store} #{browser_type} #{post_q} #{header} #{@setup_params} #{ref}  \"#{url}\"  "		
		puts cmd.red if @debug
		
		result = open_pipe(cmd)
		if result.to_s.strip.size == 0 
			puts "empty result, left #{count} try".yellow  if @debug
			count -= 1
			result = self.post(url,post_data,nil,count) if count > 0
			end
	result
end

#proxy(proxy_uri) ⇒ Object



61
62
63
64
65
66
# File 'lib/curl.rb', line 61

def proxy(proxy_uri)
  File.open("/tmp/aaaaaaaa.aaa","w"){|file| file.puts "#{Time.now}---"+proxy_uri}
	proxy = ( proxy_uri.is_a?(URI) ? proxy_uri : URI.parse("http://#{proxy_uri}") )
  @setup_params = "#{@setup_params} --proxy \"#{proxy.host}:#{proxy.port}\" "
	@setup_params = "#{@setup_params} --proxy-user \"#{proxy.user}:#{proxy.password}\" " if proxy.user
end

#save(url, path = "/tmp/curl/curl_#{rand}_#{rand}.jpg") ⇒ Object



221
222
223
224
225
226
227
# File 'lib/curl.rb', line 221

def save(url,path="/tmp/curl/curl_#{rand}_#{rand}.jpg")
FileUtils.mkdir_p(File.dirname(path))
	cmd = "curl #{cookies_store} #{browser_type} #{@setup_params}  \"#{url}\" --output \"#{path}\"  "
	puts cmd.red  if @debug	 
	system(cmd)
	path
end

#save!(url, path = "/tmp/curl/curl_#{rand}_#{rand}.jpg") ⇒ Object



229
230
231
232
233
234
235
# File 'lib/curl.rb', line 229

def save!(url,path="/tmp/curl/curl_#{rand}_#{rand}.jpg")
FileUtils.mkdir_p(File.dirname(path))
	cmd = "curl  #{browser_type}   --location --compressed --silent  \"#{url}\" --output \"#{path}\"  "
	puts cmd.red  if @debug	 
	system(cmd)
	path
end

#send(url, post_data, ref = nil, count = 5) ⇒ Object

формат данных для поста data = { “subm”=>“1”, “sid”=>cap.split(“=”).last, “country”=>“1” }



185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
# File 'lib/curl.rb', line 185

def send(url,post_data, ref = nil,count=5 )
	
  post_q = '' # "  -F \"method\"=\"post\"  "
  post_data.each do |key,val|
    pre = ""
    if key
      key = key.to_s
      pre = "@" if key.scan("file").size>0 or key.scan("photo").size>0 or key.scan("@").size>0
      key = key.to_s.gsub("@",'')
      val = val.to_s
      val = val.gsub('"','\"')
      post_q += " -F \"#{key}\"=#{pre}\"#{val}\" " 
    end
  end
  
  cmd = "curl   #{cookies_store} #{browser_type} #{post_q}  #{@setup_params} #{ref}  \"#{url}\" "		
  puts cmd.red if @debug
  
  result = open_pipe(cmd)
  #if result.to_s.strip.size == 0 
  #	puts "empty result, left #{count} try".yellow  if @debug
  #	count -= 1
  #	result = self.send(url,post_data,nil,count) if count > 0
  #end
  result
end

#socks(socks_uri) ⇒ Object



68
69
70
71
72
73
74
# File 'lib/curl.rb', line 68

def socks(socks_uri)
  socks = ( socks_uri.is_a?(URI) ? socks_uri : URI.parse("http://#{socks_uri}") )
  s = @socks_hostname ? "--socks5-hostname" : "--socks5"
  @setup_params = "#{@setup_params} #{s} \"#{socks.host}:#{socks.port}\" "
  @setup_params = "#{@setup_params} --proxy-user \"#{socks.user}:#{socks.password}\" " if socks.user
  @setup_params
end

#user_agent_alias=(al) ⇒ Object



53
54
55
# File 'lib/curl.rb', line 53

def user_agent_alias=(al)
  self.user_agent = AGENT_ALIASES[al] || raise("unknown agent alias")
end