Class: Baidu
- Inherits:
-
SearchEngine
- Object
- SearchEngine
- Baidu
- Defined in:
- lib/baidu.rb
Constant Summary collapse
- BaseUri =
'http://www.baidu.com/s?'
- PerPage =
100
Instance Method Summary collapse
-
#how_many_links(uri) ⇒ Object
domain:xxx.yyy.com/path/file.html.
-
#how_many_pages(host) ⇒ Object
site:xxx.yyy.com.
-
#how_many_pages_with(host, string) ⇒ Object
site:xxx.yyy.com inurl:zzz.
-
#initialize ⇒ Baidu
constructor
A new instance of Baidu.
-
#popular?(wd) ⇒ Boolean
def extend(words,level=3,sleeptime=1) level = level.to_i - 1 words = [words] unless words.respond_to? ‘each’ extensions = Array.new words.each do |word| self.query(word) extensions = related_keywords extensions = suggestions(word) sleep sleeptime end extensions.uniq! return extensions if level < 1 return extensions + extend(extensions,level) end.
- #query(wd) ⇒ Object
- #suggestions(wd) ⇒ Object
-
#url(id) ⇒ Object
to find out the real url for something lik ‘www.baidu.com/link?url=7yoYGJqjJ4zBBpC8yDF8xDhctimd_UkfF8AVaJRPKduy2ypxVG18aRB5L6D558y3MjT_Ko0nqFgkMoS’.
Methods inherited from SearchEngine
Constructor Details
#initialize ⇒ Baidu
Returns a new instance of Baidu.
217 218 219 220 221 222 |
# File 'lib/baidu.rb', line 217 def initialize @a = Mechanize.new {|agent| agent.user_agent_alias = 'Linux Mozilla'} @a.idle_timeout = 2 @a.max_history = 1 @page = nil end |
Instance Method Details
#how_many_links(uri) ⇒ Object
domain:xxx.yyy.com/path/file.html
290 291 292 |
# File 'lib/baidu.rb', line 290 def how_many_links(uri) query("domain:\"#{uri}\"").how_many end |
#how_many_pages(host) ⇒ Object
site:xxx.yyy.com
285 286 287 |
# File 'lib/baidu.rb', line 285 def how_many_pages(host) query("site:#{host}").how_many end |
#how_many_pages_with(host, string) ⇒ Object
site:xxx.yyy.com inurl:zzz
295 296 297 |
# File 'lib/baidu.rb', line 295 def how_many_pages_with(host,string) query("site:#{host} inurl:#{string}").how_many end |
#popular?(wd) ⇒ Boolean
def extend(words,level=3,sleeptime=1)
level = level.to_i - 1
words = [words] unless words.respond_to? 'each'
extensions = Array.new
words.each do |word|
self.query(word)
extensions += related_keywords
extensions += suggestions(word)
sleep sleeptime
end
extensions.uniq!
return extensions if level < 1
return extensions + extend(extensions,level)
end
254 255 256 |
# File 'lib/baidu.rb', line 254 def popular?(wd) return @a.get("http://index.baidu.com/main/word.php?word=#{URI.encode(wd.encode("GBK"))}").body.include?"boxFlash" end |
#query(wd) ⇒ Object
258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 |
# File 'lib/baidu.rb', line 258 def query(wd) q = Array.new q << "wd=#{wd}" q << "rn=#{PerPage}" queryStr = q.join("&") #uri = URI.encode((BaseUri + queryStr).encode('GBK')) uri = URI.encode((BaseUri + queryStr)) begin @page = @a.get uri BaiduResult.new(@page) rescue Net::HTTP::Persistent::Error warn "[timeout] #{uri}" return false end =begin query = "#{query}" @uri = BaseUri+URI.encode(query.encode('GBK')) @page = @a.get @uri self.clean @number = self.how_many @maxpage = (@number / @perpage.to_f).round @maxpage =10 if @maxpage>10 @currpage =0 =end end |
#suggestions(wd) ⇒ Object
224 225 226 227 228 |
# File 'lib/baidu.rb', line 224 def suggestions(wd) json = HTTParty.get("http://suggestion.baidu.com/su?wd=#{URI.encode(wd)}&cb=callback").body.force_encoding('GBK').encode("UTF-8") m = /\[([^\]]*)\]/.match json return JSON.parse m[0] end |
#url(id) ⇒ Object
to find out the real url for something lik ‘www.baidu.com/link?url=7yoYGJqjJ4zBBpC8yDF8xDhctimd_UkfF8AVaJRPKduy2ypxVG18aRB5L6D558y3MjT_Ko0nqFgkMoS’
230 231 232 233 234 |
# File 'lib/baidu.rb', line 230 def url(id) a = Mechanize.new a.redirect_ok=false return a.head("http://www.baidu.com/link?url=#{id}").header['location'] end |