Class: Searcher::ChinaSearcher

Inherits:
Object
  • Object
show all
Defined in:
lib/searcher/china_searcher.rb

Constant Summary collapse

PAGE_NUM =
2

Instance Attribute Summary collapse

Class Method Summary collapse

Instance Method Summary collapse

Constructor Details

#initialize(name, url, page_no = '1', page_name = 'pn') ⇒ ChinaSearcher

Returns a new instance of ChinaSearcher.



6
7
8
9
10
11
12
13
# File 'lib/searcher/china_searcher.rb', line 6

def initialize(name,url,page_no='1',page_name='pn')

  @name =  name
  @url = url
  @page_no = page_no
  @page_name = page_name

end

Instance Attribute Details

#nameObject

Returns the value of attribute name.



3
4
5
# File 'lib/searcher/china_searcher.rb', line 3

def name
  @name
end

#page_nameObject

Returns the value of attribute page_name.



3
4
5
# File 'lib/searcher/china_searcher.rb', line 3

def page_name
  @page_name
end

#page_noObject

Returns the value of attribute page_no.



3
4
5
# File 'lib/searcher/china_searcher.rb', line 3

def page_no
  @page_no
end

#urlObject

Returns the value of attribute url.



3
4
5
# File 'lib/searcher/china_searcher.rb', line 3

def url
  @url
end

Class Method Details

.keyword_urls(names, keyword, page = PAGE_NUM) ⇒ Object



61
62
63
64
65
66
67
68
69
# File 'lib/searcher/china_searcher.rb', line 61

def  keyword_urls(names,keyword,page=PAGE_NUM)
  urls = []
  names.each do |name|
       name.keyword_urls(keyword,page).each do |url|
          urls << url
       end
  end
  urls
end

Instance Method Details

#get_list(keyword, page = PAGE_NUM) ⇒ Object



37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
# File 'lib/searcher/china_searcher.rb', line 37

def get_list(keyword,page=PAGE_NUM)
  content = search_keywords(keyword,page)
  super_link = Array.new
  regex = /<a.*?href.*?<\/a>/

  #Global.save_to_file(content,@name + '.html','/htmls')

  content.scan(regex).each do |n|
    if n.index('<em>') != nil
      url =/"http.*?"/.match(n)
      if url != nil
        string_url = url.to_s.delete('"')
        redirect_url = Global.html_get_web_url(string_url)
        if redirect_url != nil
          super_link << [redirect_url,@name]
          #Global.save_link_info(redirect_url,@name)
        end
      end
    end
  end
  super_link
end

#keyword_urls(keyword, page = PAGE_NUM) ⇒ Object



24
25
26
27
28
29
30
31
32
33
34
# File 'lib/searcher/china_searcher.rb', line 24

def keyword_urls (keyword,page=PAGE_NUM)
  i =  'baidu' == @name ? 0 : 1
  sites = []
  loop do
     url_with_keyword = @url + URI.encode(keyword) + '&' + @page_name + '=' + i.to_s
     sites << url_with_keyword
     i += page_no.to_i
     break if i > (page * @page_no.to_i)
   end
  sites
end

#search_keywords(keyword, page = PAGE_NUM) ⇒ Object



15
16
17
18
19
20
21
# File 'lib/searcher/china_searcher.rb', line 15

def search_keywords(keyword,page=PAGE_NUM)
  res = ''
    keyword_urls(keyword,page).each do |url|
      res << Net::HTTP.get_response(URI.parse(url)).body
    end
  res
end