Class: YahooAnswersScraper::Query

Inherits:
Object
  • Object
show all
Defined in:
lib/yahoo_answers_scraper/query.rb

Constant Summary collapse

BASE_URL =
"https://answers.yahoo.com"

Instance Attribute Summary collapse

Instance Method Summary collapse

Constructor Details

#initialize(query, options = {}) ⇒ Query

Returns a new instance of Query.



7
8
9
10
11
12
# File 'lib/yahoo_answers_scraper/query.rb', line 7

def initialize(query, options={})
  @query = query
  @mode = options[:mode] || "rel"
  @current_page = options[:offset] || 0
  @questions = []
end

Instance Attribute Details

#current_pageObject (readonly)

Returns the value of attribute current_page.



5
6
7
# File 'lib/yahoo_answers_scraper/query.rb', line 5

def current_page
  @current_page
end

#queryObject (readonly)

Returns the value of attribute query.



5
6
7
# File 'lib/yahoo_answers_scraper/query.rb', line 5

def query
  @query
end

#questionsObject (readonly)

Returns the value of attribute questions.



5
6
7
# File 'lib/yahoo_answers_scraper/query.rb', line 5

def questions
  @questions
end

Instance Method Details

#fetch(pages = 1) ⇒ Object



14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
# File 'lib/yahoo_answers_scraper/query.rb', line 14

def fetch(pages=1)
  page = @current_page + 1
  
  pages.times do
    doc = Nokogiri::HTML.parse(open(search_url(page)))

    doc.css("h3.question-title > a").each do |link_el|
      question = link_el.text.strip
      link = BASE_URL + link_el.attributes["href"].value

      @questions << YahooAnswersScraper::Question.new(question: question, link: link)
    end

    @current_page = page
    page += 1
  end

  self
end

#fetch_questionsObject



34
35
36
37
# File 'lib/yahoo_answers_scraper/query.rb', line 34

def fetch_questions
  @questions.each(&:fetch)
  self
end

#inspectObject



44
45
46
# File 'lib/yahoo_answers_scraper/query.rb', line 44

def inspect
  "#<YahooAnswersScraper::Query query: #{@query.inspect}, current_page: #{@current_page.inspect}>"
end

#search_url(page = 1) ⇒ Object



39
40
41
42
# File 'lib/yahoo_answers_scraper/query.rb', line 39

def search_url(page=1)
  q = URI.escape(@query)
  "https://answers.yahoo.com/search/search_result?p=#{q}&s=#{page}&sort=#{@mode}"
end