Class: GScraper::Search::AJAXQuery

Inherits:
Query
  • Object
show all
Includes:
HasPages
Defined in:
lib/gscraper/search/ajax_query.rb

Overview

Represents a Query through the Google AJAX search API.

Constant Summary collapse

RESULTS_PER_PAGE =

Maximum results per-page

8
PATH =

AJAX API Path

'/uds/GwebSearch'
QUERY =

AJAX API Query string

'callback=google.search.WebSearch.RawCompletion&context=0&lstkp=0&rsz=large'
DEFAULT_SIG =

Default signature

'582c1116317355adf613a6a843f19ece'
DEFAULT_KEY =

Default key

'notsupplied'
DEFAULT_VERSION =

Default version

'1.0'

Constants inherited from Query

Query::DEFAULT_HOST, Query::SUB_DOMAIN

Instance Attribute Summary collapse

Attributes inherited from Query

#allintext, #allintitle, #allinurl, #define, #exact_phrase, #filetype, #info, #intext, #intitle, #inurl, #language, #link, #numeric_range, #query, #related, #search_host, #site, #with_words, #without_words

Class Method Summary collapse

Instance Method Summary collapse

Methods included from HasPages

#[], #each, #each_on_page, #each_on_pages, #each_page, #first_page, #page_cache, #page_index_of, #pages, #result_index_of, #result_offset_of

Methods inherited from Query

#expression, #format_modifier, #format_options

Constructor Details

#initialize(options = {}) {|query| ... } ⇒ AJAXQuery

Creates a new AJAX query.

Parameters:

  • options (Hash) (defaults to: {})

    Query options.

Options Hash (options):

  • :search_host (String) — default: www.google.com

    The host to submit queries to.

  • :language (String, Symbol) — default: Languages.native

    The search language.

  • :sig (String) — default: '582c1116317355adf613a6a843f19ece'

    The search signature.

  • :key (String, Symbol) — default: 'notsupplied'

    The search key.

  • :version (Float) — default: 1.0

    The desired API version.

Yields:

  • (query)

    If a block is given, the new AJAX query will be passed to it.

Yield Parameters:



94
95
96
97
98
99
100
101
102
# File 'lib/gscraper/search/ajax_query.rb', line 94

def initialize(options={},&block)
  @agent = GScraper.web_agent(options)

  @sig     = options.fetch(:sig,DEFAULT_SIG)
  @key     = options.fetch(:key,DEFAULT_KEY)
  @version = options.fetch(:version,DEFAULT_VERSION)

  super(options,&block)
end

Instance Attribute Details

#keyObject

The search key



62
63
64
# File 'lib/gscraper/search/ajax_query.rb', line 62

def key
  @key
end

#sigObject

The search signature



59
60
61
# File 'lib/gscraper/search/ajax_query.rb', line 59

def sig
  @sig
end

#versionObject

The API version



65
66
67
# File 'lib/gscraper/search/ajax_query.rb', line 65

def version
  @version
end

Class Method Details

.from_url(url, options = {}) {|query| ... } ⇒ AJAXQuery

Creates a new AJAX query from the specified URL.

Parameters:

  • url (URI::HTTP, String)

    The URL to create the query from.

  • options (Hash) (defaults to: {})

    Additional query options.

Yields:

  • (query)

    If a block is given, it will be passed the new AJAX query.

Yield Parameters:

Returns:

See Also:

  • AJAXQuery.new


124
125
126
127
128
129
130
131
132
133
134
135
# File 'lib/gscraper/search/ajax_query.rb', line 124

def AJAXQuery.from_url(url,options={},&block)
  url = URI(url.to_s)

  options[:language] = url.query_params['hl']
  options[:query]    = url.query_params['q']

  options[:sig]     = url.query_params['sig']
  options[:key]     = url.query_params['key']
  options[:version] = url.query_params['v']

  return AJAXQuery.new(options,&block)
end

Instance Method Details

#page(page_index) ⇒ Page<Result>

A page containing results at the specified page index.

Parameters:

  • page_index (Integer)

    The index of the page.

Returns:



200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
# File 'lib/gscraper/search/ajax_query.rb', line 200

def page(page_index)
  Page.new do |new_page|
    body = @agent.get(page_url(page_index)).body
    hash = JSON.parse(body.scan(/\{.*\}/).first)

    rank_offset = result_offset_of(page_index)

    if (hash.kind_of?(Hash) && hash['results'])
      hash['results'].each_with_index do |result,index|
        rank  = rank_offset + (index + 1)
        title = Nokogiri::HTML(result['title']).inner_text
        url   = URI(URI.escape(result['unescapedUrl']))

        summary = unless result['content'].empty?
                    Nokogiri::HTML(result['content']).inner_text
                  else
                    ''
                  end

        cached_url = URI(result['cacheUrl'])

        new_page << Result.new(rank,title,url,summary,cached_url)
      end
    end
  end
end

#page_url(page_index) ⇒ URI::HTTP

The URL that represents the query at a specific page index.

Parameters:

  • page_index (Integer)

    The page index to create the URL for.

Returns:

  • (URI::HTTP)

    The query URL for the given page index.



181
182
183
184
185
186
187
188
189
# File 'lib/gscraper/search/ajax_query.rb', line 181

def page_url(page_index)
  url = search_url

  if page_index > 1
    url.query_params['start'] = result_offset_of(page_index)
  end

  return url
end

#results_per_pageInteger

The results per page.

Returns:

  • (Integer)

    The number of results per page.

See Also:



145
146
147
# File 'lib/gscraper/search/ajax_query.rb', line 145

def results_per_page
  RESULTS_PER_PAGE
end

#search_urlURI::HTTP

The URL that represents the query.

Returns:

  • (URI::HTTP)

    The URL for the query.



155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
# File 'lib/gscraper/search/ajax_query.rb', line 155

def search_url
  search_url = URI::HTTP.build(
    :host  => search_host,
    :path  => PATH,
    :query => QUERY
  )

  search_url.query_params['hl']  = @language
  search_url.query_params['gss'] = '.com'
  search_url.query_params['q']   = expression
  search_url.query_params['sig'] = @sig
  search_url.query_params['key'] = @key
  search_url.query_params['v']   = @version

  return search_url
end