Class: GScraper::Search::AJAXQuery
- Includes:
- HasPages
- Defined in:
- lib/gscraper/search/ajax_query.rb
Overview
Represents a Query through the Google AJAX search API.
Constant Summary collapse
- RESULTS_PER_PAGE =
Maximum results per-page
8
- PATH =
AJAX API Path
'/uds/GwebSearch'
- QUERY =
AJAX API Query string
'callback=google.search.WebSearch.RawCompletion&context=0&lstkp=0&rsz=large'
- DEFAULT_SIG =
Default signature
'582c1116317355adf613a6a843f19ece'
- DEFAULT_KEY =
Default key
'notsupplied'
- DEFAULT_VERSION =
Default version
'1.0'
Constants inherited from Query
Query::DEFAULT_HOST, Query::SUB_DOMAIN
Instance Attribute Summary collapse
-
#key ⇒ Object
The search key.
-
#sig ⇒ Object
The search signature.
-
#version ⇒ Object
The API version.
Attributes inherited from Query
#allintext, #allintitle, #allinurl, #define, #exact_phrase, #filetype, #info, #intext, #intitle, #inurl, #language, #link, #numeric_range, #query, #related, #search_host, #site, #with_words, #without_words
Class Method Summary collapse
-
.from_url(url, options = {}) {|query| ... } ⇒ AJAXQuery
Creates a new AJAX query from the specified URL.
Instance Method Summary collapse
-
#initialize(options = {}) {|query| ... } ⇒ AJAXQuery
constructor
Creates a new AJAX query.
-
#page(page_index) ⇒ Page<Result>
A page containing results at the specified page index.
-
#page_url(page_index) ⇒ URI::HTTP
The URL that represents the query at a specific page index.
-
#results_per_page ⇒ Integer
The results per page.
-
#search_url ⇒ URI::HTTP
The URL that represents the query.
Methods included from HasPages
#[], #each, #each_on_page, #each_on_pages, #each_page, #first_page, #page_cache, #page_index_of, #pages, #result_index_of, #result_offset_of
Methods inherited from Query
#expression, #format_modifier, #format_options
Constructor Details
#initialize(options = {}) {|query| ... } ⇒ AJAXQuery
Creates a new AJAX query.
94 95 96 97 98 99 100 101 102 |
# File 'lib/gscraper/search/ajax_query.rb', line 94 def initialize(={},&block) @agent = GScraper.web_agent() @sig = .fetch(:sig,DEFAULT_SIG) @key = .fetch(:key,DEFAULT_KEY) @version = .fetch(:version,DEFAULT_VERSION) super(,&block) end |
Instance Attribute Details
#key ⇒ Object
The search key
62 63 64 |
# File 'lib/gscraper/search/ajax_query.rb', line 62 def key @key end |
#sig ⇒ Object
The search signature
59 60 61 |
# File 'lib/gscraper/search/ajax_query.rb', line 59 def sig @sig end |
#version ⇒ Object
The API version
65 66 67 |
# File 'lib/gscraper/search/ajax_query.rb', line 65 def version @version end |
Class Method Details
.from_url(url, options = {}) {|query| ... } ⇒ AJAXQuery
Creates a new AJAX query from the specified URL.
124 125 126 127 128 129 130 131 132 133 134 135 |
# File 'lib/gscraper/search/ajax_query.rb', line 124 def AJAXQuery.from_url(url,={},&block) url = URI(url.to_s) [:language] = url.query_params['hl'] [:query] = url.query_params['q'] [:sig] = url.query_params['sig'] [:key] = url.query_params['key'] [:version] = url.query_params['v'] return AJAXQuery.new(,&block) end |
Instance Method Details
#page(page_index) ⇒ Page<Result>
A page containing results at the specified page index.
200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 |
# File 'lib/gscraper/search/ajax_query.rb', line 200 def page(page_index) Page.new do |new_page| body = @agent.get(page_url(page_index)).body hash = JSON.parse(body.scan(/\{.*\}/).first) rank_offset = result_offset_of(page_index) if (hash.kind_of?(Hash) && hash['results']) hash['results'].each_with_index do |result,index| rank = rank_offset + (index + 1) title = Nokogiri::HTML(result['title']).inner_text url = URI(URI.escape(result['unescapedUrl'])) summary = unless result['content'].empty? Nokogiri::HTML(result['content']).inner_text else '' end cached_url = URI(result['cacheUrl']) new_page << Result.new(rank,title,url,summary,cached_url) end end end end |
#page_url(page_index) ⇒ URI::HTTP
The URL that represents the query at a specific page index.
181 182 183 184 185 186 187 188 189 |
# File 'lib/gscraper/search/ajax_query.rb', line 181 def page_url(page_index) url = search_url if page_index > 1 url.query_params['start'] = result_offset_of(page_index) end return url end |
#results_per_page ⇒ Integer
The results per page.
145 146 147 |
# File 'lib/gscraper/search/ajax_query.rb', line 145 def results_per_page RESULTS_PER_PAGE end |
#search_url ⇒ URI::HTTP
The URL that represents the query.
155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 |
# File 'lib/gscraper/search/ajax_query.rb', line 155 def search_url search_url = URI::HTTP.build( :host => search_host, :path => PATH, :query => QUERY ) search_url.query_params['hl'] = @language search_url.query_params['gss'] = '.com' search_url.query_params['q'] = expression search_url.query_params['sig'] = @sig search_url.query_params['key'] = @key search_url.query_params['v'] = @version return search_url end |