Class: Google::Scholar::Scraper
- Inherits:
-
Object
- Object
- Google::Scholar::Scraper
- Defined in:
- lib/google/scholar/scraper.rb
Instance Attribute Summary collapse
-
#documents ⇒ Object
Returns the value of attribute documents.
Class Method Summary collapse
Instance Method Summary collapse
- #has_more_pages? ⇒ Boolean
-
#initialize(url, initial_document = nil) ⇒ Scraper
constructor
A new instance of Scraper.
- #load_next_page ⇒ Object
- #valid? ⇒ Boolean
Constructor Details
#initialize(url, initial_document = nil) ⇒ Scraper
Returns a new instance of Scraper.
7 8 9 10 11 12 |
# File 'lib/google/scholar/scraper.rb', line 7 def initialize(url,initial_document=nil) @documents = [] @documents << initial_document if initial_document @documents << self.class.load_url(url) if url self end |
Instance Attribute Details
#documents ⇒ Object
Returns the value of attribute documents.
6 7 8 |
# File 'lib/google/scholar/scraper.rb', line 6 def documents @documents end |
Class Method Details
.class_lookup(url = "") ⇒ Object
13 14 15 16 17 18 19 20 21 22 23 |
# File 'lib/google/scholar/scraper.rb', line 13 def self.class_lookup(url="") arguments = url.split("?") arguments = arguments[1].split("&") if arguments.length > 1 if(arguments.include?("view_op=search_authors")) return Google::Scholar::AuthorsDocument end if(arguments.any?{|x| x.include?("user=")}) return Google::Scholar::AuthorsProfileDocument end return Google::Scholar::Document end |
.load_url(url) ⇒ Object
34 35 36 37 38 |
# File 'lib/google/scholar/scraper.rb', line 34 def self.load_url(url) uri = URI(url) raise "Invalid scheme for #{url}" if uri.scheme.nil? || !%w{http https}.any?{|scheme| uri.scheme == scheme} return class_lookup(url).new(Nokogiri::HTML(open(url))) end |
Instance Method Details
#has_more_pages? ⇒ Boolean
39 40 41 |
# File 'lib/google/scholar/scraper.rb', line 39 def has_more_pages? @documents.last.has_next_page? end |
#load_next_page ⇒ Object
30 31 32 33 |
# File 'lib/google/scholar/scraper.rb', line 30 def load_next_page return unless self.has_more_pages? @documents << self.class.load_url(@documents.last.next_page_url) end |
#valid? ⇒ Boolean
24 25 26 27 28 29 |
# File 'lib/google/scholar/scraper.rb', line 24 def valid? @documents.each do |document| return false unless document.valid? end return true end |