Class: Google::Scholar::Scraper

Inherits:
Object
  • Object
show all
Defined in:
lib/google/scholar/scraper.rb

Instance Attribute Summary collapse

Class Method Summary collapse

Instance Method Summary collapse

Constructor Details

#initialize(url, initial_document = nil) ⇒ Scraper

Returns a new instance of Scraper.



7
8
9
10
11
12
# File 'lib/google/scholar/scraper.rb', line 7

def initialize(url,initial_document=nil)
  @documents = []
  @documents << initial_document if initial_document
  @documents << self.class.load_url(url) if url
  self
end

Instance Attribute Details

#documentsObject

Returns the value of attribute documents.



6
7
8
# File 'lib/google/scholar/scraper.rb', line 6

def documents
  @documents
end

Class Method Details

.class_lookup(url = "") ⇒ Object



13
14
15
16
17
18
19
20
21
22
23
# File 'lib/google/scholar/scraper.rb', line 13

def self.class_lookup(url="")
  arguments = url.split("?")
  arguments = arguments[1].split("&") if arguments.length > 1
  if(arguments.include?("view_op=search_authors"))
    return Google::Scholar::AuthorsDocument
  end
  if(arguments.any?{|x| x.include?("user=")})
    return Google::Scholar::AuthorsProfileDocument
  end
  return Google::Scholar::Document
end

.load_url(url) ⇒ Object



34
35
36
37
38
# File 'lib/google/scholar/scraper.rb', line 34

def self.load_url(url)
  uri = URI(url)
  raise "Invalid scheme for #{url}" if uri.scheme.nil? || !%w{http https}.any?{|scheme| uri.scheme == scheme}
  return class_lookup(url).new(Nokogiri::HTML(open(url)))
end

Instance Method Details

#has_more_pages?Boolean

Returns:

  • (Boolean)


39
40
41
# File 'lib/google/scholar/scraper.rb', line 39

def has_more_pages?
  @documents.last.has_next_page?
end

#load_next_pageObject



30
31
32
33
# File 'lib/google/scholar/scraper.rb', line 30

def load_next_page
  return unless self.has_more_pages?
  @documents << self.class.load_url(@documents.last.next_page_url)
end

#valid?Boolean

Returns:

  • (Boolean)


24
25
26
27
28
29
# File 'lib/google/scholar/scraper.rb', line 24

def valid?
  @documents.each do |document|
    return false unless document.valid?
  end
  return true
end