Class: GitTrend::Scraper

Inherits:
Object
  • Object
show all
Defined in:
lib/git_trend/scraper.rb

Constant Summary collapse

BASE_HOST =
'https://github.com'
BASE_URL =
"#{BASE_HOST}/trending"

Instance Method Summary collapse

Constructor Details

#initializeScraper

Returns a new instance of Scraper.



10
11
12
13
14
15
# File 'lib/git_trend/scraper.rb', line 10

def initialize
  @agent = Mechanize.new
  @agent.user_agent = "git-trend #{VERSION}"
  proxy = URI.parse(ENV['http_proxy']) if ENV['http_proxy']
  @agent.set_proxy(proxy.host, proxy.port, proxy.user, proxy.password) if proxy
end

Instance Method Details

#get(language = nil, since = nil, number = nil) ⇒ Object



17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
# File 'lib/git_trend/scraper.rb', line 17

def get(language = nil, since = nil, number = nil)
  projects = []
  page = @agent.get(generate_url_for_get(language, since))

  page.search('.repo-list-item').each do |content|
    project = Project.new
     = content.search('.repo-list-meta').text
    project.lang, project.star_count = extract_lang_and_star_from_meta()
    project.name        = content.search('.repo-list-name a').text.split.join
    project.description = content.search('.repo-list-description').text.gsub("\n", '').strip
    projects << project
  end
  fail ScrapeException if projects.empty?
  number ? projects[0...number] : projects
end

#languagesObject



33
34
35
36
37
38
39
40
41
42
43
# File 'lib/git_trend/scraper.rb', line 33

def languages
  languages = []
  page = @agent.get(BASE_URL)
  page.search('div.select-menu-item a').each do |content|
    href = content.attributes['href'].value
    # objective-c++ =>
    language = href.match(/github.com\/trending\?l=(.+)/).to_a[1]
    languages << CGI.unescape(language) if language
  end
  languages
end