Class: Copycasts::Crawling

Inherits:
Object
  • Object
show all
Defined in:
lib/copycasts.rb

Constant Summary collapse

TARGET_URL =
'http://railscasts.com'

Instance Attribute Summary collapse

Instance Method Summary collapse

Constructor Details

#initialize(options = {}) ⇒ Crawling

Returns a new instance of Crawling.



11
12
13
# File 'lib/copycasts.rb', line 11

def initialize(options = {})
  @pages = options[:page] || maximum_page
end

Instance Attribute Details

#pageObject

Returns the value of attribute page.



9
10
11
# File 'lib/copycasts.rb', line 9

def page
  @page
end

Instance Method Details

#download_videosObject



53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
# File 'lib/copycasts.rb', line 53

def download_videos
  mp4_video_links.each do |video_link|
    uri = URI.parse(video_link)
    file_name = video_link.split("/").last

    Net::HTTP.start(uri.host) do |http|
      puts "Start downloading #{file_name}..."
      response = http.get(uri.request_uri)
      open(file_name, "wb") do |file|
        file.write(response.body)
      end
    end
    puts "Downloaded successfully!"
  end
end


15
16
17
18
19
20
21
22
23
24
25
26
27
28
# File 'lib/copycasts.rb', line 15

def get_links
  casts_list = []
  puts "Start crawling..."
  for index in 1..@pages
    puts "Page :#{index}"
    target_page = Nokogiri::HTML(open(TARGET_URL + "/?type=free&page=#{index}"))
    target_page.css('.watch a:first').each do |link|
      link_without_autoplay = link['href'].to_s.sub('?autoplay=true','')
      casts_list << link_without_autoplay
    end
  end
  puts "Finish crawling."
  casts_list
end

#maximum_pageObject



30
31
32
33
34
35
36
37
38
39
40
41
# File 'lib/copycasts.rb', line 30

def maximum_page
  target_page = Nokogiri::HTML(open(TARGET_URL + "/?type=free"))
  ret = 0
  target_page.css('.pagination a').each do |a|
    if !(a.content.match(/\A[+-]?\d+?(\.\d+)?\Z/) == nil) #not number
      if a.content.to_i > 0
        ret = a.content.to_i
      end
    end
  end
  ret
end


43
44
45
46
47
48
49
50
51
# File 'lib/copycasts.rb', line 43

def mp4_video_links
  mp4_links = []
  get_links.each do |video_link|
    video_page = Nokogiri::HTML(open(TARGET_URL + "/" + video_link))
    link = video_page.css('.downloads li[3] a').first
    mp4_links << link.values.first
  end
  mp4_links
end