Class: TorrentCrawler::Crawlers::Mininova
- Inherits:
-
Base
- Object
- Base
- TorrentCrawler::Crawlers::Mininova
show all
- Defined in:
- lib/crawlers/mininova.rb
Instance Attribute Summary
Attributes inherited from Base
#results
Instance Method Summary
collapse
Methods inherited from Base
#headers, #initialize, #result, #tracker_key
Instance Method Details
#detail(tracker_id) ⇒ Object
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
|
# File 'lib/crawlers/mininova.rb', line 41
def detail(tracker_id)
doc = Nokogiri::HTML(open(self.detail_url(tracker_id), ))
result do |torrent|
torrent.tracker_id = tracker_id
torrent.hash = doc.css('#torrentdetails p:nth-child(2)').first.text.strip.gsub(/Info hash:\s*(.*)/, '\1')
torrent.title = doc.css('h1').first.text.gsub(/Details of (.*)/, '\1')
torrent.size = doc.css('#torrentdetails p:nth-child(6)').first.text.strip.gsub(/(.*) in \d+ files?/, '\1')
torrent.files = doc.css('#torrentdetails p:nth-child(6)').first.text.strip.gsub(/.*(\d+) files?$/m, '\1')
torrent.uploaded_at = DateTime.parse(doc.css('#torrentdetails p:nth-child(3)').first.text.strip.gsub(/Added on:\s*(.*)/, '\1'))
torrent
end
end
|
#detail_url(tracker_id) ⇒ Object
7
8
9
|
# File 'lib/crawlers/mininova.rb', line 7
def detail_url(tracker_id)
"http://www.mininova.org/det/#{tracker_id}"
end
|
#index(last_seen = nil) ⇒ Object
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
|
# File 'lib/crawlers/mininova.rb', line 11
def index(last_seen = nil)
doc = Nokogiri::HTML(open(self.index_url, self.))
doc.css('table.maintable:nth-child(2) tr').each do |tr|
next if tr.css(':nth-child(2) a').first.nil?
result do |torrent|
torrent.tracker_id = tr.css('td:nth-child(2) a').first['href'].gsub(%r{^.*/get/(\d+)/?$}, '\1')
return results if torrent.tracker_id == last_seen
torrent.title = tr.css('td:nth-child(2) a:nth-child(2)').first.text.strip
torrent.size = tr.css('td:nth-child(3)').first.text.strip
torrent.seeders = tr.css('td:nth-child(4)').first.text.strip
torrent.leechers = tr.css('td:nth-child(5)').first.text.strip
torrent.uploaded_at = Time.now
torrent.tags << tr.css('td:nth-child(1)').first.text.strip
torrent.tags << tr.css('td:nth-child(2) small strong').first.text.strip
results << torrent
end
end
results
end
|
#index_url ⇒ Object
3
4
5
|
# File 'lib/crawlers/mininova.rb', line 3
def index_url
"http://www.mininova.org/"
end
|