23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
|
# File 'lib/eg_nijigazou_sokuhou.rb', line 23
def fetch_url(src_url, dist = 0)
document = Nokogiri::HTML(open(src_url).read)
title_full = document.title
title = title_full.split(/:/).last.match(/(.+?)(その.+)?$/)[1].strip.gsub(/&/, '&')
image_urls = document.css(".article-body-more > a > img").map { |path|
path.parent[:href] if path.parent[:href] =~ /jpe?g|png|gif$/
}.compact
connection = {:prev => document.xpath("//a[@rel='prev']").first, :next => document.xpath("//a[@rel='next']").first}
dir = dirname(image_urls.first)
image_urls.each do |image_url|
filename = File.basename(image_url)
response = http_client.get(image_url, :header => {:referer => src_url})
raise unless response.status == 200
File.open(File.join(dir, filename), "wb") {|f| f.write response.body }
end
connection.each do |type, path|
if path
case type
when :prev
fetch_url(path[:href], -1) if dist <= 0 && path.text.match(Regexp.escape(title))
when :next
fetch_url(path[:href], 1) if dist >= 0 && path.text.match(Regexp.escape(title))
end
end
end
end
|