Class: SakamichiScraper::Base

Inherits:
Object
  • Object
show all
Defined in:
lib/sakamichi_scraper/base.rb

Direct Known Subclasses

Hinatazaka, Sakurazaka

Instance Method Summary collapse

Constructor Details

#initialize(group_name) ⇒ Base

Returns a new instance of Base.



5
6
7
8
# File 'lib/sakamichi_scraper/base.rb', line 5

def initialize(group_name)
  @group_name = group_name
  @home_page = "https://#{@group_name}46.com"
end

Instance Method Details

#blog_list_pageObject



23
24
25
# File 'lib/sakamichi_scraper/base.rb', line 23

def blog_list_page
  init_url_from_yml(@group_name, "blog_list_page")
end

#blog_top_pageObject



19
20
21
# File 'lib/sakamichi_scraper/base.rb', line 19

def blog_top_page
  init_url_from_yml(@group_name, "blog_top_page")
end

#download_images_from_url_list(image_urls) ⇒ Object



65
66
67
68
69
70
71
72
73
74
# File 'lib/sakamichi_scraper/base.rb', line 65

def download_images_from_url_list(image_urls)
  image_urls.each do |image_url|
    dest_image_path = "#{image_file_path}/#{image_url[%r([^/]+$)]}"
    File.open(dest_image_path, "w") do |pass|
      URI.parse(image_url).open do |img|
        pass.write(img.read)
      end
    end
  end
end

#exclude_img_path(group_name) ⇒ Object



43
44
45
# File 'lib/sakamichi_scraper/base.rb', line 43

def exclude_img_path(group_name)
  YAML.load_file("config/url.yml")["#{group_name}"]["exclude_img_path"]
end

#exec_dateObject



35
36
37
# File 'lib/sakamichi_scraper/base.rb', line 35

def exec_date
  @exec_date ||= Time.now.strftime("%Y%m%d")
end

#format_content(content) ⇒ Object



27
28
29
# File 'lib/sakamichi_scraper/base.rb', line 27

def format_content(content)
  content.gsub(/[\r\n\s]/, "")
end

#format_timestamp(datetime) ⇒ Object



31
32
33
# File 'lib/sakamichi_scraper/base.rb', line 31

def format_timestamp(datetime)
  DateTime.parse(datetime).strftime("%Y-%-m-%-d %-H:%-M")
end

#get_content(url) ⇒ Object



15
16
17
# File 'lib/sakamichi_scraper/base.rb', line 15

def get_content(url)
  URI.open(url, "User-Agent" => "Ruby/2.7.1", &:read)
end

#image_file_pathObject



47
48
49
# File 'lib/sakamichi_scraper/base.rb', line 47

def image_file_path
  "img/#{@group_name}/#{exec_date}"
end

#image_urls_from_article_url(article_html, class_name) ⇒ Object



51
52
53
54
55
56
57
58
59
60
61
62
63
# File 'lib/sakamichi_scraper/base.rb', line 51

def image_urls_from_article_url(article_html, class_name)
  [].tap do |url|
    Nokogiri.parse(article_html, nil, nil).css("#{class_name} img").each do |c|
      image_url = c.attribute("src").value
      url << case @group_name
             when "sakurazaka"
               "#{@home_page}#{image_url}"
             else
               image_url
             end
    end
  end
end

#init_url_from_yml(group_name, yml_key) ⇒ Object



10
11
12
13
# File 'lib/sakamichi_scraper/base.rb', line 10

def init_url_from_yml(group_name, yml_key)
  url = YAML.load_file("config/url.yml")["#{group_name}"]["#{yml_key}"]
  get_content(url)
end

#mkdir_today_file_pathObject



39
40
41
# File 'lib/sakamichi_scraper/base.rb', line 39

def mkdir_today_file_path
  FileUtils.mkdir_p(image_file_path)
end