Class: SakamichiScraper::Hinatazaka
- Inherits:
-
Base
- Object
- Base
- SakamichiScraper::Hinatazaka
show all
- Defined in:
- lib/sakamichi_scraper/hinatazaka.rb
Instance Method Summary
collapse
Methods inherited from Base
#blog_list_page, #download_images_from_url_list, #exclude_img_path, #exec_date, #format_content, #format_timestamp, #get_content, #image_file_path, #image_urls_from_article_url, #init_url_from_yml, #mkdir_today_file_path
Constructor Details
Returns a new instance of Hinatazaka.
5
6
7
|
# File 'lib/sakamichi_scraper/hinatazaka.rb', line 5
def initialize
super("hinatazaka")
end
|
Instance Method Details
#blog_top_page_title ⇒ Object
9
10
11
|
# File 'lib/sakamichi_scraper/hinatazaka.rb', line 9
def blog_top_page_title
Nokogiri::HTML.parse(blog_top_page, nil, nil).title
end
|
#newest_blog_title ⇒ Object
13
14
15
16
17
18
|
# File 'lib/sakamichi_scraper/hinatazaka.rb', line 13
def newest_blog_title
scraped_title = Nokogiri.parse(blog_top_page, nil, nil)
.at_css(".p-blog-main__head > .c-blog-main__title")
.content
format_content(scraped_title)
end
|
#picture_in_newest_article ⇒ Object
33
34
35
36
37
38
39
40
|
# File 'lib/sakamichi_scraper/hinatazaka.rb', line 33
def picture_in_newest_article
newest_article_url = article_urls_from_list_page(blog_top_page).first
article_html = get_content(newest_article_url)
image_urls = image_urls_from_article_url(article_html, "div.c-blog-article__text")
mkdir_today_file_path unless Dir.exist?(image_file_path)
download_images_from_url_list(image_urls)
end
|
#recent_blog_info ⇒ Object
20
21
22
23
24
25
26
27
28
29
30
31
|
# File 'lib/sakamichi_scraper/hinatazaka.rb', line 20
def recent_blog_info
res = []
Nokogiri.parse(blog_top_page, nil, nil).css(".p-blog-top__list > li").each do |c|
info_arr = c.content.strip.split("\n").reject { |i| i.blank? }
info = {
member: info_arr[0],
title: info_arr[1].lstrip,
timestamp: format_timestamp(info_arr[2].lstrip)
}
res << info
end
end
|