Class: SakamichiScraper::Hinatazaka

Inherits:
Base
  • Object
show all
Defined in:
lib/sakamichi_scraper/hinatazaka.rb

Instance Method Summary collapse

Methods inherited from Base

#blog_list_page, #download_images_from_url_list, #exclude_img_path, #exec_date, #format_content, #format_timestamp, #get_content, #image_file_path, #image_urls_from_article_url, #init_url_from_yml, #mkdir_today_file_path

Constructor Details

#initializeHinatazaka

Returns a new instance of Hinatazaka.



5
6
7
# File 'lib/sakamichi_scraper/hinatazaka.rb', line 5

def initialize
  super("hinatazaka")
end

Instance Method Details

#blog_top_page_titleObject



9
10
11
# File 'lib/sakamichi_scraper/hinatazaka.rb', line 9

def blog_top_page_title
  Nokogiri::HTML.parse(blog_top_page, nil, nil).title
end

#newest_blog_titleObject



13
14
15
16
17
18
# File 'lib/sakamichi_scraper/hinatazaka.rb', line 13

def newest_blog_title
  scraped_title = Nokogiri.parse(blog_top_page, nil, nil)
                    .at_css(".p-blog-main__head > .c-blog-main__title")
                    .content
  format_content(scraped_title)
end

#picture_in_newest_articleObject



33
34
35
36
37
38
39
40
# File 'lib/sakamichi_scraper/hinatazaka.rb', line 33

def picture_in_newest_article
  newest_article_url = article_urls_from_list_page(blog_top_page).first
  article_html = get_content(newest_article_url)
  image_urls = image_urls_from_article_url(article_html, "div.c-blog-article__text")

  mkdir_today_file_path unless Dir.exist?(image_file_path)
  download_images_from_url_list(image_urls)
end

#recent_blog_infoObject



20
21
22
23
24
25
26
27
28
29
30
31
# File 'lib/sakamichi_scraper/hinatazaka.rb', line 20

def recent_blog_info
  res = []
  Nokogiri.parse(blog_top_page, nil, nil).css(".p-blog-top__list > li").each do |c|
    info_arr = c.content.strip.split("\n").reject { |i| i.blank? }
    info = {
      member: info_arr[0],
      title: info_arr[1].lstrip,
      timestamp: format_timestamp(info_arr[2].lstrip)
    }
    res << info
  end
end