Class: SakamichiScraper::Base
- Inherits:
-
Object
- Object
- SakamichiScraper::Base
- Defined in:
- lib/sakamichi_scraper/base.rb
Direct Known Subclasses
Instance Method Summary collapse
- #blog_list_page ⇒ Object
- #blog_top_page ⇒ Object
- #download_images_from_url_list(image_urls) ⇒ Object
- #exclude_img_path(group_name) ⇒ Object
- #exec_date ⇒ Object
- #format_content(content) ⇒ Object
- #format_timestamp(datetime) ⇒ Object
- #get_content(url) ⇒ Object
- #image_file_path ⇒ Object
- #image_urls_from_article_url(article_html, class_name) ⇒ Object
- #init_url_from_yml(group_name, yml_key) ⇒ Object
-
#initialize(group_name) ⇒ Base
constructor
A new instance of Base.
- #mkdir_today_file_path ⇒ Object
Constructor Details
#initialize(group_name) ⇒ Base
Returns a new instance of Base.
5 6 7 8 |
# File 'lib/sakamichi_scraper/base.rb', line 5 def initialize(group_name) @group_name = group_name @home_page = "https://#{@group_name}46.com" end |
Instance Method Details
#blog_list_page ⇒ Object
23 24 25 |
# File 'lib/sakamichi_scraper/base.rb', line 23 def blog_list_page init_url_from_yml(@group_name, "blog_list_page") end |
#blog_top_page ⇒ Object
19 20 21 |
# File 'lib/sakamichi_scraper/base.rb', line 19 def blog_top_page init_url_from_yml(@group_name, "blog_top_page") end |
#download_images_from_url_list(image_urls) ⇒ Object
65 66 67 68 69 70 71 72 73 74 |
# File 'lib/sakamichi_scraper/base.rb', line 65 def download_images_from_url_list(image_urls) image_urls.each do |image_url| dest_image_path = "#{image_file_path}/#{image_url[%r([^/]+$)]}" File.open(dest_image_path, "w") do |pass| URI.parse(image_url).open do |img| pass.write(img.read) end end end end |
#exclude_img_path(group_name) ⇒ Object
43 44 45 |
# File 'lib/sakamichi_scraper/base.rb', line 43 def exclude_img_path(group_name) YAML.load_file("config/url.yml")["#{group_name}"]["exclude_img_path"] end |
#exec_date ⇒ Object
35 36 37 |
# File 'lib/sakamichi_scraper/base.rb', line 35 def exec_date @exec_date ||= Time.now.strftime("%Y%m%d") end |
#format_content(content) ⇒ Object
27 28 29 |
# File 'lib/sakamichi_scraper/base.rb', line 27 def format_content(content) content.gsub(/[\r\n\s]/, "") end |
#format_timestamp(datetime) ⇒ Object
31 32 33 |
# File 'lib/sakamichi_scraper/base.rb', line 31 def (datetime) DateTime.parse(datetime).strftime("%Y-%-m-%-d %-H:%-M") end |
#get_content(url) ⇒ Object
15 16 17 |
# File 'lib/sakamichi_scraper/base.rb', line 15 def get_content(url) URI.open(url, "User-Agent" => "Ruby/2.7.1", &:read) end |
#image_file_path ⇒ Object
47 48 49 |
# File 'lib/sakamichi_scraper/base.rb', line 47 def image_file_path "img/#{@group_name}/#{exec_date}" end |
#image_urls_from_article_url(article_html, class_name) ⇒ Object
51 52 53 54 55 56 57 58 59 60 61 62 63 |
# File 'lib/sakamichi_scraper/base.rb', line 51 def image_urls_from_article_url(article_html, class_name) [].tap do |url| Nokogiri.parse(article_html, nil, nil).css("#{class_name} img").each do |c| image_url = c.attribute("src").value url << case @group_name when "sakurazaka" "#{@home_page}#{image_url}" else image_url end end end end |
#init_url_from_yml(group_name, yml_key) ⇒ Object
10 11 12 13 |
# File 'lib/sakamichi_scraper/base.rb', line 10 def init_url_from_yml(group_name, yml_key) url = YAML.load_file("config/url.yml")["#{group_name}"]["#{yml_key}"] get_content(url) end |
#mkdir_today_file_path ⇒ Object
39 40 41 |
# File 'lib/sakamichi_scraper/base.rb', line 39 def mkdir_today_file_path FileUtils.mkdir_p(image_file_path) end |