Class: DManga::MangaHostParser

Inherits:

Object
SiteParserBase
DManga::MangaHostParser

Defined in:: lib/dmanga/mangahost_parser.rb

Constant Summary collapse

SEARCH_URL = url used to search in the site

"https://mangahosted.com/find/"

SEARCH_LINK_REGEX = regex to extract url of found mangas

/entry-title">\s*<a\s*href="(.*)?"\s*title="(.*)"/

CHAPTER_LINK_REGEX = Regex to extract chapters’ url from manga page. Manga host has two diferent manga pages. One to medium/short mangas and one to big mangas

[
  /capitulo.*?Ler\s+Online\s+-\s+(.*?)['"]\s+href=['"](.*?)['"]/, # for short/medium mangas
  /<a\s+href=['"](.*?)['"]\s+title=['"]Ler\s+Online\s+-\s+(.*?)\s+\[\]/ # for big mangas
]

IMG_LINK_REGEX = regex to extract images’ url from chapter page

[/img_\d+['"]\s+src=['"](.*?)['"]/,
/url['"]:['"](.*?)['"]\}/]

Constants inherited from SiteParserBase

SiteParserBase::USER_AGENT

Instance Attribute Summary

Attributes inherited from SiteParserBase

#chapters, #manga_name, #manga_url, #verbose

Instance Method Summary collapse

#download ⇒ Object

Methods inherited from SiteParserBase

#create_dir, #download_dir, #get_progressbar, #imgs_download, #initialize, #parse, #remove_invalid_simbols, #search, #select_chapters, #select_manga

Constructor Details

This class inherits a constructor from DManga::SiteParserBase

Instance Method Details

#download ⇒ `Object`

# File 'lib/dmanga/mangahost_parser.rb', line 25

def download

  @options.site = SEARCH_URL.match(%r{.*://(.*)/find/})[1]

  # white space is not allowed in the search url.
  guess_manga_name = @options.manga.gsub(/\s/, '+') # Replace ' ' by  '+'
  guess_manga_name = encode_manga_name(guess_manga_name)

  search("#{SEARCH_URL}#{guess_manga_name}", SEARCH_LINK_REGEX)

  # Due the organazation of the chapters page the chapters are
  # extracted in reverse order
  @chapters = parse(@manga_url, CHAPTER_LINK_REGEX[0]) do |resul, page|

    # use long mangas regex if short mangas regex
    # returns empty array
    if resul.empty?
      # Extract chapters name and url and
      # swap chapter[0](name) with chapter[1](url)
      # to conform with result from CHAPTER_LINK_REGEX[0]
      page.scan(CHAPTER_LINK_REGEX[1]) {|chapter| resul << chapter.rotate}
    end
    resul
  end

  # correct utf-8 errors
  correct_chapters_name

  # prompt user to select chapters to download
  select_chapters

  # remove simbols that cannot be used in folder's name on windows
  remove_invalid_simbols(@manga_name)
  # create manga directory
  remove_invalid_simbols(@manga_name)
  create_dir(@manga_name)

  # download selected chapters
  @chapters.reverse_each do |chapter|
    imgs_url = parse(chapter[1], IMG_LINK_REGEX[0]) do |resul, page|
      # use second pattern if the first returns a empty
      # array
      if resul.empty?
        page.scan(IMG_LINK_REGEX[1]) do |img|
          resul << img[0]
        end
      end

      resul.each do |img|
        # some images urls are incorrect and need to be corrected. For exemple:
        # img.mangahost.net/br/images/img.png.webp => img.mangahost.net/br/mangas_files/img.png
        img.sub!(/images/, "mangas_files")
        img.sub!(/\.webp/, "")

        #correct créditos img problem
        correct_image_uri(img)

        img.gsub!(%r{\\}, "")
      end
      resul
    end

    # create chapter directory relative to manga directory
    chapter_name = "#{chapter[0]}"
    # remove simbols that cannot be used in folder's name on windows
    remove_invalid_simbols chapter_name
    chapter_dir = "#{@manga_name}/#{chapter_name}"
    create_dir(chapter_dir)

    DManga::display_feedback "\nBaixando #{chapter_name}"
    imgs_download(chapter_dir, imgs_url)
  end
end