Module: Downloader

Extended by:
Loggable
Defined in:
lib/downloader.rb,
lib/downloader/cli.rb,
lib/downloader/errors.rb,
lib/downloader/version.rb,
lib/downloader/url_helper.rb,
lib/downloader/filename_utils.rb

Defined Under Namespace

Classes: CLI, Error, FilenameError, FilenameUtils, UriError, UrlHelper

Constant Summary collapse

VERSION =
"0.3.2"

Class Method Summary collapse

Methods included from Loggable

logger, logger

Class Method Details

.batch(input_file, dest, options = nil) ⇒ Object

Downloads the files pointed to by the URLs in input_file to the path specified by dest

Example:

Downloader.batch("urls.txt", ".", {})
# => downloads the files from the URLs in urls.txt to the current directory

99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
# File 'lib/downloader.rb', line 99

def self.batch(input_file, dest, options=nil)
  logger.debug("Options: #{options}")

  urls = read_input_file(input_file)
  host_with_scheme = get_host_with_scheme(urls[0], options)

  logger.info("Connecting to #{host_with_scheme}")

  http = HTTP.persistent(host_with_scheme)

  urls.each_with_index do |url, i|
    relative_ref = UrlHelper.extract_relative_ref(url)

    # note & operator and Hash#dig: just in case options is nil
    filename = UrlHelper.create_filename(url, options&.dig('numbered_filenames'), i+1)
    logger.info("Downloading #{relative_ref} - filename: #{filename}")

    File.open(File.join(dest, filename), 'w') do |f|
      f.write(do_get(http, relative_ref))
    end
  end

  http.close
end

.do_get(http, ref) ⇒ String

Makes the HTTP GET request for ref using http, follows redirects


74
75
76
77
78
79
80
81
82
83
84
85
86
# File 'lib/downloader.rb', line 74

def self.do_get(http, ref)
  response = http.get(ref)
  logger.debug(response.status)

  if HTTP::Redirector::REDIRECT_CODES.include?(response.status.code)
    response = http.follow.get(ref)
    logger.debug("Followed redirect, new response status: #{response.status}")
  end

  # to_s must be called so the response will be consumed
  # before the next (persistent) request is made
  response.body.to_s
end

.download(url) ⇒ String

Downloads the file at url to the current directory


129
130
131
132
133
134
135
136
137
# File 'lib/downloader.rb', line 129

def self.download(url)
  filename = UrlHelper.extract_filename(url)

  File.open(filename, 'w') do |f|
    f.write(HTTP.get(url))
  end

  filename
end

.get_host_with_scheme(url, options) ⇒ String

Returns the value of scheme_host in options if it exists, otherwise extracts the scheme and host as one string from url

Exits with a nonzero value (1) and an error message with troubleshooting tips when UrlHelper throws a UriError

Example:

get_host_with_scheme("https://example.com/cats", options_hash)
# => "https://example.com"

48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
# File 'lib/downloader.rb', line 48

def self.get_host_with_scheme(url, options)
  begin
    options&.dig("scheme_host") ||
      UrlHelper.extract_host_with_scheme(url, options&.dig("scheme"))
  rescue UriError => e
    logger.error("Error while parsing URL: #{e}")
    logger.error(%q(
Possible solutions:
- Check your input file. If the URLs are relative, use the
--scheme-host option to provide the scheme and host.
- If using the --scheme-host option, check if it's correct.
- If the URLs are missing a scheme but not the host, use the
--scheme option to provide the scheme.
- If the URLs are absolute, check if the scheme and host are
correct.
      ))
    exit(1)
  end
end

.read_input_file(file) ⇒ Array

Returns the contents of file as an array of lines after removing empty lines and newlines

Exits with a nonzero value (1) when file can't be loaded

Example:

read_input_file("in.txt")

22
23
24
25
26
27
28
29
30
31
# File 'lib/downloader.rb', line 22

def self.read_input_file(file)
  begin
    File.open(file, 'r').
      readlines(chomp: true).
      reject { |u| u.empty? }
  rescue SystemCallError
    logger.error("Could not load input file: #{file}")
    exit(1)
  end
end