Module: TorchAudio::Datasets::Utils

Defined in:
lib/torchaudio/datasets/utils.rb

Class Method Summary collapse

Class Method Details

.download_url(url, download_folder, filename: nil, hash_value: nil, hash_type: "sha256") ⇒ Object



5
6
7
8
9
10
11
12
13
14
15
# File 'lib/torchaudio/datasets/utils.rb', line 5

def download_url(url, download_folder, filename: nil, hash_value: nil, hash_type: "sha256")
  filename ||= File.basename(url)
  filepath = File.join(download_folder, filename)

  if File.exist?(filepath)
    raise "#{filepath} already exists. Delete the file manually and retry."
  end

  puts "Downloading #{url}..."
  download_url_to_file(url, filepath, hash_value, hash_type)
end

.download_url_to_file(url, dst, hash_value, hash_type, redirects = 0) ⇒ Object

follows redirects



18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
# File 'lib/torchaudio/datasets/utils.rb', line 18

def download_url_to_file(url, dst, hash_value, hash_type, redirects = 0)
  raise "Too many redirects" if redirects > 10

  uri = URI(url)
  tmp = nil
  location = nil

  Net::HTTP.start(uri.host, uri.port, use_ssl: uri.scheme == "https") do |http|
    request = Net::HTTP::Get.new(uri)

    http.request(request) do |response|
      case response
      when Net::HTTPRedirection
        location = response["location"]
      when Net::HTTPSuccess
        tmp = "#{Dir.tmpdir}/#{Time.now.to_f}" # TODO better name
        File.open(tmp, "wb") do |f|
          response.read_body do |chunk|
            f.write(chunk)
          end
        end
      else
        raise Error, "Bad response"
      end
    end
  end

  if location
    download_url_to_file(location, dst, hash_value, hash_type, redirects + 1)
  else
    # check hash
    # TODO use hash_type
    if Digest::MD5.file(tmp).hexdigest != hash_value
      raise "The hash of #{dst} does not match. Delete the file manually and retry."
    end

    FileUtils.mv(tmp, dst)
    dst
  end
end

.extract_archive(from_path, to_path: nil, overwrite: nil) ⇒ Object

extract_tar_gz doesn’t list files, so just return to_path



60
61
62
63
64
65
66
67
68
69
70
71
# File 'lib/torchaudio/datasets/utils.rb', line 60

def extract_archive(from_path, to_path: nil, overwrite: nil)
  to_path ||= File.dirname(from_path)

  if from_path.end_with?(".tar.gz") || from_path.end_with?(".tgz")
    File.open(from_path, "rb") do |io|
      Gem::Package.new("").extract_tar_gz(io, to_path)
    end
    return to_path
  end

  raise "We currently only support tar.gz and tgz archives."
end

.walk_files(root, suffix, prefix: false, remove_suffix: false) ⇒ Object



73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
# File 'lib/torchaudio/datasets/utils.rb', line 73

def walk_files(root, suffix, prefix: false, remove_suffix: false)
  return enum_for(:walk_files, root, suffix, prefix: prefix, remove_suffix: remove_suffix) unless block_given?

  Dir.glob("**/*", base: root).sort.each do |f|
    if f.end_with?(suffix)
      if remove_suffix
        f = f[0..(-suffix.length - 1)]
      end

      if prefix
        raise "Not implemented yet"
        # f = File.join(dirpath, f)
      end

      yield f
    end
  end
end