Module: Transformers::Utils::Hub

Defined in:
lib/transformers/utils/hub.rb

Constant Summary collapse

IS_OFFLINE_MODE =
HfHub::HF_HUB_OFFLINE
PYTORCH_PRETRAINED_BERT_CACHE =
ENV.fetch("PYTORCH_PRETRAINED_BERT_CACHE", HfHub::HF_HUB_CACHE)
PYTORCH_TRANSFORMERS_CACHE =
ENV.fetch("PYTORCH_TRANSFORMERS_CACHE", PYTORCH_PRETRAINED_BERT_CACHE)
TRANSFORMERS_CACHE =
ENV.fetch("TRANSFORMERS_CACHE", PYTORCH_TRANSFORMERS_CACHE)
DEFAULT_ENDPOINT =
"https://huggingface.co"
HUGGINGFACE_CO_RESOLVE_ENDPOINT =
ENV.fetch("HF_ENDPOINT", DEFAULT_ENDPOINT)

Class Method Summary collapse

Class Method Details

.cached_file(path_or_repo_id, filename, cache_dir: nil, force_download: false, resume_download: false, proxies: nil, token: nil, revision: nil, local_files_only: false, subfolder: "", repo_type: nil, user_agent: nil, _raise_exceptions_for_gated_repo: true, _raise_exceptions_for_missing_entries: true, _raise_exceptions_for_connection_errors: true, _commit_hash: nil, **deprecated_kwargs) ⇒ Object



58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
# File 'lib/transformers/utils/hub.rb', line 58

def cached_file(
  path_or_repo_id,
  filename,
  cache_dir: nil,
  force_download: false,
  resume_download: false,
  proxies: nil,
  token: nil,
  revision: nil,
  local_files_only: false,
  subfolder: "",
  repo_type: nil,
  user_agent: nil,
  _raise_exceptions_for_gated_repo: true,
  _raise_exceptions_for_missing_entries: true,
  _raise_exceptions_for_connection_errors: true,
  _commit_hash: nil,
  **deprecated_kwargs
)
  if is_offline_mode && !local_files_only
    Transformers.logger.info "Offline mode: forcing local_files_only: true"
    local_files_only = true
  end
  if subfolder.nil?
    subfolder = ""
  end

  path_or_repo_id = path_or_repo_id.to_s
  full_filename = File.join(subfolder, filename)
  if Dir.exist?(path_or_repo_id)
    raise Todo
  end

  if cache_dir.nil?
    cache_dir = TRANSFORMERS_CACHE
  end
  if cache_dir.is_a?(Pathname)
    cache_dir = cache_dir.to_s
  end

  if !_commit_hash.nil? && !force_download
    # If the file is cached under that commit hash, we return it directly.
    resolved_file =
      HfHub.try_to_load_from_cache(
        path_or_repo_id, full_filename, cache_dir: cache_dir, revision: _commit_hash, repo_type: repo_type
      )
    if !resolved_file.nil?
      if resolved_file != HfHub::CACHED_NO_EXIST
        return resolved_file
      elsif !_raise_exceptions_for_missing_entries
        return nil
      else
        raise EnvironmentError, "Could not locate #{full_filename} inside #{path_or_repo_id}."
      end
    end
  end

  user_agent = http_user_agent(user_agent)

  resolved_file = nil
  begin
    resolved_file =
      HfHub.hf_hub_download(
        path_or_repo_id,
        filename,
        subfolder: subfolder.length == 0 ? nil : subfolder,
        repo_type: repo_type,
        revision: revision,
        cache_dir: cache_dir,
        user_agent: user_agent,
        force_download: force_download,
        proxies: proxies,
        resume_download: resume_download,
        token: token,
        local_files_only: local_files_only
      )
  rescue => e
    raise e if _raise_exceptions_for_missing_entries
  end
  resolved_file
end

.extract_commit_hash(resolved_file, commit_hash) ⇒ Object



46
47
48
49
50
51
52
53
54
55
56
# File 'lib/transformers/utils/hub.rb', line 46

def extract_commit_hash(resolved_file, commit_hash)
  if resolved_file.nil? || !commit_hash.nil?
    return commit_hash
  end
  search = /snapshots\/([^\/]+)/.match(resolved_file)
  if search.nil?
    return nil
  end
  commit_hash = search[1]
  HfHub::REGEX_COMMIT_HASH.match(commit_hash) ? commit_hash : nil
end

.has_file(path_or_repo, filename, revision: nil, proxies: nil, token: nil, local_files_only: false, cache_dir: nil, repo_type: nil, **deprecated_kwargs) ⇒ Object



140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
# File 'lib/transformers/utils/hub.rb', line 140

def has_file(
  path_or_repo,
  filename,
  revision: nil,
  proxies: nil,
  token: nil,
  local_files_only: false,
  cache_dir: nil,
  repo_type: nil,
  **deprecated_kwargs
)
  # If path to local directory, check if the file exists
  if Dir.exist?(path_or_repo)
    return File.exist?(File.join(path_or_repo, filename))
  end

  # Else it's a repo => let's check if the file exists in local cache or on the Hub

  # Check if file exists in cache
  # This information might be outdated so it's best to also make a HEAD call (if allowed).
  cached_path = HfHub.try_to_load_from_cache(
    path_or_repo,
    filename,
    revision: revision,
    repo_type: repo_type,
    cache_dir: cache_dir
  )
  has_file_in_cache = cached_path.is_a?(String)

  # If local_files_only, don't try the HEAD call
  if local_files_only
    return has_file_in_cache
  end

  # Check if the file exists
  begin
    HfHub._request_wrapper(
      "HEAD",
      HfHub.hf_hub_url(path_or_repo, filename, revision: revision, repo_type: repo_type),
      headers: HfHub.build_hf_headers(token: token, user_agent: http_user_agent),
      allow_redirects: false,
      proxies: proxies,
      timeout: 10
    )
    true
  rescue HfHub::OfflineModeIsEnabled
    has_file_in_cache
  rescue HfHub::GatedRepoError => e
    Transformers.logger.error(e)
    raise EnvironmentError,
      "#{path_or_repo} is a gated repository. Make sure to request access at " +
      "https://huggingface.co/#{path_or_repo} and pass a token having permission to this repo either by " +
      "logging in with `huggingface-cli login` or by passing `token=<your_token>`."
  rescue HfHub::RepositoryNotFoundError => e
    Transformers.logger.error(e)
    raise EnvironmentError,
      "#{path_or_repo} is not a local folder or a valid repository name on 'https://hf.co'."
  rescue HfHub::RevisionNotFoundError => e
    Transformers.logger.error(e)
    raise EnvironmentError,
      "#{revision} is not a valid git identifier (branch name, tag name or commit id) that exists for this " +
      "model name. Check the model page at 'https://huggingface.co/#{path_or_repo}' for available revisions."
  rescue HfHub::EntryNotFoundError
    false  # File does not exist
  end
end

.http_user_agent(user_agent = nil) ⇒ Object



36
37
38
39
40
41
42
43
44
# File 'lib/transformers/utils/hub.rb', line 36

def http_user_agent(user_agent = nil)
  ua = "transformers.rb/#{Transformers::VERSION}; ruby/#{RUBY_VERSION.to_f}"
  if user_agent.is_a?(Hash)
    ua += "; " + user_agent.map { |k, v| "#{k}/#{v}" }.join("; ")
  elsif user_agent.is_a?(String)
    ua += "; " + user_agent
  end
  ua
end

.is_offline_modeObject



28
29
30
# File 'lib/transformers/utils/hub.rb', line 28

def is_offline_mode
  IS_OFFLINE_MODE
end

.is_remote_url(url_or_filename) ⇒ Object



32
33
34
# File 'lib/transformers/utils/hub.rb', line 32

def is_remote_url(url_or_filename)
  url_or_filename.is_a?(URI)
end