Class: RightScraper::Retrievers::Download

Inherits:
Base
  • Object
show all
Defined in:
lib/right_scraper/retrievers/download.rb

Overview

A retriever for resources stored in archives on a web server somewhere. Uses command line curl and command line tar.

Defined Under Namespace

Classes: DownloadError

Constant Summary collapse

@@available =
false

Instance Attribute Summary

Attributes inherited from Base

#logger, #max_bytes, #max_seconds, #repo_dir, #repository

Instance Method Summary collapse

Methods inherited from Base

#ignorable_paths, #initialize, repo_dir

Constructor Details

This class inherits a constructor from RightScraper::Retrievers::Base

Instance Method Details

#available?Boolean

Determines if downloader is available.

Returns:

  • (Boolean)


44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
# File 'lib/right_scraper/retrievers/download.rb', line 44

def available?
  unless @@available
    begin
      # FIX: we might want to parse the result and require a minimum curl
      # version.
      cmd = "curl --version"
      `#{cmd}`
      if $?.success?
        @@available = true
      else
        raise RetrieverError, "\"#{cmd}\" exited with #{$?.exitstatus}"
      end
    rescue
      @logger.note_error($!, :available, "download retriever is unavailable")
    end
  end
  @@available
end

#exit_download(status) ⇒ Object



168
169
170
171
172
173
174
175
# File 'lib/right_scraper/retrievers/download.rb', line 168

def exit_download(status)
  unless status.success?
    @output.safe_buffer_data("Exit code = #{status.exitstatus}")
    raise DownloadError, "Downloader failed: #{@output.display_text}"
  end
  @logger.note_phase(:commit, :running_command, @cmd.first)
  true
end

#note_tag(file) ⇒ Object

Amend @repository with the tag information from the downloaded file.

Parameters

file(String)

file that was downloaded



183
184
185
186
187
188
189
# File 'lib/right_scraper/retrievers/download.rb', line 183

def note_tag(file)
  digest = Digest::SHA1.new
  File.open(file) {|f| digest << f.read(4096) }
  repo = @repository.clone
  repo.tag = digest.hexdigest
  @repository = repo
end

#output_download(data) ⇒ Object



156
157
158
# File 'lib/right_scraper/retrievers/download.rb', line 156

def output_download(data)
  @output.safe_buffer_data(data)
end

#pid_download(pid) ⇒ Object



151
152
153
154
# File 'lib/right_scraper/retrievers/download.rb', line 151

def pid_download(pid)
  @logger.note_phase(:begin, :running_command, @cmd.first)
  true
end

#retrieveObject

Download tarball and unpack it

Raises:



69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
# File 'lib/right_scraper/retrievers/download.rb', line 69

def retrieve
  raise RetrieverError.new("download retriever is unavailable") unless available?
  ::FileUtils.remove_entry_secure @repo_dir if File.exists?(@repo_dir)
  ::FileUtils.remove_entry_secure workdir if File.exists?(workdir)
  ::FileUtils.mkdir_p @repo_dir
  ::FileUtils.mkdir_p workdir
  file = ::File.join(workdir, "package")

  # TEAL FIX: we have to always-download the tarball before we can
  # determine if contents have changed, but afterward we can compare the
  # previous download against the latest downloaded and short-circuit the
  # remaining flow for the no-difference case.
  @logger.operation(:downloading) do
    credential_command = if @repository.first_credential && @repository.second_credential
      ['-u', "#{@repository.first_credential}:#{@repository.second_credential}"]
    else
      []
    end
    @output = ::RightScale::RightPopen::SafeOutputBuffer.new
    @cmd = [
      'curl',
      '--silent', '--show-error', '--location', '--fail',
      '--location-trusted', '-o', file, credential_command,
      @repository.url
    ].flatten
    begin
      ::RightScale::RightPopen.popen3_sync(
        @cmd,
        :target             => self,
        :pid_handler        => :pid_download,
        :timeout_handler    => :timeout_download,
        :size_limit_handler => :size_limit_download,
        :exit_handler       => :exit_download,
        :stderr_handler     => :output_download,
        :stdout_handler     => :output_download,
        :inherit_io         => true,  # avoid killing any rails connection
        :watch_directory    => workdir,
        :size_limit_bytes   => @max_bytes,
        :timeout_seconds    => @max_seconds)
    rescue Exception => e
      @logger.note_phase(:abort, :running_command, 'curl', e)
      raise
    end
  end

  note_tag(file)

  @logger.operation(:unpacking) do
    path = @repository.to_url.path
    if path =~ /\.gz$/
      extraction = "xzf"
    elsif path =~ /\.bz2$/
      extraction = "xjf"
    else
      extraction = "xf"
    end
    Dir.chdir(@repo_dir) do
      @output = ::RightScale::RightPopen::SafeOutputBuffer.new
      @cmd = ['tar', extraction, file]
      begin
        ::RightScale::RightPopen.popen3_sync(
          @cmd,
          :target             => self,
          :pid_handler        => :pid_download,
          :timeout_handler    => :timeout_download,
          :size_limit_handler => :size_limit_download,
          :exit_handler       => :exit_download,
          :stderr_handler     => :output_download,
          :stdout_handler     => :output_download,
          :inherit_io         => true,  # avoid killing any rails connection
          :watch_directory    => @repo_dir,
          :size_limit_bytes   => @max_bytes,
          :timeout_seconds    => @max_seconds)
      rescue Exception => e
        @logger.note_phase(:abort, :running_command, @cmd.first, e)
        raise
      end
    end
  end
  true
end

#size_limit_downloadObject

Raises:



164
165
166
# File 'lib/right_scraper/retrievers/download.rb', line 164

def size_limit_download
  raise DownloadError, "Downloader exceeded size limit"
end

#timeout_downloadObject

Raises:



160
161
162
# File 'lib/right_scraper/retrievers/download.rb', line 160

def timeout_download
  raise DownloadError, "Downloader timed out"
end

#workdirObject

Directory used to download tarballs



64
65
66
# File 'lib/right_scraper/retrievers/download.rb', line 64

def workdir
  @workdir ||= ::File.join(::File.dirname(@repo_dir), 'download')
end