Class: Remi::Extractor::SftpFile

Inherits:
FileSystem show all
Includes:
DataSubject::SftpFile
Defined in:
lib/remi/data_subjects/sftp_file.rb

Overview

Sftp File extractor Used to extract files from an SFTP server

class MyJob < Remi::Job source :some_file do extractor Remi::Extractor::SftpFile.new( credentials: { host: 'coolserver.com', username: 'myself', password: 'secret' }, remote_path: '/', pattern: /^some_file_\d14.csv/, most_recent_only: true )

parser Remi::Parser::CsvFile.new(
  csv_options: {
    headers: true,
    col_sep: ','
  }
)

end end

job = MyJob.new job.some_file.df # =># # id name # 0 1 Albert # 1 2 Betsy # 2 3 Camu

Instance Attribute Summary collapse

Attributes included from DataSubject::SftpFile

#sftp_session

Attributes inherited from FileSystem

#created_within, #group_by, #local_path, #most_recent_by, #most_recent_only, #pattern, #remote_path

Attributes inherited from Remi::Extractor

#logger

Instance Method Summary collapse

Methods included from DataSubject::SftpFile

#begin_connection, #end_connection, #sftp_retry

Methods inherited from FileSystem

#entries, #get_created_within, #matching_entries, #most_recent_matching_entry, #most_recent_matching_entry_in_group

Constructor Details

#initialize(*args, **kargs, &block) ⇒ SftpFile

Returns a new instance of SftpFile.

Parameters:

  • credentials (Hash)

    Options hash containing login credentials

  • credentials (String)

    :host SFTP host (e.g., coolserver.com)

  • credentials (String)

    :username SFTP username

  • credentials (String)

    :password SFTP password

  • credentials (String)

    :port SFTP port (default: 22)

  • retries (Integer)

    Number of times a connection or operation will be retried (default: 3)

  • timeout (Integer)

    Number of seconds to wait for establishing/closing a connection (default: 30)



87
88
89
90
# File 'lib/remi/data_subjects/sftp_file.rb', line 87

def initialize(*args, **kargs, &block)
  super
  init_sftp_extractor(*args, **kargs)
end

Instance Attribute Details

#hostObject (readonly)

Returns the value of attribute host.



92
93
94
# File 'lib/remi/data_subjects/sftp_file.rb', line 92

def host
  @host
end

#passwordObject (readonly)

Returns the value of attribute password.



94
95
96
# File 'lib/remi/data_subjects/sftp_file.rb', line 94

def password
  @password
end

#portObject (readonly)

Returns the value of attribute port.



95
96
97
# File 'lib/remi/data_subjects/sftp_file.rb', line 95

def port
  @port
end

#usernameObject (readonly)

Returns the value of attribute username.



93
94
95
# File 'lib/remi/data_subjects/sftp_file.rb', line 93

def username
  @username
end

Instance Method Details

#all_entriesArray<Extractor::FileSystemEntry>

Returns (Memoized) list of objects in the bucket/prefix.

Returns:



113
114
115
# File 'lib/remi/data_subjects/sftp_file.rb', line 113

def all_entries
  @all_entries ||= all_entries!
end

#all_entries!Array<Extractor::FileSystemEntry>

Returns (Memoized) list of objects in the bucket/prefix.

Returns:



118
119
120
121
122
123
124
125
126
127
# File 'lib/remi/data_subjects/sftp_file.rb', line 118

def all_entries!
  sftp_session.dir.entries(@remote_path).map do |entry|
    # Early versions of the protocol don't support create time, fake it with modified time?
    FileSystemEntry.new(
      pathname: File.join(@remote_path, entry.name),
      create_time: entry.attributes.respond_to?(:createtime) ? entry.attributes.createtime : entry.attributes.mtime,
      modified_time: entry.attributes.mtime
    )
  end
end

#extractArray<String>

Called to extract files from the source filesystem.

Returns:

  • (Array<String>)

    An array of paths to a local copy of the files extacted



99
100
101
102
103
104
105
106
107
108
109
110
# File 'lib/remi/data_subjects/sftp_file.rb', line 99

def extract
  begin_connection

  entries.map do |entry|
    local_file = File.join(@local_path, entry.name)
    logger.info "Downloading #{entry.name} to #{local_file}"
    sftp_retry { sftp_session.download!(File.join(@remote_path, entry.name), local_file) }
    local_file
  end
ensure
  end_connection
end