Class: Swineherd::S3FileSystem

Inherits:
Object
  • Object
show all
Includes:
BaseFileSystem
Defined in:
lib/swineherd/filesystem/s3filesystem.rb

Overview

Methods for interacting with Amazon’s Simple Store Service (s3).

Defined Under Namespace

Classes: S3File

Instance Attribute Summary collapse

Instance Method Summary collapse

Methods included from BaseFileSystem

#check_paths

Constructor Details

#initialize(aws_access_key_id, aws_secret_access_key) ⇒ S3FileSystem

Initialize a new s3 file system, needs path to aws keys



16
17
18
19
# File 'lib/swineherd/filesystem/s3filesystem.rb', line 16

def initialize aws_access_key_id, aws_secret_access_key
  require 'right_aws'
  @s3 = RightAws::S3.new(aws_access_key_id, aws_secret_access_key)
end

Instance Attribute Details

#s3Object

Returns the value of attribute s3.



11
12
13
# File 'lib/swineherd/filesystem/s3filesystem.rb', line 11

def s3
  @s3
end

Instance Method Details

#bucket(path) ⇒ Object



64
65
66
67
# File 'lib/swineherd/filesystem/s3filesystem.rb', line 64

def bucket path
  uri = URI.parse(path)
  uri.path.split('/').reject{|x| x.empty?}.first
end

#close(*args) ⇒ Object



219
220
# File 'lib/swineherd/filesystem/s3filesystem.rb', line 219

def close *args
end

#common_directory(paths) ⇒ Object

Ick.



201
202
203
204
205
206
207
# File 'lib/swineherd/filesystem/s3filesystem.rb', line 201

def common_directory paths
  dirs     = paths.map{|path| path.split('/')}
  min_size = dirs.map{|splits| splits.size}.min
  dirs.map!{|splits| splits[0...min_size]}
  uncommon_idx = dirs.transpose.each_with_index.find{|dirnames, idx| dirnames.uniq.length > 1}.last
  dirs[0][0...uncommon_idx].join('/')
end

#copy_to_local(srcpath, dstpath) ⇒ Object

right now this only works on single files



151
152
153
154
155
156
157
158
159
# File 'lib/swineherd/filesystem/s3filesystem.rb', line 151

def copy_to_local srcpath, dstpath
  src_bucket   = bucket(srcpath)
  src_key_path = key_path(srcpath)
  dstfile      = File.new(dstpath, 'w')
  @s3.interface.get(src_bucket, src_key_path) do |chunk|
    dstfile.write(chunk)
  end
  dstfile.close
end

#cp(srcpath, dstpath) ⇒ Object



131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
# File 'lib/swineherd/filesystem/s3filesystem.rb', line 131

def cp srcpath, dstpath
  src_bucket   = bucket(srcpath)
  dst_bucket   = bucket(dstpath)
  dst_key_path = key_path(dstpath)
  mkpath(dstpath)
  case type(srcpath)
  when "directory" then
    paths_to_copy = lr(srcpath)
    common_dir    = common_directory(paths_to_copy)
    paths_to_copy.each do |path|
      src_key = key_path(path)
      dst_key = File.join(dst_key_path, path.gsub(common_dir, ''))
      @s3.interface.copy(src_bucket, src_key, dst_bucket, dst_key)
    end
  when "file" then
    @s3.interface.copy(src_bucket, key_path(srcpath), dst_bucket, dst_key_path)
  end
end

#entries(dirpath) ⇒ Object



183
184
185
186
# File 'lib/swineherd/filesystem/s3filesystem.rb', line 183

def entries dirpath
  return unless type(dirpath) == "directory"
  full_contents(dirpath)
end

#exists?(path) ⇒ Boolean

Returns:

  • (Boolean)


92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
# File 'lib/swineherd/filesystem/s3filesystem.rb', line 92

def exists? path
  object     = File.basename(path)
  search_dir = File.dirname(path)
  case search_dir
  when '.' then # only a bucket was passed in
    begin
      (full_contents(object).size > 0)
    rescue RightAws::AwsError => e
      if e.message =~ /nosuchbucket/i
        false
      else
        raise e
      end
    end
  else
    search_dir_contents = full_contents(search_dir).map{|c| File.basename(c).gsub(/\//, '')}
    search_dir_contents.include?(object)
  end
end

#file_size(path) ⇒ Object



37
38
39
40
41
# File 'lib/swineherd/filesystem/s3filesystem.rb', line 37

def file_size path
  containing_bucket = bucket(path)
  header            = @s3.interface.head(containing_bucket, key_path(path))
  header['content-length'].to_i
end

#full_contents(path) ⇒ Object



80
81
82
83
84
85
86
87
88
89
90
# File 'lib/swineherd/filesystem/s3filesystem.rb', line 80

def full_contents path
  bkt = bucket(path)
  pre = key_path(path)
  pre += '/' if needs_trailing_slash(pre)
  contents = []
  s3.interface.incrementally_list_bucket(bkt, {'prefix' => pre, 'delimiter' => '/'}) do |res|
    contents += res[:common_prefixes].map{|c| File.join(bkt,c)}
    contents += res[:contents].map{|c| File.join(bkt, c[:key])}
  end
  contents
end

#key_path(path) ⇒ Object



69
70
71
72
# File 'lib/swineherd/filesystem/s3filesystem.rb', line 69

def key_path path
  uri = URI.parse(path)
  File.join(uri.path.split('/').reject{|x| x.empty?}[1..-1])
end

#lr(path) ⇒ Object

Recursively list paths



189
190
191
192
193
194
195
196
# File 'lib/swineherd/filesystem/s3filesystem.rb', line 189

def lr path
  paths = entries(path)
  if paths
    paths.map{|e| lr(e)}.flatten
  else
    path
  end
end

#mkpath(path) ⇒ Object

This is a bit funny, there’s actually no need to create a ‘path’ since s3 is nothing more than a glorified key-value store. When you create a ‘file’ (key) the ‘path’ will be created for you. All we do here is create the bucket unless it already exists.



166
167
168
169
170
171
172
173
174
175
# File 'lib/swineherd/filesystem/s3filesystem.rb', line 166

def mkpath path
  bkt = bucket(path)
  key = key_path(path)
  if key.empty?
    @s3.interface.create_bucket(bkt)
  else
    @s3.interface.create_bucket(bkt) unless exists? bkt
  end
  path
end

#mv(srcpath, dstpath) ⇒ Object



112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
# File 'lib/swineherd/filesystem/s3filesystem.rb', line 112

def mv srcpath, dstpath
  src_bucket   = bucket(srcpath)
  dst_bucket   = bucket(dstpath)
  dst_key_path = key_path(dstpath)
  mkpath(dstpath)
  case type(srcpath)
  when "directory" then
    paths_to_copy = lr(srcpath)
    common_dir    = common_directory(paths_to_copy)
    paths_to_copy.each do |path|
      src_key = key_path(path)
      dst_key = File.join(dst_key_path, path.gsub(common_dir, ''))
      @s3.interface.move(src_bucket, src_key, dst_bucket, dst_key)
    end
  when "file" then
    @s3.interface.move(src_bucket, key_path(srcpath), dst_bucket, dst_key_path)
  end
end

#needs_trailing_slash(pre) ⇒ Object



74
75
76
77
78
# File 'lib/swineherd/filesystem/s3filesystem.rb', line 74

def needs_trailing_slash pre
  has_trailing_slash = pre.end_with? '/'
  is_empty_prefix    = pre.empty?
  !(has_trailing_slash || is_empty_prefix)
end

#open(path, mode = "r", &blk) ⇒ Object



21
22
23
# File 'lib/swineherd/filesystem/s3filesystem.rb', line 21

def open path, mode="r", &blk
  S3File.new(path,mode,self,&blk)
end

#put(srcpath, destpath) ⇒ Object



209
210
211
212
213
214
215
216
217
# File 'lib/swineherd/filesystem/s3filesystem.rb', line 209

def put srcpath, destpath
  dest_bucket = bucket(destpath)
  if File.directory? srcpath
	# handle Dir later
  else
    key = srcpath
  end
  @s3.interface.put(dest_bucket, key, File.open(srcpath))
end

#rm(path) ⇒ Object



43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
# File 'lib/swineherd/filesystem/s3filesystem.rb', line 43

def rm path
  bkt = bucket(path)
  key = key_path(path)
  if key.empty? # only the bucket was passed in, delete it
    @s3.interface.force_delete_bucket(bkt)
  else
    case type(path)
    when "directory" then
      keys_to_delete = lr(path)
      keys_to_delete.each do |k|
        key_to_delete = key_path(k)
        @s3.interface.delete(bkt, key_to_delete)
      end
      keys_to_delete
    when "file" then
      @s3.interface.delete(bkt, key)
      [path]
    end
  end
end

#size(path) ⇒ Object



25
26
27
28
29
30
31
32
33
34
35
# File 'lib/swineherd/filesystem/s3filesystem.rb', line 25

def size path
  sz = 0
  if type(path) == "directory"
    lr(path).each do |f|
      sz += file_size(f)
    end        
  else
    sz += file_size(path)
  end
  sz
end

#type(path) ⇒ Object



177
178
179
180
181
# File 'lib/swineherd/filesystem/s3filesystem.rb', line 177

def type path
  return "unknown" unless exists? path
  return "directory" if full_contents(path).size > 0
  "file"
end