Class: Swineherd::S3FileSystem

Inherits:
Object
  • Object
show all
Includes:
BaseFileSystem
Defined in:
lib/swineherd/filesystem/s3filesystem.rb

Overview

Methods for interacting with Amazon’s Simple Store Service (s3).

Defined Under Namespace

Classes: S3File

Instance Attribute Summary collapse

Instance Method Summary collapse

Methods included from BaseFileSystem

#check_paths

Constructor Details

#initialize(aws_access_key_id, aws_secret_access_key) ⇒ S3FileSystem

Initialize a new s3 file system, needs path to aws keys



16
17
18
19
# File 'lib/swineherd/filesystem/s3filesystem.rb', line 16

def initialize aws_access_key_id, aws_secret_access_key
  require 'right_aws'
  @s3 = RightAws::S3.new(aws_access_key_id, aws_secret_access_key)
end

Instance Attribute Details

#s3Object

Returns the value of attribute s3.



11
12
13
# File 'lib/swineherd/filesystem/s3filesystem.rb', line 11

def s3
  @s3
end

Instance Method Details

#bucket(path) ⇒ Object



46
47
48
49
# File 'lib/swineherd/filesystem/s3filesystem.rb', line 46

def bucket path
  uri = URI.parse(path)
  uri.path.split('/').reject{|x| x.empty?}.first
end

#close(*args) ⇒ Object



181
182
# File 'lib/swineherd/filesystem/s3filesystem.rb', line 181

def close *args
end

#common_directory(paths) ⇒ Object

Ick.



173
174
175
176
177
178
179
# File 'lib/swineherd/filesystem/s3filesystem.rb', line 173

def common_directory paths
  dirs     = paths.map{|path| path.split('/')}
  min_size = dirs.map{|splits| splits.size}.min
  dirs.map!{|splits| splits[0...min_size]}
  uncommon_idx = dirs.transpose.each_with_index.find{|dirnames, idx| dirnames.uniq.length > 1}.last
  dirs[0][0...uncommon_idx].join('/')
end

#cp(srcpath, dstpath) ⇒ Object



113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
# File 'lib/swineherd/filesystem/s3filesystem.rb', line 113

def cp srcpath, dstpath
  src_bucket   = bucket(srcpath)
  dst_bucket   = bucket(dstpath)
  dst_key_path = key_path(dstpath)
  mkpath(dstpath)
  case type(srcpath)
  when "directory" then
    paths_to_copy = lr(srcpath)
    common_dir    = common_directory(paths_to_copy)
    paths_to_copy.each do |path|
      src_key = key_path(path)
      dst_key = File.join(dst_key_path, path.gsub(common_dir, ''))
      @s3.interface.copy(src_bucket, src_key, dst_bucket, dst_key)
    end
  when "file" then
    @s3.interface.copy(src_bucket, key_path(srcpath), dst_bucket, dst_key_path)
  end
end

#entries(dirpath) ⇒ Object



155
156
157
158
# File 'lib/swineherd/filesystem/s3filesystem.rb', line 155

def entries dirpath
  return unless type(dirpath) == "directory"
  full_contents(dirpath)
end

#exists?(path) ⇒ Boolean

Returns:

  • (Boolean)


74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
# File 'lib/swineherd/filesystem/s3filesystem.rb', line 74

def exists? path
  object     = File.basename(path)
  search_dir = File.dirname(path)
  case search_dir
  when '.' then # only a bucket was passed in
    begin
      (full_contents(object).size > 0)
    rescue RightAws::AwsError => e
      if e.message =~ /nosuchbucket/i
        false
      else
        raise e
      end
    end
  else
    search_dir_contents = full_contents(search_dir).map{|c| File.basename(c).gsub(/\//, '')}
    search_dir_contents.include?(object)
  end
end

#full_contents(path) ⇒ Object



62
63
64
65
66
67
68
69
70
71
72
# File 'lib/swineherd/filesystem/s3filesystem.rb', line 62

def full_contents path
  bkt = bucket(path)
  pre = key_path(path)
  pre += '/' if needs_trailing_slash(pre)
  contents = []
  s3.interface.incrementally_list_bucket(bkt, {'prefix' => pre, 'delimiter' => '/'}) do |res|
    contents += res[:common_prefixes].map{|c| File.join(bkt,c)}
    contents += res[:contents].map{|c| File.join(bkt, c[:key])}
  end
  contents
end

#key_path(path) ⇒ Object



51
52
53
54
# File 'lib/swineherd/filesystem/s3filesystem.rb', line 51

def key_path path
  uri = URI.parse(path)
  File.join(uri.path.split('/').reject{|x| x.empty?}[1..-1])
end

#lr(path) ⇒ Object

Recursively list paths



161
162
163
164
165
166
167
168
# File 'lib/swineherd/filesystem/s3filesystem.rb', line 161

def lr path
  paths = entries(path)
  if paths
    paths.map{|e| lr(e)}.flatten
  else
    path
  end
end

#mkpath(path) ⇒ Object

This is a bit funny, there’s actually no need to create a ‘path’ since s3 is nothing more than a glorified key-value store. When you create a ‘file’ (key) the ‘path’ will be created for you. All we do here is create the bucket unless it already exists.



138
139
140
141
142
143
144
145
146
147
# File 'lib/swineherd/filesystem/s3filesystem.rb', line 138

def mkpath path
  bkt = bucket(path)
  key = key_path(path)
  if key.empty?
    @s3.interface.create_bucket(bkt)
  else
    @s3.interface.create_bucket(bkt) unless exists? bkt
  end
  path
end

#mv(srcpath, dstpath) ⇒ Object



94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
# File 'lib/swineherd/filesystem/s3filesystem.rb', line 94

def mv srcpath, dstpath
  src_bucket   = bucket(srcpath)
  dst_bucket   = bucket(dstpath)
  dst_key_path = key_path(dstpath)
  mkpath(dstpath)
  case type(srcpath)
  when "directory" then
    paths_to_copy = lr(srcpath)
    common_dir    = common_directory(paths_to_copy)
    paths_to_copy.each do |path|
      src_key = key_path(path)
      dst_key = File.join(dst_key_path, path.gsub(common_dir, ''))
      @s3.interface.move(src_bucket, src_key, dst_bucket, dst_key)
    end
  when "file" then
    @s3.interface.move(src_bucket, key_path(srcpath), dst_bucket, dst_key_path)
  end
end

#needs_trailing_slash(pre) ⇒ Object



56
57
58
59
60
# File 'lib/swineherd/filesystem/s3filesystem.rb', line 56

def needs_trailing_slash pre
  has_trailing_slash = pre.end_with? '/'
  is_empty_prefix    = pre.empty?
  !(has_trailing_slash || is_empty_prefix)
end

#open(path, mode = "r", &blk) ⇒ Object



21
22
23
# File 'lib/swineherd/filesystem/s3filesystem.rb', line 21

def open path, mode="r", &blk
  S3File.new(path,mode,self,&blk)
end

#rm(path) ⇒ Object



25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
# File 'lib/swineherd/filesystem/s3filesystem.rb', line 25

def rm path
  bkt = bucket(path)
  key = key_path(path)
  if key.empty? # only the bucket was passed in, delete it
    @s3.interface.force_delete_bucket(bkt)
  else
    case type(path)
    when "directory" then
      keys_to_delete = lr(path)
      keys_to_delete.each do |k|
        key_to_delete = key_path(k)
        @s3.interface.delete(bkt, key_to_delete)
      end
      keys_to_delete
    when "file" then
      @s3.interface.delete(bkt, key)
      [path]
    end
  end
end

#type(path) ⇒ Object



149
150
151
152
153
# File 'lib/swineherd/filesystem/s3filesystem.rb', line 149

def type path
  return "unknown" unless exists? path
  return "directory" if full_contents(path).size > 0
  "file"
end