Module: Hdfs

Includes:
Java
Defined in:
lib/hdfs_jruby.rb,
lib/hdfs_jruby/file.rb,
lib/hdfs_jruby/version.rb

Defined Under Namespace

Classes: Configuration, File, FileSystem, FsPermission, Path

Constant Summary collapse

JAR_PATTERN_0_20 =
"hadoop-core-*.jar"
HADOOP_HOME =
VERSION =
"0.0.11"

Class Method Summary collapse

Class Method Details

._conv(stat) ⇒ Object



251
252
253
254
255
256
257
258
259
260
261
# File 'lib/hdfs_jruby.rb', line 251

def _conv(stat)
  file_info = {}
  file_info['path'] = stat.getPath.to_s
  file_info['length'] = stat.getLen.to_i
  file_info['modificationTime'] = stat.getModificationTime.to_i
  file_info['owner'] = stat.getOwner.to_s
  file_info['group'] = stat.getGroup.to_s
  file_info['permission'] = stat.getPermission.toShort.to_i
  file_info['type'] = !stat.isDir ? 'FILE': 'DIRECTORY'
  return file_info
end

._path(path) ⇒ Object



243
244
245
246
247
248
# File 'lib/hdfs_jruby.rb', line 243

def _path(path)
  if path.nil?
    raise "path is nil"
  end
  Path.new(path)
end

.connectAsUser(user) ⇒ Object



56
57
58
59
60
# File 'lib/hdfs_jruby.rb', line 56

def connectAsUser(user)
  uri =  Hdfs::FileSystem.getDefaultUri(@conf)
  @fs.close if ! @fs.nil?
  @fs = Hdfs::FileSystem.get(uri, @conf, user)
end

.delete(path, r = false) ⇒ Object

delete

Parameters:

  • path (String)
  • r (Boolean) (defaults to: false)

    recursive false or true (default: false)



148
149
150
# File 'lib/hdfs_jruby.rb', line 148

def delete(path, r=false)
  @fs.delete(_path(path), r)
end

.directory?(path) ⇒ Boolean

Returns true: directory, false: file.

Returns:

  • (Boolean)

    true: directory, false: file



158
159
160
# File 'lib/hdfs_jruby.rb', line 158

def directory?(path)
  @fs.isDirectory(_path(path))
end

.exists?(path) ⇒ Boolean

Parameters:

  • path (String)

Returns:

  • (Boolean)


134
135
136
# File 'lib/hdfs_jruby.rb', line 134

def exists?(path)
  @fs.exists(_path(path))
end

.file?(path) ⇒ Boolean

Returns true: file, false: directory.

Returns:

  • (Boolean)

    true: file, false: directory



153
154
155
# File 'lib/hdfs_jruby.rb', line 153

def file?(path)
  @fs.isFile(_path(path))
end

.get(remote, local) ⇒ Object

get file or directory from hdfs

Parameters:

  • remote (String)

    surouce (hdfs path)

  • local (String)

    destination (local path)



183
184
185
# File 'lib/hdfs_jruby.rb', line 183

def get(remote, local)
  @fs.copyToLocalFile(Path.new(remote), Path.new(local))
end

.get_fsObject



217
218
219
# File 'lib/hdfs_jruby.rb', line 217

def get_fs
  @fs
end

.get_home_directoryObject

get home directory



188
189
190
# File 'lib/hdfs_jruby.rb', line 188

def get_home_directory()
  @fs.getHomeDirectory()
end

.get_working_directoryObject

get working directory



193
194
195
# File 'lib/hdfs_jruby.rb', line 193

def get_working_directory()
  @fs.getWorkingDirectory()
end

.list(path, opts = {}) ⇒ Object



108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
# File 'lib/hdfs_jruby.rb', line 108

def list(path, opts={})
  use_glob = opts[:glob] ? true : false
  p = _path(path)

  list = nil
  if use_glob
    list = @fs.globStatus(p)
  else
    list = @fs.listStatus(p)
  end
  return [] if list.nil?
    
  if ! block_given?
    ret_list = []
    list.each do | stat |
      ret_list << _conv(stat)
    end
    return ret_list
  else
    list.each do | stat |
      yield _conv(stat)
    end
  end
end

.ls(path) ⇒ Array

Note:

file status: path length modificationTime owner group permission type

ls

Examples:

Hdfs.ls("hoge/").each do | stat |
  p stat
end

Parameters:

  • path (String)

Returns:

  • (Array)

    file status array



78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
# File 'lib/hdfs_jruby.rb', line 78

def ls(path)
  p = _path(path)
  list = @fs.globStatus(p)
  return [] if list.nil?

  ret_list = []
  list.each do |stat|
    if stat.isDir
      sub_list = @fs.listStatus(stat.getPath)
      next if sub_list.nil?
      
      sub_list.each do | s |
        if block_given?
          yield _conv(s)
        else
          ret_list << _conv(s)
        end
      end
    else
      if block_given?
        yield _conv(stat)
      else
        ret_list << _conv(stat)
      end
    end
  end
  ret_list if ! block_given?
end

.mkdir(path) ⇒ Object

make directory

Parameters:

  • path (String)


169
170
171
# File 'lib/hdfs_jruby.rb', line 169

def mkdir(path)
  @fs.mkdirs(_path(path))
end

.move(src, dst) ⇒ Object

Parameters:

  • src (String)

    hdfs source path

  • dst (String)

    hdfs destination path



140
141
142
# File 'lib/hdfs_jruby.rb', line 140

def move(src, dst)
  @fs.rename(Path.new(src), Path.new(dst))
end

.put(local, remote) ⇒ Object

put file or directory to hdfs

Parameters:

  • local (String)

    surouce (local path)

  • remote (String)

    destination (hdfs path)



176
177
178
# File 'lib/hdfs_jruby.rb', line 176

def put(local, remote)
  @fs.copyFromLocalFile(Path.new(local), Path.new(remote))
end

.set_owner(path, owner, group) ⇒ Object

set owner & group

Parameters:

  • path (String)
  • owner (String)
  • group (String)


213
214
215
# File 'lib/hdfs_jruby.rb', line 213

def set_owner(path, owner, group)
  @fs.setOwner(_path(path), owner, group)
end

.set_permission(path, perm) ⇒ Object

set permission

Parameters:

  • path (String)
  • perm (Integer)

    permission



205
206
207
# File 'lib/hdfs_jruby.rb', line 205

def set_permission(path, perm)
  @fs.setPermission(_path(path), org.apache.hadoop.fs.permission.FsPermission.new(perm))
end

.set_working_directory(path) ⇒ Object

set working directory



198
199
200
# File 'lib/hdfs_jruby.rb', line 198

def set_working_directory(path)
  @fs.setWorkingDirectory(_path())
end

.size(path) ⇒ Integer

Returns file size.

Returns:

  • (Integer)

    file size



163
164
165
# File 'lib/hdfs_jruby.rb', line 163

def size(path)
  @fs.getFileStatus(_path(path)).getLen()
end