Module: Spark

Includes:
Helper::System
Defined in:
lib/spark/sampler.rb,
lib/spark.rb,
lib/spark/cli.rb,
lib/spark/rdd.rb,
lib/spark/sort.rb,
lib/spark/sort.rb,
lib/spark/build.rb,
lib/spark/error.rb,
lib/spark/mllib.rb,
lib/spark/config.rb,
lib/spark/ext/io.rb,
lib/spark/helper.rb,
lib/spark/logger.rb,
lib/spark/command.rb,
lib/spark/context.rb,
lib/spark/sampler.rb,
lib/spark/version.rb,
lib/spark/constant.rb,
lib/spark/ext/hash.rb,
lib/spark/broadcast.rb,
lib/spark/ext/module.rb,
lib/spark/ext/object.rb,
lib/spark/ext/string.rb,
lib/spark/serializer.rb,
lib/spark/accumulator.rb,
lib/spark/accumulator.rb,
lib/spark/ext/integer.rb,
lib/spark/java_bridge.rb,
lib/spark/mllib/matrix.rb,
lib/spark/mllib/matrix.rb,
lib/spark/mllib/matrix.rb,
lib/spark/mllib/matrix.rb,
lib/spark/mllib/vector.rb,
lib/spark/mllib/vector.rb,
lib/spark/mllib/vector.rb,
lib/spark/mllib/vector.rb,
lib/spark/stat_counter.rb,
lib/spark/ext/ip_socket.rb,
lib/spark/helper/logger.rb,
lib/spark/helper/parser.rb,
lib/spark/helper/system.rb,
lib/spark/serializer/oj.rb,
lib/spark/storage_level.rb,
lib/spark/command_builder.rb,
lib/spark/java_bridge/rjb.rb,
lib/spark/serializer/base.rb,
lib/spark/serializer/pair.rb,
lib/spark/serializer/text.rb,
lib/spark/helper/serialize.rb,
lib/spark/helper/statistic.rb,
lib/spark/java_bridge/base.rb,
lib/spark/command_validator.rb,
lib/spark/java_bridge/jruby.rb,
lib/spark/serializer/batched.rb,
lib/spark/serializer/marshal.rb,
lib/spark/serializer/cartesian.rb,
lib/spark/serializer/compressed.rb,
lib/spark/mllib/regression/lasso.rb,
lib/spark/mllib/regression/ridge.rb,
lib/spark/mllib/clustering/kmeans.rb,
lib/spark/mllib/clustering/kmeans.rb,
lib/spark/mllib/regression/common.rb,
lib/spark/mllib/regression/common.rb,
lib/spark/mllib/regression/linear.rb,
lib/spark/serializer/auto_batched.rb,
lib/spark/serializer/message_pack.rb,
lib/spark/mllib/classification/svm.rb,
lib/spark/mllib/classification/svm.rb,
lib/spark/mllib/classification/common.rb,
lib/spark/mllib/classification/common.rb,
lib/spark/mllib/regression/labeled_point.rb,
lib/spark/mllib/classification/naive_bayes.rb,
lib/spark/mllib/classification/naive_bayes.rb,
lib/spark/mllib/ruby_matrix/matrix_adapter.rb,
lib/spark/mllib/ruby_matrix/vector_adapter.rb,
lib/spark/mllib/clustering/gaussian_mixture.rb,
lib/spark/mllib/clustering/gaussian_mixture.rb,
lib/spark/mllib/classification/logistic_regression.rb,
lib/spark/mllib/classification/logistic_regression.rb,
lib/spark/mllib/classification/logistic_regression.rb,
ext/ruby_c/ruby-spark.c

Overview

Spark::JavaBridge::Base

Parent for all adapter (ruby - java)

Defined Under Namespace

Modules: Build, CommandValidator, Constant, CoreExtension, Digest, Helper, InternalSorter, JavaBridge, Mllib, RandomGenerator, Sampler, Serializer Classes: Accumulator, AccumulatorError, Broadcast, BroadcastError, BuildError, CLI, Command, CommandBuilder, CommandError, Config, ConfigurationError, Context, ContextError, ExternalSorter, JavaBridgeError, Logger, MllibError, NotImplemented, ParseError, PipelinedRDD, RDD, RDDError, SerializeError, StatCounter, StorageLevel

Constant Summary collapse

DEFAULT_CONFIG_FILE =
File.join(Dir.home, '.ruby-spark.conf')
VERSION =
'1.2.1'

Class Method Summary collapse

Methods included from Helper::System

included

Class Method Details

.clear_configObject

Destroy current configuration. This can be useful for restarting config to set new. It has no effect if context is already started.



75
76
77
# File 'lib/spark.rb', line 75

def self.clear_config
  @config = nil
end

.config(&block) ⇒ Object

Returns current configuration. Configurations can be changed until context is initialized. In this case config is locked only for reading.

Configuration can be changed:

Spark.config.set('spark.app.name', 'RubySpark')

Spark.config['spark.app.name'] = 'RubySpark'

Spark.config do
  set 'spark.app.name', 'RubySpark'
end


63
64
65
66
67
68
69
70
71
# File 'lib/spark.rb', line 63

def self.config(&block)
  @config ||= Spark::Config.new

  if block_given?
    @config.instance_eval(&block)
  else
    @config
  end
end

.contextObject Also known as: sc

Return a current active context or nil.

TODO: Run ‘start` if context is nil?



83
84
85
# File 'lib/spark.rb', line 83

def self.context
  @context
end

.java_bridgeObject Also known as: jb



218
219
220
# File 'lib/spark.rb', line 218

def self.java_bridge
  @java_bridge
end

.load_defaultsObject

Load default configuration for Spark and RubySpark By default are values stored at ~/.ruby-spark.conf File is automatically created



120
121
122
123
124
125
126
# File 'lib/spark.rb', line 120

def self.load_defaults
  unless File.exists?(DEFAULT_CONFIG_FILE)
    save_defaults_to(DEFAULT_CONFIG_FILE)
  end

  load_defaults_from(DEFAULT_CONFIG_FILE)
end

.load_defaults_from(file_path) ⇒ Object

Clear prev setting and load new from file



129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
# File 'lib/spark.rb', line 129

def self.load_defaults_from(file_path)
  # Parse values
  values = File.readlines(file_path)
  values.map!(&:strip)
  values.select!{|value| value.start_with?('gem.')}
  values.map!{|value| value.split(nil, 2)}
  values = Hash[values]

  # Clear prev values
  @target_dir = nil
  @ruby_spark_jar = nil
  @spark_home = nil

  # Load new
  @target_dir = values['gem.target']
end

.load_lib(target = nil) ⇒ Object

Load dependent libraries, can be use once Cannot load before CLI::install

Parameters:

target

path to directory where are located sparks .jar files or single Spark jar



208
209
210
211
212
213
214
215
216
# File 'lib/spark.rb', line 208

def self.load_lib(target=nil)
  return if @java_bridge

  target ||= Spark.target_dir

  @java_bridge = JavaBridge.init(target)
  @java_bridge.import_all
  nil
end

.loggerObject

Global settings and variables



170
171
172
# File 'lib/spark.rb', line 170

def self.logger
  @logger ||= Spark::Logger.new
end


35
36
37
38
39
40
41
42
43
44
45
46
47
48
# File 'lib/spark.rb', line 35

def self.(message=nil)
  puts <<-STRING

  Welcome to
                __           ____              __
      ______ __/ /  __ __   / __/__  ___ _____/ /__
     / __/ // / _ \\/ // /  _\\ \\/ _ \\/ _ `/ __/  '_/
    /_/  \\_,_/_.__/\\_, /  /___/ .__/\\_,_/_/ /_/\\_\\   version #{Spark::VERSION}
                  /___/      /_/

  #{message}

  STRING
end

.rootObject Also known as: home

Root of the gem



175
176
177
# File 'lib/spark.rb', line 175

def self.root
  @root ||= File.expand_path('..', File.dirname(__FILE__))
end

.ruby_spark_jarObject



189
190
191
# File 'lib/spark.rb', line 189

def self.ruby_spark_jar
  @ruby_spark_jar ||= File.join(target_dir, 'ruby-spark.jar')
end

.save_defaults_to(file_path) ⇒ Object

Create target dir and new config file



147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
# File 'lib/spark.rb', line 147

def self.save_defaults_to(file_path)
  dir = File.join(Dir.home, ".ruby-spark.#{SecureRandom.uuid}")

  if Dir.exist?(dir)
    save_defaults_to(file_path)
  else
    Dir.mkdir(dir, 0700)
    file = File.open(file_path, 'w')
    file.puts "# Directory where will be Spark saved"
    file.puts "gem.target   #{dir}"
    file.puts ""
    file.puts "# You can also defined spark properties"
    file.puts "# spark.master                       spark://master:7077"
    file.puts "# spark.ruby.serializer              marshal"
    file.puts "# spark.ruby.serializer.batch_size   2048"
    file.close
  end
end

.spark_ext_dirObject



193
194
195
# File 'lib/spark.rb', line 193

def self.spark_ext_dir
  @spark_ext_dir ||= File.join(root, 'ext', 'spark')
end

.startObject

Initialize spark context if not already. Config will be automatically loaded on constructor. From that point ‘config` will use configuration from running Spark and will be locked only for reading.



90
91
92
93
94
95
96
# File 'lib/spark.rb', line 90

def self.start
  if started?
    # Already started
  else
    @context ||= Spark::Context.new
  end
end

.started?Boolean

Returns:

  • (Boolean)


109
110
111
# File 'lib/spark.rb', line 109

def self.started?
  !!@context
end

.stopObject



98
99
100
101
102
103
104
105
106
107
# File 'lib/spark.rb', line 98

def self.stop
  @context.stop
  RubyWorker.stopServer
  logger.info('Workers were stopped')
rescue
  nil
ensure
  @context = nil
  clear_config
end

.target_dirObject

Default directory for java extensions



180
181
182
# File 'lib/spark.rb', line 180

def self.target_dir
  @target_dir ||= File.join(root, 'target')
end

.worker_dirObject

Directory where is worker.rb



185
186
187
# File 'lib/spark.rb', line 185

def self.worker_dir
  @worker_dir ||= File.join(root, 'lib', 'spark', 'worker')
end