Module: Spark
- Includes:
- Helper::System
- Defined in:
- lib/spark/sampler.rb,
lib/spark.rb,
lib/spark/cli.rb,
lib/spark/rdd.rb,
lib/spark/sort.rb,
lib/spark/sort.rb,
lib/spark/build.rb,
lib/spark/error.rb,
lib/spark/mllib.rb,
lib/spark/config.rb,
lib/spark/ext/io.rb,
lib/spark/helper.rb,
lib/spark/logger.rb,
lib/spark/command.rb,
lib/spark/context.rb,
lib/spark/sampler.rb,
lib/spark/version.rb,
lib/spark/constant.rb,
lib/spark/ext/hash.rb,
lib/spark/broadcast.rb,
lib/spark/ext/module.rb,
lib/spark/ext/object.rb,
lib/spark/ext/string.rb,
lib/spark/serializer.rb,
lib/spark/accumulator.rb,
lib/spark/accumulator.rb,
lib/spark/ext/integer.rb,
lib/spark/java_bridge.rb,
lib/spark/mllib/matrix.rb,
lib/spark/mllib/matrix.rb,
lib/spark/mllib/matrix.rb,
lib/spark/mllib/matrix.rb,
lib/spark/mllib/vector.rb,
lib/spark/mllib/vector.rb,
lib/spark/mllib/vector.rb,
lib/spark/mllib/vector.rb,
lib/spark/stat_counter.rb,
lib/spark/ext/ip_socket.rb,
lib/spark/helper/logger.rb,
lib/spark/helper/parser.rb,
lib/spark/helper/system.rb,
lib/spark/serializer/oj.rb,
lib/spark/storage_level.rb,
lib/spark/command_builder.rb,
lib/spark/java_bridge/rjb.rb,
lib/spark/serializer/base.rb,
lib/spark/serializer/pair.rb,
lib/spark/serializer/text.rb,
lib/spark/helper/serialize.rb,
lib/spark/helper/statistic.rb,
lib/spark/java_bridge/base.rb,
lib/spark/command_validator.rb,
lib/spark/java_bridge/jruby.rb,
lib/spark/serializer/batched.rb,
lib/spark/serializer/marshal.rb,
lib/spark/serializer/cartesian.rb,
lib/spark/serializer/compressed.rb,
lib/spark/mllib/regression/lasso.rb,
lib/spark/mllib/regression/ridge.rb,
lib/spark/mllib/clustering/kmeans.rb,
lib/spark/mllib/clustering/kmeans.rb,
lib/spark/mllib/regression/common.rb,
lib/spark/mllib/regression/common.rb,
lib/spark/mllib/regression/linear.rb,
lib/spark/serializer/auto_batched.rb,
lib/spark/serializer/message_pack.rb,
lib/spark/mllib/classification/svm.rb,
lib/spark/mllib/classification/svm.rb,
lib/spark/mllib/classification/common.rb,
lib/spark/mllib/classification/common.rb,
lib/spark/mllib/regression/labeled_point.rb,
lib/spark/mllib/classification/naive_bayes.rb,
lib/spark/mllib/classification/naive_bayes.rb,
lib/spark/mllib/ruby_matrix/matrix_adapter.rb,
lib/spark/mllib/ruby_matrix/vector_adapter.rb,
lib/spark/mllib/clustering/gaussian_mixture.rb,
lib/spark/mllib/clustering/gaussian_mixture.rb,
lib/spark/mllib/classification/logistic_regression.rb,
lib/spark/mllib/classification/logistic_regression.rb,
lib/spark/mllib/classification/logistic_regression.rb,
ext/ruby_c/ruby-spark.c
Overview
Spark::JavaBridge::Base
Parent for all adapter (ruby - java)
Defined Under Namespace
Modules: Build, CommandValidator, Constant, CoreExtension, Digest, Helper, InternalSorter, JavaBridge, Mllib, RandomGenerator, Sampler, Serializer Classes: Accumulator, AccumulatorError, Broadcast, BroadcastError, BuildError, CLI, Command, CommandBuilder, CommandError, Config, ConfigurationError, Context, ContextError, ExternalSorter, JavaBridgeError, Logger, MllibError, NotImplemented, ParseError, PipelinedRDD, RDD, RDDError, SerializeError, StatCounter, StorageLevel
Constant Summary collapse
- DEFAULT_CONFIG_FILE =
File.join(Dir.home, '.ruby-spark.conf')
- VERSION =
'1.2.1'
Class Method Summary collapse
-
.clear_config ⇒ Object
Destroy current configuration.
-
.config(&block) ⇒ Object
Returns current configuration.
-
.context ⇒ Object
(also: sc)
Return a current active context or nil.
- .java_bridge ⇒ Object (also: jb)
-
.load_defaults ⇒ Object
Load default configuration for Spark and RubySpark By default are values stored at ~/.ruby-spark.conf File is automatically created.
-
.load_defaults_from(file_path) ⇒ Object
Clear prev setting and load new from file.
-
.load_lib(target = nil) ⇒ Object
Load dependent libraries, can be use once Cannot load before CLI::install.
-
.logger ⇒ Object
Global settings and variables.
- .print_logo(message = nil) ⇒ Object
-
.root ⇒ Object
(also: home)
Root of the gem.
- .ruby_spark_jar ⇒ Object
-
.save_defaults_to(file_path) ⇒ Object
Create target dir and new config file.
- .spark_ext_dir ⇒ Object
-
.start ⇒ Object
Initialize spark context if not already.
- .started? ⇒ Boolean
- .stop ⇒ Object
-
.target_dir ⇒ Object
Default directory for java extensions.
-
.worker_dir ⇒ Object
Directory where is worker.rb.
Methods included from Helper::System
Class Method Details
.clear_config ⇒ Object
Destroy current configuration. This can be useful for restarting config to set new. It has no effect if context is already started.
75 76 77 |
# File 'lib/spark.rb', line 75 def self.clear_config @config = nil end |
.config(&block) ⇒ Object
Returns current configuration. Configurations can be changed until context is initialized. In this case config is locked only for reading.
Configuration can be changed:
Spark.config.set('spark.app.name', 'RubySpark')
Spark.config['spark.app.name'] = 'RubySpark'
Spark.config do
set 'spark.app.name', 'RubySpark'
end
63 64 65 66 67 68 69 70 71 |
# File 'lib/spark.rb', line 63 def self.config(&block) @config ||= Spark::Config.new if block_given? @config.instance_eval(&block) else @config end end |
.context ⇒ Object Also known as: sc
Return a current active context or nil.
TODO: Run ‘start` if context is nil?
83 84 85 |
# File 'lib/spark.rb', line 83 def self.context @context end |
.java_bridge ⇒ Object Also known as: jb
218 219 220 |
# File 'lib/spark.rb', line 218 def self.java_bridge @java_bridge end |
.load_defaults ⇒ Object
Load default configuration for Spark and RubySpark By default are values stored at ~/.ruby-spark.conf File is automatically created
120 121 122 123 124 125 126 |
# File 'lib/spark.rb', line 120 def self.load_defaults unless File.exists?(DEFAULT_CONFIG_FILE) save_defaults_to(DEFAULT_CONFIG_FILE) end load_defaults_from(DEFAULT_CONFIG_FILE) end |
.load_defaults_from(file_path) ⇒ Object
Clear prev setting and load new from file
129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 |
# File 'lib/spark.rb', line 129 def self.load_defaults_from(file_path) # Parse values values = File.readlines(file_path) values.map!(&:strip) values.select!{|value| value.start_with?('gem.')} values.map!{|value| value.split(nil, 2)} values = Hash[values] # Clear prev values @target_dir = nil @ruby_spark_jar = nil @spark_home = nil # Load new @target_dir = values['gem.target'] end |
.load_lib(target = nil) ⇒ Object
Load dependent libraries, can be use once Cannot load before CLI::install
Parameters:
- target
-
path to directory where are located sparks .jar files or single Spark jar
208 209 210 211 212 213 214 215 216 |
# File 'lib/spark.rb', line 208 def self.load_lib(target=nil) return if @java_bridge target ||= Spark.target_dir @java_bridge = JavaBridge.init(target) @java_bridge.import_all nil end |
.logger ⇒ Object
Global settings and variables
170 171 172 |
# File 'lib/spark.rb', line 170 def self.logger @logger ||= Spark::Logger.new end |
.print_logo(message = nil) ⇒ Object
35 36 37 38 39 40 41 42 43 44 45 46 47 48 |
# File 'lib/spark.rb', line 35 def self.print_logo(=nil) puts <<-STRING Welcome to __ ____ __ ______ __/ / __ __ / __/__ ___ _____/ /__ / __/ // / _ \\/ // / _\\ \\/ _ \\/ _ `/ __/ '_/ /_/ \\_,_/_.__/\\_, / /___/ .__/\\_,_/_/ /_/\\_\\ version #{Spark::VERSION} /___/ /_/ #{} STRING end |
.root ⇒ Object Also known as: home
Root of the gem
175 176 177 |
# File 'lib/spark.rb', line 175 def self.root @root ||= File.('..', File.dirname(__FILE__)) end |
.ruby_spark_jar ⇒ Object
189 190 191 |
# File 'lib/spark.rb', line 189 def self.ruby_spark_jar @ruby_spark_jar ||= File.join(target_dir, 'ruby-spark.jar') end |
.save_defaults_to(file_path) ⇒ Object
Create target dir and new config file
147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 |
# File 'lib/spark.rb', line 147 def self.save_defaults_to(file_path) dir = File.join(Dir.home, ".ruby-spark.#{SecureRandom.uuid}") if Dir.exist?(dir) save_defaults_to(file_path) else Dir.mkdir(dir, 0700) file = File.open(file_path, 'w') file.puts "# Directory where will be Spark saved" file.puts "gem.target #{dir}" file.puts "" file.puts "# You can also defined spark properties" file.puts "# spark.master spark://master:7077" file.puts "# spark.ruby.serializer marshal" file.puts "# spark.ruby.serializer.batch_size 2048" file.close end end |
.spark_ext_dir ⇒ Object
193 194 195 |
# File 'lib/spark.rb', line 193 def self.spark_ext_dir @spark_ext_dir ||= File.join(root, 'ext', 'spark') end |
.start ⇒ Object
Initialize spark context if not already. Config will be automatically loaded on constructor. From that point ‘config` will use configuration from running Spark and will be locked only for reading.
90 91 92 93 94 95 96 |
# File 'lib/spark.rb', line 90 def self.start if started? # Already started else @context ||= Spark::Context.new end end |
.started? ⇒ Boolean
109 110 111 |
# File 'lib/spark.rb', line 109 def self.started? !!@context end |
.stop ⇒ Object
98 99 100 101 102 103 104 105 106 107 |
# File 'lib/spark.rb', line 98 def self.stop @context.stop RubyWorker.stopServer logger.info('Workers were stopped') rescue nil ensure @context = nil clear_config end |
.target_dir ⇒ Object
Default directory for java extensions
180 181 182 |
# File 'lib/spark.rb', line 180 def self.target_dir @target_dir ||= File.join(root, 'target') end |
.worker_dir ⇒ Object
Directory where is worker.rb
185 186 187 |
# File 'lib/spark.rb', line 185 def self.worker_dir @worker_dir ||= File.join(root, 'lib', 'spark', 'worker') end |