Class: Spark::PipelinedRDD

Inherits:
RDD
  • Object
show all
Defined in:
lib/spark/rdd.rb

Overview

Pipelined Resilient Distributed Dataset, operations are pipelined and sended to worker

RDD
`-- map
    `-- map
        `-- map

Code is executed from top to bottom

Instance Attribute Summary collapse

Attributes inherited from RDD

#context

Instance Method Summary collapse

Methods inherited from RDD

#+, #add_command, #add_library, #aggregate, #aggregate_by_key, #bind, #cache, #cached?, #cartesian, #checkpointed?, #coalesce, #cogroup, #collect, #collect_as_hash, #collect_from_file, #combine_by_key, #compact, #config, #count, #default_reduce_partitions, #distinct, #filter, #first, #flat_map, #flat_map_values, #fold, #fold_by_key, #foreach, #foreach_partition, #glom, #group_by, #group_by_key, #group_with, #histogram, #id, #inspect, #intersection, #key_by, #keys, #lookup, #map, #map_partitions, #map_partitions_with_index, #map_values, #max, #mean, #min, #name, #name=, #new_rdd_from_command, #partition_by, #partitions_size, #persist, #pipe, #reduce, #reduce_by_key, #reserialize, #sample, #sample_stdev, #sample_variance, #set_name, #shuffle, #sort_by, #sort_by_key, #sort_by_value, #stats, #stdev, #subtract, #subtract_by_key, #sum, #take, #take_sample, #to_java, #union, #unpersist, #values, #variance

Methods included from Helper::Statistic

#bisect_right, #compute_fraction, #determine_bounds, #upper_binomial_bound, #upper_poisson_bound

Methods included from Helper::Parser

included

Methods included from Helper::Logger

included

Constructor Details

#initialize(prev, command) ⇒ PipelinedRDD

Returns a new instance of PipelinedRDD.



1369
1370
1371
1372
1373
1374
1375
1376
1377
1378
1379
1380
1381
1382
1383
1384
# File 'lib/spark/rdd.rb', line 1369

def initialize(prev, command)

  if prev.is_a?(PipelinedRDD) && prev.pipelinable?
    # Second, ... stages
    @prev_jrdd = prev.prev_jrdd
  else
    # First stage
    @prev_jrdd = prev.jrdd
  end

  @cached = false
  @checkpointed = false

  @context = prev.context
  @command = command
end

Instance Attribute Details

#commandObject (readonly)

Returns the value of attribute command.



1367
1368
1369
# File 'lib/spark/rdd.rb', line 1367

def command
  @command
end

#prev_jrddObject (readonly)

Returns the value of attribute prev_jrdd.



1367
1368
1369
# File 'lib/spark/rdd.rb', line 1367

def prev_jrdd
  @prev_jrdd
end

Instance Method Details

#jrddObject

Serialization necessary things and sent it to RubyRDD (scala extension)



1391
1392
1393
# File 'lib/spark/rdd.rb', line 1391

def jrdd
  @jrdd ||= _jrdd
end

#pipelinable?Boolean

Returns:

  • (Boolean)


1386
1387
1388
# File 'lib/spark/rdd.rb', line 1386

def pipelinable?
  !(cached? || checkpointed?)
end