Class: UniqueIndexedCountReduce

Inherits:
ReduceBase show all
Defined in:
lib/mrtoolkit.rb

Overview

Reducer works on groups where the first field is the same. Count the number of distinct occurances of the second field.

Instance Attribute Summary

Attributes inherited from Stage

#errors, #in_fields, #in_sep, #out_fields, #out_sep

Instance Method Summary collapse

Methods inherited from ReduceBase

#process, #process_begin, #process_end, #process_end_internal, #process_internal, #run

Methods inherited from Stage

#catch_errors, #copy_struct, #emit, #emit_separator, #field, #field_separator, #initialize, #new_input, #new_output, #prepare, #process_step, #write_out

Constructor Details

This class inherits a constructor from Stage

Instance Method Details

#declareObject



495
496
497
498
499
500
501
502
# File 'lib/mrtoolkit.rb', line 495

def declare
  field :unique
  field :index

  emit :unique
  emit :index
  emit :value
end

#process_each(input, output) ⇒ Object



507
508
509
510
511
512
# File 'lib/mrtoolkit.rb', line 507

def process_each(input, output)
  index = input.index
  @sum[index] = 0 unless @sum.has_key?(index) 
  @sum[index] += 1
  nil
end

#process_init(input, output) ⇒ Object



503
504
505
506
# File 'lib/mrtoolkit.rb', line 503

def process_init(input, output)
  @sum = {}
  nil
end

#process_term(dummy, output) ⇒ Object



513
514
515
516
517
518
519
520
521
522
523
# File 'lib/mrtoolkit.rb', line 513

def process_term(dummy, output)
  output = []
  @sum.each do |index, value|
    item = new_output
    item.unique = @last
    item.index = index
    item.value = value
    output << item
  end
  output
end