Module: Rubadoop::MapReduce::Mappable

Extended by:
ActiveSupport::Concern
Included in:
Mapper
Defined in:
lib/rubadoop/map_reduce/mappable.rb

Instance Method Summary collapse

Instance Method Details

#mapperObject

Process Hadoop input as a Mapper. Yields values line-by-line to supplied block. Supports returning array of full lines if no block supplied for testing only.



12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
# File 'lib/rubadoop/map_reduce/mappable.rb', line 12

def mapper

  lines = []
  MapReduce.io_in.each_line do |line|
    line.chomp!
    if @input_ignore_key
      key, line = key_value_split(line)
      line = key if line.nil? && !key.nil?
    end
    if block_given?
      yield line
    else
      lines << line
    end
  end
  lines unless block_given?
end

#mapper_batched(batch_size = 50) ⇒ Object



30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
# File 'lib/rubadoop/map_reduce/mappable.rb', line 30

def mapper_batched(batch_size = 50)
  batch = []
  batches = []

  mapper do |line|
    batch << line
    if batch.size >= batch_size
      if block_given?
        yield batch
      else
        batches << batch
      end
      batch = []
    end
  end

  if batch.size > 0
    if block_given?
      yield batch
    else
      batches << batch
    end
  end

  batches unless block_given?
end