Class: Embulk::Formatter::JsonlFormatterPlugin

Inherits:
FormatterPlugin
  • Object
show all
Defined in:
lib/embulk/formatter/jsonl.rb

Constant Summary collapse

VALID_ENCODINGS =
%w(UTF-8 UTF-16LE UTF-32BE UTF-32LE UTF-32BE)
NEWLINES =
{
  'CRLF' => "\r\n",
  'LF' => "\n",
  'CR' => "\r",
  # following are not jsonl, but useful in some case
  'NUL' => "\0",
  'NO' => '',
}

Class Method Summary collapse

Instance Method Summary collapse

Class Method Details

.join_texts(inits,last, opt = {}) ⇒ Object



19
20
21
22
23
# File 'lib/embulk/formatter/jsonl.rb', line 19

def self.join_texts((*inits,last), opt = {})
  delim = opt[:delimiter] || ', '
  last_delim = opt[:last_delimiter] || ' or '
  [inits.join(delim),last].join(last_delim)
end

.transaction(config, schema) {|task| ... } ⇒ Object

Yields:

  • (task)


25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
# File 'lib/embulk/formatter/jsonl.rb', line 25

def self.transaction(config, schema, &control)
  # configuration code:
  task = {
    'encoding' => config.param('encoding', :string, default: 'UTF-8'),
    'newline' => config.param('newline', :string, default: 'LF'),
    'date_format' => config.param('date_format', :string, default: nil),
    'timezone' => config.param('timezone', :string, default: nil )
  }

  encoding = task['encoding'].upcase
  raise "encoding must be one of #{join_texts(VALID_ENCODINGS)}" unless VALID_ENCODINGS.include?(encoding)

  newline = task['newline'].upcase
  raise "newline must be one of #{join_texts(NEWLINES.keys)}" unless NEWLINES.has_key?(newline)

  yield(task)
end

Instance Method Details

#add(page) ⇒ Object



61
62
63
64
65
66
67
68
69
70
71
72
73
74
# File 'lib/embulk/formatter/jsonl.rb', line 61

def add(page)
  # output code:
  page.each do |record|
    if @current_file == nil || @current_file_size > 32*1024
      @current_file = file_output.next_file
      @current_file_size = 0
    end
    datum = {}
    @schema.each do |col|
      datum[col.name] = record[col.index]
    end
    @current_file.write "#{JrJackson::Json.dump(datum, @opts )}#{@newline}".encode(@encoding)
  end
end

#closeObject



58
59
# File 'lib/embulk/formatter/jsonl.rb', line 58

def close
end

#finishObject



76
77
78
# File 'lib/embulk/formatter/jsonl.rb', line 76

def finish
  file_output.finish
end

#initObject



43
44
45
46
47
48
49
50
51
52
53
54
55
56
# File 'lib/embulk/formatter/jsonl.rb', line 43

def init
  # initialization code:
  @encoding = task['encoding'].upcase
  @newline = NEWLINES[task['newline'].upcase]

  # your data
  @current_file == nil
  @current_file_size = 0
  @opts = { :mode => :compat }
  date_format = task['date_format']
  timezone = task['timezone']
  @opts[:date_format] = date_format if date_format
  @opts[:timezone] = timezone if timezone
end