Module: Embulk::Guess::SchemaGuess

Defined in:
lib/embulk/guess/schema_guess.rb

Defined Under Namespace

Classes: TimestampTypeMatch

Class Method Summary collapse

Class Method Details

.from_array_records(column_names, samples) ⇒ Object



26
27
28
29
30
31
32
33
34
# File 'lib/embulk/guess/schema_guess.rb', line 26

def from_array_records(column_names, samples)
  column_types = types_from_array_records(samples)
  columns = column_types.zip(column_names).map do |(type,name)|
    hash = {name: name, type: type.to_sym}
    hash[:format] = type.format if type.is_a?(TimestampTypeMatch)
    Embulk::Column.new(hash)
  end
  return Embulk::Schema.new(columns)
end

.from_hash_records(array_of_hash) ⇒ Object



16
17
18
19
20
21
22
23
24
# File 'lib/embulk/guess/schema_guess.rb', line 16

def from_hash_records(array_of_hash)
  array_of_hash = Array(array_of_hash)
  if array_of_hash.empty?
    raise "SchemaGuess Can't guess schema from no records"
  end
  column_names = array_of_hash.first.keys
  samples = array_of_hash.to_a.map {|hash| column_names.map {|name| hash[name] } }
  from_array_records(column_names, samples)
end

.types_from_array_records(samples) ⇒ Object

TODO this method will be private once guess/csv is refactored



37
38
39
40
41
42
43
# File 'lib/embulk/guess/schema_guess.rb', line 37

def types_from_array_records(samples)
  columnar_types = []
  samples.each do |record|
    record.each_with_index {|value,i| (columnar_types[i] ||= []) << guess_type(value.to_s) }
  end
  columnar_types.map {|types| merge_types(types) }
end