Module: Embulk::Guess::SchemaGuess
- Defined in:
- lib/embulk/guess/schema_guess.rb
Defined Under Namespace
Classes: TimestampTypeMatch
Class Method Summary collapse
- .from_array_records(column_names, samples) ⇒ Object
- .from_hash_records(array_of_hash) ⇒ Object
-
.types_from_array_records(samples) ⇒ Object
TODO this method will be private once guess/csv is refactored.
Class Method Details
.from_array_records(column_names, samples) ⇒ Object
26 27 28 29 30 31 32 33 34 |
# File 'lib/embulk/guess/schema_guess.rb', line 26 def from_array_records(column_names, samples) column_types = types_from_array_records(samples) columns = column_types.zip(column_names).map do |(type,name)| hash = {name: name, type: type.to_sym} hash[:format] = type.format if type.is_a?(TimestampTypeMatch) Embulk::Column.new(hash) end return Embulk::Schema.new(columns) end |
.from_hash_records(array_of_hash) ⇒ Object
16 17 18 19 20 21 22 23 24 |
# File 'lib/embulk/guess/schema_guess.rb', line 16 def from_hash_records(array_of_hash) array_of_hash = Array(array_of_hash) if array_of_hash.empty? raise "SchemaGuess Can't guess schema from no records" end column_names = array_of_hash.first.keys samples = array_of_hash.to_a.map {|hash| column_names.map {|name| hash[name] } } from_array_records(column_names, samples) end |
.types_from_array_records(samples) ⇒ Object
TODO this method will be private once guess/csv is refactored
37 38 39 40 41 42 43 |
# File 'lib/embulk/guess/schema_guess.rb', line 37 def types_from_array_records(samples) columnar_types = [] samples.each do |record| record.each_with_index {|value,i| (columnar_types[i] ||= []) << guess_type(value.to_s) } end columnar_types.map {|types| merge_types(types) } end |