Class: RBHive::TCLISchemaDefinition
- Inherits:
-
Object
- Object
- RBHive::TCLISchemaDefinition
- Defined in:
- lib/rbhive/t_c_l_i_schema_definition.rb
Constant Summary collapse
- NAN =
Float::NAN rescue 0.0/0.0
- INFINITY =
Float::INFINITY rescue 1.0/0.0
- TYPES =
{ :boolean => :to_s, :string => :to_s, :float => :to_f, :double => :to_f, :int => :to_i, :bigint => :to_i, :smallint => :to_i, :tinyint => :to_i, }
Instance Attribute Summary collapse
-
#schema ⇒ Object
readonly
Returns the value of attribute schema.
Instance Method Summary collapse
- #coerce_column(column_name, value) ⇒ Object
- #coerce_complex_value(value) ⇒ Object
- #coerce_row(row) ⇒ Object
- #coerce_row_to_array(row) ⇒ Object
- #column_names ⇒ Object
- #column_type_map ⇒ Object
-
#initialize(schema, example_row) ⇒ TCLISchemaDefinition
constructor
A new instance of TCLISchemaDefinition.
Constructor Details
#initialize(schema, example_row) ⇒ TCLISchemaDefinition
Returns a new instance of TCLISchemaDefinition.
20 21 22 23 |
# File 'lib/rbhive/t_c_l_i_schema_definition.rb', line 20 def initialize(schema, example_row) @schema = schema @example_row = example_row ? example_row.colVals : [] end |
Instance Attribute Details
#schema ⇒ Object (readonly)
Returns the value of attribute schema.
5 6 7 |
# File 'lib/rbhive/t_c_l_i_schema_definition.rb', line 5 def schema @schema end |
Instance Method Details
#coerce_column(column_name, value) ⇒ Object
67 68 69 70 71 72 73 74 |
# File 'lib/rbhive/t_c_l_i_schema_definition.rb', line 67 def coerce_column(column_name, value) type = column_type_map[column_name] return INFINITY if (type != :string && value == "Infinity") return NAN if (type != :string && value == "NaN") return coerce_complex_value(value) if type.to_s =~ /^array/ conversion_method = TYPES[type] conversion_method ? value.send(conversion_method) : value end |
#coerce_complex_value(value) ⇒ Object
80 81 82 83 84 85 |
# File 'lib/rbhive/t_c_l_i_schema_definition.rb', line 80 def coerce_complex_value(value) return nil if value.nil? return nil if value.length == 0 return nil if value == 'null' JSON.parse(value) end |
#coerce_row(row) ⇒ Object
60 61 62 63 64 65 |
# File 'lib/rbhive/t_c_l_i_schema_definition.rb', line 60 def coerce_row(row) column_names.zip(row.colVals.map(&:get_value).map(&:value)).inject({}) do |hsh, (column_name, value)| hsh[column_name] = coerce_column(column_name, value) hsh end end |
#coerce_row_to_array(row) ⇒ Object
76 77 78 |
# File 'lib/rbhive/t_c_l_i_schema_definition.rb', line 76 def coerce_row_to_array(row) column_names.map { |n| row[n] } end |
#column_names ⇒ Object
25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 |
# File 'lib/rbhive/t_c_l_i_schema_definition.rb', line 25 def column_names @column_names ||= begin schema_names = @schema.columns.map {|c| c.columnName } # In rare cases Hive can return two identical column names # consider SELECT a.foo, b.foo... # in this case you get two columns called foo with no disambiguation. # as a (far from ideal) solution we detect this edge case and rename them # a.foo => foo1, b.foo => foo2 # otherwise we will trample one of the columns during Hash mapping. s = Hash.new(0) schema_names.map! { |c| s[c] += 1; s[c] > 1 ? "#{c}---|---#{s[c]}" : c } schema_names.map! { |c| s[c] > 1 ? "#{c}---|---1" : c } schema_names.map! { |c| c.gsub('---|---', '_').to_sym } # Lets fix the fact that Hive doesn't return schema data for partitions on SELECT * queries # For now we will call them :_p1, :_p2, etc. to avoid collisions. offset = 0 while schema_names.length < @example_row.length schema_names.push(:"_p#{offset+=1}") end schema_names end end |
#column_type_map ⇒ Object
50 51 52 53 54 55 56 57 58 |
# File 'lib/rbhive/t_c_l_i_schema_definition.rb', line 50 def column_type_map @column_type_map ||= column_names.inject({}) do |hsh, c| definition = @schema.columns.find {|s| s.columnName.to_sym == c } # If the column isn't in the schema (eg partitions in SELECT * queries) assume they are strings type = TYPE_NAMES[definition.typeDesc.types.first.primitiveEntry.type].downcase rescue nil hsh[c] = definition && type ? type.to_sym : :string hsh end end |