Class: TableSchema::Infer
- Inherits:
-
Object
- Object
- TableSchema::Infer
show all
- Includes:
- Helpers
- Defined in:
- lib/tableschema/infer.rb
Instance Attribute Summary collapse
Instance Method Summary
collapse
Methods included from Helpers
#deep_symbolize_keys, #get_class_for_type, #type_class_lookup
Constructor Details
#initialize(headers, rows, explicit: false, primary_key: nil, row_limit: nil) ⇒ Infer
Returns a new instance of Infer.
11
12
13
14
15
16
17
18
19
20
21
22
23
|
# File 'lib/tableschema/infer.rb', line 11
def initialize(, rows, explicit: false, primary_key: nil, row_limit: nil)
@headers =
@rows = rows
@explicit = explicit
@primary_key = primary_key
@row_limit = row_limit
@schema = {
fields: fields
}
@schema[:primaryKey] = @primary_key if @primary_key
infer!
end
|
Instance Attribute Details
#schema ⇒ Object
Returns the value of attribute schema.
9
10
11
|
# File 'lib/tableschema/infer.rb', line 9
def schema
@schema
end
|
Instance Method Details
#available_types ⇒ Object
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
|
# File 'lib/tableschema/infer.rb', line 128
def available_types
[
'any',
'string',
'boolean',
'number',
'integer',
'date',
'time',
'datetime',
'array',
'object',
'geopoint',
'geojson'
]
end
|
#fields ⇒ Object
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
|
# File 'lib/tableschema/infer.rb', line 25
def fields
@headers.map do ||
descriptor = {
name: ,
title: '',
description: '',
}
constraints = {}
constraints[:required] = @explicit === true
constraints[:unique] = ( == @primary_key)
constraints.delete_if { |_,v| v == false } unless @explicit === true
descriptor[:constraints] = constraints if constraints.count > 0
TableSchema::Field.new(descriptor)
end
end
|
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
|
# File 'lib/tableschema/infer.rb', line 89
def guess_format(converter, col)
guessed_format = TableSchema::DEFAULTS[:format]
converter.class.instance_methods.grep(/cast_/).each do |method|
begin
format = method.to_s
format.slice!('cast_')
next if format == TableSchema::DEFAULTS[:format]
converter.send(method, col)
guessed_format = format
break
rescue TableSchema::Exception
next
end
end
guessed_format
end
|
#guess_type(col, index) ⇒ Object
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
|
# File 'lib/tableschema/infer.rb', line 69
def guess_type(col, index)
guessed_type = TableSchema::DEFAULTS[:type]
guessed_format = TableSchema::DEFAULTS[:format]
available_types.reverse_each do |type|
klass = get_class_for_type(type)
converter = Kernel.const_get(klass).new(@schema[:fields][index])
if converter.test(col) === true
guessed_type = type
guessed_format = guess_format(converter, col)
break
end
end
{
type: guessed_type,
format: guessed_format
}
end
|
#infer! ⇒ Object
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
|
# File 'lib/tableschema/infer.rb', line 42
def infer!
type_matches = []
@rows.each_with_index do |row, index|
break if @row_limit && index > @row_limit
row = row.fields if row.class == CSV::Row
row_length = row.count
= @headers.count
if row_length >
row = row[0..]
elsif row_length <
diff = - row_length
fill = [''] * diff
row = row.push(fill).flatten
end
row.each_with_index do |col, idx|
type_matches[idx] ||= []
type_matches[idx] << guess_type(col, idx)
end
end
resolve_types(type_matches)
@schema = TableSchema::Schema.new(@schema)
end
|
#resolve_types(results) ⇒ Object
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
|
# File 'lib/tableschema/infer.rb', line 106
def resolve_types(results)
results.each_with_index do |result,v|
result.uniq!
if result.count == 1
rv = result[0]
else
counts = {}
result.each do |r|
counts[r] ||= 0
counts[r] += 1
end
sorted_counts = counts.sort_by {|_key, value| value}
rv = sorted_counts[0][0]
end
@schema[:fields][v].merge!(rv)
end
end
|