Class: DataPackage::Interpreter
- Inherits:
-
Object
- Object
- DataPackage::Interpreter
- Defined in:
- lib/datapackage/interpreter.rb
Constant Summary collapse
- INFER_THRESHOLD =
10
- INFER_CONFIDENCE =
0.75
- YEAR_PATTERN =
/[12]\d{3}/
- DATE_PATTERN =
/(\d{1,2}[-\/]\d{1,2}[-\/]\d{2,4})|(\d{4}[-\/]\d{1,2}[-\/]\d{1,2})/
- DATETIME_PATTERN =
/(\d{1,2}[-\/]\d{1,2}[-\/]\d{2,4}|\d{4}[-\/]\d{1,2}[-\/]\d{1,2}).\d{1,2}:\d{2}/
- TIME_PATTERN =
/^\d{1,2}((:\d{1,2})|(am|pm|AM|PM))$/
- INTEGER_PATTERN =
/^\d+$/
- DEFAULT_TYPE_FORMAT =
{'type' => 'any', 'format' => 'default'}
Instance Attribute Summary collapse
-
#csv ⇒ Object
readonly
Returns the value of attribute csv.
-
#threshold ⇒ Object
readonly
Returns the value of attribute threshold.
Instance Method Summary collapse
-
#initialize(csv) ⇒ Interpreter
constructor
A new instance of Interpreter.
- #inspect_value(value) ⇒ Object
- #type_and_format_at(header) ⇒ Object
Constructor Details
#initialize(csv) ⇒ Interpreter
Returns a new instance of Interpreter.
14 15 16 17 |
# File 'lib/datapackage/interpreter.rb', line 14 def initialize(csv) @csv = csv @threshold = [csv.length, INFER_THRESHOLD].min end |
Instance Attribute Details
#csv ⇒ Object (readonly)
Returns the value of attribute csv.
12 13 14 |
# File 'lib/datapackage/interpreter.rb', line 12 def csv @csv end |
#threshold ⇒ Object (readonly)
Returns the value of attribute threshold.
12 13 14 |
# File 'lib/datapackage/interpreter.rb', line 12 def threshold @threshold end |
Instance Method Details
#inspect_value(value) ⇒ Object
40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 |
# File 'lib/datapackage/interpreter.rb', line 40 def inspect_value(value) return DEFAULT_TYPE_FORMAT unless value.is_a?(String) if value.length == 4 && value.match(YEAR_PATTERN) return { 'type' => 'year', 'format' => 'default' } end if value.match(DATETIME_PATTERN) return { 'type' => 'datetime', 'format' => 'default' } end if value.match(DATE_PATTERN) return { 'type' => 'date', 'format' => 'default' } end if value.match(TIME_PATTERN) return { 'type' => 'time', 'format' => 'default' } end if value.match(INTEGER_PATTERN) return { 'type' => 'integer', 'format' => 'default' } end DEFAULT_TYPE_FORMAT end |
#type_and_format_at(header) ⇒ Object
19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 |
# File 'lib/datapackage/interpreter.rb', line 19 def type_and_format_at(header) values = csv.values_at(header).flatten counter = {} type_and_format = DEFAULT_TYPE_FORMAT values.each_with_index do |value, i| inspection_count = i + 1 inspection = inspect_value(value) counter[inspection] = (counter[inspection] || 0) + 1 if inspection_count >= threshold if counter[inspection] / inspection_count >= INFER_CONFIDENCE type_and_format = inspection break end end end type_and_format end |