Class: DataKit::CSV::SchemaAnalyzer
- Inherits:
-
Object
- Object
- DataKit::CSV::SchemaAnalyzer
- Defined in:
- lib/data_kit/csv/schema_analyzer.rb
Instance Attribute Summary collapse
-
#csv ⇒ Object
Returns the value of attribute csv.
-
#keys ⇒ Object
Returns the value of attribute keys.
-
#sampling_rate ⇒ Object
Returns the value of attribute sampling_rate.
-
#use_type_hints ⇒ Object
Returns the value of attribute use_type_hints.
Class Method Summary collapse
Instance Method Summary collapse
- #execute ⇒ Object
-
#initialize(csv, options = {}) ⇒ SchemaAnalyzer
constructor
A new instance of SchemaAnalyzer.
Constructor Details
#initialize(csv, options = {}) ⇒ SchemaAnalyzer
Returns a new instance of SchemaAnalyzer.
9 10 11 12 13 14 15 16 17 18 19 |
# File 'lib/data_kit/csv/schema_analyzer.rb', line 9 def initialize(csv, = {}) @csv = csv @keys = [:keys] || [] @sampling_rate = [:sampling_rate] || 0.1 if [:use_type_hints].nil? || [:use_type_hints] == true @use_type_hints = true else @use_type_hints = false end end |
Instance Attribute Details
#csv ⇒ Object
Returns the value of attribute csv.
4 5 6 |
# File 'lib/data_kit/csv/schema_analyzer.rb', line 4 def csv @csv end |
#keys ⇒ Object
Returns the value of attribute keys.
5 6 7 |
# File 'lib/data_kit/csv/schema_analyzer.rb', line 5 def keys @keys end |
#sampling_rate ⇒ Object
Returns the value of attribute sampling_rate.
6 7 8 |
# File 'lib/data_kit/csv/schema_analyzer.rb', line 6 def sampling_rate @sampling_rate end |
#use_type_hints ⇒ Object
Returns the value of attribute use_type_hints.
7 8 9 |
# File 'lib/data_kit/csv/schema_analyzer.rb', line 7 def use_type_hints @use_type_hints end |
Class Method Details
.analyze(csv, options = {}) ⇒ Object
45 46 47 48 49 50 51 52 53 |
# File 'lib/data_kit/csv/schema_analyzer.rb', line 45 def analyze(csv, = {}) analyzer = new(csv, :keys => [:keys], :sampling_rate => [:sampling_rate], :use_type_hints => [:use_type_hints] ) analyzer.execute end |
.sampling_rate(file_size) ⇒ Object
55 56 57 58 59 60 61 62 |
# File 'lib/data_kit/csv/schema_analyzer.rb', line 55 def sampling_rate(file_size) if file_size < (1024 * 1024) sampling_rate = 1.0 else scale_factor = 500 sampling_rate = (scale_factor / Math.sqrt(file_size)).round(4) end end |
Instance Method Details
#execute ⇒ Object
21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 |
# File 'lib/data_kit/csv/schema_analyzer.rb', line 21 def execute first = true analysis = nil random = Random.new csv.each_row do |row| if first first = false analysis = SchemaAnalysis.new(csv.headers, :use_type_hints => use_type_hints) end analysis.increment_total if random.rand <= sampling_rate analysis.increment_sample row.each_with_index do |value, index| analysis.insert(csv.headers[index].to_s, value) end end end analysis end |