Class: Masticate::Sniffer
Constant Summary collapse
- CandidateDelimiters =
[',', '|', "\t", "~"]
Instance Attribute Summary collapse
-
#col_sep ⇒ Object
readonly
Returns the value of attribute col_sep.
-
#delimstats ⇒ Object
readonly
Returns the value of attribute delimstats.
-
#quote_char ⇒ Object
readonly
Returns the value of attribute quote_char.
-
#stats ⇒ Object
readonly
Returns the value of attribute stats.
Attributes inherited from Base
#csv_options, #filename, #input, #input_count, #output, #output_count
Instance Method Summary collapse
- #consider_delim(line, delim) ⇒ Object
- #count_fields(line, delim) ⇒ Object
- #find_col_sep ⇒ Object
-
#initialize(filename) ⇒ Sniffer
constructor
A new instance of Sniffer.
- #sniff(opts) ⇒ Object
Methods inherited from Base
#emit, #execute, #get, #standard_options, #with_input
Constructor Details
#initialize(filename) ⇒ Sniffer
Returns a new instance of Sniffer.
9 10 11 |
# File 'lib/masticate/sniffer.rb', line 9 def initialize(filename) @filename = filename end |
Instance Attribute Details
#col_sep ⇒ Object (readonly)
Returns the value of attribute col_sep.
4 5 6 |
# File 'lib/masticate/sniffer.rb', line 4 def col_sep @col_sep end |
#delimstats ⇒ Object (readonly)
Returns the value of attribute delimstats.
5 6 7 |
# File 'lib/masticate/sniffer.rb', line 5 def delimstats @delimstats end |
#quote_char ⇒ Object (readonly)
Returns the value of attribute quote_char.
4 5 6 |
# File 'lib/masticate/sniffer.rb', line 4 def quote_char @quote_char end |
#stats ⇒ Object (readonly)
Returns the value of attribute stats.
4 5 6 |
# File 'lib/masticate/sniffer.rb', line 4 def stats @stats end |
Instance Method Details
#consider_delim(line, delim) ⇒ Object
44 45 46 47 48 |
# File 'lib/masticate/sniffer.rb', line 44 def consider_delim(line, delim) @quote_char = nil n = count_fields(line, delim) [n, @quote_char] end |
#count_fields(line, delim) ⇒ Object
50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 |
# File 'lib/masticate/sniffer.rb', line 50 def count_fields(line, delim) if delim == ',' straight_count = line.count(delim) + 1 count_with_quoting = begin CSV.parse_line(line).count rescue CSV::MalformedCSVError # this is not valid CSV, e.g. has incorrectly embedded quotes 0 end if count_with_quoting < straight_count @quote_char = '"' count_with_quoting else straight_count end else line.count(delim) + 1 end end |
#find_col_sep ⇒ Object
25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 |
# File 'lib/masticate/sniffer.rb', line 25 def find_col_sep @delimstats = {} with_input do |input| lines = 10.times.map{get}.compact lines.each do |line| @line1 = line unless @line1 CandidateDelimiters.each do |delim| delimstats[delim] ||= { :counts => Set.new, :quote_char => nil} h = delimstats[delim] fieldcount, quote_char = consider_delim(line, delim) h[:counts] << fieldcount h[:quote_char] ||= quote_char end end end delimstats.sort_by{|delim,stats| stats[:counts].max || 0}.last.first end |
#sniff(opts) ⇒ Object
13 14 15 16 17 18 19 20 21 22 23 |
# File 'lib/masticate/sniffer.rb', line 13 def sniff(opts) @col_sep = find_col_sep @quote_char = delimstats[@col_sep][:quote_char] @stats = stats if opts[:stats] { :col_sep => @col_sep, :quote_char => @quote_char, :field_counts => @stats, :headers => @line1.split(@col_sep).map(&:strip) } end |