Class: Csv_lazy

Inherits:
Object
  • Object
show all
Includes:
Enumerable
Defined in:
lib/csv_lazy.rb

Overview

A simple library for parsing CSV-files through IO’s. Solves corrupt file formats automatically like when files contains several spaces after a column and more.

Instance Method Summary collapse

Constructor Details

#initialize(args = {}, &blk) ⇒ Csv_lazy

Examples

File.open("csvfile.csv", "r") do |fp|
  Csv_lazy.new(:io => fp, :quote_char => '"', :col_sep => ";", :row_sep => "\n", :encode => "utf-8") do |row_array|
    puts "Row: #{row_array}"
  end
end


16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
# File 'lib/csv_lazy.rb', line 16

def initialize(args = {}, &blk)
  @args = {
    :quote_char => '"',
    :row_sep => "\n",
    :col_sep => ";",
    :headers => false,
    :buffer_length => 4096
  }.merge(args)
  
  @io = @args[:io]
  @eof = false
  @buffer = ""
  @debug = @args[:debug]
  @encode = @args[:encode]
  @mutex = Mutex.new
  @buffer_length = @args[:buffer_length]
  @escape_char = "\\"
  @escaped_quote = "#{@escape_char}#{@args[:quote_char]}"
  @escaped_quote_double = "#{@escape_char}#{@escape_char}#{@args[:quote_char]}"
  
  #@debug = true
  
  accepted = [:encode, :quote_char, :row_sep, :col_sep, :io, :debug, :headers, :buffer_length]
  @args.each do |key, val|
    if accepted.index(key) == nil
      raise "Unknown argument: '#{key}'."
    end
  end
  
  raise "No ':quote_char' was given." if @args[:quote_char].to_s.strip.empty?
  raise "No ':col_sep' was given." if @args[:col_sep].to_s.empty?
  raise "No ':row_sep' was given." if @args[:row_sep].to_s.empty?
  raise "No ':io' was given." if !@args[:io]
  
  @regex_begin_quote_char = /\A\s*#{Regexp.escape(@args[:quote_char])}/
  
  @regex_row_end = /\A\s*?#{Regexp.escape(@args[:row_sep])}/
  @regex_colsep_next = /\A#{Regexp.escape(@args[:col_sep])}/
  
  @regex_read_until_quote_char = /\A(.*?)#{Regexp.escape(@args[:quote_char])}/
  @regex_read_until_col_sep = /\A(.*?)#{Regexp.escape(@args[:col_sep])}/
  @regex_read_until_row_sep = /\A(.+?)#{Regexp.escape(@args[:row_sep])}/
  @regex_read_until_end = /\A(.+?)\Z/
  
  if @args[:headers]
    headers = []
    read_row.each do |key|
      headers << key.to_sym
    end
    
    @headers = headers
  end
  
  self.each(&blk) if blk
end

Instance Method Details

#eachObject

Yields each row as an array.



73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
# File 'lib/csv_lazy.rb', line 73

def each
  if block_given?
    @mutex.synchronize do
      while row = read_row
        yield(row)
      end
    end
  else
    Enumerable.new do |yielder|
      @mutex.synchronize do
        while row = read_row
          yielder << row
        end
      end
    end
  end
end

#read_rowObject

Returns the next row.



92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
# File 'lib/csv_lazy.rb', line 92

def read_row
  @row = []
  while !@eof or !@buffer.empty?
    break if !read_next_col
  end
  
  row = @row
  @row = nil
  
  puts "csv_lazy: Row: #{row}\n\n" if @debug
  
  if row.empty?
    return false
  else
    if @headers
      ret = {}
      row.length.times do |count|
        ret[@headers[count]] = row[count]
      end
      
      return ret
    else
      return row
    end
  end
end