Class: CsvHashReader

Inherits:
Object
  • Object
show all
Extended by:
Forwardable
Includes:
Enumerable
Defined in:
lib/csvreader/base.rb,
lib/csvreader/builder.rb,
lib/csvreader/reader_hash.rb

Defined Under Namespace

Classes: Builder

Constant Summary collapse

DEFAULT =

pre-define CsvReader (built-in) formats/dialect

Builder.new( Parser::DEFAULT )
NUMERIC =
Builder.new( Parser::NUMERIC )
STRICT =
Builder.new( Parser::STRICT )
RFC4180 =
Builder.new( Parser::RFC4180 )
EXCEL =
Builder.new( Parser::EXCEL )
MYSQL =
Builder.new( Parser::MYSQL )
POSTGRES =
POSTGRESQL           = Builder.new( Parser::POSTGRESQL )
POSTGRES_TEXT =
POSTGRESQL_TEXT = Builder.new( Parser::POSTGRESQL_TEXT )
TAB =
Builder.new( Parser::TAB )
TABLE =
Builder.new( Parser::TABLE )
FIXED =
Builder.new( Parser::FIXED )
JSON =
Builder.new( Parser::JSON )
YAML =
Builder.new( Parser::YAML )
Parser =

add convenience shortcuts / aliases for CsvReader support classes

CsvReader::Parser
ParserFixed =
CsvReader::ParserFixed
ParserJson =
CsvReader::ParserJson
ParserYaml =
CsvReader::ParserYaml
Converter =
CsvReader::Converter

Class Method Summary collapse

Instance Method Summary collapse

Constructor Details

#initialize(str_or_readable, headers: nil, sep: nil, converters: nil, header_converters: nil, parser: nil, **kwargs) ⇒ CsvHashReader

Returns a new instance of CsvHashReader.

Raises:

  • (ArgumentError)


109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
# File 'lib/csvreader/reader_hash.rb', line 109

def initialize( str_or_readable, headers: nil, sep: nil,
                      converters: nil,
                      header_converters: nil,
                      parser: nil,
                      **kwargs )
      raise ArgumentError.new( "Cannot parse nil as CSV" )  if str_or_readable.nil?
      ## todo: use (why? why not) - raise ArgumentError, "Cannot parse nil as CSV"     if data.nil?

      # create the IO object we will read from
      @io = str_or_readable.is_a?(String) ? StringIO.new(str_or_readable) : str_or_readable

      ## pass in headers as array e.g. ['A', 'B', 'C']
      ##  double check: run header_converters on passed in headers?
      ##    for now - do NOT auto-convert passed in headers - keep them as-is (1:1)
      @names = headers ? headers : nil

      @sep    = sep
      @kwargs = kwargs

      @converters        = Converter.create_converters( converters )
      @header_converters = Converter.create_header_converters( header_converters )

      @parser = parser.nil? ? Parser::DEFAULT : parser
end

Class Method Details

.defaultObject

alternative alias for DEFAULT



190
# File 'lib/csvreader/base.rb', line 190

def self.default()         DEFAULT;         end

.excelObject

alternative alias for EXCEL



196
# File 'lib/csvreader/base.rb', line 196

def self.excel()           EXCEL;           end

.fObject



207
# File 'lib/csvreader/base.rb', line 207

def self.f()               fixed;           end

.fixObject



206
# File 'lib/csvreader/base.rb', line 206

def self.fix()             fixed;           end

.fixedObject



205
# File 'lib/csvreader/base.rb', line 205

def self.fixed()           FIXED;           end

.foreach(path, headers: nil, sep: nil, converters: nil, header_converters: nil, parser: nil, **kwargs, &block) ⇒ Object



58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
# File 'lib/csvreader/reader_hash.rb', line 58

def self.foreach( path, headers: nil,
                        sep: nil,
                        converters: nil,
                        header_converters: nil,
                        parser: nil, **kwargs, &block )
  csv = open( path,
              headers: headers,
              sep: sep,
              converters: converters,
              header_converters: header_converters,
              parser: parser,
              **kwargs )

  if block_given?
    begin
      csv.each( &block )
    ensure
      csv.close
    end
  else
    csv.to_enum    ## note: caller (responsible) must close file!!!
    ## remove version without block given - why? why not?
    ## use Csv.open().to_enum  or Csv.open().each
    ##   or Csv.new( File.new() ).to_enum or Csv.new( File.new() ).each ???
  end
end

.jObject



210
# File 'lib/csvreader/base.rb', line 210

def self.j()               json;            end

.jsonObject



209
# File 'lib/csvreader/base.rb', line 209

def self.json()            JSON;            end

.mysqlObject



197
# File 'lib/csvreader/base.rb', line 197

def self.mysql()           MYSQL;           end

.nObject



193
# File 'lib/csvreader/base.rb', line 193

def self.n()               numeric;         end

.numObject



192
# File 'lib/csvreader/base.rb', line 192

def self.num()             numeric;         end

.numericObject



191
# File 'lib/csvreader/base.rb', line 191

def self.numeric()         NUMERIC;         end

.open(path, mode = nil, headers: nil, sep: nil, converters: nil, header_converters: nil, parser: nil, **kwargs, &block) ⇒ Object



14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
# File 'lib/csvreader/reader_hash.rb', line 14

def self.open( path, mode=nil,
               headers: nil,
               sep: nil,
               converters: nil,
               header_converters: nil,
               parser: nil, **kwargs, &block )   ## rename path to filename or name - why? why not?

    ## note: default mode (if nil/not passed in) to 'r:bom|utf-8'
    f = File.open( path, mode ? mode : 'r:bom|utf-8' )
    csv = new(f, headers: headers,
                 sep: sep,
                 converters: converters,
                 header_converters: header_converters,
                 parser: parser, **kwargs )

    # handle blocks like Ruby's open(), not like the (old old) CSV library
    if block_given?
      begin
        block.call( csv )
      ensure
        csv.close
      end
    else
      csv
    end
end

.parse(str_or_readable, headers: nil, sep: nil, converters: nil, header_converters: nil, parser: nil, **kwargs, &block) ⇒ Object



86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
# File 'lib/csvreader/reader_hash.rb', line 86

def self.parse( str_or_readable, headers: nil,
                      sep: nil,
                      converters: nil,
                      header_converters: nil,
                      parser: nil, **kwargs, &block )
  csv = new( str_or_readable,
             headers: headers,
             sep: sep,
             converters: converters,
             header_converters: header_converters,
             parser: parser, **kwargs )

  if block_given?
    csv.each( &block )  ## note: caller (responsible) must close file!!! - add autoclose - why? why not?
  else  # slurp contents, if no block is given
    csv.read            ## note: caller (responsible) must close file!!! - add autoclose - why? why not?
  end
end

.postgresObject



199
# File 'lib/csvreader/base.rb', line 199

def self.postgres()        postgresql;      end

.postgres_textObject



201
# File 'lib/csvreader/base.rb', line 201

def self.postgres_text()   postgresql_text; end

.postgresqlObject



198
# File 'lib/csvreader/base.rb', line 198

def self.postgresql()      POSTGRESQL;      end

.postgresql_textObject



200
# File 'lib/csvreader/base.rb', line 200

def self.postgresql_text() POSTGRESQL_TEXT; end

.read(path, headers: nil, sep: nil, converters: nil, header_converters: nil, parser: nil, **kwargs) ⇒ Object



42
43
44
45
46
47
48
49
50
51
52
53
54
# File 'lib/csvreader/reader_hash.rb', line 42

def self.read( path, headers: nil,
                     sep: nil,
                     converters: nil,
                     header_converters: nil,
                     parser: nil,
                     **kwargs )
    open( path,
          headers: headers,
          sep: sep,
          converters: converters,
          header_converters: header_converters,
          parser: parser, **kwargs ) { |csv| csv.read }
end

.rfc4180Object

alternative alias for RFC4180



195
# File 'lib/csvreader/base.rb', line 195

def self.rfc4180()         RFC4180;         end

.strictObject

alternative alias for STRICT



194
# File 'lib/csvreader/base.rb', line 194

def self.strict()          STRICT;          end

.tabObject



203
# File 'lib/csvreader/base.rb', line 203

def self.tab()             TAB;             end

.tableObject



204
# File 'lib/csvreader/base.rb', line 204

def self.table()           TABLE;           end

.yObject



212
# File 'lib/csvreader/base.rb', line 212

def self.y()               yaml;            end

.yamlObject



211
# File 'lib/csvreader/base.rb', line 211

def self.yaml()            YAML;            end

Instance Method Details

#each(&block) ⇒ Object



153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
# File 'lib/csvreader/reader_hash.rb', line 153

def each( &block )

  ## todo/fix:
  ##   add case for headers/names.size != values.size
  ##   - add rest option? for if less headers than values (see python csv.DictReader - why? why not?)
  ##
  ##   handle case with duplicate and empty header names etc.


  if block_given?
    kwargs = {}
    ## note: only add separator if present/defined (not nil)
    ##  todo/fix: change sep keyword to "known" classes!!!!
    kwargs[:sep] = @sep    if @sep && @parser.respond_to?( :'sep=' )

    kwargs[:width] = @kwargs[:width]    if @parser.is_a?( ParserFixed )


    @parser.parse( @io, **kwargs ) do |raw_values|     # sep: sep
       if @names.nil?    ## check for (first) headers row
         if @header_converters.empty?
           @names = raw_values   ## store header row / a.k.a. field/column names
         else
           values = []
           raw_values.each_with_index do |value,i|
             values << @header_converters.convert( value, i )
           end
           @names = values
         end
       else    ## "regular" record
         raw_record = @names.zip( raw_values ).to_h    ## todo/fix: check for more values than names/headers!!!
         if @converters.empty?
           block.call( raw_record )
         else
           ## add "post"-processing with converters pipeline
           ##   that is, convert all strings to integer, float, date, ... if wanted
           record = {}
           raw_record.each do | key, value |
             record[ key ] = @converters.convert( value, key )
           end
           block.call( record )
         end
       end
    end
  else
    to_enum
  end
end

#readObject

method each



202
# File 'lib/csvreader/reader_hash.rb', line 202

def read() to_a; end