Class: CsvReader

Inherits:
Object
  • Object
show all
Extended by:
Forwardable
Includes:
Enumerable
Defined in:
lib/csvreader/version.rb,
lib/csvreader/base.rb,
lib/csvreader/base.rb,
lib/csvreader/buffer.rb,
lib/csvreader/parser.rb,
lib/csvreader/reader.rb,
lib/csvreader/builder.rb,
lib/csvreader/converter.rb,
lib/csvreader/parser_std.rb,
lib/csvreader/parser_tab.rb,
lib/csvreader/parser_json.rb,
lib/csvreader/parser_yaml.rb,
lib/csvreader/parser_fixed.rb,
lib/csvreader/parser_table.rb,
lib/csvreader/parser_strict.rb

Overview

note: uses a class for now - change to module - why? why not?

Defined Under Namespace

Modules: Version Classes: Buffer, Builder, Converter, Error, ParseError, Parser, ParserFixed, ParserJson, ParserStd, ParserStrict, ParserTab, ParserTable, ParserYaml

Constant Summary collapse

DEFAULT =

pre-define CsvReader (built-in) formats/dialect

Builder.new( Parser::DEFAULT )
NUMERIC =
Builder.new( Parser::NUMERIC )
HUMAN =
Builder.new( Parser::HUMAN )
STRICT =
Builder.new( Parser::STRICT )
RFC4180 =
Builder.new( Parser::RFC4180 )
EXCEL =
Builder.new( Parser::EXCEL )
MYSQL =
Builder.new( Parser::MYSQL )
POSTGRES =
POSTGRESQL           = Builder.new( Parser::POSTGRESQL )
POSTGRES_TEXT =
POSTGRESQL_TEXT = Builder.new( Parser::POSTGRESQL_TEXT )
TAB =
Builder.new( Parser::TAB )
TABLE =
Builder.new( Parser::TABLE )
FIXED =
Builder.new( Parser::FIXED )
JSON =
Builder.new( Parser::JSON )
YAML =
Builder.new( Parser::YAML )
VERSION =
[Version::MAJOR,
Version::MINOR,
Version::PATCH].join('.')

Class Method Summary collapse

Instance Method Summary collapse

Constructor Details

#initialize(str_or_readable, sep: nil, converters: nil, parser: nil, **kwargs) ⇒ CsvReader

Returns a new instance of CsvReader.

Raises:

  • (ArgumentError)


109
110
111
112
113
114
115
116
117
118
119
120
121
122
# File 'lib/csvreader/reader.rb', line 109

def initialize( str_or_readable, sep: nil, converters: nil, parser: nil, **kwargs )
      raise ArgumentError.new( "Cannot parse nil as CSV" )  if str_or_readable.nil?
      ## todo: use (why? why not) - raise ArgumentError, "Cannot parse nil as CSV"     if data.nil?

      # create the IO object we will read from
      @io = str_or_readable.is_a?(String) ? StringIO.new(str_or_readable) : str_or_readable

      @sep    = sep           # (optional) for ParserStd, ParserStrict
      @kwargs = kwargs        # e.g.  (optional) :width for ParserFixed

      @converters  = Converter.create_converters( converters )

      @parser = parser.nil? ? Parser::DEFAULT : parser
end

Class Method Details



22
23
24
# File 'lib/csvreader/version.rb', line 22

def self.banner
  "csvreader/#{VERSION} on Ruby #{RUBY_VERSION} (#{RUBY_RELEASE_DATE}) [#{RUBY_PLATFORM}] in (#{root})"
end

.defaultObject

alternative alias for DEFAULT



137
# File 'lib/csvreader/base.rb', line 137

def self.default()         DEFAULT;         end

.excelObject

alternative alias for EXCEL



147
# File 'lib/csvreader/base.rb', line 147

def self.excel()           EXCEL;           end

.fObject



158
# File 'lib/csvreader/base.rb', line 158

def self.f()               fixed;           end

.fixObject



157
# File 'lib/csvreader/base.rb', line 157

def self.fix()             fixed;           end

.fixedObject



156
# File 'lib/csvreader/base.rb', line 156

def self.fixed()           FIXED;           end

.foreach(path, sep: nil, converters: nil, parser: nil, **kwargs, &block) ⇒ Object



58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
# File 'lib/csvreader/reader.rb', line 58

def self.foreach( path, sep: nil,
                        converters: nil, parser: nil, **kwargs, &block )
  csv = open( path, sep: sep, converters: converters, parser: parser, **kwargs )

  if block_given?
    begin
      csv.each( &block )
    ensure
      csv.close
    end
  else
    csv.to_enum    ## note: caller (responsible) must close file!!!
    ## remove version without block given - why? why not?
    ## use Csv.open().to_enum  or Csv.open().each
    ##   or Csv.new( File.new() ).to_enum or Csv.new( File.new() ).each ???
  end
end

.header(path, sep: nil, parser: nil, **kwargs) ⇒ Object

use header or headers - or use both (with alias)?



39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
# File 'lib/csvreader/reader.rb', line 39

def self.header( path, sep: nil, parser: nil, **kwargs )   ## use header or headers - or use both (with alias)?
   # read first lines (only)
   #  and parse with csv to get header from csv library itself

   records = []
   open( path, sep: sep, parser: parser, **kwargs ) do |csv|
      csv.each do |record|
        records << record
        break   ## only parse/read first record
      end
   end

   ## unwrap record if empty return nil - why? why not?
   ##  return empty record e.g. [] - why? why not?
   ##  returns nil for empty (for now) - why? why not?
   records.size == 0 ? nil : records.first
end

.humObject



142
# File 'lib/csvreader/base.rb', line 142

def self.hum()             human;           end

.humanObject



141
# File 'lib/csvreader/base.rb', line 141

def self.human()           HUMAN;           end

.hxlObject

HXL (humanitarian eXchange language)



143
# File 'lib/csvreader/base.rb', line 143

def self.hxl()             human;           end

.jObject



161
# File 'lib/csvreader/base.rb', line 161

def self.j()               json;            end

.jsonObject



160
# File 'lib/csvreader/base.rb', line 160

def self.json()            JSON;            end

.mysqlObject



148
# File 'lib/csvreader/base.rb', line 148

def self.mysql()           MYSQL;           end

.nObject



140
# File 'lib/csvreader/base.rb', line 140

def self.n()               numeric;         end

.numObject



139
# File 'lib/csvreader/base.rb', line 139

def self.num()             numeric;         end

.numericObject



138
# File 'lib/csvreader/base.rb', line 138

def self.numeric()         NUMERIC;         end

.open(path, mode = nil, sep: nil, converters: nil, parser: nil, **kwargs, &block) ⇒ Object



4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
# File 'lib/csvreader/reader.rb', line 4

def self.open( path, mode=nil,
               sep: nil,
               converters: nil,
               parser: nil, **kwargs, &block )   ## rename path to filename or name - why? why not?

    ## note: default mode (if nil/not passed in) to 'r:bom|utf-8'
    f = File.open( path, mode ? mode : 'r:bom|utf-8' )
    csv = new(f, sep:        sep,
                 converters: converters,
                 parser:     parser,  **kwargs )

    # handle blocks like Ruby's open(), not like the (old old) CSV library
    if block_given?
      begin
        block.call( csv )
      ensure
        csv.close
      end
    else
      csv
    end
end

.parse(str_or_readable, sep: nil, converters: nil, parser: nil, **kwargs, &block) ⇒ Object



77
78
79
80
81
82
83
84
85
86
87
# File 'lib/csvreader/reader.rb', line 77

def self.parse( str_or_readable, sep: nil,
                      converters: nil,
                      parser: nil, **kwargs, &block )
  csv = new( str_or_readable, sep: sep, converters: converters, parser: parser, **kwargs )

  if block_given?
    csv.each( &block )  ## note: caller (responsible) must close file!!! - add autoclose - why? why not?
  else  # slurp contents, if no block is given
    csv.read            ## note: caller (responsible) must close file!!! - add autoclose - why? why not?
  end
end

.parse_line(str_or_readable, sep: nil, converters: nil, **kwargs) ⇒ Object

note: only add parse_line convenience helper for default

always use parse (do NOT/NOT/NOT use parse_line)  - why? why not?
todo/fix: remove parse_line!!!


96
97
98
99
100
101
102
103
104
# File 'lib/csvreader/reader.rb', line 96

def self.parse_line( str_or_readable, sep: nil,
                           converters: nil, **kwargs )
   records = []
   parse( str_or_readable, sep: sep, converters: converters, **kwargs ) do |record|
     records << record
     break   # only parse first record
   end
   records.size == 0 ? nil : records.first
end

.postgresObject



150
# File 'lib/csvreader/base.rb', line 150

def self.postgres()        postgresql;      end

.postgres_textObject



152
# File 'lib/csvreader/base.rb', line 152

def self.postgres_text()   postgresql_text; end

.postgresqlObject



149
# File 'lib/csvreader/base.rb', line 149

def self.postgresql()      POSTGRESQL;      end

.postgresql_textObject



151
# File 'lib/csvreader/base.rb', line 151

def self.postgresql_text() POSTGRESQL_TEXT; end

.read(path, sep: nil, converters: nil, parser: nil, **kwargs) ⇒ Object



28
29
30
31
32
33
34
35
36
# File 'lib/csvreader/reader.rb', line 28

def self.read( path, sep: nil,
                     converters: nil,
                     parser: nil, **kwargs )

    open( path,
          sep: sep,
          converters: converters,
          parser: parser, **kwargs ) { |csv| csv.read }
end

.rfc4180Object

alternative alias for RFC4180



146
# File 'lib/csvreader/base.rb', line 146

def self.rfc4180()         RFC4180;         end

.rootObject



26
27
28
# File 'lib/csvreader/version.rb', line 26

def self.root
  File.expand_path( File.dirname(File.dirname(File.dirname(__FILE__))) )
end

.strictObject

alternative alias for STRICT



145
# File 'lib/csvreader/base.rb', line 145

def self.strict()          STRICT;          end

.tabObject



154
# File 'lib/csvreader/base.rb', line 154

def self.tab()             TAB;             end

.tableObject



155
# File 'lib/csvreader/base.rb', line 155

def self.table()           TABLE;           end

.versionObject

keep (as an alternative to VERSION) - why? why not?



16
17
18
# File 'lib/csvreader/version.rb', line 16

def self.version   ## keep (as an alternative to VERSION) - why? why not?
  VERSION
end

.yObject



163
# File 'lib/csvreader/base.rb', line 163

def self.y()               yaml;            end

.yamlObject



162
# File 'lib/csvreader/base.rb', line 162

def self.yaml()            YAML;            end

Instance Method Details

#each(&block) ⇒ Object



142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
# File 'lib/csvreader/reader.rb', line 142

def each( &block )
  if block_given?
    kwargs = {}
    ## note: only add separator if present/defined (not nil)
    ##  todo/fix: change sep keyword to "known" classes!!!!
    kwargs[:sep]   = @sep    if @sep && @parser.respond_to?( :'sep=' )


    kwargs[:width] = @kwargs[:width]    if @parser.is_a?( ParserFixed )

    ##  todo/fix: print warning about unused / unknown kwargs!!!!!


    ## check array / pipeline of converters is empty (size=0 e.g. is [])
    if @converters.empty?
      @parser.parse( @io, **kwargs, &block )
    else
      ## add "post"-processing with converters pipeline
      ##   that is, convert all strings to integer, float, date, ... if wanted
      @parser.parse( @io, **kwargs ) do |raw_record|
        record = []
        raw_record.each_with_index do | value, i |
          record << @converters.convert( value, i )
        end
        block.call( record )
      end
    end
  else
    to_enum
  end
end

#readObject

method each



174
# File 'lib/csvreader/reader.rb', line 174

def read() to_a; end