Class: Rcsv
- Inherits:
-
Object
- Object
- Rcsv
- Defined in:
- lib/rcsv.rb,
lib/rcsv/version.rb,
ext/rcsv/rcsv.c
Defined Under Namespace
Classes: ParseError
Constant Summary collapse
- BOOLEAN_FALSE =
[nil, false, 0, 'f', 'false']
- VERSION =
"0.3.1"
Instance Attribute Summary collapse
-
#write_options ⇒ Object
readonly
Returns the value of attribute write_options.
Class Method Summary collapse
- .parse(csv_data, options = {}, &block) ⇒ Object
-
.raw_parse(*args) ⇒ Object
The main method that handles parsing.
Instance Method Summary collapse
- #generate_header ⇒ Object
- #generate_row(row) ⇒ Object
-
#initialize(write_options = {}) ⇒ Rcsv
constructor
A new instance of Rcsv.
- #write(io, &block) ⇒ Object
Constructor Details
#initialize(write_options = {}) ⇒ Rcsv
Returns a new instance of Rcsv.
140 141 142 143 144 145 146 147 148 149 150 151 152 153 |
# File 'lib/rcsv.rb', line 140 def initialize( = {}) @write_options = @write_options[:column_separator] ||= ',' @write_options[:newline_delimiter] ||= $INPUT_RECORD_SEPARATOR @write_options[:header] ||= false @quote = '"' @escaped_quote = @quote * 2 @quotable_chars = Regexp.new('[%s%s%s]' % [ Regexp.escape(@write_options[:column_separator]), Regexp.escape(@write_options[:newline_delimiter]), Regexp.escape(@quote) ]) end |
Instance Attribute Details
#write_options ⇒ Object (readonly)
Returns the value of attribute write_options.
9 10 11 |
# File 'lib/rcsv.rb', line 9 def @write_options end |
Class Method Details
.parse(csv_data, options = {}, &block) ⇒ Object
13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 |
# File 'lib/rcsv.rb', line 13 def self.parse(csv_data, = {}, &block) #options = { #:column_separator => "\t", #:only_listed_columns => true, #:header => :use, # :skip, :none #:offset_rows => 10, #:columns => { #'a' => { # can be 0, 1, 2, ... -- column position #:alias => :a, # only for hashes #:type => :int, #:default => 100, #:match => '10' #}, #... #} #} [:header] ||= :use = {} [:col_sep] = [:column_separator] && [:column_separator][0] || ',' [:quote_char] = [:quote_char] && [:quote_char][0] || '"' [:offset_rows] = [:offset_rows] || 0 [:nostrict] = [:nostrict] [:parse_empty_fields_as] = [:parse_empty_fields_as] [:buffer_size] = [:buffer_size] || 1024 * 1024 # 1 MiB if csv_data.is_a?(String) csv_data = StringIO.new(csv_data) elsif !(csv_data.respond_to?(:each_line) && csv_data.respond_to?(:read)) inspected_csv_data = csv_data.inspect raise ParseError.new("Supplied CSV object #{inspected_csv_data[0..127]}#{inspected_csv_data.size > 128 ? '...' : ''} is neither String nor looks like IO object.") end if csv_data.respond_to?(:external_encoding) [:output_encoding] = csv_data.external_encoding.to_s end initial_position = csv_data.pos case [:header] when :use header = self.raw_parse(StringIO.new(csv_data.each_line.first), ).first [:offset_rows] += 1 when :skip header = (0..(csv_data.each_line.first.split([:col_sep]).count)).to_a [:offset_rows] += 1 when :none header = (0..(csv_data.each_line.first.split([:col_sep]).count)).to_a end [:row_as_hash] = [:row_as_hash] # Setting after header parsing if [:columns] only_rows = [] except_rows = [] row_defaults = [] column_names = [] row_conversions = '' header.each do |column_header| = [:columns][column_header] if if ([:row_as_hash]) column_names << ([:alias] || column_header) end row_defaults << [:default] || nil only_rows << case [:match] when Array [:match] when nil nil else [[:match]] end except_rows << case [:not_match] when Array [:not_match] when nil nil else [[:not_match]] end row_conversions << case [:type] when :int 'i' when :float 'f' when :string 's' when :bool 'b' when nil 's' # strings by default else fail "Unknown column type #{[:type].inspect}." end elsif [:only_listed_columns] column_names << nil row_defaults << nil only_rows << nil except_rows << nil row_conversions << ' ' else column_names << column_header row_defaults << nil only_rows << nil except_rows << nil row_conversions << 's' end end [:column_names] = column_names if [:row_as_hash] [:only_rows] = only_rows unless only_rows.compact.empty? [:except_rows] = except_rows unless except_rows.compact.empty? [:row_defaults] = row_defaults unless row_defaults.compact.empty? [:row_conversions] = row_conversions end csv_data.pos = initial_position return self.raw_parse(csv_data, , &block) end |
.raw_parse(*args) ⇒ Object
The main method that handles parsing
455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 |
# File 'ext/rcsv/rcsv.c', line 455
static VALUE rb_rcsv_raw_parse(int argc, VALUE * argv, VALUE self) {
struct rcsv_metadata meta;
VALUE csvio, options, option;
VALUE ensure_container = rb_ary_new(); /* [] */
struct csv_parser cp;
unsigned char csv_options = CSV_STRICT_FINI | CSV_APPEND_NULL;
/* Setting up some sane defaults */
meta.row_as_hash = false;
meta.empty_field_is_nil = false;
meta.skip_current_row = false;
meta.encoding_index = -1;
meta.num_columns = 0;
meta.current_col = 0;
meta.current_row = 0;
meta.offset_rows = 0;
meta.num_only_rows = 0;
meta.num_except_rows = 0;
meta.num_row_defaults = 0;
meta.num_row_conversions = 0;
meta.only_rows = NULL;
meta.except_rows = NULL;
meta.row_defaults = NULL;
meta.row_conversions = NULL;
meta.column_names = NULL;
meta.result = (VALUE[]){rb_ary_new()}; /* [] */
/* csvio is required, options is optional (pun intended) */
rb_scan_args(argc, argv, "11", &csvio, &options);
/* options ||= {} */
if (NIL_P(options)) {
options = rb_hash_new();
}
/* First of all, we parse libcsv-related params so that it fails early if something is wrong with them */
/* By default, parsing is strict */
option = rb_hash_aref(options, ID2SYM(rb_intern("nostrict")));
if (!option || (option == Qnil)) {
csv_options |= CSV_STRICT;
}
/* By default, empty strings are treated as Nils and quoted empty strings are treated as empty Ruby strings */
option = rb_hash_aref(options, ID2SYM(rb_intern("parse_empty_fields_as")));
if ((option == Qnil) || (option == ID2SYM(rb_intern("nil_or_string")))) {
csv_options |= CSV_EMPTY_IS_NULL;
} else if (option == ID2SYM(rb_intern("nil"))) {
meta.empty_field_is_nil = true;
} else if (option == ID2SYM(rb_intern("string"))) {
meta.empty_field_is_nil = false;
} else {
rb_raise(rcsv_parse_error, "The only valid options for :parse_empty_fields_as are :nil, :string and :nil_or_string, but %s was supplied.", RSTRING_PTR(rb_inspect(option)));
}
/* rb_ensure() only expects callback functions to accept and return VALUEs */
/* This ugly hack converts C pointers into Ruby Fixnums in order to pass them in Array */
rb_ary_push(ensure_container, options); /* [options] */
rb_ary_push(ensure_container, csvio); /* [options, csvio] */
rb_ary_push(ensure_container, LONG2NUM((long)&meta)); /* [options, csvio, &meta] */
rb_ary_push(ensure_container, LONG2NUM((long)&cp)); /* [options, csvio, &meta, &cp] */
/* Try to initialize libcsv */
if (csv_init(&cp, csv_options) == -1) {
rb_raise(rcsv_parse_error, "Couldn't initialize libcsv");
}
/* From now on, cp handles allocated data and should be free'd on exit or exception */
rb_ensure(rcsv_raw_parse, ensure_container, rcsv_free_memory, ensure_container);
/* Remove the last row if it's empty. That happens if CSV file ends with a newline. */
if (RARRAY_LEN(*(meta.result)) && /* meta.result.size != 0 */
RARRAY_LEN(rb_ary_entry(*(meta.result), -1)) == 0) {
rb_ary_pop(*(meta.result));
}
if (rb_block_given_p()) {
return Qnil; /* STREAMING */
} else {
return *(meta.result); /* Return accumulated result */
}
}
|
Instance Method Details
#generate_header ⇒ Object
162 163 164 165 166 |
# File 'lib/rcsv.rb', line 162 def generate_header return @write_options[:columns].map { |c| c[:name].to_s }.join(@write_options[:column_separator]) << @write_options[:newline_delimiter] end |
#generate_row(row) ⇒ Object
168 169 170 171 172 173 174 175 176 177 178 179 180 |
# File 'lib/rcsv.rb', line 168 def generate_row(row) column_separator = @write_options[:column_separator] csv_row = '' max_index = row.size - 1 row.each_with_index do |field, index| unquoted_field = process(field, @write_options[:columns] && @write_options[:columns][index]) csv_row << (unquoted_field.match(@quotable_chars) ? "\"#{unquoted_field.gsub(@quote, @escaped_quote)}\"" : unquoted_field) csv_row << column_separator unless index == max_index end return csv_row << @write_options[:newline_delimiter] end |
#write(io, &block) ⇒ Object
155 156 157 158 159 160 |
# File 'lib/rcsv.rb', line 155 def write(io, &block) io.write generate_header if @write_options[:header] while row = yield io.write generate_row(row) end end |