Method: Polars::IO#read_csv_batched
- Defined in:
- lib/polars/io/csv.rb
#read_csv_batched(source, has_header: true, columns: nil, new_columns: nil, separator: ",", comment_prefix: nil, quote_char: '"', skip_rows: 0, skip_lines: 0, schema_overrides: nil, null_values: nil, missing_utf8_is_empty_string: false, ignore_errors: false, try_parse_dates: false, n_threads: nil, infer_schema_length: N_INFER_DEFAULT, batch_size: 50_000, n_rows: nil, encoding: "utf8", low_memory: false, rechunk: false, skip_rows_after_header: 0, row_index_name: nil, row_index_offset: 0, eol_char: "\n", raise_if_empty: true, truncate_ragged_lines: false, decimal_comma: false) ⇒ BatchedCsvReader
Deprecated.
Use scan_csv().collect_batches instead.
Read a CSV file in batches.
Upon creation of the BatchedCsvReader,
polars will gather statistics and determine the
file chunks. After that work will only be done
if next_batches is called.
456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 |
# File 'lib/polars/io/csv.rb', line 456 def read_csv_batched( source, has_header: true, columns: nil, new_columns: nil, separator: ",", comment_prefix: nil, quote_char: '"', skip_rows: 0, skip_lines: 0, schema_overrides: nil, null_values: nil, missing_utf8_is_empty_string: false, ignore_errors: false, try_parse_dates: false, n_threads: nil, infer_schema_length: N_INFER_DEFAULT, batch_size: 50_000, n_rows: nil, encoding: "utf8", low_memory: false, rechunk: false, skip_rows_after_header: 0, row_index_name: nil, row_index_offset: 0, eol_char: "\n", raise_if_empty: true, truncate_ragged_lines: false, decimal_comma: false ) projection, columns = Utils.handle_projection_columns(columns) if columns && !has_header columns.each do |column| if !column.start_with?("column_") raise ArgumentError, "Specified column names do not start with \"column_\", but autogenerated header names were requested." end end end BatchedCsvReader.new( source, has_header: has_header, columns: columns || projection, separator: separator, comment_prefix: comment_prefix, quote_char: quote_char, skip_rows: skip_rows, skip_lines: skip_lines, schema_overrides: schema_overrides, null_values: null_values, missing_utf8_is_empty_string: missing_utf8_is_empty_string, ignore_errors: ignore_errors, try_parse_dates: try_parse_dates, n_threads: n_threads, infer_schema_length: infer_schema_length, batch_size: batch_size, n_rows: n_rows, encoding: encoding == "utf8-lossy" ? encoding : "utf8", low_memory: low_memory, rechunk: rechunk, skip_rows_after_header: skip_rows_after_header, row_index_name: row_index_name, row_index_offset: row_index_offset, eol_char: eol_char, new_columns: new_columns, raise_if_empty: raise_if_empty, truncate_ragged_lines: truncate_ragged_lines, decimal_comma: decimal_comma ) end |