Class: Google::Cloud::Bigquery::External::CsvSource

Inherits:
DataSource
  • Object
show all
Defined in:
lib/google/cloud/bigquery/external/csv_source.rb

Overview

CsvSource

CsvSource is a subclass of DataSource and represents a CSV external data source that can be queried from directly, such as Google Cloud Storage or Google Drive, even though the data is not stored in BigQuery. Instead of loading or streaming the data, this object references the external data source.

Examples:

require "google/cloud/bigquery"

bigquery = Google::Cloud::Bigquery.new

csv_url = "gs://bucket/path/to/data.csv"
csv_table = bigquery.external csv_url do |csv|
  csv.autodetect = true
  csv.skip_leading_rows = 1
end

data = bigquery.query "SELECT * FROM my_ext_table",
                      external: { my_ext_table: csv_table }

# Iterate over the first page of results
data.each do |row|
  puts row[:name]
end
# Retrieve the next page of results
data = data.next if data.next?

Instance Method Summary collapse

Methods inherited from DataSource

#autodetect, #autodetect=, #avro?, #backup?, #bigtable?, #compression, #compression=, #csv?, #format, #hive_partitioning?, #hive_partitioning_mode, #hive_partitioning_mode=, #hive_partitioning_require_partition_filter=, #hive_partitioning_require_partition_filter?, #hive_partitioning_source_uri_prefix, #hive_partitioning_source_uri_prefix=, #ignore_unknown, #ignore_unknown=, #json?, #max_bad_records, #max_bad_records=, #orc?, #parquet?, #sheets?, #urls

Instance Method Details

#delimiterString

The separator for fields in a CSV file.

Examples:

require "google/cloud/bigquery"

bigquery = Google::Cloud::Bigquery.new

csv_url = "gs://bucket/path/to/data.csv"
csv_table = bigquery.external csv_url do |csv|
  csv.delimiter = "|"
end

csv_table.delimiter #=> "|"

Returns:

  • (String)


256
257
258
# File 'lib/google/cloud/bigquery/external/csv_source.rb', line 256

def delimiter
  @gapi.csv_options.field_delimiter
end

#delimiter=(new_delimiter) ⇒ Object

Set the separator for fields in a CSV file.

Examples:

require "google/cloud/bigquery"

bigquery = Google::Cloud::Bigquery.new

csv_url = "gs://bucket/path/to/data.csv"
csv_table = bigquery.external csv_url do |csv|
  csv.delimiter = "|"
end

csv_table.delimiter #=> "|"

Parameters:

  • new_delimiter (String)

    New delimiter value



277
278
279
280
# File 'lib/google/cloud/bigquery/external/csv_source.rb', line 277

def delimiter= new_delimiter
  frozen_check!
  @gapi.csv_options.field_delimiter = new_delimiter
end

#encodingString

The character encoding of the data.

Examples:

require "google/cloud/bigquery"

bigquery = Google::Cloud::Bigquery.new

csv_url = "gs://bucket/path/to/data.csv"
csv_table = bigquery.external csv_url do |csv|
  csv.encoding = "UTF-8"
end

csv_table.encoding #=> "UTF-8"

Returns:

  • (String)


167
168
169
# File 'lib/google/cloud/bigquery/external/csv_source.rb', line 167

def encoding
  @gapi.csv_options.encoding
end

#encoding=(new_encoding) ⇒ Object

Set the character encoding of the data.

Examples:

require "google/cloud/bigquery"

bigquery = Google::Cloud::Bigquery.new

csv_url = "gs://bucket/path/to/data.csv"
csv_table = bigquery.external csv_url do |csv|
  csv.encoding = "UTF-8"
end

csv_table.encoding #=> "UTF-8"

Parameters:

  • new_encoding (String)

    New encoding value



188
189
190
191
# File 'lib/google/cloud/bigquery/external/csv_source.rb', line 188

def encoding= new_encoding
  frozen_check!
  @gapi.csv_options.encoding = new_encoding
end

#fieldsArray<Schema::Field>

The fields of the schema.

Returns:



439
440
441
# File 'lib/google/cloud/bigquery/external/csv_source.rb', line 439

def fields
  schema.fields
end

#headersArray<Symbol>

The names of the columns in the schema.

Returns:

  • (Array<Symbol>)

    An array of column names.



448
449
450
# File 'lib/google/cloud/bigquery/external/csv_source.rb', line 448

def headers
  schema.headers
end

#iso8859_1?Boolean

Checks if the character encoding of the data is "ISO-8859-1".

Examples:

require "google/cloud/bigquery"

bigquery = Google::Cloud::Bigquery.new

csv_url = "gs://bucket/path/to/data.csv"
csv_table = bigquery.external csv_url do |csv|
  csv.encoding = "ISO-8859-1"
end

csv_table.encoding #=> "ISO-8859-1"
csv_table.iso8859_1? #=> true

Returns:

  • (Boolean)


235
236
237
# File 'lib/google/cloud/bigquery/external/csv_source.rb', line 235

def iso8859_1?
  encoding == "ISO-8859-1"
end

#jagged_rowsBoolean

Indicates if BigQuery should accept rows that are missing trailing optional columns.

Examples:

require "google/cloud/bigquery"

bigquery = Google::Cloud::Bigquery.new

csv_url = "gs://bucket/path/to/data.csv"
csv_table = bigquery.external csv_url do |csv|
  csv.jagged_rows = true
end

csv_table.jagged_rows #=> true

Returns:

  • (Boolean)


78
79
80
# File 'lib/google/cloud/bigquery/external/csv_source.rb', line 78

def jagged_rows
  @gapi.csv_options.allow_jagged_rows
end

#jagged_rows=(new_jagged_rows) ⇒ Object

Set whether BigQuery should accept rows that are missing trailing optional columns.

Examples:

require "google/cloud/bigquery"

bigquery = Google::Cloud::Bigquery.new

csv_url = "gs://bucket/path/to/data.csv"
csv_table = bigquery.external csv_url do |csv|
  csv.jagged_rows = true
end

csv_table.jagged_rows #=> true

Parameters:

  • new_jagged_rows (Boolean)

    New jagged_rows value



100
101
102
103
# File 'lib/google/cloud/bigquery/external/csv_source.rb', line 100

def jagged_rows= new_jagged_rows
  frozen_check!
  @gapi.csv_options.allow_jagged_rows = new_jagged_rows
end

#param_typesHash

The types of the fields in the data in the schema, using the same format as the optional query parameter types.

Returns:

  • (Hash)

    A hash with field names as keys, and types as values.



458
459
460
# File 'lib/google/cloud/bigquery/external/csv_source.rb', line 458

def param_types
  schema.param_types
end

#quoteString

The value that is used to quote data sections in a CSV file.

Examples:

require "google/cloud/bigquery"

bigquery = Google::Cloud::Bigquery.new

csv_url = "gs://bucket/path/to/data.csv"
csv_table = bigquery.external csv_url do |csv|
  csv.quote = "'"
end

csv_table.quote #=> "'"

Returns:

  • (String)


299
300
301
# File 'lib/google/cloud/bigquery/external/csv_source.rb', line 299

def quote
  @gapi.csv_options.quote
end

#quote=(new_quote) ⇒ Object

Set the value that is used to quote data sections in a CSV file.

Examples:

require "google/cloud/bigquery"

bigquery = Google::Cloud::Bigquery.new

csv_url = "gs://bucket/path/to/data.csv"
csv_table = bigquery.external csv_url do |csv|
  csv.quote = "'"
end

csv_table.quote #=> "'"

Parameters:

  • new_quote (String)

    New quote value



320
321
322
323
# File 'lib/google/cloud/bigquery/external/csv_source.rb', line 320

def quote= new_quote
  frozen_check!
  @gapi.csv_options.quote = new_quote
end

#quoted_newlinesBoolean

Indicates if BigQuery should allow quoted data sections that contain newline characters in a CSV file.

Examples:

require "google/cloud/bigquery"

bigquery = Google::Cloud::Bigquery.new

csv_url = "gs://bucket/path/to/data.csv"
csv_table = bigquery.external csv_url do |csv|
  csv.quoted_newlines = true
end

csv_table.quoted_newlines #=> true

Returns:

  • (Boolean)


123
124
125
# File 'lib/google/cloud/bigquery/external/csv_source.rb', line 123

def quoted_newlines
  @gapi.csv_options.allow_quoted_newlines
end

#quoted_newlines=(new_quoted_newlines) ⇒ Object

Set whether BigQuery should allow quoted data sections that contain newline characters in a CSV file.

Examples:

require "google/cloud/bigquery"

bigquery = Google::Cloud::Bigquery.new

csv_url = "gs://bucket/path/to/data.csv"
csv_table = bigquery.external csv_url do |csv|
  csv.quoted_newlines = true
end

csv_table.quoted_newlines #=> true

Parameters:

  • new_quoted_newlines (Boolean)

    New quoted_newlines value



145
146
147
148
# File 'lib/google/cloud/bigquery/external/csv_source.rb', line 145

def quoted_newlines= new_quoted_newlines
  frozen_check!
  @gapi.csv_options.allow_quoted_newlines = new_quoted_newlines
end

#schema(replace: false) {|schema| ... } ⇒ Google::Cloud::Bigquery::Schema

The schema for the data.

Examples:

require "google/cloud/bigquery"

bigquery = Google::Cloud::Bigquery.new

csv_url = "gs://bucket/path/to/data.csv"
csv_table = bigquery.external csv_url do |csv|
  csv.schema do |schema|
    schema.string "name", mode: :required
    schema.string "email", mode: :required
    schema.integer "age", mode: :required
    schema.boolean "active", mode: :required
  end
end

Parameters:

  • replace (Boolean) (defaults to: false)

    Whether to replace the existing schema with the new schema. If true, the fields will replace the existing schema. If false, the fields will be added to the existing schema. The default value is false.

Yields:

  • (schema)

    a block for setting the schema

Yield Parameters:

  • schema (Schema)

    the object accepting the schema

Returns:



397
398
399
400
401
402
403
404
405
406
# File 'lib/google/cloud/bigquery/external/csv_source.rb', line 397

def schema replace: false
  @schema ||= Schema.from_gapi @gapi.schema
  if replace
    frozen_check!
    @schema = Schema.from_gapi
  end
  @schema.freeze if frozen?
  yield @schema if block_given?
  @schema
end

#schema=(new_schema) ⇒ Object

Set the schema for the data.

Examples:

require "google/cloud/bigquery"

bigquery = Google::Cloud::Bigquery.new

csv_shema = bigquery.schema do |schema|
  schema.string "name", mode: :required
  schema.string "email", mode: :required
  schema.integer "age", mode: :required
  schema.boolean "active", mode: :required
end

csv_url = "gs://bucket/path/to/data.csv"
csv_table = bigquery.external csv_url
csv_table.schema = csv_shema

Parameters:

  • new_schema (Schema)

    The schema object.



429
430
431
432
# File 'lib/google/cloud/bigquery/external/csv_source.rb', line 429

def schema= new_schema
  frozen_check!
  @schema = new_schema
end

#skip_leading_rowsInteger

The number of rows at the top of a CSV file that BigQuery will skip when reading the data.

Examples:

require "google/cloud/bigquery"

bigquery = Google::Cloud::Bigquery.new

csv_url = "gs://bucket/path/to/data.csv"
csv_table = bigquery.external csv_url do |csv|
  csv.skip_leading_rows = 1
end

csv_table.skip_leading_rows #=> 1

Returns:

  • (Integer)


343
344
345
# File 'lib/google/cloud/bigquery/external/csv_source.rb', line 343

def skip_leading_rows
  @gapi.csv_options.skip_leading_rows
end

#skip_leading_rows=(row_count) ⇒ Object

Set the number of rows at the top of a CSV file that BigQuery will skip when reading the data.

Examples:

require "google/cloud/bigquery"

bigquery = Google::Cloud::Bigquery.new

csv_url = "gs://bucket/path/to/data.csv"
csv_table = bigquery.external csv_url do |csv|
  csv.skip_leading_rows = 1
end

csv_table.skip_leading_rows #=> 1

Parameters:

  • row_count (Integer)

    New skip_leading_rows value



365
366
367
368
# File 'lib/google/cloud/bigquery/external/csv_source.rb', line 365

def skip_leading_rows= row_count
  frozen_check!
  @gapi.csv_options.skip_leading_rows = row_count
end

#utf8?Boolean

Checks if the character encoding of the data is "UTF-8". This is the default.

Examples:

require "google/cloud/bigquery"

bigquery = Google::Cloud::Bigquery.new

csv_url = "gs://bucket/path/to/data.csv"
csv_table = bigquery.external csv_url do |csv|
  csv.encoding = "UTF-8"
end

csv_table.encoding #=> "UTF-8"
csv_table.utf8? #=> true

Returns:

  • (Boolean)


212
213
214
215
# File 'lib/google/cloud/bigquery/external/csv_source.rb', line 212

def utf8?
  return true if encoding.nil?
  encoding == "UTF-8"
end