Class: Charty::VectorAdapters::ArrowAdapter

Inherits:
BaseAdapter
  • Object
show all
Includes:
IndexSupport, NameSupport, Enumerable
Defined in:
lib/charty/vector_adapters/arrow_adapter.rb

Instance Attribute Summary

Attributes included from IndexSupport

#index

Attributes included from NameSupport

#name

Attributes inherited from BaseAdapter

#data

Class Method Summary collapse

Instance Method Summary collapse

Methods included from IndexSupport

#[], #[]=

Methods inherited from BaseAdapter

#==, adapter_name, #inverse_log_scale, #log_scale, #percentile, #values_at, #where_in_array

Constructor Details

#initialize(data) ⇒ ArrowAdapter

Returns a new instance of ArrowAdapter.



15
16
17
18
# File 'lib/charty/vector_adapters/arrow_adapter.rb', line 15

def initialize(data)
  @data = check_data(data)
  self.index = index || RangeIndex.new(0 ... length)
end

Class Method Details

.supported?(data) ⇒ Boolean

Returns:

  • (Boolean)


10
11
12
13
# File 'lib/charty/vector_adapters/arrow_adapter.rb', line 10

def self.supported?(data)
  (defined?(Arrow::Array) && data.is_a?(Arrow::Array)) ||
    (defined?(Arrow::ChunkedArray) && data.is_a?(Arrow::ChunkedArray))
end

Instance Method Details

#boolean?Boolean

Returns:

  • (Boolean)


44
45
46
47
48
49
50
51
52
53
# File 'lib/charty/vector_adapters/arrow_adapter.rb', line 44

def boolean?
  case @data
  when Arrow::BooleanArray
    true
  when Arrow::ChunkedArray
    @data.value_data_type.is_a?(Arrow::BooleanDataType)
  else
    false
  end
end

#categorical?Boolean

Returns:

  • (Boolean)


66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
# File 'lib/charty/vector_adapters/arrow_adapter.rb', line 66

def categorical?
  case @data
  when Arrow::StringArray, Arrow::DictionaryArray
    true
  when Arrow::ChunkedArray
    case @data.value_data_type
    when Arrow::StringArray, Arrow::DictionaryDataType
      true
    else
      false
    end
  else
    false
  end
end

#categoriesObject



82
83
84
85
86
87
88
89
# File 'lib/charty/vector_adapters/arrow_adapter.rb', line 82

def categories
  if @data.respond_to?(:dictionary)
    dictionary = @data.dictionary
  else
    dictionary = @data.dictionary_encode.dictionary
  end
  dictionary.to_a
end

#drop_naObject



114
115
116
117
118
119
120
121
122
# File 'lib/charty/vector_adapters/arrow_adapter.rb', line 114

def drop_na
  if @data.n_nulls.zero?
    Vector.new(@data, index: index, name: name)
  else
    data_without_null =
      Arrow::Function.find("drop_null").execute([@data]).value
    Vector.new(data_without_null)
  end
end

#empty?Boolean

Returns:

  • (Boolean)


24
25
26
# File 'lib/charty/vector_adapters/arrow_adapter.rb', line 24

def empty?
  @data.length.zero?
end

#eq(val) ⇒ Object



124
125
126
127
# File 'lib/charty/vector_adapters/arrow_adapter.rb', line 124

def eq(val)
  mask = Arrow::Function.find("equal").execute([@data, val]).value
  Vector.new(mask, index: index, name: name)
end

#group_by(grouper) ⇒ Object



95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
# File 'lib/charty/vector_adapters/arrow_adapter.rb', line 95

def group_by(grouper)
  grouper = Vector.new(grouper) unless grouper.is_a?(Vector)
  group_keys = grouper.unique_values
  grouper_data = grouper.data
  unless grouper_data.is_a?(Arrow::Array)
    grouper_data = Arrow::Array.new(grouper.to_a)
  end
  equal = Arrow::Function.find("equal")
  group_keys.map { |key|
    if key.nil?
      target_vector = Vector.new([nil] * @data.n_nulls)
    else
      mask = equal.execute([grouper_data, key]).value
      target_vector = Vector.new(@data.filter(mask))
    end
    [key, target_vector]
  }.to_h
end

#meanObject



141
142
143
# File 'lib/charty/vector_adapters/arrow_adapter.rb', line 141

def mean
  @data.mean
end

#notnullObject



129
130
131
132
133
134
135
136
137
138
139
# File 'lib/charty/vector_adapters/arrow_adapter.rb', line 129

def notnull
  if @data.n_nulls.zero?
    mask = Arrow::BooleanArray.new([true] * @data.length)
  else
    mask = Arrow::BooleanArray.new(@data.length,
                                   @data.null_bitmap,
                                   nil,
                                   0)
  end
  Vector.new(mask, index: index, name: name)
end

#numeric?Boolean

Returns:

  • (Boolean)


55
56
57
58
59
60
61
62
63
64
# File 'lib/charty/vector_adapters/arrow_adapter.rb', line 55

def numeric?
  case @data
  when Arrow::NumericArray
    true
  when Arrow::ChunkedArray
    @data.value_data_type.is_a?(Arrow::NumericDataType)
  else
    false
  end
end

#sizeObject



20
21
22
# File 'lib/charty/vector_adapters/arrow_adapter.rb', line 20

def size
  @data.length
end

#stdev(population: false) ⇒ Object



145
146
147
148
149
150
151
152
153
# File 'lib/charty/vector_adapters/arrow_adapter.rb', line 145

def stdev(population: false)
  options = Arrow::VarianceOptions.new
  if population
    options.ddof = 0
  else
    options.ddof = 1
  end
  Arrow::Function.find("stddev").execute([@data], options).value.value
end

#unique_valuesObject



91
92
93
# File 'lib/charty/vector_adapters/arrow_adapter.rb', line 91

def unique_values
  @data.unique.to_a
end

#where(mask) ⇒ Object



28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
# File 'lib/charty/vector_adapters/arrow_adapter.rb', line 28

def where(mask)
  mask = check_mask_vector(mask)
  mask_data = mask.data
  unless mask_data.is_a?(Arrow::BooleanArray)
    mask_data = mask.to_a
    mask_data = mask_data.map(&:nonzero?) if mask_data[0].is_a?(Integer)
    mask_data = Arrow::BooleanArray.new(mask_data)
  end
  masked_data = @data.filter(mask_data)
  masked_index = []
  mask_data.to_a.each_with_index do |boolean, i|
    masked_index << index[i] if boolean
  end
  Vector.new(masked_data, index: masked_index, name: name)
end