Module: Daru::Category
- Defined in:
- lib/daru/category.rb
Overview
rubocop:disable Metrics/ModuleLength
Constant Summary collapse
- UNDEFINED =
Object.new.freeze
- CODING_SCHEMES =
%i[dummy deviation helmert simple].freeze
Instance Attribute Summary collapse
-
#base_category ⇒ Object
Returns the value of attribute base_category.
-
#coding_scheme ⇒ Object
Returns the value of attribute coding_scheme.
-
#index ⇒ Object
Returns the value of attribute index.
-
#name ⇒ Object
Returns the value of attribute name.
Instance Method Summary collapse
-
#==(other) ⇒ Object
Two categorical vectors are equal if their index and corresponding values are same return [true, false] true if two vectors are similar.
-
#[](*indexes) ⇒ Object
Returns vector for indexes/positions specified.
-
#[]=(*indexes, val) ⇒ Object
Modifies values at specified indexes/positions.
-
#add_category(*new_categories) ⇒ Object
Associates a category to the vector.
-
#at(*positions) ⇒ Object
Returns vector for positions specified.
-
#categories ⇒ Array
(also: #order)
Returns all the categories with the inherent order.
-
#categories=(cat_with_order) ⇒ Object
Sets order of the categories.
-
#contrast_code(opts = {}) ⇒ Daru::DataFrame
Contrast code the vector acording to the coding scheme set.
-
#count(category = UNDEFINED) ⇒ Object
Returns frequency of given category.
-
#count_values(*values) ⇒ Integer
Count the number of values specified.
-
#describe ⇒ Daru::Vector
Gives the summary of data using following parameters - size: size of the data - categories: total number of categories - max_freq: Max no of times a category occurs - max_category: The category which occurs max no of times - min_freq: Min no of times a category occurs - min_category: The category which occurs min no of times.
-
#dup ⇒ Daru::Vector
Duplicated a vector.
-
#each ⇒ Enumerator
Returns an enumerator that enumerates on categorical data.
-
#frequencies(type = :count) ⇒ Daru::Vector
Returns a vector storing count/frequency of each category.
-
#include_values?(*values) ⇒ true, false
Check if any one of mentioned values occur in the vector.
-
#indexes(*values) ⇒ Array
Return indexes of values specified.
-
#initialize_category(data, opts = {}) ⇒ Object
Initializes a vector to store categorical data.
-
#max ⇒ object
Returns the maximum category acording to the order specified.
-
#min ⇒ object
Returns the minimum category acording to the order specified.
-
#ordered=(bool) ⇒ Object
Make categorical data ordered or unordered.
-
#ordered? ⇒ Boolean
Tells whether vector is ordered or not.
- #plotting_library=(lib) ⇒ Object
- #positions(*values) ⇒ Object
-
#reindex!(idx) ⇒ Daru::Vector
Sets new index for vector.
-
#reject_values(*values) ⇒ Daru::Vector
Return a vector with specified values removed.
-
#remove_unused_categories ⇒ Daru::Vector
Removes the unused categories.
-
#rename_categories(old_to_new) ⇒ Object
Rename categories.
-
#reorder!(order) ⇒ Object
Reorder the vector with given positions.
-
#replace_values(old_values, new_value) ⇒ Daru::Vector
Replaces specified values with a new value.
-
#set_at(positions, val) ⇒ Object
Modifies values at specified positions.
-
#size ⇒ Object
Size of categorical data.
- #sort ⇒ Object
-
#sort! ⇒ Daru::Vector
Sorts the vector in the order specified.
-
#to_a ⇒ Array
Returns all categorical data.
-
#to_category ⇒ Daru::Vector
Does nothing since its already of type category.
-
#to_ints ⇒ Array
Returns integer coding for categorical data in the order starting from 0.
-
#to_non_category ⇒ Daru::Vector
Converts a category type vector to non category type vector.
-
#where(bool_array) ⇒ Daru::Vector
For querying the data.
Instance Attribute Details
#base_category ⇒ Object
Returns the value of attribute base_category.
5 6 7 |
# File 'lib/daru/category.rb', line 5 def base_category @base_category end |
#coding_scheme ⇒ Object
Returns the value of attribute coding_scheme.
6 7 8 |
# File 'lib/daru/category.rb', line 6 def coding_scheme @coding_scheme end |
#index ⇒ Object
Returns the value of attribute index.
6 7 8 |
# File 'lib/daru/category.rb', line 6 def index @index end |
#name ⇒ Object
Returns the value of attribute name.
6 7 8 |
# File 'lib/daru/category.rb', line 6 def name @name end |
Instance Method Details
#==(other) ⇒ Object
Two categorical vectors are equal if their index and corresponding values are same return [true, false] true if two vectors are similar
505 506 507 508 509 |
# File 'lib/daru/category.rb', line 505 def == other size == other.size && to_a == other.to_a && index == other.index end |
#[](*indexes) ⇒ Object
Since it accepts both indexes and postions. In case of collision, arguement will be treated as index
Returns vector for indexes/positions specified
191 192 193 194 195 196 197 198 199 200 201 |
# File 'lib/daru/category.rb', line 191 def [] *indexes positions = @index.pos(*indexes) return category_from_position(positions) if positions.is_a? Integer Daru::Vector.new positions.map { |pos| category_from_position pos }, index: @index.subset(*indexes), name: @name, type: :category, ordered: @ordered, categories: categories end |
#[]=(*indexes, val) ⇒ Object
In order to add a new category you need to associate it via #add_category
Modifies values at specified indexes/positions.
245 246 247 248 249 250 251 252 253 254 |
# File 'lib/daru/category.rb', line 245 def []= *indexes, val positions = @index.pos(*indexes) if positions.is_a? Numeric modify_category_at positions, val else positions.each { |pos| modify_category_at pos, val } end self end |
#add_category(*new_categories) ⇒ Object
Associates a category to the vector.
124 125 126 127 |
# File 'lib/daru/category.rb', line 124 def add_category(*new_categories) new_categories -= categories add_extra_categories new_categories end |
#at(*positions) ⇒ Object
Returns vector for positions specified.
214 215 216 217 218 219 220 221 222 223 224 225 226 227 |
# File 'lib/daru/category.rb', line 214 def at *positions original_positions = positions positions = coerce_positions(*positions) validate_positions(*positions) return category_from_position(positions) if positions.is_a? Integer Daru::Vector.new positions.map { |pos| category_from_position(pos) }, index: @index.at(*original_positions), name: @name, type: :category, ordered: @ordered, categories: categories end |
#categories ⇒ Array Also known as: order
Returns all the categories with the inherent order
315 316 317 |
# File 'lib/daru/category.rb', line 315 def categories @cat_hash.keys end |
#categories=(cat_with_order) ⇒ Object
If extra categories are specified, they get added too.
Sets order of the categories.
329 330 331 332 333 |
# File 'lib/daru/category.rb', line 329 def categories= cat_with_order validate_categories(cat_with_order) add_extra_categories(cat_with_order - categories) order_with cat_with_order end |
#contrast_code(opts = {}) ⇒ Daru::DataFrame
To set the coding scheme use #coding_scheme=
Contrast code the vector acording to the coding scheme set.
487 488 489 490 491 492 493 494 |
# File 'lib/daru/category.rb', line 487 def contrast_code opts={} if opts[:user_defined] user_defined_coding(opts[:user_defined]) else # TODO: Make various coding schemes code DRY send("#{coding_scheme}_coding".to_sym, opts[:full] || false) end end |
#count(category = UNDEFINED) ⇒ Object
Returns frequency of given category
138 139 140 141 142 143 144 |
# File 'lib/daru/category.rb', line 138 def count category=UNDEFINED return @cat_hash.values.map(&:size).inject(&:+) if category == UNDEFINED # count all raise ArgumentError, "Invalid category #{category}" unless categories.include?(category) @cat_hash[category].size end |
#count_values(*values) ⇒ Integer
Count the number of values specified
703 704 705 706 707 |
# File 'lib/daru/category.rb', line 703 def count_values(*values) values.map { |v| @cat_hash[v].size if @cat_hash.include? v } .compact .inject(0, :+) end |
#describe ⇒ Daru::Vector
Gives the summary of data using following parameters
-
size: size of the data
-
categories: total number of categories
-
max_freq: Max no of times a category occurs
-
max_category: The category which occurs max no of times
-
min_freq: Min no of times a category occurs
-
min_category: The category which occurs min no of times
626 627 628 629 630 631 632 633 634 635 |
# File 'lib/daru/category.rb', line 626 def describe Daru::Vector.new( size: size, categories: categories.size, max_freq: @cat_hash.values.map(&:size).max, max_category: @cat_hash.keys.max_by { |cat| @cat_hash[cat].size }, min_freq: @cat_hash.values.map(&:size).min, min_category: @cat_hash.keys.min_by { |cat| @cat_hash[cat].size } ) end |
#dup ⇒ Daru::Vector
Duplicated a vector
108 109 110 111 112 113 114 115 |
# File 'lib/daru/category.rb', line 108 def dup Daru::Vector.new to_a.dup, name: @name, index: @index.dup, type: :category, categories: categories, ordered: ordered? end |
#each ⇒ Enumerator
Returns an enumerator that enumerates on categorical data
81 82 83 84 85 |
# File 'lib/daru/category.rb', line 81 def each return enum_for(:each) unless block_given? @array.each { |pos| yield cat_from_int pos } self end |
#frequencies(type = :count) ⇒ Daru::Vector
Returns a vector storing count/frequency of each category
157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 |
# File 'lib/daru/category.rb', line 157 def frequencies type=:count counts = @cat_hash.values.map(&:size) values = case type when :count counts when :fraction counts.map { |c| c / size.to_f } when :percentage counts.map { |c| c / size.to_f * 100 } else raise ArgumentError, 'Type should be either :count, :fraction or'\ " :percentage. #{type} not supported." end Daru::Vector.new values, index: categories, name: name end |
#include_values?(*values) ⇒ true, false
Check if any one of mentioned values occur in the vector
673 674 675 |
# File 'lib/daru/category.rb', line 673 def include_values?(*values) values.any? { |v| @cat_hash.include?(v) && !@cat_hash[v].empty? } end |
#indexes(*values) ⇒ Array
Return indexes of values specified
716 717 718 719 |
# File 'lib/daru/category.rb', line 716 def indexes(*values) values &= categories index.to_a.values_at(*values.flat_map { |v| @cat_hash[v] }.sort) end |
#initialize_category(data, opts = {}) ⇒ Object
Base category is set to the first category encountered in the vector.
Initializes a vector to store categorical data.
28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 |
# File 'lib/daru/category.rb', line 28 def initialize_category data, opts={} @type = :category initialize_core_attributes data if opts[:categories] validate_categories(opts[:categories]) add_extra_categories(opts[:categories] - categories) order_with opts[:categories] end # Specify if the categories are ordered or not. # By default its unordered @ordered = opts[:ordered] || false # The coding scheme to code with. Default is dummy coding. @coding_scheme = :dummy # Base category which won't be present in the coding @base_category = @cat_hash.keys.first # Stores the name of the vector @name = opts[:name] # Index of the vector @index = coerce_index opts[:index] self end |
#max ⇒ object
This operation will only work if vector is ordered. To set the vector ordered do ‘vector.ordered = true`
Returns the maximum category acording to the order specified.
409 410 411 412 |
# File 'lib/daru/category.rb', line 409 def max assert_ordered :max categories.last end |
#min ⇒ object
This operation will only work if vector is ordered. To set the vector ordered do ‘vector.ordered = true`
Returns the minimum category acording to the order specified.
395 396 397 398 |
# File 'lib/daru/category.rb', line 395 def min assert_ordered :min categories.first end |
#ordered=(bool) ⇒ Object
Make categorical data ordered or unordered.
303 304 305 |
# File 'lib/daru/category.rb', line 303 def ordered= bool @ordered = bool end |
#ordered? ⇒ Boolean
Tells whether vector is ordered or not.
292 293 294 |
# File 'lib/daru/category.rb', line 292 def ordered? @ordered end |
#plotting_library=(lib) ⇒ Object
62 63 64 65 66 67 68 69 70 71 72 73 74 75 |
# File 'lib/daru/category.rb', line 62 def plotting_library= lib case lib when :gruff, :nyaplot @plotting_library = lib if Daru.send("has_#{lib}?".to_sym) extend Module.const_get( "Daru::Plotting::Category::#{lib.to_s.capitalize}Library" ) end else raise ArgumentError, "Plotting library #{lib} not supported. "\ 'Supported libraries are :nyaplot and :gruff' end end |
#positions(*values) ⇒ Object
744 745 746 747 |
# File 'lib/daru/category.rb', line 744 def positions(*values) values &= categories values.flat_map { |v| @cat_hash[v] }.sort end |
#reindex!(idx) ⇒ Daru::Vector
Unlike #reorder! which takes positions as input it takes index as an input to reorder the vector
Sets new index for vector. Preserves index->value correspondence.
558 559 560 561 562 563 564 565 566 567 568 569 |
# File 'lib/daru/category.rb', line 558 def reindex! idx idx = Daru::Index.new idx unless idx.is_a? Daru::Index raise ArgumentError, 'Invalid index specified' unless idx.to_a.sort == index.to_a.sort old_categories = categories data = idx.map { |i| self[i] } initialize_core_attributes data self.categories = old_categories self.index = idx self end |
#reject_values(*values) ⇒ Daru::Vector
Return a vector with specified values removed
686 687 688 689 690 691 692 693 694 |
# File 'lib/daru/category.rb', line 686 def reject_values(*values) resultant_pos = size.times.to_a - values.flat_map { |v| @cat_hash[v] } dv = at(*resultant_pos) unless dv.is_a? Daru::Vector pos = resultant_pos.first dv = at(pos..pos) end dv.remove_unused_categories end |
#remove_unused_categories ⇒ Daru::Vector
If base category is removed, then the first occuring category in the data is taken as base category. Order of the undeleted categories remains preserved.
Removes the unused categories
376 377 378 379 380 381 382 383 384 |
# File 'lib/daru/category.rb', line 376 def remove_unused_categories old_categories = categories initialize_core_attributes to_a self.categories = old_categories & categories self.base_category = @cat_hash.keys.first unless categories.include? base_category self end |
#rename_categories(old_to_new) ⇒ Object
The order of categories after renaming is preserved but new categories are added at the end in the order. Also the base-category is reassigned to new value if it is renamed
Rename categories.
351 352 353 354 355 356 357 358 359 360 361 362 |
# File 'lib/daru/category.rb', line 351 def rename_categories old_to_new old_categories = categories data = to_a.map do |cat| old_to_new.include?(cat) ? old_to_new[cat] : cat end initialize_core_attributes data self.categories = (old_categories - old_to_new.keys) | old_to_new.values self.base_category = old_to_new[base_category] if old_to_new.include? base_category self end |
#reorder!(order) ⇒ Object
Unlike #reindex! which takes index as input, it takes positions as an input to reorder the vector
Reorder the vector with given positions
536 537 538 539 540 541 542 543 544 |
# File 'lib/daru/category.rb', line 536 def reorder! order raise ArgumentError, 'Invalid order specified' unless order.sort == size.times.to_a # TODO: Room for optimization old_data = to_a new_data = order.map { |i| old_data[i] } initialize_core_attributes new_data self end |
#replace_values(old_values, new_value) ⇒ Daru::Vector
It performs the replace in place.
Replaces specified values with a new value
738 739 740 741 742 |
# File 'lib/daru/category.rb', line 738 def replace_values old_values, new_value old_values = [old_values] unless old_values.is_a? Array rename_hash = old_values.map { |v| [v, new_value] }.to_h rename_categories rename_hash end |
#set_at(positions, val) ⇒ Object
Modifies values at specified positions.
270 271 272 273 274 |
# File 'lib/daru/category.rb', line 270 def set_at positions, val validate_positions(*positions) positions.map { |pos| modify_category_at pos, val } self end |
#size ⇒ Object
Size of categorical data.
282 283 284 |
# File 'lib/daru/category.rb', line 282 def size @array.size end |
#sort ⇒ Object
452 453 454 |
# File 'lib/daru/category.rb', line 452 def sort dup.sort! end |
#sort! ⇒ Daru::Vector
This operation will only work if vector is ordered. To set the vector ordered, do ‘vector.ordered = true`
Sorts the vector in the order specified.
429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 |
# File 'lib/daru/category.rb', line 429 def sort! # rubocop:disable Metrics/AbcSize # TODO: Simply the code assert_ordered :sort # Build sorted index old_index = @index.to_a new_index = @cat_hash.values.map do |positions| old_index.values_at(*positions) end.flatten @index = @index.class.new new_index # Build sorted data @cat_hash = categories.inject([{}, 0]) do |acc, cat| hash, count = acc cat_count = @cat_hash[cat].size cat_count.times { |i| @array[count+i] = int_from_cat(cat) } hash[cat] = (count...(cat_count+count)).to_a [hash, count + cat_count] end.first self end |
#to_a ⇒ Array
Returns all categorical data
93 94 95 |
# File 'lib/daru/category.rb', line 93 def to_a each.to_a end |
#to_category ⇒ Daru::Vector
Does nothing since its already of type category.
639 640 641 |
# File 'lib/daru/category.rb', line 639 def to_category self end |
#to_ints ⇒ Array
Returns integer coding for categorical data in the order starting from 0. For example if order is [:a, :b, :c], then :a, will be coded as 0, :b as 1 and :c as 2
520 521 522 |
# File 'lib/daru/category.rb', line 520 def to_ints @array end |
#to_non_category ⇒ Daru::Vector
Converts a category type vector to non category type vector
645 646 647 |
# File 'lib/daru/category.rb', line 645 def to_non_category Daru::Vector.new to_a, name: name, index: index end |
#where(bool_array) ⇒ Daru::Vector
For querying the data
603 604 605 |
# File 'lib/daru/category.rb', line 603 def where bool_array Daru::Core::Query.vector_where self, bool_array end |