Class: CADataFrame
- Inherits:
-
Object
- Object
- CADataFrame
- Defined in:
- lib/carray-dataframe/join.rb,
lib/carray-dataframe/io.rb,
lib/carray-dataframe/join.rb,
lib/carray-dataframe/group.rb,
lib/carray-dataframe/pivot.rb,
lib/carray-dataframe/to_html.rb,
lib/carray-dataframe/arranger.rb,
lib/carray-dataframe/converter.rb,
lib/carray-dataframe/dataframe.rb,
lib/carray-dataframe/dataframe.rb,
lib/carray-dataframe/dataframe.rb,
lib/carray-dataframe/dataframe.rb,
lib/carray-dataframe/dataframe.rb,
lib/carray-dataframe/dataframe.rb,
lib/carray-dataframe/dataframe.rb,
lib/carray-dataframe/dataframe.rb,
lib/carray-dataframe/reference.rb,
lib/carray-dataframe/loc_accessor.rb,
lib/carray-dataframe/iloc_accessor.rb
Overview
CArray
Defined Under Namespace
Modules: Merge Classes: Arranger, ILocAccessor, LocAccessor, MergeFrame
Instance Attribute Summary collapse
-
#column_data ⇒ Object
readonly
Returns the value of attribute column_data.
-
#column_names ⇒ Object
readonly
Returns the value of attribute column_names.
-
#row_index ⇒ Object
readonly
Returns the value of attribute row_index.
-
#row_number ⇒ Object
readonly
Returns the value of attribute row_number.
Class Method Summary collapse
- .concat(*args) ⇒ Object
- .from_csv(file, sep: ",", rs: $/, quote_char: '"', index: nil, &block) ⇒ Object
- .load(filename) ⇒ Object
- .load_csv(file, sep: ",", rs: $/, encoding: nil, quote_char: '"', index: nil, &block) ⇒ Object
- .load_sqlite3(*args) ⇒ Object
- .merge(*args) ⇒ Object
- .parse_csv(file, sep: ",", rs: $/, quote_char: '"', index: nil, &block) ⇒ Object
- .read_csv(file, sep: ",", rs: $/, quote_char: '"', encoding: nil, index: nil, &block) ⇒ Object
Instance Method Summary collapse
- #-@ ⇒ Object
- #<(other) ⇒ Object
- #<=(other) ⇒ Object
- #>(other) ⇒ Object
- #>=(other) ⇒ Object
- #[](arg, opt = :__dummy__) ⇒ Object
- #[]=(arg, opt = :__dummy__, value) ⇒ Object
- #__methods__ ⇒ Object
- #add_suffix(suf) ⇒ Object
- #append_column(name, new_column = nil, &block) ⇒ Object (also: #append)
-
#arrange(&block) ⇒ Object
Arrange.
- #ascii_table(rowmax = :full, time_format: nil, index: true) ⇒ Object
- #ca(*names) ⇒ Object
- #calculate(label, &block) ⇒ Object
- #classify(name, scale = nil, opt = {}) ⇒ Object
-
#column(spec) ⇒ Object
(also: #col)
Column, Row Access.
- #column_types ⇒ Object
- #columns ⇒ Object
- #columns_to_hash(key_name, value_names) ⇒ Object
- #cross(name1, name2) ⇒ Object
- #delete_masked_rows ⇒ Object
- #delete_rows(&block) ⇒ Object
- #describe ⇒ Object
- #detouch ⇒ Object
- #downcase ⇒ Object
- #drop_column(*columns) ⇒ Object (also: #eliminate_column)
-
#each_column(&block) ⇒ Object
Iterators.
- #each_column_name(&block) ⇒ Object
- #each_row(with: Array, columns: nil, &block) ⇒ Object
- #each_row_index(&block) ⇒ Object
- #each_row_with_row_index(with: Array, &block) ⇒ Object
- #execute(&block) ⇒ Object
- #fill(*names, value) ⇒ Object
- #get_dummies(*names, prefix: nil, prefix_sep: "_") ⇒ Object
- #group_by(*names) ⇒ Object
- #has_column?(name) ⇒ Boolean
- #has_index? ⇒ Boolean
- #head(n = 10) ⇒ Object
- #histogram(name, scale = nil, options = nil) ⇒ Object
- #iloc(&block) ⇒ Object
-
#index ⇒ Object
TO BE FIXED.
-
#initialize(data, index: nil, columns: nil, order: nil, clone: false, &block) ⇒ CADataFrame
constructor
Constructor.
- #insert_column(pos, name, new_column = nil, &block) ⇒ Object
- #inspect ⇒ Object
- #is_finite ⇒ Object
- #is_masked ⇒ Object
- #join(other_df, opts = {}) ⇒ Object
- #loc ⇒ Object
- #matchup(keyname, reference) ⇒ Object
- #mean ⇒ Object
- #merge(*args) ⇒ Object
- #method(hash) ⇒ Object
- #method_missing(name, *args) ⇒ Object
- #objectify ⇒ Object
- #order_by(*names, &block) ⇒ Object
- #pivot(name1, name2) ⇒ Object
- #prepend_column(name, new_column = nil, &block) ⇒ Object (also: #lead)
- #rename(name1, name2) ⇒ Object
-
#reorder(&block) ⇒ Object
Transformation.
- #replace(other) ⇒ Object
- #resample(&block) ⇒ Object
- #reverse ⇒ Object
- #save(filename) ⇒ Object
- #select(*columns, &block) ⇒ Object
- #select_columns(selector = nil) ⇒ Object
- #set_index(index, drop: true, inplace: false) ⇒ Object
- #sum ⇒ Object
- #summary(*names) ⇒ Object
- #tail(n = 10) ⇒ Object
- #to_a(with_index: true) ⇒ Object
- #to_ary ⇒ Object
- #to_ca(*names) ⇒ Object
- #to_csv(io = "", rs: $/, sep: ",", fill: "", with_index: true, time_format: nil, &block) ⇒ Object
- #to_daru ⇒ Object
-
#to_df ⇒ Object
Conversions.
- #to_hash ⇒ Object (also: #to_h)
- #to_html(threshold = 8, time_format: nil, index: true) ⇒ Object
- #to_s ⇒ Object
- #to_sql(tablename) ⇒ Object
- #to_sqlite3(**args) ⇒ Object
- #to_xlsx(filename, sheet_name: 'Sheet1', with_row_index: false, &block) ⇒ Object
- #transpose(columns: nil) ⇒ Object
- #unmask(value = nil) ⇒ Object
-
#unmask!(value = nil) ⇒ Object
Maintenance.
- #vacant_copy ⇒ Object
- #where(mask, *args) ⇒ Object
Constructor Details
#initialize(data, index: nil, columns: nil, order: nil, clone: false, &block) ⇒ CADataFrame
Constructor
13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 |
# File 'lib/carray-dataframe/dataframe.rb', line 13 def initialize (data, index: nil, columns: nil, order: nil, clone: false, &block) # @column_names = Array holds column names and its order # @column_data = Hash holds data entities # @row_number = Integer holds number of rows # @row_index = CArray stores row index (any object) # @__methods__ = ... # Stores data entity case data when Hash raise "columns option is not needed for hash data" if columns @column_data = columns_to_columns(data) @column_names = @column_data.keys when CArray if columns @column_names = columns.map(&:to_s) else if data.respond_to?(:column_names) if data.column_names.is_a?(Array) @column_names = data.column_names.map(&:to_s) else @column_names = data.dim1.times.map{|i| "c#{i}" } end elsif order @column_names = order.map(&:to_s) else raise "can't determin column names use columns or order option" end end if @column_names.size != data.dim1 raise "mismatch between 'column_names' and table columns" end @column_data = table_to_columns(data) when Array case data.first when Hash @column_data = {} dummy = {} data.each do |hash| dummy.update(hash) end @column_names = [] dummy.each_key do |k| list = [] data.each do |hash| list << (hash[k] || UNDEF) end name = k.to_s @column_names << name @column_data[name] = list.to_ca end else if columns @column_names = columns.map(&:to_s) elsif order @column_names = order.map(&:to_s) else raise "columns or order option should be given" end @column_data = array_to_columns(data) end else raise "unknown data" end if order if @column_names.size != order.size raise 'invalid order option' end new_column_data = {} order.each do |key| if @column_data.has_key?(key.to_s) new_column_data[key.to_s] = @column_data[key.to_s] else raise 'invalid column name '#{key.to_s}' in order option' end end @column_data = new_column_data @column_names = new_column_data.keys end # Sets @row_number and check column length if @column_data.empty? and index @row_number = index.size else @row_number = @column_data.first[1].size if @column_names.any?{ |key| @column_data[key].size != @row_number } raise "column sizes mismatch" end end # Processing option 'index' set_index(index, inplace: true) @__methods__ = {} if clone raise NotImplementedError, "copy option is not implemented" end if block_given? arrange(&block) end end |
Dynamic Method Handling
This class handles dynamic methods through the method_missing method
#method_missing(name, *args) ⇒ Object
308 309 310 311 312 313 314 315 316 317 318 319 320 |
# File 'lib/carray-dataframe/dataframe.rb', line 308 def method_missing (name, *args) if args.size == 0 name = name.to_s if has_column?(name) return @column_data[name] elsif has_column?(name.gsub(/_/,'.')) ### For R return @column_data[name.gsub(/_/,'.')] elsif @__methods__.include?(name) return @column_data[@__methods__[name]] end end raise "no method '#{name}' for CADataFrame" end |
Instance Attribute Details
#column_data ⇒ Object (readonly)
Returns the value of attribute column_data.
117 118 119 |
# File 'lib/carray-dataframe/dataframe.rb', line 117 def column_data @column_data end |
#column_names ⇒ Object (readonly)
Returns the value of attribute column_names.
117 118 119 |
# File 'lib/carray-dataframe/dataframe.rb', line 117 def column_names @column_names end |
#row_index ⇒ Object (readonly)
Returns the value of attribute row_index.
117 118 119 |
# File 'lib/carray-dataframe/dataframe.rb', line 117 def row_index @row_index end |
#row_number ⇒ Object (readonly)
Returns the value of attribute row_number.
117 118 119 |
# File 'lib/carray-dataframe/dataframe.rb', line 117 def row_number @row_number end |
Class Method Details
.concat(*args) ⇒ Object
954 955 956 957 958 959 960 961 962 963 964 965 966 967 968 969 970 971 972 973 974 975 976 977 978 |
# File 'lib/carray-dataframe/dataframe.rb', line 954 def self.concat (*args) ref = args.first column_names = ref.column_names new_columns = {} column_names.each do |name| list = args.map{|t| t.column(name) } if list.first.is_a?(CATimeIndex) new_columns[name] = CATimeIndex.concat(*list) else data_type = list.first.data_type new_columns[name] = CArray.bind(data_type, list, 0) end end list = args.map(&:row_index) if list.all? if list.first.is_a?(CATimeIndex) new_row_index = CATimeIndex.concat(*list) else new_row_index = CArray.join(*list).flatten end else new_row_index = nil end return CADataFrame.new(new_columns, index: new_row_index) end |
.from_csv(file, sep: ",", rs: $/, quote_char: '"', index: nil, &block) ⇒ Object
78 79 80 81 |
# File 'lib/carray-dataframe/io.rb', line 78 def self.from_csv (file, sep: ",", rs: $/, quote_char: '"', index: nil, &block) warn "CADataFrame.from_csv will be obsolete, use CADataFrame.parse_csv" self.parse_csv(file, sep: sep, rs: rs, index: index, &block) end |
.load(filename) ⇒ Object
1321 1322 1323 1324 1325 1326 1327 |
# File 'lib/carray-dataframe/dataframe.rb', line 1321 def self.load (filename) out = open(filename, "r") {|io| Marshal.load(io) } raise "invalid data" unless out.is_a?(CADataFrame) return out end |
.load_csv(file, sep: ",", rs: $/, encoding: nil, quote_char: '"', index: nil, &block) ⇒ Object
60 61 62 63 |
# File 'lib/carray-dataframe/io.rb', line 60 def self.load_csv (file, sep: ",", rs: $/, encoding: nil, quote_char: '"', index: nil, &block) warn "CADataFrame.load_csv will be obsolete, use CADataFrame.read_csv" self.read_csv(file, sep: sep, rs: rs, quote_char: quote_char, encoding: encoding, &block) end |
.load_sqlite3(*args) ⇒ Object
34 35 36 37 38 39 40 41 42 43 44 45 |
# File 'lib/carray-dataframe/io.rb', line 34 def self.load_sqlite3 (*args) df = CArray.load_sqlite3(*args).to_dataframe if df return df.arrange{ column_names.each do |name| mask name, nil end } else return nil end end |
.merge(*args) ⇒ Object
943 944 945 946 947 948 949 950 951 952 |
# File 'lib/carray-dataframe/dataframe.rb', line 943 def self.merge (*args) ref = args.first new_columns = {} args.each do |table| table.column_names.each do |name| new_columns[name] = table.col(name) end end return CADataFrame.new(new_columns, index: ref.row_index) end |
.parse_csv(file, sep: ",", rs: $/, quote_char: '"', index: nil, &block) ⇒ Object
65 66 67 68 69 70 71 72 73 74 75 76 |
# File 'lib/carray-dataframe/io.rb', line 65 def self.parse_csv (file, sep: ",", rs: $/, quote_char: '"', index: nil, &block) df = CArray.parse_csv(file, sep: sep, rs: rs, quote_char: quote_char, &block).to_dataframe(index: index) if df return df.arrange{ column_names.each do |name| mask name, nil end } else return nil end end |
.read_csv(file, sep: ",", rs: $/, quote_char: '"', encoding: nil, index: nil, &block) ⇒ Object
47 48 49 50 51 52 53 54 55 56 57 58 |
# File 'lib/carray-dataframe/io.rb', line 47 def self.read_csv (file, sep: ",", rs: $/, quote_char: '"', encoding: nil, index: nil, &block) df = CArray.read_csv(file, sep: sep, rs: rs, quote_char: quote_char, encoding: encoding, &block).to_dataframe(index: index) if df return df.arrange{ column_names.each do |name| mask name, nil end } else return nil end end |
Instance Method Details
#-@ ⇒ Object
988 989 990 |
# File 'lib/carray-dataframe/dataframe.rb', line 988 def -@ return cmp(:-@) end |
#<(other) ⇒ Object
992 993 994 |
# File 'lib/carray-dataframe/dataframe.rb', line 992 def < (other) return cmp(:<, other) end |
#<=(other) ⇒ Object
996 997 998 |
# File 'lib/carray-dataframe/dataframe.rb', line 996 def <= (other) return cmp(:<=, other) end |
#>(other) ⇒ Object
1000 1001 1002 |
# File 'lib/carray-dataframe/dataframe.rb', line 1000 def > (other) return cmp(:>, other) end |
#>=(other) ⇒ Object
1004 1005 1006 |
# File 'lib/carray-dataframe/dataframe.rb', line 1004 def >= (other) return cmp(:>=, other) end |
#[](arg, opt = :__dummy__) ⇒ Object
73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 |
# File 'lib/carray-dataframe/reference.rb', line 73 def [] (arg, opt = :__dummy__) if opt != :__dummy__ return loc[arg, opt] else case arg when Range if arg.begin.is_a?(Integer) return iloc[arg] unless @row_index end return loc[arg] when CArray if arg.rank == 1 return loc[arg] else raise "index should be 1-dim array" end when String, Symbol return column(arg.to_s) else column_selector = select_columns(arg) new_columns = {} column_selector.each do |key| new_columns[key] = @column_data[key] end return CADataFrame.new(new_columns, index: @row_index) end end end |
#[]=(arg, opt = :__dummy__, value) ⇒ Object
102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 |
# File 'lib/carray-dataframe/reference.rb', line 102 def []= (arg, opt = :__dummy__, value) if opt != :__dummy__ loc[arg, opt] = value else case arg when Range if arg.begin.is_a?(Integer) iloc[arg] = value unless @row_index end loc[arg] = value when CArray loc[arg] = value when String, Symbol if column(arg.to_s) column(arg.to_s)[] = value else arrange { append arg, value } end else case value when CADataFrame column_selector = select_columns(arg) values = column_selector.each_index.map { |i| value.column(i).to_ca } column_selector.each_with_index do |key, i| column(key)[] = values[i] end else column_selector = select_columns(arg) column_selector.each do |key| column(key)[] = value end end end end end |
#__methods__ ⇒ Object
123 124 125 |
# File 'lib/carray-dataframe/dataframe.rb', line 123 def __methods__ return @__methods__ end |
#add_suffix(suf) ⇒ Object
728 729 730 731 732 733 734 735 |
# File 'lib/carray-dataframe/dataframe.rb', line 728 def add_suffix (suf) new_columns = {} each_column_name do |name| new_name = (name.to_s + suf).to_s new_columns[new_name] = column(name) end return CADataFrame.new(new_columns, index: @row_index) end |
#append_column(name, new_column = nil, &block) ⇒ Object Also known as: append
460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 |
# File 'lib/carray-dataframe/dataframe.rb', line 460 def append_column (name, new_column = nil, &block) name = name.to_s if new_column # do nothing elsif block new_column = instance_exec(self, &block) else new_column = @column_data.first[1].template(:object) end unless new_column.is_a?(CArray) new_column = new_column.to_ca end if new_column.rank != 1 or new_column.size != @row_number raise "invalid shape of appended column" end @column_names.push(name) @column_data[name] = new_column return new_column end |
#arrange(&block) ⇒ Object
Arrange
433 434 435 |
# File 'lib/carray-dataframe/dataframe.rb', line 433 def arrange (&block) return Arranger.new(self).arrange(&block) end |
#ascii_table(rowmax = :full, time_format: nil, index: true) ⇒ Object
833 834 835 836 837 838 839 840 841 842 843 844 845 846 847 848 849 850 851 852 853 854 855 856 857 858 859 860 861 862 863 864 865 866 867 868 869 870 871 872 873 874 875 876 877 878 879 880 881 882 883 884 885 886 887 888 889 890 891 892 893 894 895 896 897 898 899 900 901 902 903 904 905 906 907 908 909 910 911 912 913 914 915 916 917 918 919 920 921 |
# File 'lib/carray-dataframe/dataframe.rb', line 833 def ascii_table (rowmax = :full, time_format: nil, index: true) columns = @column_data.clone @column_names.each do |name| if columns[name].is_a?(CATimeIndex) if time_format columns[name] = columns[name].time.time_strftime(time_format) else columns[name] = columns[name].time.time_strftime("%F %T%:z") end end end if index if @row_index namelist = [" "] + @column_names if @row_index.is_a?(CATimeIndex) if time_format row_index = @row_index.time.time_strftime(time_format) else row_index = @row_index.time.time_strftime("%F %T%:z") end else row_index = @row_index end tbl = CADFArray.new(namelist, columns.update(" " => row_index)) else namelist = [" "] + @column_names tbl = CADFArray.new(namelist, columns.update(" " => CArray.int(@row_number).seq)) end else namelist = @column_names tbl = CADFArray.new(namelist, columns) end if rowmax.is_a?(Integer) and @row_number > rowmax list = tbl[0..(rowmax/2),nil].to_a list.push namelist.map { "..." } list.push *(tbl[-rowmax/2+1..-1,nil].to_a) tbl = list.to_ca end datastr = tbl.convert {|c| __obj_to_string__(c) }.unmask("") datamb = datastr.convert(:boolean, &:"ascii_only?").not.sum(0).ne(0) namemb = namelist.to_ca.convert(:boolean) {|c| c.to_s.ascii_only? }.eq(0) mb = datamb.or(namemb) namelen = namelist.map(&:length).to_ca datalen = datastr.convert(&:length) if mb.max == 0 if datalen.size == 0 lengths = namelen.to_a else lengths = datalen.max(0).pmax(namelen).to_a end hrule = "-" + lengths.map {|len| "-"*len}.join("--") + "-" header = " " + [namelist, lengths].transpose.map{|name, len| "#{name.to_s.ljust(len)}" }.join(" ") + " " ary = [hrule, header, hrule] if datalen.size > 0 datastr[:i,nil].each_with_index do |blk, i| list = blk.flatten.to_a ary << " " + [list, lengths].transpose.map{|value, len| "#{value.ljust(len)}"}.join(" ") + " " end end ary << hrule return "DataFrame: rows#=#{@row_number}: \n" + ary.join("\n") else namewidth = namelist.to_ca.convert{|c| __strwidth__(c.to_s) } if datalen.size == 0 maxwidth = namewidth else datawidth = datastr.convert{|c| __strwidth__(c.to_s) } maxwidth = datawidth.max(0).pmax(namewidth) end len = maxwidth[:*,nil] - datawidth + datalen hrule = "-" + maxwidth.map {|len| "-"*len}.join("--") + "-" header = " " + [namelist, maxwidth.to_a].transpose.map{|name, len| "#{name.to_s.ljust(len-__strwidth__(name.to_s)+name.to_s.length)}" }.join(" ") + " " ary = [hrule, header, hrule] if datalen.size > 0 datastr[:i,nil].each_with_addr do |blk, i| list = blk.flatten.to_a ary << " " + list.map.with_index {|value, j| "#{value.ljust(len[i,j])}"}.join(" ") + " " end end ary << hrule return "DataFrame: row#=#{@row_number}: \n" + ary.join("\n") end end |
#ca(*names) ⇒ Object
756 757 758 759 760 761 762 |
# File 'lib/carray-dataframe/dataframe.rb', line 756 def ca (*names) if names.empty? return CADFArray.new(@column_names, @column_data) else return CADFArray.new(names.map(&:to_s), @column_data) end end |
#calculate(label, &block) ⇒ Object
574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 |
# File 'lib/carray-dataframe/dataframe.rb', line 574 def calculate (label, &block) hash = {} each_column_name do |name| begin if block hash[name] = [yield(name, column(name))] else hash[name] = [column(name).send(label.intern)] end rescue hash[name] = [UNDEF] end end return CADataFrame.new(hash, index: [label]) end |
#classify(name, scale = nil, opt = {}) ⇒ Object
1070 1071 1072 1073 1074 1075 1076 1077 1078 1079 1080 1081 1082 1083 1084 1085 1086 1087 1088 1089 1090 1091 1092 1093 1094 1095 1096 1097 1098 1099 1100 1101 1102 1103 1104 1105 1106 1107 |
# File 'lib/carray-dataframe/dataframe.rb', line 1070 def classify (name, scale = nil, opt = {}) if not scale column = @column_data[name.to_s] mids = column.uniq mapper = {} mids.each_with_index do |v,i| mapper[v] = i end cls = columns.convert(:int32) {|v| mapper[v] } hash = { "#{name}_M" => mids, "#{name}_L" => mids, "#{name}_R" => mids, "#{name}_CLASS" => cls } else option = { :include_upper => false, :include_lowest => true, :offset => 0, }.update(opt) column = @column_data[name.to_s] cls = scale.bin(column, option[:include_upper], option[:include_lowest], option[:offset]) mids = ((scale + scale.shifted(-1))/2)[0..-2].to_ca left = scale[0..-2] right = scale.shift(-1)[0..-2] hash = { "#{name}_M" => mids.project(cls).to_ca, "#{name}_L" => left.project(cls).to_ca, "#{name}_R" => right.project(cls).to_ca, "#{name}_CLASS" => cls } end return CADataFrame.new(hash) end |
#column(spec) ⇒ Object Also known as: col
Column, Row Access
247 248 249 250 251 252 253 254 255 256 |
# File 'lib/carray-dataframe/dataframe.rb', line 247 def column (spec) case spec when Integer return @column_data[@column_names[spec]] when String, Symbol return @column_data[spec.to_s] else raise "invalid column specifier" end end |
#column_types ⇒ Object
239 240 241 |
# File 'lib/carray-dataframe/dataframe.rb', line 239 def column_types return @column_names.map{|name| @column_data[name].data_type_name } end |
#columns ⇒ Object
119 120 121 |
# File 'lib/carray-dataframe/dataframe.rb', line 119 def columns @column_data end |
#columns_to_hash(key_name, value_names) ⇒ Object
781 782 783 784 785 786 787 788 789 790 791 792 793 794 795 796 797 798 799 800 801 802 803 804 805 806 807 808 809 |
# File 'lib/carray-dataframe/dataframe.rb', line 781 def columns_to_hash (key_name, value_names) hash = {} unless @column_names.include?(key_name) raise ArgumentError, "include invalid key column name #{key_name}" end case value_names when String unless @column_names.include?(value_names) raise ArgumentError, "invalid key column name #{value_names}" end key_columns = @column_data[key_name] value_columns = @column_data[value_names] @row_number.times do |i| hash[key_columns[i]] = value_columns[i] end when Array unless value_names.all?{|s| @column_names.include?(s) } raise ArgumentError, "include invalid column name in #{value_names.join(' ')}" end key_columns = @column_data[key_name] value_columns = @column_data.values_at(*value_names) @row_number.times do |i| hash[key_columns[i]] = value_columns.map{|c| c[i]} end else raise ArgumentError, "invalud argument" end return hash end |
#cross(name1, name2) ⇒ Object
1109 1110 1111 1112 1113 1114 1115 1116 1117 1118 1119 1120 1121 1122 1123 1124 1125 1126 1127 1128 1129 1130 1131 1132 |
# File 'lib/carray-dataframe/dataframe.rb', line 1109 def cross (name1, name2) col1 = column(name1) col2 = column(name2) var1 = col1.uniq.sort var2 = col2.uniq.sort hash = {} count = Hash.new {0} var1.each do |v1| var2.each do |v2| hash[[v1,v2]] = 0 end end list = CArray.join([col1, col2]).to_a list.each do |item| hash[item] += 1 end out = CArray.object(var1.size, var2.size) { 0 } var1.each_with_index do |v1, i| var2.each_with_index do |v2, j| out[i,j] = hash[[v1,v2]] end end return CADataFrame.new(out, index: var1, columns: var2) end |
#delete_masked_rows ⇒ Object
657 658 659 660 661 662 663 664 665 666 667 |
# File 'lib/carray-dataframe/dataframe.rb', line 657 def delete_masked_rows not_masked = @column_data.first[1].template(:boolean) { true } @column_names.each do |name| not_masked &= @column_data[name].is_not_masked end columns = {} @column_names.each_with_index do |name, i| columns[name] = @column_data[name].to_ca[not_masked] end return CADataFrame.new(columns) end |
#delete_rows(&block) ⇒ Object
669 670 671 672 673 674 675 676 |
# File 'lib/carray-dataframe/dataframe.rb', line 669 def delete_rows (&block) masked = instance_eval(&block) columns = {} @column_names.each_with_index do |name, i| columns[name] = @column_data[name].to_ca[masked] end return CADataFrame.new(columns) end |
#describe ⇒ Object
1243 1244 1245 1246 1247 1248 1249 |
# File 'lib/carray-dataframe/dataframe.rb', line 1243 def describe list = [] @column_data.each do |name, column| list << column.describe end CADataFrame.new(list, index: @column_names).transpose end |
#detouch ⇒ Object
646 647 648 649 650 651 652 653 654 655 |
# File 'lib/carray-dataframe/dataframe.rb', line 646 def detouch @column_data = @column_data.clone each_column_name do |name| @column_data[name] = @column_data[name].to_ca end if @row_index @row_index = @row_index.clone end return self end |
#downcase ⇒ Object
448 449 450 451 452 453 454 455 456 457 458 |
# File 'lib/carray-dataframe/dataframe.rb', line 448 def downcase new_column_names = [] new_columns = {} each_column_name do |name| new_column_names << name.downcase new_columns[name.downcase] = @column_data[name] end @column_names = new_column_names @column_data = new_columns return self end |
#drop_column(*columns) ⇒ Object Also known as: eliminate_column
524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 |
# File 'lib/carray-dataframe/dataframe.rb', line 524 def drop_column (*columns) if columns.empty? return self else names = [] columns.each do |c| case c when String names << c when Symbol names << c.to_s when Regexp names.push *@column_names.grep(c) else raise "invalid column specification" end end end new_columns = {} each_column_name do |name| unless names.include?(name) new_columns[name] = column(name) end end return replace CADataFrame.new(new_columns, index: @row_index) end |
#each_column(&block) ⇒ Object
Iterators
326 327 328 |
# File 'lib/carray-dataframe/dataframe.rb', line 326 def each_column (&block) return @column_data.each(&block) end |
#each_column_name(&block) ⇒ Object
330 331 332 |
# File 'lib/carray-dataframe/dataframe.rb', line 330 def each_column_name (&block) return @column_names.each(&block) end |
#each_row(with: Array, columns: nil, &block) ⇒ Object
342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 |
# File 'lib/carray-dataframe/dataframe.rb', line 342 def each_row (with: Array, columns: nil, &block) case columns when Array column_names = columns when Regexp column_names = @column_names.grep(columns) else column_names = @column_names end if with == Array @row_number.times do |i| yield column_names.map{|n| @column_data[n][i] } end elsif with == Hash row = {} @row_number.times do |i| column_names.each do |c| row[c] = @column_data[c][i] end yield row end elsif with == CArray joined = CArray.join(@column_data.values_at(*column_names)) joined[:i,nil].each do |block| yield block.to_ca.compact end else raise "invalid data type for loop variable" end end |
#each_row_index(&block) ⇒ Object
334 335 336 337 338 339 340 |
# File 'lib/carray-dataframe/dataframe.rb', line 334 def each_row_index (&block) if @row_index @row_index.each(&block) else @row_number.times(&block) end end |
#each_row_with_row_index(with: Array, &block) ⇒ Object
373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 |
# File 'lib/carray-dataframe/dataframe.rb', line 373 def each_row_with_row_index (with: Array, &block) if with == Array if @row_index @row_index.each_with_index do |idx, i| yield @column_data.map{|n,c| c[i] }, idx end else @row_number.times do |i| yield @column_data.map{|n,c| c[i] }, i end end elsif with == Hash row = {} if @row_index @row_index.each_with_index do |idx, i| @column_names.each do |c| row[c] = @column_data[c][i] end yield row, @row_index[i] end else @row_number.times do |idx, i| @column_names.each do |c| row[c] = @column_data[c][i] end yield row, @row_index[i] end end else raise "invalid data type for loop variable" end end |
#execute(&block) ⇒ Object
565 566 567 568 569 570 571 572 |
# File 'lib/carray-dataframe/dataframe.rb', line 565 def execute (&block) case block.arity when 1 return instance_exec(self, &block) else return instance_exec(&block) end end |
#fill(*names, value) ⇒ Object
420 421 422 423 424 425 426 427 |
# File 'lib/carray-dataframe/dataframe.rb', line 420 def fill (*names, value) names.each do |name| if has_column?(name) column(name).fill(value) end end return self end |
#get_dummies(*names, prefix: nil, prefix_sep: "_") ⇒ Object
1277 1278 1279 1280 1281 1282 1283 1284 1285 1286 1287 1288 1289 1290 1291 1292 1293 1294 1295 1296 1297 1298 1299 1300 1301 1302 1303 1304 1305 1306 1307 1308 |
# File 'lib/carray-dataframe/dataframe.rb', line 1277 def get_dummies (*names, prefix: nil, prefix_sep: "_") keep_columns = {} new_columns = {} k = 0 @column_names.each do |name| unless names.include?(name) keep_columns[name] = @column_data[name] next end hash = @column_data[name].get_dummies case prefix when nil hash.each do |v, dummy| new_columns["#{name}#{prefix_sep}#{v}"] = dummy end when String hash.each do |v, dummy| new_columns["#{prefix}#{prefix_sep}#{v}"] = dummy end when Array hash.each do |v, dummy| new_columns["#{prefix[k]}#{prefix_sep}#{v}"] = dummy end when Hash hash.each do |v, dummy| new_columns["#{prefix[name]}#{prefix_sep}#{v}"] = dummy end end k += 1 end CADataFrame.new(keep_columns.update(new_columns), index: @row_index) end |
#group_by(*names) ⇒ Object
9 10 11 12 13 14 15 |
# File 'lib/carray-dataframe/group.rb', line 9 def group_by (*names) if names.size == 1 return CADataFrameGroup.new(self, names[0]) else return CADataFrameGroupMulti.new(self, *names) end end |
#has_column?(name) ⇒ Boolean
230 231 232 233 234 235 236 237 |
# File 'lib/carray-dataframe/dataframe.rb', line 230 def has_column? (name) case name.to_s when "index" return has_index? else return @column_names.include?(name) end end |
#has_index? ⇒ Boolean
217 218 219 |
# File 'lib/carray-dataframe/dataframe.rb', line 217 def has_index? @row_index ? true : false end |
#head(n = 10) ⇒ Object
290 291 292 293 |
# File 'lib/carray-dataframe/dataframe.rb', line 290 def head (n=10) rmax = [@row_number, n].min return row[0..rmax-1] end |
#histogram(name, scale = nil, options = nil) ⇒ Object
1050 1051 1052 1053 1054 1055 1056 1057 1058 1059 1060 1061 1062 1063 1064 1065 1066 1067 1068 |
# File 'lib/carray-dataframe/dataframe.rb', line 1050 def histogram (name, scale = nil, = nil) if scale.nil? return group_by(name).table{ { :count => col(name).count_valid } } else if hist = CAHistogram.int(scale, ) else hist = CAHistogram.int(scale) end hist.increment(@column_data[name.to_s]) hash = { name.to_s => hist.midpoints[0], "#{name}_L".to_s => scale[0..-2], "#{name}_R".to_s => scale.shift(-1)[0..-2], :count => hist[0..-2].to_ca, } return CADataFrame.new(hash) end end |
#iloc(&block) ⇒ Object
276 277 278 279 |
# File 'lib/carray-dataframe/dataframe.rb', line 276 def iloc (&block) @iloc ||= CADataFrame::ILocAccessor.new(self) return @iloc end |
#index ⇒ Object
TO BE FIXED
282 283 284 285 286 287 288 |
# File 'lib/carray-dataframe/dataframe.rb', line 282 def index if @row_index return @row_index.to_ca else return CArray.int(@row_number).seq end end |
#insert_column(pos, name, new_column = nil, &block) ⇒ Object
480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 |
# File 'lib/carray-dataframe/dataframe.rb', line 480 def insert_column (pos, name, new_column = nil, &block) name = name.to_s if new_column # do nothing elsif block new_column = instance_exec(self, &block) else new_column = @column_data.first[1].template(:object) end unless new_column.is_a?(CArray) new_column = new_column.to_ca end if new_column.rank != 1 or new_column.size != @row_number raise "invalid shape of appended column" end @column_names.insert(pos, name) @column_data[name] = new_column return new_column end |
#inspect ⇒ Object
923 924 925 |
# File 'lib/carray-dataframe/dataframe.rb', line 923 def inspect return ascii_table(8) end |
#is_finite ⇒ Object
1012 1013 1014 |
# File 'lib/carray-dataframe/dataframe.rb', line 1012 def is_finite return cmp(:is_finite) end |
#is_masked ⇒ Object
1008 1009 1010 |
# File 'lib/carray-dataframe/dataframe.rb', line 1008 def is_masked return cmp(:is_masked) end |
#join(other_df, opts = {}) ⇒ Object
26 27 28 |
# File 'lib/carray-dataframe/join.rb', line 26 def join(other_df,opts={}) CADataFrame::Merge.join(self, other_df, opts) end |
#loc ⇒ Object
271 272 273 274 |
# File 'lib/carray-dataframe/dataframe.rb', line 271 def loc @loc ||= CADataFrame::LocAccessor.new(self) return @loc end |
#matchup(keyname, reference) ⇒ Object
1031 1032 1033 1034 1035 1036 1037 1038 1039 1040 1041 1042 1043 1044 1045 1046 1047 1048 |
# File 'lib/carray-dataframe/dataframe.rb', line 1031 def matchup (keyname, reference) key = column(keyname) idx = reference.matchup(key) new_columns = {} each_column_name do |name| if name == keyname.to_s new_columns[name] = reference else new_columns[name] = column(name).project(idx) end end if @row_index new_row_index = @row_index.project(idx).unmask(nil) else new_row_index = nil end return CADataFrame.new(new_columns, index: new_row_index) end |
#mean ⇒ Object
1150 1151 1152 1153 1154 1155 1156 |
# File 'lib/carray-dataframe/dataframe.rb', line 1150 def mean new_columns = {} each_column do |name, col| new_columns[name] = [col.mean] end return CADataFrame.new(new_columns, index: ["mean"]) end |
#merge(*args) ⇒ Object
561 562 563 |
# File 'lib/carray-dataframe/dataframe.rb', line 561 def merge (*args) return CADataFrame.merge(self, *args) end |
#method(hash) ⇒ Object
300 301 302 303 304 305 306 |
# File 'lib/carray-dataframe/dataframe.rb', line 300 def method (hash) new_hash = {} hash.each do |key, value| new_hash[key.to_s] = value.to_s end @__methods__.update(new_hash) end |
#objectify ⇒ Object
748 749 750 751 752 753 754 |
# File 'lib/carray-dataframe/dataframe.rb', line 748 def objectify new_columns = {} each_column_name do |name| new_columns[name] = column(name).object end return CADataFrame.new(new_columns, index: @row_index) end |
#order_by(*names, &block) ⇒ Object
690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 |
# File 'lib/carray-dataframe/dataframe.rb', line 690 def order_by (*names, &block) if names.empty? if block ret = instance_exec(&block) case ret when CArray list = [ret] when Array list = ret end end else list = @column_data.values_at(*names.map{|s| s.to_s}) end return reorder { CA.sort_addr(*list) } end |
#pivot(name1, name2) ⇒ Object
8 9 10 |
# File 'lib/carray-dataframe/pivot.rb', line 8 def pivot (name1, name2) return CADataFramePivot.new(self, name1, name2) end |
#prepend_column(name, new_column = nil, &block) ⇒ Object Also known as: lead
502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 |
# File 'lib/carray-dataframe/dataframe.rb', line 502 def prepend_column (name, new_column = nil, &block) name = name.to_s if new_column # do nothing elsif block new_column = instance_exec(self, &block) else new_column = @column_data.first[1].template(:object) end unless new_column.is_a?(CArray) new_column = new_column.to_ca end if new_column.rank != 1 or new_column.size != @row_number raise "invalid shape of appended column" end @column_names.unshift(name) @column_data[name] = new_column return new_column end |
#rename(name1, name2) ⇒ Object
437 438 439 440 441 442 443 444 445 446 |
# File 'lib/carray-dataframe/dataframe.rb', line 437 def rename (name1, name2) if idx = @column_names.index(name1.to_s) @column_names[idx] = name2.to_s column = @column_data[name1.to_s] @column_data.delete(name1.to_s) @column_data[name2.to_s] = column else raise "unknown column name #{name1}" end end |
#reorder(&block) ⇒ Object
Transformation
681 682 683 684 685 686 687 688 |
# File 'lib/carray-dataframe/dataframe.rb', line 681 def reorder (&block) index = instance_exec(&block) new_columns = {} each_column_name do |name| new_columns[name] = column(name)[index] end return CADataFrame.new(new_columns, index: @row_index ? @row_index[index] : nil) end |
#replace(other) ⇒ Object
221 222 223 224 225 226 227 228 |
# File 'lib/carray-dataframe/dataframe.rb', line 221 def replace (other) @column_names = other.column_names @column_data = other.column_data @row_index = other.row_index @row_number = other.row_number @__methods__ = other.__methods__ return self end |
#resample(&block) ⇒ Object
590 591 592 593 594 595 596 597 598 599 |
# File 'lib/carray-dataframe/dataframe.rb', line 590 def resample (&block) new_columns = {} each_column_name do |name| begin new_columns[name] = yield(name, column(name)) rescue end end return CADataFrame.new(new_columns) end |
#reverse ⇒ Object
707 708 709 710 711 712 713 |
# File 'lib/carray-dataframe/dataframe.rb', line 707 def reverse new_columns = {} each_column_name do |name| new_columns[name] = column(name).reverse end return CADataFrame.new(new_columns, index: @row_index ? @row_index.reverse : nil) end |
#save(filename) ⇒ Object
1315 1316 1317 1318 1319 |
# File 'lib/carray-dataframe/dataframe.rb', line 1315 def save (filename) open(filename, "w") {|io| Marshal.dump(self, io) } end |
#select(*columns, &block) ⇒ Object
601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 |
# File 'lib/carray-dataframe/dataframe.rb', line 601 def select (*columns, &block) if columns.empty? names = @column_names else names = [] columns.each do |c| case c when String names << c when Symbol names << c.to_s when Regexp names.push *@column_names.grep(c) else raise "invalid column specification" end end end if block row = instance_exec(&block) else row = nil end new_columns = {} names.map(&:to_s).each do |name| raise "unknown column '#{name}'" unless column(name) new_columns[name] = column(name)[row] end return CADataFrame.new(new_columns, index: @row_index ? @row_index[row] : nil) end |
#select_columns(selector = nil) ⇒ Object
4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 |
# File 'lib/carray-dataframe/reference.rb', line 4 def select_columns (selector = nil) case selector when nil ### all return @column_names when Integer name = @column_names[selector] raise "invalid column index" unless selector return [name] when String, Symbol ### "AAA" if @column_names.include?(selector.to_s) return [selector.to_s] else raise "invalid column specified #{selector}" end when Array ### ["AAA", "BBB"] if selector.size == 1 && selector.first.is_a?(Hash) return select_columns(selector.first) else selector.each do |name| unless @column_names.include?(name.to_s) raise "invalid column specified #{name}" end end return selector.map(&:to_s) end when Range ### "AAA".."BBB", 0..1 case selector.begin when nil idx1 = 0 when Integer idx1 = selector.begin when String, Symbol idx1 = @column_names.search(selector.begin.to_s) raise "can't find column #{selector.begin}" unless idx1 else raise "invalid column specified #{selector.begin}" end if selector.exclude_end? case selector.end when nil idx2 = -2 when Integer idx2 = selector.end - 1 when String idx2 = @column_names.search(selector.end.to_s) raise "can't find column #{selector.end}" unless idx2 idx2 = idx2 - 1 else raise "invalid column specified #{selector.end}" end else case selector.end when nil idx2 = -1 when Integer idx2 = selector.end when String idx2 = @column_names.search(selector.end.to_s) raise "can't find column #{selector.end}" unless idx2 else raise "invalid column specified #{selector.end}" end end return @column_names[idx1..idx2] else raise "invalid column selector #{selector}" end end |
#set_index(index, drop: true, inplace: false) ⇒ Object
195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 |
# File 'lib/carray-dataframe/dataframe.rb', line 195 def set_index (index, drop: true, inplace: false) if inplace case index when nil when String, Symbol index = index.to_s raise "can't find column named '#{index}'" unless @column_names.include?(index) if drop @row_index = @column_data.delete(index) @column_names.delete(index) else @row_index = @column_data[index] end else @row_index = index.to_ca end self else return to_df.set_index(index, drop: drop, inplace: true) end end |
#sum ⇒ Object
1142 1143 1144 1145 1146 1147 1148 |
# File 'lib/carray-dataframe/dataframe.rb', line 1142 def sum new_columns = {} each_column do |name, col| new_columns[name] = [col.sum] end return CADataFrame.new(new_columns, index: ["sum"]) end |
#summary(*names) ⇒ Object
1251 1252 1253 1254 1255 1256 1257 |
# File 'lib/carray-dataframe/dataframe.rb', line 1251 def summary (*names) data = [] names.each do |name| data << @column_data[name].summary end CADataFrame.new(data, index: names).transpose end |
#tail(n = 10) ⇒ Object
295 296 297 298 |
# File 'lib/carray-dataframe/dataframe.rb', line 295 def tail (n=10) rmin = -([@row_number, n].min) return row[rmin..-1] end |
#to_a(with_index: true) ⇒ Object
31 32 33 34 35 36 37 38 39 |
# File 'lib/carray-dataframe/converter.rb', line 31 def to_a (with_index: true) if @row_index and with_index namelist = [""] + @column_names tbl = CADFArray.new(namelist, @column_data.clone.update("" => index)) else tbl = ca.to_ca end return tbl.to_a end |
#to_ary ⇒ Object
931 932 933 |
# File 'lib/carray-dataframe/dataframe.rb', line 931 def to_ary return [to_s] end |
#to_ca(*names) ⇒ Object
764 765 766 |
# File 'lib/carray-dataframe/dataframe.rb', line 764 def to_ca (*names) return ca(*names).to_ca end |
#to_csv(io = "", rs: $/, sep: ",", fill: "", with_index: true, time_format: nil, &block) ⇒ Object
41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 |
# File 'lib/carray-dataframe/converter.rb', line 41 def to_csv (io = "", rs: $/, sep: ",", fill: "", with_index: true, time_format: nil, &block) if @row_index and with_index namelist = ["index"] + @column_names columns = @column_data.clone.update("index" => index) else namelist = @column_names columns = @column_data.clone end columns.each do |k, v| if v.is_a?(CATimeIndex) if time_format columns[k] = v.time.time_format(time_format) else columns[k] = v.time.convert(:object){|t| t.to_s} end end end tbl = CADFArray.new(namelist, columns) return tbl.to_csv(io, **{rs: rs, sep: sep, fill: fill}, &block) end |
#to_daru ⇒ Object
62 63 64 65 66 67 68 69 70 71 72 73 |
# File 'lib/carray-dataframe/converter.rb', line 62 def to_daru require "daru" columns = {} each_column_name do |name| columns[name] = column(name).object.unmask(nil).to_a end if @row_index return Daru::DataFrame.new(columns, index: @row_index.to_a, order: @column_names) else return Daru::DataFrame.new(columns, order: @column_names) end end |
#to_df ⇒ Object
Conversions
740 741 742 743 744 745 746 |
# File 'lib/carray-dataframe/dataframe.rb', line 740 def to_df new_columns = {} each_column_name do |name| new_columns[name] = column(name) end return CADataFrame.new(new_columns, index: @row_index).detouch end |
#to_hash ⇒ Object Also known as: to_h
768 769 770 771 772 773 774 775 776 777 |
# File 'lib/carray-dataframe/dataframe.rb', line 768 def to_hash hash = {} if @row_index hash["index"] = @row_index end @column_data.each do |k,v| hash[k] = v.to_a end return hash end |
#to_html(threshold = 8, time_format: nil, index: true) ⇒ Object
3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 |
# File 'lib/carray-dataframe/to_html.rb', line 3 def to_html (threshold = 8, time_format: nil, index: true) columns = @column_data.clone @column_names.each do |name| if columns[name].is_a?(CATimeIndex) if time_format columns[name] = columns[name].time.time_strftime(time_format) else columns[name] = columns[name].time.time_format("%F %T%:z") end end end if index if @row_index namelist = [" "] + @column_names if @row_index.is_a?(CATimeIndex) if time_format row_index = @row_index.time.time_strftime(time_format) else row_index = @row_index.time.time_format("%F %T%:z") end else row_index = @row_index end tbl = CADFArray.new(namelist, columns.update(" " => row_index)) else namelist = [" "] + @column_names tbl = CADFArray.new(namelist, columns.update(" " => CArray.int(@row_number).seq)) end else namelist = @column_names tbl = CADFArray.new(namelist, columns) end if threshold.is_a?(Integer) and @row_number > threshold list = tbl[0..(threshold/2),nil].to_a list.push namelist.map { "..." } list.push *(tbl[-threshold/2+1..-1,nil].to_a) tbl = list.to_ca end datastr = tbl.convert {|c| __obj_to_string__(c) }.unmask("") datamb = datastr.convert(:boolean, &:"ascii_only?").not.sum(0).ne(0) namemb = namelist.to_ca.convert(:boolean) {|c| c.to_s.ascii_only? }.eq(0) mb = datamb.or(namemb) namelen = namelist.map(&:length).to_ca datalen = datastr.convert(&:length) if mb.max == 0 if datalen.size == 0 lengths = namelen.to_a else lengths = datalen.max(0).pmax(namelen).to_a end table_in = "<table>" header = "<thead><tr>" + [namelist, lengths].transpose.map{|name, len| "<th>#{name.to_s.ljust(len)}</th>" }.join() + "</tr></thead>" body_in = "<tbody>" ary = [table_in, header, body_in] if datalen.size > 0 datastr[:i,nil].each_with_index do |blk, i| list = blk.flatten.to_a ary << "<tr>" + [list, lengths].transpose.map {|value, len| "<td>#{value.ljust(len)}</td>" }.join() + "</tr>" end end ary << "</tbody>" ary << "</table>" return "DataFrame: rows#=#{@row_number}: \n" + ary.join("\n") else namewidth = namelist.to_ca.convert{|c| __strwidth__(c.to_s) } if datalen.size == 0 maxwidth = namewidth else datawidth = datastr.convert{|c| __strwidth__(c.to_s) } maxwidth = datawidth.max(0).pmax(namewidth) end len = maxwidth[:*,nil] - datawidth + datalen table_in = "<table>" header = "<thead><tr>" + [namelist, maxwidth.to_a].transpose.map{|name, len| "<th>#{name.to_s.ljust(len-__strwidth__(name.to_s)+name.to_s.length)}</th>" }.join() + "</tr></thead>" body_in = "<tbody>" ary = [table_in, header, body_in] if datalen.size > 0 datastr[:i,nil].each_with_addr do |blk, i| list = blk.flatten.to_a ary << "<tr>" + list.map.with_index {|value, j| "<td>#{value.ljust(len[i,j])}</td>" }.join() + "</tr>" end end ary << "</tbody>" ary << "</table>" return "DataFrame: row#=#{@row_number}: \n" + ary.join("\n") end end |
#to_s ⇒ Object
927 928 929 |
# File 'lib/carray-dataframe/dataframe.rb', line 927 def to_s return ascii_table end |
#to_sql(tablename) ⇒ Object
88 89 90 91 92 93 94 95 96 97 98 99 100 |
# File 'lib/carray-dataframe/io.rb', line 88 def to_sql (tablename) if @column_names.any?{ |s| s =~ /[\. \-]/ } columns = {} each_column_name do |name| name2 = name.gsub(/[\. \-]/, '_') columns[name2] = column(name) end df = CADataFrame.new(columns) return df.to_sqlite3(database: ":memory:", table: tablename) else return to_sqlite3(database: ":memory:", table: tablename) end end |
#to_sqlite3(**args) ⇒ Object
84 85 86 |
# File 'lib/carray-dataframe/io.rb', line 84 def to_sqlite3 (**args) self.to_ca.to_sqlite3(**args) end |
#to_xlsx(filename, sheet_name: 'Sheet1', with_row_index: false, &block) ⇒ Object
75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 |
# File 'lib/carray-dataframe/converter.rb', line 75 def to_xlsx (filename, sheet_name: 'Sheet1', with_row_index: false, &block) require "caxlsx" xl = Axlsx::Package.new xl.use_shared_strings = true sheet = xl.workbook.add_worksheet(name: sheet_name) df = self.to_df.objectify.unmask("=NA()") if with_row_index sheet.add_row([""] + column_names) df.each_row_with_row_index(with: Array) do |list, i| sheet.add_row([i] + list) end else sheet.add_row(column_names) df.each_row(with: Array) do |list| sheet.add_row(list) end end if block_given? yield sheet end xl.serialize(filename) end |
#transpose(columns: nil) ⇒ Object
715 716 717 718 719 720 721 722 723 724 725 726 |
# File 'lib/carray-dataframe/dataframe.rb', line 715 def transpose (columns: nil) if columns columns = columns.map(&:to_s) else if @row_index columns = @row_index.convert(:object) {|v| v.to_s } else columns = CArray.object(@row_number).seq("a",:succ) end end return CADataFrame.new(ca.transpose, index: @column_names.to_ca, columns: columns) end |
#unmask(value = nil) ⇒ Object
642 643 644 |
# File 'lib/carray-dataframe/dataframe.rb', line 642 def unmask (value = nil) return to_df.unmask!(value) end |
#unmask!(value = nil) ⇒ Object
Maintenance
635 636 637 638 639 640 |
# File 'lib/carray-dataframe/dataframe.rb', line 635 def unmask! (value = nil) each_column_name do |name| column(name).unmask(value) end return self end |
#vacant_copy ⇒ Object
553 554 555 556 557 558 559 |
# File 'lib/carray-dataframe/dataframe.rb', line 553 def vacant_copy new_columns = {} each_column_name do |key| new_columns[key] = CArray.object(0) end return CADataFrame.new(new_columns) end |
#where(mask, *args) ⇒ Object
406 407 408 409 410 411 412 413 414 415 416 417 418 |
# File 'lib/carray-dataframe/dataframe.rb', line 406 def where (mask, *args) mask.column_names.each do |key| if has_column?(key) case args.size when 1 column(key)[mask.column(key).boolean.not] = args[0] when 2 column(key)[mask.column(key).boolean.not] = args[0] column(key)[mask.column(key).boolean] = args[1] end end end end |