Module: Yomise
- Defined in:
- lib/yomise.rb,
lib/yomise/version.rb
Defined Under Namespace
Classes: Error
Constant Summary collapse
- VERSION =
"0.1.1"
Class Method Summary collapse
- .read(path, **opt) ⇒ Object
-
.read_csv(path, format: :daru, encoding: "utf-8", col_sep: ",", index: nil, **opt) ⇒ Object
##Generate Array from CSV File, and convert it to Hash or DataFrame.
-
.read_excel(path, sheet_i: 0, format: :daru, encoding: "utf-8", index: nil, **opt) ⇒ Object
##Generate Array from EXCEL File, and convert it to Hash or DataFrame.
- .recognize_type(str, expected) ⇒ Object
-
.to_df(d, format: :daru) ⇒ Object
Convert Hash to DataFrame.
-
.to_hash(array2d, line_from: 1, line_until: nil, line_ignored: nil, column_from: nil, column_until: nil, header: 0, symbol_header: false, replaced_by_nil: [], analyze_type: true, index: nil) ⇒ Object
Convert 2d Array to Hash header: nil -> Default Headers(:column1, column2,…) are generated.
Class Method Details
.read(path, **opt) ⇒ Object
16 17 18 |
# File 'lib/yomise.rb', line 16 def read(path, **opt) return /csv$/ === path ? read_csv(path, **opt) : read_excel(path, **opt) end |
.read_csv(path, format: :daru, encoding: "utf-8", col_sep: ",", index: nil, **opt) ⇒ Object
##Generate Array from CSV File, and convert it to Hash or DataFrame. **opt candidate= line_from: 1, header: 0 ver. 0.3.8~ default format=:daru
23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 |
# File 'lib/yomise.rb', line 23 def read_csv(path, format: :daru, encoding: "utf-8", col_sep: ",", index: nil, **opt) ## TODO.. index: option that designate column number to generate DF index. ## That is, revicing set_index method. # Get 2D Array begin csv = CSV.parse(File.open(path, encoding: encoding, &:read), col_sep: col_sep) rescue # Try Another Encoding ## puts "Fail Encoding #{encoding}. Trying cp932..." csv = CSV.parse(File.open(path, encoding: "cp932", &:read), col_sep: col_sep) encoding = "cp932" end if format.to_s == "array" return csv elsif format.to_s == "hash" h, i = to_hash(csv, **opt) return h elsif format.to_s == "csv" return csv.to_csv elsif format.to_s == "numo" return csv # Under Construction else # include format.nil? (in this case, convert to Daru::DF). h, ind_orig = to_hash(csv, index: index, **opt) ans = to_df(h, format: format) # Converting Encode and Setting index.. rover not supported yet if format.to_s == "daru" || format.nil? ans.convert_enc!(from: encoding, to: "utf-8") begin ans.index = ind_orig if index rescue warn "Indexing failed (Parhaps due to duplicated index)." end end return ans end end |
.read_excel(path, sheet_i: 0, format: :daru, encoding: "utf-8", index: nil, **opt) ⇒ Object
##Generate Array from EXCEL File, and convert it to Hash or DataFrame. **opt candidate= line_from: 1, header: 0)
67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 |
# File 'lib/yomise.rb', line 67 def read_excel(path, sheet_i: 0, format: :daru, encoding: "utf-8", index: nil, **opt) a2d = open_excel(path, sheet_i, encoding: encoding) # Get 2D Array if format.to_s == "array" return a2d elsif format.to_s == "hash" h, i = to_hash(a2d, **opt) return h elsif format.to_s == "csv" return a2d.to_csv elsif format.to_s == "numo" return a2d # Under Construction else # include format.nil? h, ind_orig = to_hash(a2d, index: index, **opt) ans = to_df(h, format: format) if format.to_s == "daru" || format.nil? begin ans.index = ind_orig if index rescue warn "Indexing failed (Parhaps due to duplicated index)." end end return ans end end |
.recognize_type(str, expected) ⇒ Object
220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 |
# File 'lib/yomise.rb', line 220 def recognize_type(str, expected) return expected if str.nil? order = {:any => 0, :int => 1, :float => 2, :string => 3} if /^\s*(-|\+)?\d+\s*$/ === str type_of_str = :int elsif /^\s*(-|\+)?\d*\.\d*\s*$/ === str || /^\s*(-|\+)?(\d*\.\d+|\d+)(e|E)(-|\+)?\d+\s*$/ === str type_of_str = :float else type_of_str = :string end # p "#{type_of_str}, #{str}" if order[type_of_str] > order[expected] return order[type_of_str] > order[expected] ? type_of_str : expected end |
.to_df(d, format: :daru) ⇒ Object
Convert Hash to DataFrame
145 146 147 148 149 150 151 |
# File 'lib/yomise.rb', line 145 def to_df(d, format: :daru) if format.to_s == "daru" || format.nil? Daru::DataFrame.new(d) else Rover::DataFrame.new(d) end end |
.to_hash(array2d, line_from: 1, line_until: nil, line_ignored: nil, column_from: nil, column_until: nil, header: 0, symbol_header: false, replaced_by_nil: [], analyze_type: true, index: nil) ⇒ Object
Convert 2d Array to Hash header: nil -> Default Headers(:column1, column2,…) are generated. Option line_ignored, is not implemented yet.
96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 |
# File 'lib/yomise.rb', line 96 def to_hash(array2d, line_from: 1, line_until: nil, line_ignored: nil, column_from: nil, column_until: nil, header: 0, symbol_header: false, replaced_by_nil: [], analyze_type: true, index: nil) ## TODO.. column_from: , column_until: # Define Read Range------------ lfrom, luntil = line_from, line_until lf_reg, lu_reg = line_from.kind_of?(Regexp), line_until.kind_of?(Regexp) if lf_reg || lu_reg lines_ary = array2d.map{ _1.join "," } lfrom = lines_ary.find_index{ line_from === _1 } if lf_reg luntil = (lines_ary.length-1) - lines_ary.reverse.find_index{ line_until === _1 } if lu_reg end # And get originally array----- output = array2d[lfrom...luntil] # ----------------------------- # Then get data of index------- ind_orig = index ? output.map{ _1[index] } : nil # ----------------------------- # Selecct Column--------------- output = output.map { _1[column_from...column_until] } if column_from || column_until # Define Data Array------------ output_transpose = output[0].zip(*output[1..]) output_transpose = fix_array(output_transpose, replaced_by_nil, analyze_type) # ----------------------------- # Define Header---------------- if header hd = check_header(array2d[header])[column_from...column_until] else hd = [*0...(output.longest_line)].map{"column#{_1}"} end # hd = header.nil? ? [*0...(output.longest_line)].map{"column#{_1}"} : check_header(array2d[header]) hd = hd.map { _1.intern } if symbol_header # ----------------------------- # Make Hash(Header => Data Array) return hd.each_with_object({}).with_index {|(hdr, hash), i| hash[hdr]=output_transpose[i]}, ind_orig end |