Module: Yomise

Defined in:
lib/yomise.rb,
lib/yomise/version.rb

Defined Under Namespace

Classes: Error

Constant Summary collapse

VERSION =
"0.1.1"

Class Method Summary collapse

Class Method Details

.read(path, **opt) ⇒ Object



16
17
18
# File 'lib/yomise.rb', line 16

def read(path, **opt)
	return /csv$/ === path ? read_csv(path, **opt) : read_excel(path, **opt)
end

.read_csv(path, format: :daru, encoding: "utf-8", col_sep: ",", index: nil, **opt) ⇒ Object

##Generate Array from CSV File, and convert it to Hash or DataFrame. **opt candidate= line_from: 1, header: 0 ver. 0.3.8~ default format=:daru



23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
# File 'lib/yomise.rb', line 23

def read_csv(path, format: :daru, encoding: "utf-8", col_sep: ",", index: nil, **opt)
	## TODO.. index: option that designate column number to generate DF index.
	## That is, revicing set_index method.

	# Get 2D Array
	begin
		csv = CSV.parse(File.open(path, encoding: encoding, &:read), col_sep: col_sep)
	rescue
		# Try Another Encoding
		## puts "Fail Encoding #{encoding}. Trying cp932..."
		csv = CSV.parse(File.open(path, encoding: "cp932", &:read), col_sep: col_sep)
		encoding = "cp932"
	end
	
	if format.to_s == "array"
		return csv
	elsif format.to_s == "hash"
		h, i = to_hash(csv, **opt)
		return h
	elsif format.to_s == "csv"
		return csv.to_csv
	elsif format.to_s == "numo"
		return csv  # Under Construction
	else # include format.nil? (in this case, convert to Daru::DF).

		h, ind_orig = to_hash(csv, index: index, **opt)
		ans = to_df(h, format: format)
		
		# Converting Encode and Setting index.. rover not supported yet
		if format.to_s == "daru" || format.nil?
			ans.convert_enc!(from: encoding, to: "utf-8")
			begin
				ans.index = ind_orig if index
			rescue
				warn "Indexing failed (Parhaps due to duplicated index)."
			end
		end
		
		return ans
	end
end

.read_excel(path, sheet_i: 0, format: :daru, encoding: "utf-8", index: nil, **opt) ⇒ Object

##Generate Array from EXCEL File, and convert it to Hash or DataFrame. **opt candidate= line_from: 1, header: 0)



67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
# File 'lib/yomise.rb', line 67

def read_excel(path, sheet_i: 0, format: :daru, encoding: "utf-8", index: nil, **opt)
	a2d = open_excel(path, sheet_i, encoding: encoding) # Get 2D Array

	if format.to_s == "array"
		return a2d
	elsif format.to_s == "hash"
		h, i = to_hash(a2d, **opt)
		return h
	elsif format.to_s == "csv"
		return a2d.to_csv
	elsif format.to_s == "numo"
		return a2d  # Under Construction
	else # include format.nil?
		h, ind_orig = to_hash(a2d, index: index, **opt)
		ans = to_df(h, format: format)
		if format.to_s == "daru" || format.nil?
			begin
				ans.index = ind_orig if index
			rescue
				warn "Indexing failed (Parhaps due to duplicated index)."
			end
		end
		return ans
	end
end

.recognize_type(str, expected) ⇒ Object



220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
# File 'lib/yomise.rb', line 220

def recognize_type(str, expected)
	return expected if str.nil?

	order = {:any => 0, :int => 1, :float => 2, :string => 3}
	if /^\s*(-|\+)?\d+\s*$/ === str
		type_of_str = :int
	elsif /^\s*(-|\+)?\d*\.\d*\s*$/ === str || /^\s*(-|\+)?(\d*\.\d+|\d+)(e|E)(-|\+)?\d+\s*$/ === str
		type_of_str = :float
	else
		type_of_str = :string
	end
			
	# p "#{type_of_str}, #{str}" if order[type_of_str] > order[expected]

	return order[type_of_str] > order[expected] ? type_of_str : expected
end

.to_df(d, format: :daru) ⇒ Object

Convert Hash to DataFrame



145
146
147
148
149
150
151
# File 'lib/yomise.rb', line 145

def to_df(d, format: :daru)
	if format.to_s == "daru" || format.nil?
		Daru::DataFrame.new(d)
	else
		Rover::DataFrame.new(d)
	end
end

.to_hash(array2d, line_from: 1, line_until: nil, line_ignored: nil, column_from: nil, column_until: nil, header: 0, symbol_header: false, replaced_by_nil: [], analyze_type: true, index: nil) ⇒ Object

Convert 2d Array to Hash header: nil -> Default Headers(:column1, column2,…) are generated. Option line_ignored, is not implemented yet.



96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
# File 'lib/yomise.rb', line 96

def to_hash(array2d, line_from: 1, line_until: nil, line_ignored: nil,
	                 column_from: nil, column_until: nil, 
	                 header: 0, symbol_header: false,
					 replaced_by_nil: [], analyze_type: true,
                     index: nil)
			## TODO.. column_from: , column_until:
	
	# Define Read Range------------		
	lfrom, luntil = line_from, line_until
	lf_reg, lu_reg = line_from.kind_of?(Regexp), line_until.kind_of?(Regexp)
	
	if lf_reg || lu_reg
		lines_ary = array2d.map{ _1.join "," }
		lfrom = lines_ary.find_index{ line_from === _1 } if lf_reg
		luntil = (lines_ary.length-1) - lines_ary.reverse.find_index{ line_until === _1 } if lu_reg
	end

	# And get originally array-----
	output = array2d[lfrom...luntil]
	# -----------------------------

	# Then get data of index-------
	ind_orig = index ? output.map{ _1[index] } : nil
	# -----------------------------
	
	# Selecct Column---------------
	output = output.map { _1[column_from...column_until] } if column_from || column_until
		
	# Define Data Array------------
	output_transpose = output[0].zip(*output[1..])
	output_transpose = fix_array(output_transpose, replaced_by_nil, analyze_type)
	# -----------------------------

	# Define Header----------------
	if header
		hd = check_header(array2d[header])[column_from...column_until]
	else
		hd = [*0...(output.longest_line)].map{"column#{_1}"}
	end
	# hd = header.nil? ? [*0...(output.longest_line)].map{"column#{_1}"} : check_header(array2d[header])
	
	hd = hd.map { _1.intern } if symbol_header
	# -----------------------------

	# Make Hash(Header => Data Array)  
	return hd.each_with_object({}).with_index {|(hdr, hash), i| hash[hdr]=output_transpose[i]}, ind_orig
end