Module: ExcelX::Previewer

Defined in:
lib/excelx_preview/excelx.rb

Constant Summary collapse

STANDARD_FORMATS =
{
    0 => 'General',
    1 => '0',
    2 => '0.00',
    3 => '#,##0',
    4 => '#,##0.00',
    9 => '0%',
    10 => '0.00%',
    11 => '0.00E+00',
    12 => '# ?/?',
    13 => '# ??/??',
    14 => 'mm-dd-yy',
    15 => 'd-mmm-yy',
    16 => 'd-mmm',
    17 => 'mmm-yy',
    18 => 'h:mm AM/PM',
    19 => 'h:mm:ss AM/PM',
    20 => 'h:mm',
    21 => 'h:mm:ss',
    22 => 'm/d/yy h:mm',
    37 => '#,##0 ;(#,##0)',
    38 => '#,##0 ;[Red](#,##0)',
    39 => '#,##0.00;(#,##0.00)',
    40 => '#,##0.00;[Red](#,##0.00)',
    45 => 'mm:ss',
    46 => '[h]:mm:ss',
    47 => 'mmss.0',
    48 => '##0.0E+0',
    49 => '@',
}
FORMATS =
{
    "general"=>:float,
    "0"=>:float,
    "0.00"=>:float,
    "#,##0"=>:float,
    "#,##0.00"=>:float,
    "0%"=>:percentage,
    "0.00%"=>:percentage,
    "0.00E+00"=>:float,
    "# ?/?"=>:float,
    "# ??/??"=>:float,
    "mm-dd-yy"=>:date,
    "d-mmm-yy"=>:date,
    "d-mmm"=>:date,
    "mmm-yy"=>:date,
    "h:mm AM/PM"=>:time,
    "h:mm:ss AM/PM"=>:time,
    "hh:mm:ss AM/PM"=>:time,
    "h:mm"=>:time,
    "h:mm:ss"=>:time,
    "m/d/yy h:mm"=>:datetime,
    "#,##0 ;(#,##0)"=>:float,
    "#,##0 ;[Red](#,##0)"=>:float,
    "#,##0.00;(#,##0.00)"=>:float,
    "#,##0.00;[Red](#,##0.00)"=>:float,
    "mm:ss"=>:time,
    "[h]:mm:ss"=>:time,
    "mmss.0"=>:time,
    "##0.0E+0"=>:float,
    "@"=>:float,
    "yyyy\\-mm\\-dd"=>:date,
    "dd/mm/yy"=>:date,
    "hh:mm:ss"=>:time,
    "dd/mm/yy hh:mm"=>:datetime,
    "dd/mmm/yy"=>:date,
    "yyyy-mm-dd"=>:date,
    "hh:mm:ss am/pm" => :time,
    "mm/dd/yy hh:mm am/pm" =>:datetime,
    "mm/dd/yy" => :date,
    "h:mm am/pm" => :time,
    "m/d/yyyy" => :date,
    "m/d/yyyy h:mm" => :datetime,
    "hh:mm am/pm" => :time,
    "dd/mm/yyyy" => :date,
}
DATE_TIME_FORMAT =

add more formats when found

{#add more formats when found
                  "mm/dd/yy" => "%y,%m,%d", #coz excelx stores in this format
                  "m/d/yyyy" =>"%Y,%m,%d",
                  "h:mm am/pm" => "%I:%M:%S %p"
}

Class Method Summary collapse

Class Method Details

.attribute2format(s) ⇒ Object

taken from roo gem



106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
# File 'lib/excelx_preview/excelx.rb', line 106

def self.attribute2format(s) #taken from roo gem
  result = nil
  @numFmts.each { |nf|

    if nf.first.to_s == @cellXfs[s.to_i].first
      result = nf[1]
      break
    end
  }
  unless result
    id = @cellXfs[s.to_i].first.to_i
    if STANDARD_FORMATS.has_key? id
      result = STANDARD_FORMATS[id]
    end
  end
  result
end

.cleanupObject

TODO use fileutils or something.



247
248
249
# File 'lib/excelx_preview/excelx.rb', line 247

def self.cleanup()      #TODO use fileutils or something.
  FileUtils.remove_dir(@tmp_folder,force= true)
end


170
171
172
173
174
175
176
# File 'lib/excelx_preview/excelx.rb', line 170

def self.content_from_link(link)
  if  link.children.first.name=="f"
    content = link.children.last.children.last.text
  else
    content = link.content
  end
end

.datetime(value, type) ⇒ Object



146
147
148
149
150
151
152
153
154
155
156
157
# File 'lib/excelx_preview/excelx.rb', line 146

def self.datetime(value, type)
  seconds = (value.to_f - 25569) * 86400.0
  if type == :time
    (Time.at seconds).utc.strftime("%I:%M:%S %p") rescue value
  elsif type == :datetime
    (Time.at seconds).utc.strftime("%m/%d/%Y %I:%M:%S %p") rescue value
  elsif type == :date
    (Time.at seconds).utc.strftime("%m/%d/%Y") rescue value
  elsif type == :float
    value
  end
end

.datetime_whenis(value, type, format) ⇒ Object



159
160
161
162
163
164
165
166
167
168
# File 'lib/excelx_preview/excelx.rb', line 159

def self.datetime_whenis(value, type, format)

  date_or_time_format = datetimeformat(format, type)
  if  type == :date
    Date.strptime(value, "date(#{date_or_time_format}").to_s rescue value # TODO do same formatting as in the sheet
  elsif type == :time
    Time.parse("#{value}".delete('time(').chop.split(",").map(&:to_i).join(":")).strftime(date_or_time_format) rescue value
  end

end

.datetimeformat(format, type) ⇒ Object



132
133
134
135
136
137
138
139
140
141
142
143
144
# File 'lib/excelx_preview/excelx.rb', line 132

def self.datetimeformat(format, type)
  if DATE_TIME_FORMAT.has_key? format
    DATE_TIME_FORMAT[format]
  else
    if type ==:date
      "%Y,%m,%d"
    elsif type ==:time
      "%I:%M:%S %p"
    else
      ""
    end
  end
end

.extracted_sheetsObject



251
252
253
254
255
256
257
258
259
260
# File 'lib/excelx_preview/excelx.rb', line 251

def self.extracted_sheets()
  sheet_list_xml= Nokogiri::XML(File.open("#{@tmp_folder}/xl/workbook.xml"))
  @sheet_list = {}
  sheet_list_xml.xpath("//*[local-name()='sheet']").each do |sheet|
    sheet_name_in_xml = "sheet#{sheet.attributes["sheetId"].value}"
    sheet_name_in_excelx = sheet.attributes["name"].value.downcase
    @sheet_list[sheet_name_in_xml] = sheet_name_in_excelx
  end
  @sheet_list
end

.fetch_headersObject



298
299
300
301
302
303
304
305
306
307
308
309
310
# File 'lib/excelx_preview/excelx.rb', line 298

def self.fetch_headers()
  @headers =[]
  @row_count = @doc.xpath("//*[local-name()='row']").count
  return if @row_count <1
  @doc.xpath("//*[local-name()='row']")[0].children.each do |first_row|
    if  first_row['t']=="s"
      @headers<< @shared_strings[first_row.content.to_i]
    else
      @headers<< first_row.content
    end
  end
  @headers
end

.first_10_from_sheetObject



212
213
214
215
216
217
218
219
220
221
222
223
# File 'lib/excelx_preview/excelx.rb', line 212

def self.first_10_from_sheet()
  @preview[@sheet_name]= {}
  return if @row_count < 1
  @doc.xpath("//*[local-name()='row']")[0..10].each do |row|
    sheet = {}
    row.children.each do |c|
      content = get_content(c)
      sheet[c['r']] = content
    end
    @preview[@sheet_name]["Row#{row['r']}"]= sheet
  end
end

.format2type(format) ⇒ Object

taken from roo gem



124
125
126
127
128
129
130
# File 'lib/excelx_preview/excelx.rb', line 124

def self.format2type(format) #taken from roo gem
  if FORMATS.has_key? format
    FORMATS[format]
  else
    :float
  end
end

.get_content(link, is = false) ⇒ Object



187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
# File 'lib/excelx_preview/excelx.rb', line 187

def self.get_content(link, is=false)
  content = nil
  s_value = link["s"].to_i
  format = attribute2format(s_value).to_s.downcase.gsub(/\\/, "").gsub("-", "/")
  type = format2type(format)
  return nil if link.children.empty?
  if is?(link)
    value = link.content.downcase
    datetime_whenis(value, type, format)
    return datetime_whenis(value, type, format)
  end
  if s_value == 0
    if link['t']=="s"
      content = @shared_strings[link.content.to_i]
    else
      content = content_from_link(link)
    end
  elsif s_value >0 && s_value <48
    content = datetime(content_from_link(link), type)
  else
    content = content_from_link(link)
  end
  content
end

.is?(c) ⇒ Boolean

Returns:

  • (Boolean)


178
179
180
181
182
183
184
185
# File 'lib/excelx_preview/excelx.rb', line 178

def self.is?(c)
  c.children.each do |f_or_is|
    if f_or_is.name=="is"
      return true
    end
  end
  return false
end

.parse_xml(sheet_name_in_xml, sheet_name_in_excelx) ⇒ Object



278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
# File 'lib/excelx_preview/excelx.rb', line 278

def self.parse_xml(sheet_name_in_xml, sheet_name_in_excelx)
  @sheet_name = sheet_name_in_excelx
  sheet_xml_file = "#{@tmp_folder}/xl/worksheets/#{sheet_name_in_xml}.xml"
  xml_string =""
  sheets =[]
  counter = 0
  File.open(sheet_xml_file, "r") do |f|
    while counter<10 do
      buffer = f.read(1024)
      occurrence = 0
      occurrence = buffer.scan("</row>").size rescue break
      counter += occurrence
      xml_string = "#{xml_string}#{buffer}"
    end
  end
  rows = xml_string.split("</row>")[0..-2].join("</row>")
  @doc = Nokogiri.XML(rows)

end

.preview(filename, sheets = false) ⇒ Object



225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
# File 'lib/excelx_preview/excelx.rb', line 225

def self.preview(filename, sheets=false)

  @filename = filename.chomp(File.extname(filename)) rescue filename
  @tmp_folder = "/tmp/#{UUID.new.generate}"
  unzip
  @sheet_list = extracted_sheets unless sheets
  @sheet_list = [sheets] if sheets
  shared_strings
  @sheet_list.each do |sheet_name_in_xml, sheet_name_in_excelx|
    parse_xml(sheet_name_in_xml, sheet_name_in_excelx)
    fetch_headers
    styles
    first_10_from_sheet
  end
  cleanup()
  @preview
end

.shared_stringsObject



262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
# File 'lib/excelx_preview/excelx.rb', line 262

def self.shared_strings()
  @shared_strings =[]
  begin
    shared = Nokogiri::XML(File.open ("#{@tmp_folder}/xl/sharedStrings.xml"))
  rescue
    "no shared file"
    return @shared_strings
  end
  shared.xpath("//*[local-name()='si']").each do |shared_strings|
    @shared_strings << shared_strings.content
  end


  @shared_strings
end

.stylesObject

taken from roo gem



89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
# File 'lib/excelx_preview/excelx.rb', line 89

def self.styles() #taken from roo gem
  @numFmts =[]
  @cellXfs =[]
  style= Nokogiri::XML(File.open("#{@tmp_folder}/xl/styles.xml"))
  style.xpath("//*[local-name()='numFmt']").each do |numFmt|
    numFmtId = numFmt.attributes['numFmtId']
    formatCode = numFmt.attributes['formatCode']
    @numFmts << [numFmtId, formatCode]
  end
  style.xpath("//*[local-name()='cellXfs']").each do |xfs|
    xfs.children.each do |xf|
      numFmtId = xf['numFmtId']
      @cellXfs << [numFmtId]
    end
  end
end

.unzipObject



243
244
245
# File 'lib/excelx_preview/excelx.rb', line 243

def self.unzip()
  `unzip -o  #{@filename}.xlsx -d #{@tmp_folder}`
end