Class: Parse::Algorithm::Ver0_0_1

Inherits:
Object
  • Object
show all
Defined in:
lib/parse/algorithm/ver0_0_1.rb

Constant Summary collapse

NULL =

from bigml’s list

[ '', '-', '?', 'N/A', 'n/a', 'NULL', 'null', '#REF!', '#NAME?', 'NIL', 'nil', 'NA', 'na', '#VALUE!', '#NULL!']
DATE =
{
  euro: ['%d-%m-%Y', '%d-%m-%y'],
  us:   ['%m-%d-%Y', '%m-%d-%y'],
}

Instance Attribute Summary collapse

Instance Method Summary collapse

Constructor Details

#initialize(raw, options = nil) ⇒ Ver0_0_1

Returns a new instance of Ver0_0_1.



12
13
14
15
# File 'lib/parse/algorithm/ver0_0_1.rb', line 12

def initialize(raw, options = nil)
  @raw = raw
  @options = options
end

Instance Attribute Details

#optionsObject (readonly)

Returns the value of attribute options.



11
12
13
# File 'lib/parse/algorithm/ver0_0_1.rb', line 11

def options
  @options
end

#rawObject (readonly)

Returns the value of attribute raw.



10
11
12
# File 'lib/parse/algorithm/ver0_0_1.rb', line 10

def raw
  @raw
end

Instance Method Details

#resultObject

use YAML to parse stuff like ‘1.5’ ruby’s yaml is 1.1, which means it does weird stuff with ‘001’ (fixed in 1.2, which jruby has)



20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
# File 'lib/parse/algorithm/ver0_0_1.rb', line 20

def result
  return raw unless raw.is_a? String
  
  memo = raw.strip

  return nil if NULL.include? memo
  return 1.0/0 if INFINITY.include? memo
  return -1.0/0 if NEG_INFINITY.include? memo
  return 0.0/0 if NAN.include? memo
  
  if options and options[:date]
    yyyy, yy = DATE.fetch options[:date]
    memo.sub!(/0+/, '')
    memo.gsub! '/', '-'
    if memo =~ /\d{4,}/ # yyyy
      return Date.strptime(memo, yyyy)
    else
      return Date.strptime(memo, yy)
    end
  end

  not_numeric = nil
  not_numeric ||= memo =~ /,\d{1,2},/ # comma not used for thousands, like 10,20,30
  not_numeric ||= memo =~ /\..*,/ # comma following a period, like 1.0,2
  not_numeric ||= memo =~ /\A[^(+\-\$0-9%]/ # starts with letter or smth
  possible_numeric = !not_numeric
  accounting_negative = nil
  percentage = nil

  if possible_numeric
    accounting_negative = memo =~ /\A[0$]*\([0$]*/
    percentage = memo.end_with?('%')
    memo.sub! /%\z/, '' if percentage
    memo.delete!('()') if accounting_negative # accounting negative
    # in yaml 1.1, anything starting with zero is treated as octal... in 1.2, it's 0o
    memo.sub!(/0+/, '') if memo =~ /\A[+\-]?0+[+\-\$]?[1-9]+/ # leading zeros
    memo.delete!('$') if memo =~ /\A[+\-]?0*\$/
    if memo.include?(',')
      a, b = memo.split('.', 2)
      a.delete! ','
      memo = b ? [a, b].join('.') : a
    end
  end

  not_safe_for_yaml = nil
  not_safe_for_yaml ||= memo.include?('#')
  not_safe_for_yaml ||= not_numeric && memo =~ /\A[\d,]+\z/ #1,2,3, maybe a csv
  safe_for_yaml = !not_safe_for_yaml

  if safe_for_yaml
    begin
      memo = SafeYAML.load memo
    rescue
      $stderr.puts "#{memo.inspect} => #{$!}"
    end
  end

  if possible_numeric
    case memo
    when /\A[+\-]?[\d._]+[eE][+\-]?[\d._]+\z/
      # scientific notation
      memo = memo.to_f
    when /\A[+\-]?0o/
      # octal per yaml 1.2
      memo = memo.to_i 8
    end
  end
  
  if memo.is_a?(String)
    # compress whitespace
    memo.gsub! /\s+/, ' '
  end

  memo = memo / 100.0 if percentage
  memo = -memo if accounting_negative
  memo
end