Module: SpsBill::BillParser

Included in:
Bill
Defined in:
lib/sps_bill/bill_parser.rb

Overview

all the bill scanning and parsing intelligence

Constant Summary collapse

ELECTRICITY_SERVICE_HEADER =
/Electricity Services/i
/Gas Services|Water Services/i
GAS_SERVICE_HEADER =
/Gas Services/i
/Water Services/i
WATER_SERVICE_HEADER =
/Water Services/i
/Waterborne Fee/i

Instance Method Summary collapse

Instance Method Details

#do_complete_parseObject

Command: scans and extracts billing details from the pdf doc



19
20
21
22
23
24
25
26
27
28
# File 'lib/sps_bill/bill_parser.rb', line 19

def do_complete_parse
  return unless reader
  methods.select{|m| m =~ /^parse_/ }.each do |m|
    begin
      send(m)
    rescue => e
      errors << "failure parsing #{source_file}:#{m} #{e.inspect}"
    end
  end
end

#errorsObject

Returns a collection of parser errors



14
15
16
# File 'lib/sps_bill/bill_parser.rb', line 14

def errors
  @errors ||= []
end

#parse_account_numberObject

Command: extracts the account number



31
32
33
34
35
36
37
38
39
40
41
# File 'lib/sps_bill/bill_parser.rb', line 31

def 
  region = reader.bounding_box do
    exclusive!
    below 'Dated'
    above 'Type'
    right_of 'Account No'
  end
  # text will be returned like this:
  #   [[":", "8123123123"]]
  @account_number = region.text.flatten.last
end

#parse_electricity_usageObject

Command: extracts an array of electricity usage charges. Each element is a Hash:

{ kwh: float, rate: float, amount: float }


89
90
91
92
93
94
95
96
97
98
99
100
101
102
# File 'lib/sps_bill/bill_parser.rb', line 89

def parse_electricity_usage
  region = reader.bounding_box do
    exclusive!
    below ELECTRICITY_SERVICE_HEADER
    above ELECTRICITY_SERVICE_FOOTER
    right_of 240.0
    left_of 450.0
  end
  # text will be returned like this:
  #   [["4 kWh", "0.2410", "0.97"], ["616 kWh", "0.2558", "157.57"]]
  @electricity_usage = unless (raw_data = region.text).empty?
    raw_data.map{|l| {:kwh => l[0].gsub(/kwh/i,'').to_f, :rate => l[1].to_f, :amount => l[2].to_f} }
  end
end

#parse_gas_usageObject

Command: extracts an array of gas usage charges. Each element is a Hash:

{ kwh: float, rate: float, amount: float }


106
107
108
109
110
111
112
113
114
115
116
117
118
119
# File 'lib/sps_bill/bill_parser.rb', line 106

def parse_gas_usage
  region = reader.bounding_box do
    exclusive!
    below GAS_SERVICE_HEADER
    above GAS_SERVICE_FOOTER
    right_of 240.0
    left_of 450.0
  end
  # text will be returned like this:
  #   [["4 kWh", "0.2410", "0.97"], ["616 kWh", "0.2558", "157.57"]]
  @gas_usage = unless (raw_data = region.text).empty?
    raw_data.map{|l| {:kwh => l[0].gsub(/kwh/i,'').to_f, :rate => l[1].to_f, :amount => l[2].to_f} }
  end
end

#parse_invoice_dateObject

Command: extracts the invoice date



58
59
60
61
62
63
64
65
66
67
68
69
# File 'lib/sps_bill/bill_parser.rb', line 58

def parse_invoice_date
  region = reader.bounding_box do
    inclusive!
    below 'Dated'
    above 'Dated'
    right_of 'Dated'
  end
  # text will be returned like this:
  #   [["Dated", "31", "May", "2011"]]
  date_string = region.text.flatten.slice(1..3).join('-')
  @invoice_date = Date.parse(date_string)
end

#parse_invoice_monthObject

Command: extracts the invoice month (as Date, set to 1st of the month)



72
73
74
75
76
77
78
79
80
81
82
83
84
85
# File 'lib/sps_bill/bill_parser.rb', line 72

def parse_invoice_month
  region = reader.bounding_box do
    inclusive!
    below 'Dated'
    above 'Dated'
  end
  # text will be returned like this:
  #   [["May", "11", "Bill", "Dated", "31", "May", "2011"]]
  date_array = ['01'] + region.text.flatten.slice(0..1)
  if (yy = date_array[2]).length == 2
    date_array[2] = "20#{yy}" # WARNING: converting 2-digit date. Assumed to be 21st C
  end
  @invoice_month = Date.parse(date_array.join('-'))
end

#parse_total_amountObject

Command: extracts the total amount due for the current month



44
45
46
47
48
49
50
51
52
53
54
55
# File 'lib/sps_bill/bill_parser.rb', line 44

def parse_total_amount
  region = reader.bounding_box do
    inclusive!
    below /^Total Current Charges due on/
    above /^Total Current Charges due on/
    right_of /^Total Current Charges due on/
    left_of 400.0
  end
  # text will be returned like this:
  #   [["Total Current Charges due on 14 Jun 2011 (Tue)", "251.44"]]
  @total_amount = region.text.flatten.last.to_f
end

#parse_water_usageObject

Command: extracts an array of water usage charges. Each element is a Hash:

{ cubic_m: float, rate: float, amount: float }


123
124
125
126
127
128
129
130
131
132
133
134
135
136
# File 'lib/sps_bill/bill_parser.rb', line 123

def parse_water_usage
  region = reader.bounding_box do
    exclusive!
    below WATER_SERVICE_HEADER
    above WATER_SERVICE_FOOTER
    right_of 240.0
    left_of 450.0
  end
  # text will be returned like this:
  #   [["36.1 Cu M", "1.1700", "42.24"], ["-3.0 Cu M", "1.4000", "-4.20"]]
  @water_usage = unless (raw_data = region.text).empty?
    raw_data.map{|l| {:cubic_m => l[0].gsub(/cu m/i,'').to_f, :rate => l[1].to_f, :amount => l[2].to_f} }
  end
end