Class: Indirizzo::Address
- Inherits:
-
Object
- Object
- Indirizzo::Address
- Defined in:
- lib/indirizzo/address.rb
Overview
The Address class takes a US street address or place name and constructs a list of possible structured parses of the address string.
Instance Attribute Summary collapse
-
#city ⇒ Object
Returns the value of attribute city.
-
#country ⇒ Object
Returns the value of attribute country.
-
#number ⇒ Object
Returns the value of attribute number.
-
#options ⇒ Object
Returns the value of attribute options.
-
#plus4 ⇒ Object
Returns the value of attribute plus4.
-
#prenum ⇒ Object
Returns the value of attribute prenum.
-
#state ⇒ Object
Returns the value of attribute state.
-
#street ⇒ Object
Returns the value of attribute street.
-
#sufnum ⇒ Object
Returns the value of attribute sufnum.
-
#text ⇒ Object
Returns the value of attribute text.
-
#zip ⇒ Object
Returns the value of attribute zip.
Instance Method Summary collapse
- #assign_text_to_address(text) ⇒ Object
- #city_parts ⇒ Object
-
#clean(value) ⇒ Object
Removes any characters that aren’t strictly part of an address string.
-
#expand_numbers(string) ⇒ Object
Expands a token into a list of possible strings based on the Geocoder::US::Name_Abbr constant, and expands numerals and number words into their possible equivalents.
- #expand_streets(street) ⇒ Object
-
#initialize(text, options = {}) ⇒ Address
constructor
Takes an address or place name string as its sole argument.
- #intersection? ⇒ Boolean
- #parse ⇒ Object
- #parse_state(regex_match, text) ⇒ Object
- #po_box? ⇒ Boolean
- #remove_noise_words(strings) ⇒ Object
- #street_parts ⇒ Object
Constructor Details
#initialize(text, options = {}) ⇒ Address
Takes an address or place name string as its sole argument.
30 31 32 33 34 35 36 37 38 39 40 41 |
# File 'lib/indirizzo/address.rb', line 30 def initialize (text, ={}) @options = {:expand_streets => true}.merge() raise ArgumentError, "no text provided" unless text and !text.empty? if text.class == Hash @text = "" assign_text_to_address text else @text = clean text parse end end |
Instance Attribute Details
#city ⇒ Object
Returns the value of attribute city.
23 24 25 |
# File 'lib/indirizzo/address.rb', line 23 def city @city end |
#country ⇒ Object
Returns the value of attribute country.
26 27 28 |
# File 'lib/indirizzo/address.rb', line 26 def country @country end |
#number ⇒ Object
Returns the value of attribute number.
21 22 23 |
# File 'lib/indirizzo/address.rb', line 21 def number @number end |
#options ⇒ Object
Returns the value of attribute options.
27 28 29 |
# File 'lib/indirizzo/address.rb', line 27 def @options end |
#plus4 ⇒ Object
Returns the value of attribute plus4.
25 26 27 |
# File 'lib/indirizzo/address.rb', line 25 def plus4 @plus4 end |
#prenum ⇒ Object
Returns the value of attribute prenum.
21 22 23 |
# File 'lib/indirizzo/address.rb', line 21 def prenum @prenum end |
#state ⇒ Object
Returns the value of attribute state.
24 25 26 |
# File 'lib/indirizzo/address.rb', line 24 def state @state end |
#street ⇒ Object
Returns the value of attribute street.
22 23 24 |
# File 'lib/indirizzo/address.rb', line 22 def street @street end |
#sufnum ⇒ Object
Returns the value of attribute sufnum.
21 22 23 |
# File 'lib/indirizzo/address.rb', line 21 def sufnum @sufnum end |
#text ⇒ Object
Returns the value of attribute text.
20 21 22 |
# File 'lib/indirizzo/address.rb', line 20 def text @text end |
#zip ⇒ Object
Returns the value of attribute zip.
25 26 27 |
# File 'lib/indirizzo/address.rb', line 25 def zip @zip end |
Instance Method Details
#assign_text_to_address(text) ⇒ Object
50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 |
# File 'lib/indirizzo/address.rb', line 50 def assign_text_to_address(text) if !text[:address].nil? @text = clean text[:address] parse else @street = [] @prenum = text[:prenum] @sufnum = text[:sufnum] if !text[:street].nil? @street = text[:street].scan(Match[:street]) end @number = "" if !@street.nil? if text[:number].nil? @street.map! { |single_street| single_street.downcase! @number = single_street.scan(Match[:number])[0].reject{|n| n.nil? || n.empty?}.first.to_s single_street.sub! @number, "" single_street.sub! /^\s*,?\s*/o, "" } else @number = text[:number].to_s end @street = (@street) if @options[:expand_streets] street_parts end @city = [] if !text[:city].nil? @city.push(text[:city]) @text = text[:city].to_s else @city.push("") end if !text[:region].nil? # @state = [] @state = text[:region] if @state.length > 2 # full_state = @state.strip # special case: New York @state = State[@state] end elsif !text[:state].nil? @state = text[:state] elsif !text[:country].nil? @state = text[:country] end @zip = text[:postal_code] @plus4 = text[:plus4] if !@zip @zip = @plus4 = "" end end end |
#city_parts ⇒ Object
263 264 265 266 267 268 269 270 271 272 273 274 |
# File 'lib/indirizzo/address.rb', line 263 def city_parts strings = [] @city.map do |string| tokens = string.split(" ") strings |= (0...tokens.length).to_a.reverse.map {|i| (i...tokens.length).map {|j| tokens[i..j].join(" ")}}.flatten end # Don't return strings that consist solely of abbreviations. # NOTE: Is this a micro-optimization that has edge cases that will break? # Answer: Yes, it breaks on "Prairie" strings.reject { |s| Std_Abbr.key?(s) }.uniq end |
#clean(value) ⇒ Object
Removes any characters that aren’t strictly part of an address string.
44 45 46 47 48 |
# File 'lib/indirizzo/address.rb', line 44 def clean (value) value.strip \ .gsub(/[^a-z0-9 ,'&@\/-]+/io, "") \ .gsub(/\s+/o, " ") end |
#expand_numbers(string) ⇒ Object
Expands a token into a list of possible strings based on the Geocoder::US::Name_Abbr constant, and expands numerals and number words into their possible equivalents.
107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 |
# File 'lib/indirizzo/address.rb', line 107 def (string) if /\b\d+(?:st|nd|rd|th)?\b/o.match string match = $& num = $&.to_i elsif Ordinals.regexp.match string num = Ordinals[$&] match = $& elsif Cardinals.regexp.match string num = Cardinals[$&] match = $& end strings = [] if num and num < 100 [num.to_s, Ordinals[num], Cardinals[num]].each {|replace| strings << string.sub(match, replace) } else strings << string end strings end |
#expand_streets(street) ⇒ Object
210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 |
# File 'lib/indirizzo/address.rb', line 210 def (street) if !street.empty? && !street[0].nil? street.map! {|s|s.strip} add = street.map {|item| item.gsub(Name_Abbr.regexp) {|m| Name_Abbr[m]}} street |= add add = street.map {|item| item.gsub(Std_Abbr.regexp) {|m| Std_Abbr[m]}} street |= add street.map! {|item| (item)} street.flatten! street.map! {|s| s.downcase} street.uniq! else street = [] end street end |
#intersection? ⇒ Boolean
288 289 290 |
# File 'lib/indirizzo/address.rb', line 288 def intersection? !Match[:at].match(@text).nil? end |
#parse ⇒ Object
137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 |
# File 'lib/indirizzo/address.rb', line 137 def parse text = @text.clone.downcase @zip = text.scan(Match[:zip]).last if @zip last_match = $& zip_index = text.rindex(last_match) zip_end_index = zip_index + last_match.length - 1 @zip, @plus4 = @zip.map {|s| s and s.strip } else @zip = @plus4 = "" zip_index = text.length zip_end_index = -1 end @country = @text[zip_end_index+1..-1].sub(/^\s*,\s*/, '').strip @country = nil if @country == text @state = text.scan(Match[:state]).last if @state last_match = $& state_index = text.rindex(last_match) text = parse_state(last_match, text) else @full_state = "" @state = "" end @number = text.scan(Match[:number]).first # FIXME: 230 Fish And Game Rd, Hudson NY 12534 if @number # and not intersection? last_match = $& number_index = text.index(last_match) number_end_index = number_index + last_match.length - 1 @prenum, @number, @sufnum = @number.map {|s| s and s.strip} else number_end_index = -1 @prenum = @number = @sufnum = "" end # FIXME: special case: Name_Abbr gets a bit aggressive # about replacing St with Saint. exceptional case: # Sault Ste. Marie # FIXME: PO Box should geocode to ZIP street_search_end_index = [state_index,zip_index,text.length].reject(&:nil?).min-1 @street = text[number_end_index+1..street_search_end_index].scan(Match[:street]).map { |s| s and s.strip } @street = (@street) if @options[:expand_streets] # SPECIAL CASE: 1600 Pennsylvania 20050 @street << @full_state if @street.empty? and @state.downcase != @full_state.downcase street_end_index = @street.map { |s| text.rindex(s) }.reject(&:nil?).min||0 if @city.nil? || @city.empty? @city = text[street_end_index..street_search_end_index+1].scan(Match[:city]) if !@city.empty? #@city = [@city[-1].strip] @city = [@city.last.strip] add = @city.map {|item| item.gsub(Name_Abbr.regexp) {|m| Name_Abbr[m]}} @city |= add @city.map! {|s| s.downcase} @city.uniq! else @city = [] end # SPECIAL CASE: no city, but a state with the same name. e.g. "New York" @city << @full_state if @state.downcase != @full_state.downcase end end |
#parse_state(regex_match, text) ⇒ Object
129 130 131 132 133 134 135 |
# File 'lib/indirizzo/address.rb', line 129 def parse_state(regex_match, text) idx = text.rindex(regex_match) @full_state = @state[0].strip # special case: New York @state = State[@full_state] @city = "Washington" if @state == "DC" && text[idx...idx+regex_match.length] =~ /washington\s+d\.?c\.?/i text end |
#po_box? ⇒ Boolean
284 285 286 |
# File 'lib/indirizzo/address.rb', line 284 def po_box? !Match[:po_box].match(@text).nil? end |
#remove_noise_words(strings) ⇒ Object
242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 |
# File 'lib/indirizzo/address.rb', line 242 def remove_noise_words(strings) # Don't return strings that consist solely of abbreviations. # NOTE: Is this a micro-optimization that has edge cases that will break? # Answer: Yes, it breaks on simple things like "Prairie St" or "Front St" prefix = Regexp.new("^" + Prefix_Type.regexp.source + "\s*", Regexp::IGNORECASE) suffix = Regexp.new("\s*" + Suffix_Type.regexp.source + "$", Regexp::IGNORECASE) predxn = Regexp.new("^" + Directional.regexp.source + "\s*", Regexp::IGNORECASE) sufdxn = Regexp.new("\s*" + Directional.regexp.source + "$", Regexp::IGNORECASE) good_strings = strings.map {|s| s = s.clone s.gsub!(predxn, "") s.gsub!(sufdxn, "") s.gsub!(prefix, "") s.gsub!(suffix, "") s } good_strings.reject! {|s| s.empty?} strings = good_strings if !good_strings.empty? {|s| not Std_Abbr.key?(s) and not Name_Abbr.key?(s)} strings end |
#street_parts ⇒ Object
227 228 229 230 231 232 233 234 235 236 237 238 239 240 |
# File 'lib/indirizzo/address.rb', line 227 def street_parts strings = [] # Get all the substrings delimited by whitespace @street.each {|string| tokens = string.split(" ") strings |= (0...tokens.length).map {|i| (i...tokens.length).map {|j| tokens[i..j].join(" ")}}.flatten } strings = remove_noise_words(strings) # Try a simpler case of adding the @number in case everything is an abbr. strings += [@number] if strings.all? {|s| Std_Abbr.key? s or Name_Abbr.key? s} strings.uniq end |