Class: Geocoder::US::Address
- Inherits:
-
Object
- Object
- Geocoder::US::Address
- Defined in:
- lib/geocoder/us/address.rb
Overview
The Address class takes a US street address or place name and constructs a list of possible structured parses of the address string.
Instance Attribute Summary collapse
-
#city ⇒ Object
Returns the value of attribute city.
-
#number ⇒ Object
Returns the value of attribute number.
-
#plus4 ⇒ Object
Returns the value of attribute plus4.
-
#prenum ⇒ Object
Returns the value of attribute prenum.
-
#state ⇒ Object
Returns the value of attribute state.
-
#street ⇒ Object
Returns the value of attribute street.
-
#sufnum ⇒ Object
Returns the value of attribute sufnum.
-
#text ⇒ Object
Returns the value of attribute text.
-
#zip ⇒ Object
Returns the value of attribute zip.
Instance Method Summary collapse
- #assign_text_to_address(text) ⇒ Object
- #city_parts ⇒ Object
-
#clean(value) ⇒ Object
Removes any characters that aren’t strictly part of an address string.
-
#expand_numbers(string) ⇒ Object
Expands a token into a list of possible strings based on the Geocoder::US::Name_Abbr constant, and expands numerals and number words into their possible equivalents.
- #expand_streets(street) ⇒ Object
-
#initialize(text) ⇒ Address
constructor
Takes an address or place name string as its sole argument.
- #intersection? ⇒ Boolean
- #parse ⇒ Object
- #parse_number(regex_match, text) ⇒ Object
- #parse_state(regex_match, text) ⇒ Object
- #parse_zip(regex_match, text) ⇒ Object
- #po_box? ⇒ Boolean
- #remove_noise_words(strings) ⇒ Object
- #street_parts ⇒ Object
Constructor Details
#initialize(text) ⇒ Address
Takes an address or place name string as its sole argument.
28 29 30 31 32 33 34 35 36 37 |
# File 'lib/geocoder/us/address.rb', line 28 def initialize (text) raise ArgumentError, "no text provided" unless text and !text.empty? if text.class == Hash @text = "" assign_text_to_address text else @text = clean text parse end end |
Instance Attribute Details
#city ⇒ Object
Returns the value of attribute city.
23 24 25 |
# File 'lib/geocoder/us/address.rb', line 23 def city @city end |
#number ⇒ Object
Returns the value of attribute number.
21 22 23 |
# File 'lib/geocoder/us/address.rb', line 21 def number @number end |
#plus4 ⇒ Object
Returns the value of attribute plus4.
25 26 27 |
# File 'lib/geocoder/us/address.rb', line 25 def plus4 @plus4 end |
#prenum ⇒ Object
Returns the value of attribute prenum.
21 22 23 |
# File 'lib/geocoder/us/address.rb', line 21 def prenum @prenum end |
#state ⇒ Object
Returns the value of attribute state.
24 25 26 |
# File 'lib/geocoder/us/address.rb', line 24 def state @state end |
#street ⇒ Object
Returns the value of attribute street.
22 23 24 |
# File 'lib/geocoder/us/address.rb', line 22 def street @street end |
#sufnum ⇒ Object
Returns the value of attribute sufnum.
21 22 23 |
# File 'lib/geocoder/us/address.rb', line 21 def sufnum @sufnum end |
#text ⇒ Object
Returns the value of attribute text.
20 21 22 |
# File 'lib/geocoder/us/address.rb', line 20 def text @text end |
#zip ⇒ Object
Returns the value of attribute zip.
25 26 27 |
# File 'lib/geocoder/us/address.rb', line 25 def zip @zip end |
Instance Method Details
#assign_text_to_address(text) ⇒ Object
47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 |
# File 'lib/geocoder/us/address.rb', line 47 def assign_text_to_address(text) if !text[:address].nil? @text = clean text[:address] parse else @street = [] @prenum = text[:prenum] @sufnum = text[:sufnum] if !text[:street].nil? @street = text[:street].scan(Match[:street]) end @number = "" if !@street.nil? if text[:number].nil? @street.map! { |single_street| single_street.downcase! @number = single_street.scan(Match[:number])[0].to_s single_street.sub! @number, "" single_street.sub! /^\s*,?\s*/o, "" } else @number = text[:number].to_s end @street = (@street) street_parts end @city = [] if !text[:city].nil? @city.push(text[:city]) @text = text[:city].to_s else @city.push("") end if !text[:region].nil? # @state = [] @state = text[:region] if @state.length > 2 # full_state = @state.strip # special case: New York @state = State[@state] end elsif !text[:country].nil? @state = text[:country] elsif !text[:state].nil? @state = text[:state] end @zip = text[:postal_code] @plus4 = text[:plus4] if !@zip @zip = @plus4 = "" end end end |
#city_parts ⇒ Object
260 261 262 263 264 265 266 267 268 269 270 271 272 273 |
# File 'lib/geocoder/us/address.rb', line 260 def city_parts strings = [] @city.map {|string| tokens = string.split(" ") strings |= (0...tokens.length).to_a.reverse.map {|i| (i...tokens.length).map {|j| tokens[i..j].join(" ")}}.flatten } # Don't return strings that consist solely of abbreviations. # NOTE: Is this a micro-optimization that has edge cases that will break? # Answer: Yes, it breaks on "Prairie" good_strings = strings.reject {|s| Std_Abbr.key? s} strings = good_strings if !good_strings.empty? strings.uniq end |
#clean(value) ⇒ Object
Removes any characters that aren’t strictly part of an address string.
40 41 42 43 44 |
# File 'lib/geocoder/us/address.rb', line 40 def clean (value) value.strip \ .gsub(/[^a-z0-9 ,'&@\/-]+/io, "") \ .gsub(/\s+/o, " ") end |
#expand_numbers(string) ⇒ Object
Expands a token into a list of possible strings based on the Geocoder::US::Name_Abbr constant, and expands numerals and number words into their possible equivalents.
104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 |
# File 'lib/geocoder/us/address.rb', line 104 def (string) if /\b\d+(?:st|nd|rd|th)?\b/o.match string match = $& num = $&.to_i elsif Ordinals.regexp.match string num = Ordinals[$&] match = $& elsif Cardinals.regexp.match string num = Cardinals[$&] match = $& end strings = [] if num and num < 100 [num.to_s, Ordinals[num], Cardinals[num]].each {|replace| strings << string.sub(match, replace) } else strings << string end strings end |
#expand_streets(street) ⇒ Object
207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 |
# File 'lib/geocoder/us/address.rb', line 207 def (street) if !street.empty? && !street[0].nil? street.map! {|s|s.strip} add = street.map {|item| item.gsub(Name_Abbr.regexp) {|m| Name_Abbr[m]}} street |= add add = street.map {|item| item.gsub(Std_Abbr.regexp) {|m| Std_Abbr[m]}} street |= add street.map! {|item| (item)} street.flatten! street.map! {|s| s.downcase} street.uniq! else street = [] end street end |
#intersection? ⇒ Boolean
286 287 288 |
# File 'lib/geocoder/us/address.rb', line 286 def intersection? Match[:at].match @text end |
#parse ⇒ Object
152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 |
# File 'lib/geocoder/us/address.rb', line 152 def parse text = @text.clone.downcase @zip = text.scan(Match[:zip])[-1] if @zip text = parse_zip($&, text) else @zip = @plus4 = "" end @state = text.scan(Match[:state])[-1] if @state text = parse_state($&, text) else @full_state = "" @state = "" end @number = text.scan(Match[:number])[0] # FIXME: 230 Fish And Game Rd, Hudson NY 12534 if @number # and not intersection? text = parse_number($&, text) else @prenum = @number = @sufnum = "" end # FIXME: special case: Name_Abbr gets a bit aggressive # about replacing St with Saint. exceptional case: # Sault Ste. Marie # FIXME: PO Box should geocode to ZIP @street = text.scan(Match[:street]) @street = (@street) # SPECIAL CASE: 1600 Pennsylvania 20050 @street << @full_state if @street.empty? and @state.downcase != @full_state.downcase @city = text.scan(Match[:city]) if !@city.empty? @city = [@city[-1].strip] add = @city.map {|item| item.gsub(Name_Abbr.regexp) {|m| Name_Abbr[m]}} @city |= add @city.map! {|s| s.downcase} @city.uniq! else @city = [] end # SPECIAL CASE: no city, but a state with the same name. e.g. "New York" @city << @full_state if @state.downcase != @full_state.downcase # SPECIAL CASE: if given a single city string, and it's not the # same as the street string, remove it from the street parts self.city= @city if @city.length == 1 and @city != @street end |
#parse_number(regex_match, text) ⇒ Object
143 144 145 146 147 148 149 150 |
# File 'lib/geocoder/us/address.rb', line 143 def parse_number(regex_match, text) # FIXME: What if this string appears twice? idx = text.index(regex_match) text[idx...idx+regex_match.length] = "" text.sub! /^\s*,?\s*/o, "" @prenum, @number, @sufnum = @number.map {|s| s and s.strip} text end |
#parse_state(regex_match, text) ⇒ Object
134 135 136 137 138 139 140 141 |
# File 'lib/geocoder/us/address.rb', line 134 def parse_state(regex_match, text) idx = text.rindex(regex_match) text[idx...idx+regex_match.length] = "" text.sub! /\s*,?\s*$/o, "" @full_state = @state[0].strip # special case: New York @state = State[@full_state] text end |
#parse_zip(regex_match, text) ⇒ Object
126 127 128 129 130 131 132 |
# File 'lib/geocoder/us/address.rb', line 126 def parse_zip(regex_match, text) idx = text.rindex(regex_match) text[idx...idx+regex_match.length] = "" text.sub! /\s*,?\s*$/o, "" @zip, @plus4 = @zip.map {|s|s.strip} text end |
#po_box? ⇒ Boolean
282 283 284 |
# File 'lib/geocoder/us/address.rb', line 282 def po_box? Match[:po_box].match @text end |
#remove_noise_words(strings) ⇒ Object
239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 |
# File 'lib/geocoder/us/address.rb', line 239 def remove_noise_words(strings) # Don't return strings that consist solely of abbreviations. # NOTE: Is this a micro-optimization that has edge cases that will break? # Answer: Yes, it breaks on simple things like "Prairie St" or "Front St" prefix = Regexp.new("^" + Prefix_Type.regexp.source + "\s*", Regexp::IGNORECASE) suffix = Regexp.new("\s*" + Suffix_Type.regexp.source + "$", Regexp::IGNORECASE) predxn = Regexp.new("^" + Directional.regexp.source + "\s*", Regexp::IGNORECASE) sufdxn = Regexp.new("\s*" + Directional.regexp.source + "$", Regexp::IGNORECASE) good_strings = strings.map {|s| s = s.clone s.gsub!(predxn, "") s.gsub!(sufdxn, "") s.gsub!(prefix, "") s.gsub!(suffix, "") s } good_strings.reject! {|s| s.empty?} strings = good_strings if !good_strings.empty? {|s| not Std_Abbr.key?(s) and not Name_Abbr.key?(s)} strings end |
#street_parts ⇒ Object
224 225 226 227 228 229 230 231 232 233 234 235 236 237 |
# File 'lib/geocoder/us/address.rb', line 224 def street_parts strings = [] # Get all the substrings delimited by whitespace @street.each {|string| tokens = string.split(" ") strings |= (0...tokens.length).map {|i| (i...tokens.length).map {|j| tokens[i..j].join(" ")}}.flatten } strings = remove_noise_words(strings) # Try a simpler case of adding the @number in case everything is an abbr. strings += [@number] if strings.all? {|s| Std_Abbr.key? s or Name_Abbr.key? s} strings.uniq end |