Class: Indirizzo::Address

Inherits:

Object

Object
Indirizzo::Address

show all

Defined in:: lib/indirizzo/address.rb

Overview

The Address class takes a US street address or place name and constructs a list of possible structured parses of the address string.

Instance Attribute Summary collapse

#city ⇒ Object

Returns the value of attribute city.
#country ⇒ Object

Returns the value of attribute country.
#number ⇒ Object

Returns the value of attribute number.
#options ⇒ Object

Returns the value of attribute options.
#plus4 ⇒ Object

Returns the value of attribute plus4.
#prenum ⇒ Object

Returns the value of attribute prenum.
#state ⇒ Object

Returns the value of attribute state.
#street ⇒ Object

Returns the value of attribute street.
#sufnum ⇒ Object

Returns the value of attribute sufnum.
#text ⇒ Object

Returns the value of attribute text.
#zip ⇒ Object

Returns the value of attribute zip.

Instance Method Summary collapse

#assign_text_to_address(text) ⇒ Object
#city_parts ⇒ Object
#clean(value) ⇒ Object

Removes any characters that aren’t strictly part of an address string.
#expand_numbers(string) ⇒ Object

Expands a token into a list of possible strings based on the Geocoder::US::Name_Abbr constant, and expands numerals and number words into their possible equivalents.
#expand_streets(street) ⇒ Object
#initialize(text, options = {}) ⇒ Address constructor

Takes an address or place name string as its sole argument.
#intersection? ⇒ Boolean
#parse ⇒ Object
#parse_state(regex_match, text) ⇒ Object
#po_box? ⇒ Boolean
#remove_noise_words(strings) ⇒ Object
#street_parts ⇒ Object

Constructor Details

#initialize(text, options = {}) ⇒ `Address`

Takes an address or place name string as its sole argument.

Raises:

(ArgumentError)

# File 'lib/indirizzo/address.rb', line 30

def initialize (text, options={})
  @options = {:expand_streets => true}.merge(options)

  raise ArgumentError, "no text provided" unless text and !text.empty?
  if text.class == Hash
    @text = ""
    assign_text_to_address text
  else
    @text = clean text
    parse
  end
end

Instance Attribute Details

#city ⇒ `Object`

Returns the value of attribute city.



23
24
25

# File 'lib/indirizzo/address.rb', line 23

def city
  @city
end

#country ⇒ `Object`

Returns the value of attribute country.



26
27
28

# File 'lib/indirizzo/address.rb', line 26

def country
  @country
end

#number ⇒ `Object`

Returns the value of attribute number.



21
22
23

# File 'lib/indirizzo/address.rb', line 21

def number
  @number
end

#options ⇒ `Object`

Returns the value of attribute options.



27
28
29

# File 'lib/indirizzo/address.rb', line 27

def options
  @options
end

#plus4 ⇒ `Object`

Returns the value of attribute plus4.



25
26
27

# File 'lib/indirizzo/address.rb', line 25

def plus4
  @plus4
end

#prenum ⇒ `Object`

Returns the value of attribute prenum.



21
22
23

# File 'lib/indirizzo/address.rb', line 21

def prenum
  @prenum
end

#state ⇒ `Object`

Returns the value of attribute state.



24
25
26

# File 'lib/indirizzo/address.rb', line 24

def state
  @state
end

#street ⇒ `Object`

Returns the value of attribute street.



22
23
24

# File 'lib/indirizzo/address.rb', line 22

def street
  @street
end

#sufnum ⇒ `Object`

Returns the value of attribute sufnum.



21
22
23

# File 'lib/indirizzo/address.rb', line 21

def sufnum
  @sufnum
end

#text ⇒ `Object`

Returns the value of attribute text.



20
21
22

# File 'lib/indirizzo/address.rb', line 20

def text
  @text
end

#zip ⇒ `Object`

Returns the value of attribute zip.



25
26
27

# File 'lib/indirizzo/address.rb', line 25

def zip
  @zip
end

Instance Method Details

#assign_text_to_address(text) ⇒ `Object`

# File 'lib/indirizzo/address.rb', line 50

def assign_text_to_address(text)
  if !text[:address].nil?
    @text = clean text[:address]
    parse
  else
    @street = []
    @prenum = text[:prenum]
    @sufnum = text[:sufnum]
    if !text[:street].nil?
      @street = text[:street].scan(Match[:street])
    end
    @number = ""
    if !@street.nil?
      if text[:number].nil?
        @street.map! { |single_street|
          single_street.downcase!
          @number = single_street.scan(Match[:number])[0].reject{|n| n.nil? || n.empty?}.first.to_s
          single_street.sub! @number, ""
          single_street.sub! /^\s*,?\s*/o, ""
        }
      else
        @number = text[:number].to_s
      end
      @street = expand_streets(@street) if @options[:expand_streets]
      street_parts
    end
    @city = []
    if !text[:city].nil?
      @city.push(text[:city])
      @text = text[:city].to_s
    else
      @city.push("")
    end
    if !text[:region].nil?
      # @state = []
      @state = text[:region]
      if @state.length > 2
        # full_state = @state.strip # special case: New York
        @state = State[@state]
      end
    elsif !text[:state].nil?
      @state = text[:state]
    elsif !text[:country].nil?
      @state = text[:country]
    end

    @zip = text[:postal_code]
    @plus4 = text[:plus4]
    if !@zip
      @zip = @plus4 = ""
    end
  end
end

#city_parts ⇒ `Object`

# File 'lib/indirizzo/address.rb', line 263

def city_parts
  strings = []
  @city.map do |string|
    tokens = string.split(" ")
    strings |= (0...tokens.length).to_a.reverse.map {|i|
               (i...tokens.length).map {|j| tokens[i..j].join(" ")}}.flatten
  end
  # Don't return strings that consist solely of abbreviations.
  # NOTE: Is this a micro-optimization that has edge cases that will break?
  # Answer: Yes, it breaks on "Prairie"
  strings.reject { |s| Std_Abbr.key?(s) }.uniq
end

#clean(value) ⇒ `Object`

Removes any characters that aren’t strictly part of an address string.

# File 'lib/indirizzo/address.rb', line 44

def clean (value)
  value.strip \
       .gsub(/[^a-z0-9 ,'&@\/-]+/io, "") \
       .gsub(/\s+/o, " ")
end

#expand_numbers(string) ⇒ `Object`

Expands a token into a list of possible strings based on the Geocoder::US::Name_Abbr constant, and expands numerals and number words into their possible equivalents.

# File 'lib/indirizzo/address.rb', line 107

def expand_numbers (string)
  if /\b\d+(?:st|nd|rd|th)?\b/o.match string
    match = $&
    num = $&.to_i
  elsif Ordinals.regexp.match string
    num = Ordinals[$&]
    match = $&
  elsif Cardinals.regexp.match string
    num = Cardinals[$&]
    match = $&
  end
  strings = []
  if num and num < 100
    [num.to_s, Ordinals[num], Cardinals[num]].each {|replace|
      strings << string.sub(match, replace)
    }
  else
    strings << string
  end
  strings
end

#expand_streets(street) ⇒ `Object`

# File 'lib/indirizzo/address.rb', line 210

def expand_streets(street)
  if !street.empty? && !street[0].nil?
    street.map! {|s|s.strip}
    add = street.map {|item| item.gsub(Name_Abbr.regexp) {|m| Name_Abbr[m]}}
    street |= add
    add = street.map {|item| item.gsub(Std_Abbr.regexp) {|m| Std_Abbr[m]}}
    street |= add
    street.map! {|item| expand_numbers(item)}
    street.flatten!
    street.map! {|s| s.downcase}
    street.uniq!
  else
    street = []
  end
  street
end

#intersection? ⇒ `Boolean`

Returns:

(Boolean)



288
289
290

# File 'lib/indirizzo/address.rb', line 288

def intersection?
  !Match[:at].match(@text).nil?
end

#parse ⇒ `Object`

# File 'lib/indirizzo/address.rb', line 137

def parse
  text = @text.clone.downcase

  @zip = text.scan(Match[:zip]).last
  if @zip
    last_match = $&
    zip_index = text.rindex(last_match)
    zip_end_index = zip_index + last_match.length - 1
    @zip, @plus4 = @zip.map {|s| s and s.strip }
  else
    @zip = @plus4 = ""
    zip_index = text.length
    zip_end_index = -1
  end

  @country = @text[zip_end_index+1..-1].sub(/^\s*,\s*/, '').strip
  @country = nil if @country == text

  @state = text.scan(Match[:state]).last
  if @state
    last_match = $&
    state_index = text.rindex(last_match)
    text = parse_state(last_match, text)
  else
    @full_state = ""
    @state = ""
  end

  @number = text.scan(Match[:number]).first
  # FIXME: 230 Fish And Game Rd, Hudson NY 12534
  if @number # and not intersection?
    last_match = $&
    number_index = text.index(last_match)
    number_end_index = number_index + last_match.length - 1
    @prenum, @number, @sufnum = @number.map {|s| s and s.strip}
  else
    number_end_index = -1
    @prenum = @number = @sufnum = ""
  end

  # FIXME: special case: Name_Abbr gets a bit aggressive
  # about replacing St with Saint. exceptional case:
  # Sault Ste. Marie

  # FIXME: PO Box should geocode to ZIP
  street_search_end_index = [state_index,zip_index,text.length].reject(&:nil?).min-1
  @street = text[number_end_index+1..street_search_end_index].scan(Match[:street]).map { |s| s and s.strip }

  @street = expand_streets(@street) if @options[:expand_streets]
  # SPECIAL CASE: 1600 Pennsylvania 20050
  @street << @full_state if @street.empty? and @state.downcase != @full_state.downcase

  street_end_index = @street.map { |s| text.rindex(s) }.reject(&:nil?).min||0

  if @city.nil? || @city.empty?
    @city = text[street_end_index..street_search_end_index+1].scan(Match[:city])
    if !@city.empty?
      #@city = [@city[-1].strip]
      @city = [@city.last.strip]
      add = @city.map {|item| item.gsub(Name_Abbr.regexp) {|m| Name_Abbr[m]}}
      @city |= add
      @city.map! {|s| s.downcase}
      @city.uniq!
    else
      @city = []
    end

    # SPECIAL CASE: no city, but a state with the same name. e.g. "New York"
    @city << @full_state if @state.downcase != @full_state.downcase
  end

end

#parse_state(regex_match, text) ⇒ `Object`

# File 'lib/indirizzo/address.rb', line 129

def parse_state(regex_match, text)
  idx = text.rindex(regex_match)
  @full_state = @state[0].strip # special case: New York
  @state = State[@full_state]
  @city = "Washington" if @state == "DC" && text[idx...idx+regex_match.length] =~ /washington\s+d\.?c\.?/i
  text
end

#po_box? ⇒ `Boolean`

Returns:

(Boolean)



284
285
286

# File 'lib/indirizzo/address.rb', line 284

def po_box?
  !Match[:po_box].match(@text).nil?
end

#remove_noise_words(strings) ⇒ `Object`

# File 'lib/indirizzo/address.rb', line 242

def remove_noise_words(strings)
  # Don't return strings that consist solely of abbreviations.
  # NOTE: Is this a micro-optimization that has edge cases that will break?
  # Answer: Yes, it breaks on simple things like "Prairie St" or "Front St"
  prefix = Regexp.new("^" + Prefix_Type.regexp.source + "\s*", Regexp::IGNORECASE)
  suffix = Regexp.new("\s*" + Suffix_Type.regexp.source + "$", Regexp::IGNORECASE)
  predxn = Regexp.new("^" + Directional.regexp.source + "\s*", Regexp::IGNORECASE)
  sufdxn = Regexp.new("\s*" + Directional.regexp.source + "$", Regexp::IGNORECASE)
  good_strings = strings.map {|s|
    s = s.clone
    s.gsub!(predxn, "")
    s.gsub!(sufdxn, "")
    s.gsub!(prefix, "")
    s.gsub!(suffix, "")
    s
  }
  good_strings.reject! {|s| s.empty?}
  strings = good_strings if !good_strings.empty? {|s| not Std_Abbr.key?(s) and not Name_Abbr.key?(s)}
  strings
end

#street_parts ⇒ `Object`

# File 'lib/indirizzo/address.rb', line 227

def street_parts
  strings = []
  # Get all the substrings delimited by whitespace
  @street.each {|string|
    tokens = string.split(" ")
    strings |= (0...tokens.length).map {|i|
               (i...tokens.length).map {|j| tokens[i..j].join(" ")}}.flatten
  }
  strings = remove_noise_words(strings)

  # Try a simpler case of adding the @number in case everything is an abbr.
  strings += [@number] if strings.all? {|s| Std_Abbr.key? s or Name_Abbr.key? s}
  strings.uniq
end

Class: Indirizzo::Address

Overview

Instance Attribute Summary collapse

Instance Method Summary collapse

Constructor Details

#initialize(text, options = {}) ⇒ Address

Instance Attribute Details

#city ⇒ Object

#country ⇒ Object

#number ⇒ Object

#options ⇒ Object

#plus4 ⇒ Object

#prenum ⇒ Object

#state ⇒ Object

#street ⇒ Object

#sufnum ⇒ Object

#text ⇒ Object

#zip ⇒ Object

Instance Method Details

#assign_text_to_address(text) ⇒ Object

#city_parts ⇒ Object

#clean(value) ⇒ Object

#expand_numbers(string) ⇒ Object

#expand_streets(street) ⇒ Object

#intersection? ⇒ Boolean

#parse ⇒ Object

#parse_state(regex_match, text) ⇒ Object

#po_box? ⇒ Boolean

#remove_noise_words(strings) ⇒ Object

#street_parts ⇒ Object