Module: GenderMachine::Core

Defined in:
lib/gender_machine/core.rb

Constant Summary collapse

DATA_FILE =
File.expand_path("lib/gender_machine/nam_dict.txt")
RAW_CONTENT =
open(DATA_FILE, "r:ISO-8859-1:UTF-8")
FILE_CONTENT =
RAW_CONTENT.read
SECTION_DIVIDER =
"#" * 80
SECTIONS =
FILE_CONTENT.split(/begin of name list/)
.first
.split(/^#{SECTION_DIVIDER}/)
DATA_LICENSE =
SECTIONS[1]
ENCODING_INFO =
SECTIONS[2]
SYNTAX_EXPLANATION =
SECTIONS[4]
COUNTRIES =
SECTIONS[6].split("\n")[6..-19]
COUNTRY_MAP =
COUNTRIES.each_slice(3)
                           .with_object({}) do |data, obj|
  name, ind, _ = data
  country = name.gsub(/[#\$]/, '')
  .strip
  .downcase
  .gsub(/[^a-z]/, '_')
  .chomp("_")
  .to_sym
  index = ind.index('|')
  obj[country] = index
end
COUNTRY_MAP_NUMERIC =
COUNTRY_MAP.invert
CONTENT =
FILE_CONTENT.split("\n").map { |r| r.chomp("$") }
NAME_ROWS =
CONTENT.reject { |i| i.start_with?("#") }
.map do |row|
  gender, name, _ = row.split(/\s+/, 3)
  rest = row.split(/\s/, 4).last
  [gender, name, rest, row]
end
NAMES =
NAME_ROWS.map do |row|
  gender, name, rest, raw = *row
  Name.new(gender: gender,
           name: name,
           frequency_string: rest,
           raw: raw)
end