Top Level Namespace

Defined Under Namespace

Classes: JapaneseDeinflector

Instance Method Summary collapse

Instance Method Details

#parse(fpath) ⇒ Object



4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
# File 'lib/deinflect_to_json.rb', line 4

def parse(fpath)
  reasons = []
  rules_hash = {}
  File.open(fpath).each_with_index do |line, i|
    next  if i == 0 # Skip header
    parts = line.strip.split(/\t/)
    # Reasons are listed at the top of the file and are not tab-separated
    if parts.size == 1
      reasons << parts[0]
    # Rules are tab-separated in the following format:
    # <from>\t<to>\t<type>\t<reason_index>
    else
      from_suffix = parts.first
      reason_id = parts[3].to_i
      rules_hash[from_suffix.size] ||= []
      rules_hash[from_suffix.size] << {
        :from_suffix => from_suffix,
        :to_suffix => parts[1],
        :reason => reasons[reason_id],
      }
    end
  end
  rules_hash
end