Class: LucaRecord::Dict

Inherits:
Object
  • Object
show all
Includes:
LucaSupport::Code
Defined in:
lib/luca_record/dict.rb

Class Method Summary collapse

Instance Method Summary collapse

Methods included from LucaSupport::Code

decimalize, decode_date, decode_id, decode_month, decode_term, decode_txid, delimit_num, encode_date, encode_dirname, encode_month, encode_term, encode_txid, has_status?, issue_random_id, keys_stringify, match_score, parse_current, readable, take_current, take_history, to_ngram

Constructor Details

#initialize(file = @filename) ⇒ Dict

Returns a new instance of Dict.



16
17
18
19
# File 'lib/luca_record/dict.rb', line 16

def initialize(file = @filename)
  @path = self.class.dict_path(file)
  set_driver
end

Class Method Details

.load(file = @filename) ⇒ Object

load dictionary data



70
71
72
73
74
75
76
77
78
79
# File 'lib/luca_record/dict.rb', line 70

def self.load(file = @filename)
  case File.extname(file)
  when '.tsv', '.csv'
    load_tsv_dict(dict_path(file))
  when '.yaml', '.yml'
    YAML.safe_load(File.read(dict_path(file)), permitted_classes: [Date])
  else
    raise 'cannot load this filetype'
  end
end

.load_tsv_dict(path) ⇒ Object

generate dictionary from TSV file. Minimum assumption is: 1st row is converted symbol.

  • row is ‘code’. Converted hash keys

  • row is ‘label’. Should be human readable labels

  • after row can be app specific data



88
89
90
91
92
93
94
95
96
97
98
99
100
101
# File 'lib/luca_record/dict.rb', line 88

def self.load_tsv_dict(path)
  {}.tap do |dict|
    CSV.read(path, headers: true, col_sep: "\t", encoding: 'UTF-8').each do |row|
      {}.tap do |entry|
        row.each do |header, field|
          next if row.index(header).zero?

          entry[header.to_sym] = field unless field.nil?
        end
        dict[row[0]] = entry
      end
    end
  end
end

.validate(filename, target_key = :label) ⇒ Object



103
104
105
106
107
108
109
110
111
112
# File 'lib/luca_record/dict.rb', line 103

def self.validate(filename, target_key = :label)
  errors = load(filename).map { |k, v| v[target_key].nil? ? k : nil }.compact
  if errors.empty?
    puts 'No error detected.'
    nil
  else
    puts "Key #{errors.join(', ')} has nil #{target_key}."
    errors.count
  end
end

Instance Method Details

#dig(*args) ⇒ Object

Search with unique code.



41
42
43
# File 'lib/luca_record/dict.rb', line 41

def dig(*args)
  @data.dig(*args)
end

#hash2multiassign(obj, main_key = 'label', options: nil) ⇒ Object

Separate main item from other options. If options specified as Array of string, it works as safe list filter.



48
49
50
51
52
53
54
55
56
57
58
# File 'lib/luca_record/dict.rb', line 48

def hash2multiassign(obj, main_key = 'label', options: nil)
  options = {}.tap do |opt|
    obj.map do |k, v|
      next if k == main_key
      next if !options.nil? && !options.include?(k)

      opt[k.to_sym] = v
    end
  end
  [obj[main_key], options.compact]
end

#load_csv(path) ⇒ Object

Load CSV with config options



62
63
64
65
66
# File 'lib/luca_record/dict.rb', line 62

def load_csv(path)
  CSV.read(path, headers: true, encoding: "#{@config.dig('encoding') || 'utf-8'}:utf-8").each do |row|
    yield row
  end
end

#search(word, default_word = nil, main_key: 'label', options: nil) ⇒ Object

Search code with n-gram word. If dictionary has Hash or Array, it returns [label, options].



24
25
26
27
28
29
30
31
32
33
34
35
36
37
# File 'lib/luca_record/dict.rb', line 24

def search(word, default_word = nil, main_key: 'label', options: nil)
  definitions_lazyload
  res, score = max_score_code(word.gsub(/[[:space:]]/, ''))
  return default_word if score < 0.4

  case res
  when Hash
    hash2multiassign(res, main_key, options: options)
  when Array
    res.map { |item| hash2multiassign(item, main_key, options: options) }
  else
    res
  end
end