Class: Natsukantou::SubstitudeGlossary

Inherits:
Object
  • Object
show all
Includes:
UtilityBase
Defined in:
lib/natsukantou/substitude_glossary.rb

Constant Summary collapse

LANGUAGES_WITHOUT_WORD_DIVIDERS =
%w{
  ja zh th lo
}.freeze

Instance Attribute Summary collapse

Instance Method Summary collapse

Methods included from Logger

#logger

Methods included from ParseXml

#dom, #dom_node

Constructor Details

#initialize(app, filepath: nil, glossary: []) ⇒ SubstitudeGlossary

Returns a new instance of SubstitudeGlossary.

Parameters:

  • filepath (String) (defaults to: nil)

    path to TSV glossary file.

  • glossary (Array(Array(String, String))) (defaults to: [])

    array representing glossary, e.g. [[‘book’, ‘本’]]



22
23
24
25
26
27
28
29
30
31
32
# File 'lib/natsukantou/substitude_glossary.rb', line 22

def initialize(app, filepath: nil, glossary: [])
  @app = app

  @glossary = glossary
  @glossary.concat(parse_tsv(filepath)) if filepath

  @glossary.uniq! { |row| row[0] }

  # Longer term has higher priority
  @glossary.sort_by! { |row| -row[0].length }
end

Instance Attribute Details

#glossaryObject (readonly)

Returns the value of attribute glossary.



34
35
36
# File 'lib/natsukantou/substitude_glossary.rb', line 34

def glossary
  @glossary
end

#regexObject (readonly)

Returns the value of attribute regex.



34
35
36
# File 'lib/natsukantou/substitude_glossary.rb', line 34

def regex
  @regex
end

#replacement_mappingObject (readonly)

Returns the value of attribute replacement_mapping.



34
35
36
# File 'lib/natsukantou/substitude_glossary.rb', line 34

def replacement_mapping
  @replacement_mapping
end

Instance Method Details

#call(env) ⇒ Object



36
37
38
39
40
41
42
# File 'lib/natsukantou/substitude_glossary.rb', line 36

def call(env)
  prepare_regular_expression(env)

  env[:dom].each_node(&method(:process_node))

  @app.call(env)
end

#prepare_regular_expression(env) ⇒ Object



44
45
46
47
48
49
50
51
# File 'lib/natsukantou/substitude_glossary.rb', line 44

def prepare_regular_expression(env)
  source_terms = glossary.map(&:first)
  source_terms.map! { |w| /\b#{w}\b/ } if with_space_divider?(env)
  @regex = Regexp.union(*source_terms)

  @replacement_mapping = glossary.to_h
  @replacement_mapping.transform_values! { |v| "<skip>#{v}</skip>" }
end