Class: IsoDoc::I18n

Inherits:
Object
  • Object
show all
Defined in:
lib/isodoc/date.rb,
lib/isodoc/i18n.rb,
lib/isodoc/l10n.rb,
lib/isodoc/i18n-yaml.rb,
lib/isodoc/i18n/version.rb,
lib/isodoc/liquid/liquid.rb

Defined Under Namespace

Modules: Liquid

Constant Summary collapse

INFLECTIONS =
{
  number: "sg",
  case: "nom",
  gender: "m",
  person: "3rd",
  voice: "act",
  mood: "ind",
  tense: "pres",
}.freeze
INFLECTION_ORDER =
%i(voice mood tense number case gender person).freeze
ZH_CHAR =
"(\\p{Han}|\\p{In CJK Symbols And Punctuation}|" \
"\\p{In Halfwidth And Fullwidth Forms})".freeze
ZH1_PUNCT =
/(#{ZH_CHAR}|^)   # CJK character, or start of string
     (\s*)$                   # Latin spaces optional
/xo.freeze
ZH2_PUNCT =
/^\s*             # followed by ignorable Latin spaces
          [:,.()\[\];?!-]*  # Latin punct which will also convert to CJK
          (#{ZH_CHAR}|$)    # CJK character, or end of string
/xo.freeze
ZH1_DASH =
/(#{ZH_CHAR}|^)    # CJK character, or start of string
            (\d*)             # optional digits
$/xo.freeze
ZH2_DASH =
/^\d*              # followed by optional digits
          (#{ZH_CHAR}|$)    # CJK character, or end of string
/xo.freeze
VERSION =
"1.3.0".freeze

Instance Attribute Summary collapse

Class Method Summary collapse

Instance Method Summary collapse

Constructor Details

#initialize(lang, script, locale: nil, i18nyaml: nil, i18nhash: nil) ⇒ I18n

Returns a new instance of I18n.



15
16
17
18
19
20
21
22
23
24
# File 'lib/isodoc/i18n.rb', line 15

def initialize(lang, script, locale: nil, i18nyaml: nil, i18nhash: nil)
  @lang = lang
  @script = script
  @locale = locale
  @cal = calendar_data
  @cal_en = TwitterCldr::Shared::Calendar.new(:en)
  @c = HTMLEntities.new
  init_labels(i18nyaml, i18nhash)
  liquid_init
end

Instance Attribute Details

#labelsObject

Returns the value of attribute labels.



13
14
15
# File 'lib/isodoc/i18n.rb', line 13

def labels
  @labels
end

Class Method Details

.cjk_extend(text) ⇒ Object



149
150
151
# File 'lib/isodoc/l10n.rb', line 149

def self.cjk_extend(text)
  cjk_extend(text)
end

.l10n(text, lang = @lang, script = @script, locale = @locale) ⇒ Object



3
4
5
# File 'lib/isodoc/l10n.rb', line 3

def self.l10n(text, lang = @lang, script = @script, locale = @locale)
  l10n(text, lang, script, locale)
end

Instance Method Details

#am_pm_i18n(val) ⇒ Object



19
20
21
22
23
24
# File 'lib/isodoc/date.rb', line 19

def am_pm_i18n(val)
  val.gsub(/%\u200cP<am>/, @cal.periods[:am].downcase)
    .gsub(/%\u200cP<pm>/, @cal.periods[:pm].downcase)
    .gsub(/%\u200cp<AM>/, @cal.periods[:am].upcase)
    .gsub(/%\u200cp<PM>/, @cal.periods[:pm].upcase)
end

#bidiwrap(text, lang, script) ⇒ Object



14
15
16
17
18
19
20
21
22
# File 'lib/isodoc/l10n.rb', line 14

def bidiwrap(text, lang, script)
  my_script, my_rtl, outer_rtl = bidiwrap_vars(lang, script)
  if my_rtl && !outer_rtl
    mark = %w(Arab Aran).include?(my_script) ? "&#x61c;" : "&#x200f;"
    "#{mark}#{text}#{mark}"
  elsif !my_rtl && outer_rtl then "&#x200e;#{text}&#x200e;"
  else text
  end
end

#bidiwrap_vars(lang, script) ⇒ Object



24
25
26
27
28
29
30
# File 'lib/isodoc/l10n.rb', line 24

def bidiwrap_vars(lang, script)
  my_script = script || Metanorma::Utils.default_script(lang)
  [my_script,
   Metanorma::Utils.rtl_script?(my_script),
   Metanorma::Utils.rtl_script?(@script || Metanorma::Utils
     .default_script(@lang))]
end

#boolean_conj(list, conn) ⇒ Object



52
53
54
55
56
57
58
59
60
61
62
63
# File 'lib/isodoc/i18n.rb', line 52

def boolean_conj(list, conn)
  case list.size
  when 0 then ""
  when 1 then list.first
  when 2 then @labels["binary_#{conn}"].sub(/%1/, list[0])
    .sub(/%2/, list[1])
  else
    @labels["multiple_#{conn}"]
      .sub(/%1/, l10n(list[0..-2].join(enum_comma), @lang, @script))
      .sub(/%2/, list[-1])
  end
end

#calendar_dataObject



31
32
33
34
35
# File 'lib/isodoc/i18n.rb', line 31

def calendar_data
  TwitterCldr::Shared::Calendar.new(tw_cldr_lang)
rescue StandardError
  TwitterCldr::Shared::Calendar.new(:en)
end

#cjk_extend(title) ⇒ Object



153
154
155
156
157
158
159
160
# File 'lib/isodoc/l10n.rb', line 153

def cjk_extend(title)
  @c.decode(title).chars.map.with_index do |n, i|
    if i.zero? || !interleave_space_cjk?(title[i - 1] + title[i])
      n
    else "\u3000#{n}"
    end
  end.join
end

#cleanup_entities(text, is_xml: true) ⇒ Object



70
71
72
73
74
75
76
77
78
79
# File 'lib/isodoc/i18n.rb', line 70

def cleanup_entities(text, is_xml: true)
  if is_xml
    text.split(/([<>])/).each_slice(4).map do |a|
      a[0] = @c.decode(a[0])
      a
    end.join
  else
    @c.decode(text)
  end
end

#convert_date_format(fmt) ⇒ Object



10
11
12
13
# File 'lib/isodoc/date.rb', line 10

def convert_date_format(fmt)
  fmt.gsub(/%_/, " ")
    .gsub(/%(\^?)([BbhPpAa])/, "%\u200c\\1\\2<%\\2>")
end

#date(value, format) ⇒ Object



5
6
7
8
# File 'lib/isodoc/date.rb', line 5

def date(value, format)
  date_i18n(DateTime.iso8601(value)
    .strftime(convert_date_format(format)))
end

#date_i18n(val) ⇒ Object



15
16
17
# File 'lib/isodoc/date.rb', line 15

def date_i18n(val)
  day_i18n(month_i18n(am_pm_i18n(val)))
end

#day_i18n(val) ⇒ Object



37
38
39
40
41
42
43
44
45
46
# File 'lib/isodoc/date.rb', line 37

def day_i18n(val)
  { A: :wide, a: :abbreviated }.each do |f, t|
    @cal_en.calendar_data[:days][:format][t].each do |k, v|
      m = @cal.calendar_data[:days][:format][t][k]
      val.gsub!(/%\u200c#{f}<#{v}>/, m)
      val.gsub!(/%\u200c\^#{f}<#{v}>/, m.upcase)
    end
  end
  val
end

#enum_commaObject



65
66
67
68
# File 'lib/isodoc/i18n.rb', line 65

def enum_comma
  %w(Hans Hant).include?(@script) and return "<enum-comma>、</enum-comma>"
  "<enum-comma>,</enum-comma> "
end

#getObject



50
51
52
# File 'lib/isodoc/i18n-yaml.rb', line 50

def get
  @labels
end

#inflect(word, options) ⇒ Object

can skip category if not present



124
125
126
127
128
129
130
131
132
133
# File 'lib/isodoc/i18n.rb', line 124

def inflect(word, options)
  i = @labels.dig("inflection", word) or return word
  i.is_a? String and return i
  INFLECTION_ORDER.each do |x|
    infl = options[x] || INFLECTIONS[x]
    i = i[infl] if i[infl]
    i.is_a? String and return i
  end
  word
end

#inflect_ordinal(num, term, ord_class) ⇒ Object

ord class is either SpelloutRules or OrdinalRules



82
83
84
85
86
87
88
89
90
# File 'lib/isodoc/i18n.rb', line 82

def inflect_ordinal(num, term, ord_class)
  lbl = if @labels["ordinal_keys"].nil? || @labels["ordinal_keys"].empty?
          @labels[ord_class]
        else @labels[ord_class][ordinal_key(term)]
        end
  tw_cldr_localize(num).to_rbnf_s(ord_class, lbl)
rescue StandardError
  num.localize(@lang.to_sym).to_rbnf_s(ord_class, lbl)
end

#init_labels(i18nyaml, i18nhash) ⇒ Object



37
38
39
40
41
42
43
44
# File 'lib/isodoc/i18n.rb', line 37

def init_labels(i18nyaml, i18nhash)
  @labels = load_yaml(@lang, @script, i18nyaml, i18nhash)
  @labels["language"] = @lang
  @labels["script"] = @script
  @labels.each_key do |k|
    self.class.send(:define_method, k.downcase) { get[k] }
  end
end

#interleave_space_cjk?(text) ⇒ Boolean

Returns:

  • (Boolean)


162
163
164
165
166
167
168
169
170
171
# File 'lib/isodoc/l10n.rb', line 162

def interleave_space_cjk?(text)
  text.size == 2 or return
  ["\u2014\u2014", "\u2025\u2025", "\u2026\u2026",
   "\u22ef\u22ef"].include?(text) ||
    /\d\d|\p{Latin}\p{Latin}|[[:space:]]/.match?(text) ||
    /^[\u2018\u201c(\u3014\[{\u3008\u300a\u300c\u300e\u3010\u2985\u3018\u3016\u00ab\u301d]/.match?(text) ||
    /[\u2019\u201d)\u3015\]}\u3009\u300b\u300d\u300f\u3011\u2986\u3019\u3017\u00bb\u301f]$/.match?(text) ||
    /[\u3002.\u3001,\u30fb:;\u2010\u301c\u30a0\u2013!?\u203c\u2047\u2048\u2049]/.match?(text) and return false
  true
end

#l10_context_valid?(context, idx, delim, regex) ⇒ Boolean

Returns:

  • (Boolean)


122
123
124
125
126
127
128
129
130
131
# File 'lib/isodoc/l10n.rb', line 122

def l10_context_valid?(context, idx, delim, regex)
  found_delim = if delim[0].is_a?(Regexp) # punct to convert
                  delim[0].match?(context[idx])
                else
                  context[idx] == delim[0]
                end
  found_delim &&
    regex[0].match?(context[0...idx].join) && # preceding context
    regex[1].match?(context[(idx + 1)..-1].join) # foll context
end

#l10_zh1(text, prev, foll, _script) ⇒ Object

note: we can’t differentiate comma from enumeration comma 、 def l10_zh1(text, _script)



70
71
72
73
74
75
# File 'lib/isodoc/l10n.rb', line 70

def l10_zh1(text, prev, foll, _script)
  # l10n_zh_dash(l10n_zh_remove_space(l10n_zh_punct(text)))
  r = l10n_zh_punct(text, prev, foll)
  r = l10n_zh_remove_space(r, prev, foll)
  l10n_zh_dash(r, prev, foll)
end

#l10n(text, lang = @lang, script = @script, locale = @locale) ⇒ Object

function localising spaces and punctuation.



8
9
10
11
12
# File 'lib/isodoc/l10n.rb', line 8

def l10n(text, lang = @lang, script = @script, locale = @locale)
  %w(zh ja ko).include?(lang) and text = l10n_zh(text, script)
  lang == "fr" && text = l10n_fr(text, locale || "FR")
  bidiwrap(text, lang, script)
end

#l10n_context(nodes, idx) ⇒ Object

previous, following context of current text node: do not use just the immediately adjoining text tokens for context deal with spaces and empty text by just concatenating entire context



48
49
50
51
52
# File 'lib/isodoc/l10n.rb', line 48

def l10n_context(nodes, idx)
  prev = nodes[0...idx].map(&:text).join
  foll = nodes[(idx + 1)...(nodes.size)].map(&:text).join
  [prev, foll]
end

#l10n_fr(text, locale) ⇒ Object



54
55
56
57
58
59
60
61
62
63
# File 'lib/isodoc/l10n.rb', line 54

def l10n_fr(text, locale)
  xml = Nokogiri::XML::DocumentFragment.parse(text)
  t = xml.xpath(".//text()")
  t.each_with_index do |n, i|
    prev, foll = l10n_context(t, i)
    text = cleanup_entities(n.text, is_xml: false)
    n.replace(l10n_fr1(text, prev, foll, locale))
  end
  to_xml(xml)
end

#l10n_fr1(text, prev, foll, locale) ⇒ Object



140
141
142
143
144
145
146
147
# File 'lib/isodoc/l10n.rb', line 140

def l10n_fr1(text, prev, foll, locale)
  text = l10n_gsub(text, prev, foll, [/[»›;?!]/, "\u202f\\0"],
                   [/\p{Alnum}$/, /^(\s|$)/])
  text = l10n_gsub(text, prev, foll, [/[«‹]/, "\\0\u202f"], [/$/, /^./])
  colonsp = locale == "CH" ? "\u202f" : "\u00a0"
  l10n_gsub(text, prev, foll, [":", "#{colonsp}\\0"],
            [/\p{Alnum}$/, /^(\s|$)/])
end

#l10n_gsub(text, prev, foll, delim, regex) ⇒ Object



106
107
108
109
110
111
112
113
# File 'lib/isodoc/l10n.rb', line 106

def l10n_gsub(text, prev, foll, delim, regex)
  context = l10n_gsub_context(text, prev, foll, delim) or return text
  (1...(context.size - 1)).each do |i|
    l10_context_valid?(context, i, delim, regex) and
      context[i] = delim[1].gsub("\\0", context[i]) # Full-width equivalent
  end
  context[1...(context.size - 1)].join
end

#l10n_gsub_context(text, prev, foll, delim) ⇒ Object



115
116
117
118
119
120
# File 'lib/isodoc/l10n.rb', line 115

def l10n_gsub_context(text, prev, foll, delim)
  d = delim[0].is_a?(Regexp) ? delim[0] : Regexp.quote(delim[0])
  context = text.split(/(#{d})/) # delim to replace
  context.size == 1 and return
  [prev, context, foll].flatten
end

#l10n_zh(text, script = "Hans") ⇒ Object

CJK



33
34
35
36
37
38
39
40
41
42
43
# File 'lib/isodoc/l10n.rb', line 33

def l10n_zh(text, script = "Hans")
  xml = Nokogiri::XML::DocumentFragment.parse(text)
  t = xml.xpath(".//text()")
  t.each_with_index do |n, i|
    prev, foll = l10n_context(t, i)
    text = cleanup_entities(n.text, is_xml: false)
    n.replace(l10_zh1(text, prev, foll, script))
  end
  to_xml(xml).gsub(/<b>/, "").gsub("</b>", "")
    .gsub(/<\?[^>]+>/, "")
end

#l10n_zh_dash(text, prev, foll) ⇒ Object



102
103
104
# File 'lib/isodoc/l10n.rb', line 102

def l10n_zh_dash(text, prev, foll)
  l10n_gsub(text, prev, foll, %w( ), [ZH1_DASH, ZH2_DASH])
end

#l10n_zh_punct(text, prev, foll) ⇒ Object

CJK punct if (^|CJK).($|CJK)



86
87
88
89
90
91
92
# File 'lib/isodoc/l10n.rb', line 86

def l10n_zh_punct(text, prev, foll)
  ["::", ",,", "..", "))", "]]", ";;", "??", "!!", "((", "[["].each do |m|
    text = l10n_gsub(text, prev, foll, [m[0], m[1]],
                     [ZH1_PUNCT, ZH2_PUNCT])
  end
  text
end

#l10n_zh_remove_space(text, prev, foll) ⇒ Object



133
134
135
136
137
138
# File 'lib/isodoc/l10n.rb', line 133

def l10n_zh_remove_space(text, prev, foll)
  text = l10n_gsub(text, prev, foll, [" ", ""],
                   [/(#{ZH_CHAR}|\d)$/o, /^#{ZH_CHAR}/o])
  l10n_gsub(text, prev, foll, [" ", ""],
            [/#{ZH_CHAR}$/o, /^(\d|[A-Za-z](#{ZH_CHAR}|$))/o])
end

#liquid_initObject



26
27
28
29
# File 'lib/isodoc/i18n.rb', line 26

def liquid_init
  ::IsoDoc::I18n::Liquid.set(self)
  ::Liquid::Template.register_filter(::IsoDoc::I18n::Liquid)
end

#load_yaml(lang, script, i18nyaml = nil, i18nhash = nil) ⇒ Object



8
9
10
11
12
13
14
15
# File 'lib/isodoc/i18n-yaml.rb', line 8

def load_yaml(lang, script, i18nyaml = nil, i18nhash = nil)
  ret = load_yaml1(lang, script)
  i18nyaml and
    return normalise_hash(ret.deep_merge(YAML.load_file(i18nyaml)))
  i18nhash and return normalise_hash(ret.deep_merge(i18nhash))

  normalise_hash(ret)
end

#load_yaml1(lang, script) ⇒ Object



30
31
32
33
34
35
36
37
38
39
# File 'lib/isodoc/i18n-yaml.rb', line 30

def load_yaml1(lang, script)
  case lang
  when "zh"
    if script == "Hans" then load_yaml2("zh-Hans")
    else load_yaml2("en")
    end
  else
    load_yaml2(lang)
  end
end

#load_yaml2(lang) ⇒ Object

locally defined in calling class



42
43
44
45
46
47
48
# File 'lib/isodoc/i18n-yaml.rb', line 42

def load_yaml2(lang)
  YAML.load_file(File.join(File.dirname(__FILE__),
                           "../isodoc-yaml/i18n-#{lang}.yaml"))
rescue StandardError
  YAML.load_file(File.join(File.dirname(__FILE__),
                           "../isodoc-yaml/i18n-en.yaml"))
end

#month_i18n(val) ⇒ Object



26
27
28
29
30
31
32
33
34
35
# File 'lib/isodoc/date.rb', line 26

def month_i18n(val)
  { B: :wide, b: :abbreviated, h: :abbreviated }.each do |f, t|
    @cal_en.calendar_data[:months][:format][t].each do |k, v|
      m = @cal.calendar_data[:months][:format][t][k]
      val.gsub!(/%\u200c#{f}<#{v}>/, m)
      val.gsub!(/%\u200c\^#{f}<#{v}>/, m.upcase)
    end
  end
  val
end

#normalise_hash(ret) ⇒ Object



17
18
19
20
21
22
23
24
25
26
27
28
# File 'lib/isodoc/i18n-yaml.rb', line 17

def normalise_hash(ret)
  case ret
  when Hash
    ret.each do |k, v|
      ret[k] = normalise_hash(v)
    end
    ret
  when Array then ret.map { |n| normalise_hash(n) }
  when String then cleanup_entities(ret.unicode_normalize(:nfc))
  else ret
  end
end

#ordinal_key(term) ⇒ Object



110
111
112
113
114
# File 'lib/isodoc/i18n.rb', line 110

def ordinal_key(term)
  @labels["ordinal_keys"].each_with_object([]) do |k, m|
    m << (term[k.to_s] || INFLECTIONS[k.to_sym])
  end.join(".")
end

#populate(keys, vars = {}) ⇒ Object

populate with variables, Liquid, inflections, ordinals/spellout



47
48
49
50
# File 'lib/isodoc/i18n.rb', line 47

def populate(keys, vars = {})
  ::Liquid::Template.parse(@labels.dig(*Array(keys)))
    .render(vars.merge("labels" => @labels))
end

#set(key, val) ⇒ Object



54
55
56
# File 'lib/isodoc/i18n-yaml.rb', line 54

def set(key, val)
  @labels[key] = val
end

#to_xml(node) ⇒ Object



173
174
175
176
# File 'lib/isodoc/l10n.rb', line 173

def to_xml(node)
  node&.to_xml(encoding: "UTF-8", indent: 0,
               save_with: Nokogiri::XML::Node::SaveOptions::AS_XML)
end

#tw_cldr_langObject



116
117
118
119
120
121
# File 'lib/isodoc/i18n.rb', line 116

def tw_cldr_lang
  if @lang == "zh" && @script == "Hans" then :"zh-cn"
  elsif @lang == "zh" && @script == "Hant" then :"zh-tw"
  else @lang.to_sym
  end
end

#tw_cldr_localize(num) ⇒ Object



92
93
94
95
96
# File 'lib/isodoc/i18n.rb', line 92

def tw_cldr_localize(num)
  num.localize(tw_cldr_lang)
rescue StandardError
  num.localize(:en)
end