Class: IsoDoc::I18n
- Inherits:
-
Object
- Object
- IsoDoc::I18n
- Defined in:
- lib/isodoc/date.rb,
lib/isodoc/i18n.rb,
lib/isodoc/l10n.rb,
lib/isodoc/i18n-yaml.rb,
lib/isodoc/i18n/version.rb,
lib/isodoc/liquid/liquid.rb
Defined Under Namespace
Modules: Liquid
Constant Summary collapse
- INFLECTIONS =
{ number: "sg", case: "nom", gender: "m", person: "3rd", voice: "act", mood: "ind", tense: "pres", }.freeze
- INFLECTION_ORDER =
%i(voice mood tense number case gender person).freeze
- ZH_CHAR =
"(\\p{Han}|\\p{In CJK Symbols And Punctuation}|" \ "\\p{In Halfwidth And Fullwidth Forms})".freeze
- ZH1_PUNCT =
/(#{ZH_CHAR}|^) # CJK character, or start of string (\s*)$ # Latin spaces optional /xo.freeze
- ZH2_PUNCT =
/^\s* # followed by ignorable Latin spaces [:,.()\[\];?!-]* # Latin punct which will also convert to CJK (#{ZH_CHAR}|$) # CJK character, or end of string /xo.freeze
- ZH1_DASH =
/(#{ZH_CHAR}|^) # CJK character, or start of string (\d*) # optional digits $/xo.freeze
- ZH2_DASH =
/^\d* # followed by optional digits (#{ZH_CHAR}|$) # CJK character, or end of string /xo.freeze
- VERSION =
"1.3.0".freeze
Instance Attribute Summary collapse
-
#labels ⇒ Object
Returns the value of attribute labels.
Class Method Summary collapse
Instance Method Summary collapse
- #am_pm_i18n(val) ⇒ Object
- #bidiwrap(text, lang, script) ⇒ Object
- #bidiwrap_vars(lang, script) ⇒ Object
- #boolean_conj(list, conn) ⇒ Object
- #calendar_data ⇒ Object
- #cjk_extend(title) ⇒ Object
- #cleanup_entities(text, is_xml: true) ⇒ Object
- #convert_date_format(fmt) ⇒ Object
- #date(value, format) ⇒ Object
- #date_i18n(val) ⇒ Object
- #day_i18n(val) ⇒ Object
- #enum_comma ⇒ Object
- #get ⇒ Object
-
#inflect(word, options) ⇒ Object
can skip category if not present.
-
#inflect_ordinal(num, term, ord_class) ⇒ Object
ord class is either SpelloutRules or OrdinalRules.
- #init_labels(i18nyaml, i18nhash) ⇒ Object
-
#initialize(lang, script, locale: nil, i18nyaml: nil, i18nhash: nil) ⇒ I18n
constructor
A new instance of I18n.
- #interleave_space_cjk?(text) ⇒ Boolean
- #l10_context_valid?(context, idx, delim, regex) ⇒ Boolean
-
#l10_zh1(text, prev, foll, _script) ⇒ Object
note: we can’t differentiate comma from enumeration comma 、 def l10_zh1(text, _script).
-
#l10n(text, lang = @lang, script = @script, locale = @locale) ⇒ Object
function localising spaces and punctuation.
-
#l10n_context(nodes, idx) ⇒ Object
previous, following context of current text node: do not use just the immediately adjoining text tokens for context deal with spaces and empty text by just concatenating entire context.
- #l10n_fr(text, locale) ⇒ Object
- #l10n_fr1(text, prev, foll, locale) ⇒ Object
- #l10n_gsub(text, prev, foll, delim, regex) ⇒ Object
- #l10n_gsub_context(text, prev, foll, delim) ⇒ Object
-
#l10n_zh(text, script = "Hans") ⇒ Object
CJK.
- #l10n_zh_dash(text, prev, foll) ⇒ Object
-
#l10n_zh_punct(text, prev, foll) ⇒ Object
CJK punct if (^|CJK).($|CJK).
- #l10n_zh_remove_space(text, prev, foll) ⇒ Object
- #liquid_init ⇒ Object
- #load_yaml(lang, script, i18nyaml = nil, i18nhash = nil) ⇒ Object
- #load_yaml1(lang, script) ⇒ Object
-
#load_yaml2(lang) ⇒ Object
locally defined in calling class.
- #month_i18n(val) ⇒ Object
- #normalise_hash(ret) ⇒ Object
- #ordinal_key(term) ⇒ Object
-
#populate(keys, vars = {}) ⇒ Object
populate with variables, Liquid, inflections, ordinals/spellout.
- #set(key, val) ⇒ Object
- #to_xml(node) ⇒ Object
- #tw_cldr_lang ⇒ Object
- #tw_cldr_localize(num) ⇒ Object
Constructor Details
#initialize(lang, script, locale: nil, i18nyaml: nil, i18nhash: nil) ⇒ I18n
Returns a new instance of I18n.
15 16 17 18 19 20 21 22 23 24 |
# File 'lib/isodoc/i18n.rb', line 15 def initialize(lang, script, locale: nil, i18nyaml: nil, i18nhash: nil) @lang = lang @script = script @locale = locale @cal = calendar_data @cal_en = TwitterCldr::Shared::Calendar.new(:en) @c = HTMLEntities.new init_labels(i18nyaml, i18nhash) liquid_init end |
Instance Attribute Details
#labels ⇒ Object
Returns the value of attribute labels.
13 14 15 |
# File 'lib/isodoc/i18n.rb', line 13 def labels @labels end |
Class Method Details
.cjk_extend(text) ⇒ Object
149 150 151 |
# File 'lib/isodoc/l10n.rb', line 149 def self.cjk_extend(text) cjk_extend(text) end |
.l10n(text, lang = @lang, script = @script, locale = @locale) ⇒ Object
3 4 5 |
# File 'lib/isodoc/l10n.rb', line 3 def self.l10n(text, lang = @lang, script = @script, locale = @locale) l10n(text, lang, script, locale) end |
Instance Method Details
#am_pm_i18n(val) ⇒ Object
19 20 21 22 23 24 |
# File 'lib/isodoc/date.rb', line 19 def am_pm_i18n(val) val.gsub(/%\u200cP<am>/, @cal.periods[:am].downcase) .gsub(/%\u200cP<pm>/, @cal.periods[:pm].downcase) .gsub(/%\u200cp<AM>/, @cal.periods[:am].upcase) .gsub(/%\u200cp<PM>/, @cal.periods[:pm].upcase) end |
#bidiwrap(text, lang, script) ⇒ Object
14 15 16 17 18 19 20 21 22 |
# File 'lib/isodoc/l10n.rb', line 14 def bidiwrap(text, lang, script) my_script, my_rtl, outer_rtl = bidiwrap_vars(lang, script) if my_rtl && !outer_rtl mark = %w(Arab Aran).include?(my_script) ? "؜" : "‏" "#{mark}#{text}#{mark}" elsif !my_rtl && outer_rtl then "‎#{text}‎" else text end end |
#bidiwrap_vars(lang, script) ⇒ Object
24 25 26 27 28 29 30 |
# File 'lib/isodoc/l10n.rb', line 24 def bidiwrap_vars(lang, script) my_script = script || Metanorma::Utils.default_script(lang) [my_script, Metanorma::Utils.rtl_script?(my_script), Metanorma::Utils.rtl_script?(@script || Metanorma::Utils .default_script(@lang))] end |
#boolean_conj(list, conn) ⇒ Object
52 53 54 55 56 57 58 59 60 61 62 63 |
# File 'lib/isodoc/i18n.rb', line 52 def boolean_conj(list, conn) case list.size when 0 then "" when 1 then list.first when 2 then @labels["binary_#{conn}"].sub(/%1/, list[0]) .sub(/%2/, list[1]) else @labels["multiple_#{conn}"] .sub(/%1/, l10n(list[0..-2].join(enum_comma), @lang, @script)) .sub(/%2/, list[-1]) end end |
#calendar_data ⇒ Object
31 32 33 34 35 |
# File 'lib/isodoc/i18n.rb', line 31 def calendar_data TwitterCldr::Shared::Calendar.new(tw_cldr_lang) rescue StandardError TwitterCldr::Shared::Calendar.new(:en) end |
#cjk_extend(title) ⇒ Object
153 154 155 156 157 158 159 160 |
# File 'lib/isodoc/l10n.rb', line 153 def cjk_extend(title) @c.decode(title).chars.map.with_index do |n, i| if i.zero? || !interleave_space_cjk?(title[i - 1] + title[i]) n else "\u3000#{n}" end end.join end |
#cleanup_entities(text, is_xml: true) ⇒ Object
70 71 72 73 74 75 76 77 78 79 |
# File 'lib/isodoc/i18n.rb', line 70 def cleanup_entities(text, is_xml: true) if is_xml text.split(/([<>])/).each_slice(4).map do |a| a[0] = @c.decode(a[0]) a end.join else @c.decode(text) end end |
#convert_date_format(fmt) ⇒ Object
10 11 12 13 |
# File 'lib/isodoc/date.rb', line 10 def convert_date_format(fmt) fmt.gsub(/%_/, " ") .gsub(/%(\^?)([BbhPpAa])/, "%\u200c\\1\\2<%\\2>") end |
#date(value, format) ⇒ Object
5 6 7 8 |
# File 'lib/isodoc/date.rb', line 5 def date(value, format) date_i18n(DateTime.iso8601(value) .strftime(convert_date_format(format))) end |
#date_i18n(val) ⇒ Object
15 16 17 |
# File 'lib/isodoc/date.rb', line 15 def date_i18n(val) day_i18n(month_i18n(am_pm_i18n(val))) end |
#day_i18n(val) ⇒ Object
37 38 39 40 41 42 43 44 45 46 |
# File 'lib/isodoc/date.rb', line 37 def day_i18n(val) { A: :wide, a: :abbreviated }.each do |f, t| @cal_en.calendar_data[:days][:format][t].each do |k, v| m = @cal.calendar_data[:days][:format][t][k] val.gsub!(/%\u200c#{f}<#{v}>/, m) val.gsub!(/%\u200c\^#{f}<#{v}>/, m.upcase) end end val end |
#enum_comma ⇒ Object
65 66 67 68 |
# File 'lib/isodoc/i18n.rb', line 65 def enum_comma %w(Hans Hant).include?(@script) and return "<enum-comma>、</enum-comma>" "<enum-comma>,</enum-comma> " end |
#get ⇒ Object
50 51 52 |
# File 'lib/isodoc/i18n-yaml.rb', line 50 def get @labels end |
#inflect(word, options) ⇒ Object
can skip category if not present
124 125 126 127 128 129 130 131 132 133 |
# File 'lib/isodoc/i18n.rb', line 124 def inflect(word, ) i = @labels.dig("inflection", word) or return word i.is_a? String and return i INFLECTION_ORDER.each do |x| infl = [x] || INFLECTIONS[x] i = i[infl] if i[infl] i.is_a? String and return i end word end |
#inflect_ordinal(num, term, ord_class) ⇒ Object
ord class is either SpelloutRules or OrdinalRules
82 83 84 85 86 87 88 89 90 |
# File 'lib/isodoc/i18n.rb', line 82 def inflect_ordinal(num, term, ord_class) lbl = if @labels["ordinal_keys"].nil? || @labels["ordinal_keys"].empty? @labels[ord_class] else @labels[ord_class][ordinal_key(term)] end tw_cldr_localize(num).to_rbnf_s(ord_class, lbl) rescue StandardError num.localize(@lang.to_sym).to_rbnf_s(ord_class, lbl) end |
#init_labels(i18nyaml, i18nhash) ⇒ Object
37 38 39 40 41 42 43 44 |
# File 'lib/isodoc/i18n.rb', line 37 def init_labels(i18nyaml, i18nhash) @labels = load_yaml(@lang, @script, i18nyaml, i18nhash) @labels["language"] = @lang @labels["script"] = @script @labels.each_key do |k| self.class.send(:define_method, k.downcase) { get[k] } end end |
#interleave_space_cjk?(text) ⇒ Boolean
162 163 164 165 166 167 168 169 170 171 |
# File 'lib/isodoc/l10n.rb', line 162 def interleave_space_cjk?(text) text.size == 2 or return ["\u2014\u2014", "\u2025\u2025", "\u2026\u2026", "\u22ef\u22ef"].include?(text) || /\d\d|\p{Latin}\p{Latin}|[[:space:]]/.match?(text) || /^[\u2018\u201c(\u3014\[{\u3008\u300a\u300c\u300e\u3010\u2985\u3018\u3016\u00ab\u301d]/.match?(text) || /[\u2019\u201d)\u3015\]}\u3009\u300b\u300d\u300f\u3011\u2986\u3019\u3017\u00bb\u301f]$/.match?(text) || /[\u3002.\u3001,\u30fb:;\u2010\u301c\u30a0\u2013!?\u203c\u2047\u2048\u2049]/.match?(text) and return false true end |
#l10_context_valid?(context, idx, delim, regex) ⇒ Boolean
122 123 124 125 126 127 128 129 130 131 |
# File 'lib/isodoc/l10n.rb', line 122 def l10_context_valid?(context, idx, delim, regex) found_delim = if delim[0].is_a?(Regexp) # punct to convert delim[0].match?(context[idx]) else context[idx] == delim[0] end found_delim && regex[0].match?(context[0...idx].join) && # preceding context regex[1].match?(context[(idx + 1)..-1].join) # foll context end |
#l10_zh1(text, prev, foll, _script) ⇒ Object
note: we can’t differentiate comma from enumeration comma 、 def l10_zh1(text, _script)
70 71 72 73 74 75 |
# File 'lib/isodoc/l10n.rb', line 70 def l10_zh1(text, prev, foll, _script) # l10n_zh_dash(l10n_zh_remove_space(l10n_zh_punct(text))) r = l10n_zh_punct(text, prev, foll) r = l10n_zh_remove_space(r, prev, foll) l10n_zh_dash(r, prev, foll) end |
#l10n(text, lang = @lang, script = @script, locale = @locale) ⇒ Object
function localising spaces and punctuation.
8 9 10 11 12 |
# File 'lib/isodoc/l10n.rb', line 8 def l10n(text, lang = @lang, script = @script, locale = @locale) %w(zh ja ko).include?(lang) and text = l10n_zh(text, script) lang == "fr" && text = l10n_fr(text, locale || "FR") bidiwrap(text, lang, script) end |
#l10n_context(nodes, idx) ⇒ Object
previous, following context of current text node: do not use just the immediately adjoining text tokens for context deal with spaces and empty text by just concatenating entire context
48 49 50 51 52 |
# File 'lib/isodoc/l10n.rb', line 48 def l10n_context(nodes, idx) prev = nodes[0...idx].map(&:text).join foll = nodes[(idx + 1)...(nodes.size)].map(&:text).join [prev, foll] end |
#l10n_fr(text, locale) ⇒ Object
54 55 56 57 58 59 60 61 62 63 |
# File 'lib/isodoc/l10n.rb', line 54 def l10n_fr(text, locale) xml = Nokogiri::XML::DocumentFragment.parse(text) t = xml.xpath(".//text()") t.each_with_index do |n, i| prev, foll = l10n_context(t, i) text = cleanup_entities(n.text, is_xml: false) n.replace(l10n_fr1(text, prev, foll, locale)) end to_xml(xml) end |
#l10n_fr1(text, prev, foll, locale) ⇒ Object
140 141 142 143 144 145 146 147 |
# File 'lib/isodoc/l10n.rb', line 140 def l10n_fr1(text, prev, foll, locale) text = l10n_gsub(text, prev, foll, [/[»›;?!]/, "\u202f\\0"], [/\p{Alnum}$/, /^(\s|$)/]) text = l10n_gsub(text, prev, foll, [/[«‹]/, "\\0\u202f"], [/$/, /^./]) colonsp = locale == "CH" ? "\u202f" : "\u00a0" l10n_gsub(text, prev, foll, [":", "#{colonsp}\\0"], [/\p{Alnum}$/, /^(\s|$)/]) end |
#l10n_gsub(text, prev, foll, delim, regex) ⇒ Object
106 107 108 109 110 111 112 113 |
# File 'lib/isodoc/l10n.rb', line 106 def l10n_gsub(text, prev, foll, delim, regex) context = l10n_gsub_context(text, prev, foll, delim) or return text (1...(context.size - 1)).each do |i| l10_context_valid?(context, i, delim, regex) and context[i] = delim[1].gsub("\\0", context[i]) # Full-width equivalent end context[1...(context.size - 1)].join end |
#l10n_gsub_context(text, prev, foll, delim) ⇒ Object
115 116 117 118 119 120 |
# File 'lib/isodoc/l10n.rb', line 115 def l10n_gsub_context(text, prev, foll, delim) d = delim[0].is_a?(Regexp) ? delim[0] : Regexp.quote(delim[0]) context = text.split(/(#{d})/) # delim to replace context.size == 1 and return [prev, context, foll].flatten end |
#l10n_zh(text, script = "Hans") ⇒ Object
CJK
33 34 35 36 37 38 39 40 41 42 43 |
# File 'lib/isodoc/l10n.rb', line 33 def l10n_zh(text, script = "Hans") xml = Nokogiri::XML::DocumentFragment.parse(text) t = xml.xpath(".//text()") t.each_with_index do |n, i| prev, foll = l10n_context(t, i) text = cleanup_entities(n.text, is_xml: false) n.replace(l10_zh1(text, prev, foll, script)) end to_xml(xml).gsub(/<b>/, "").gsub("</b>", "") .gsub(/<\?[^>]+>/, "") end |
#l10n_zh_dash(text, prev, foll) ⇒ Object
102 103 104 |
# File 'lib/isodoc/l10n.rb', line 102 def l10n_zh_dash(text, prev, foll) l10n_gsub(text, prev, foll, %w(– ~), [ZH1_DASH, ZH2_DASH]) end |
#l10n_zh_punct(text, prev, foll) ⇒ Object
CJK punct if (^|CJK).($|CJK)
86 87 88 89 90 91 92 |
# File 'lib/isodoc/l10n.rb', line 86 def l10n_zh_punct(text, prev, foll) ["::", ",,", "..", "))", "]]", ";;", "??", "!!", "((", "[["].each do |m| text = l10n_gsub(text, prev, foll, [m[0], m[1]], [ZH1_PUNCT, ZH2_PUNCT]) end text end |
#l10n_zh_remove_space(text, prev, foll) ⇒ Object
133 134 135 136 137 138 |
# File 'lib/isodoc/l10n.rb', line 133 def l10n_zh_remove_space(text, prev, foll) text = l10n_gsub(text, prev, foll, [" ", ""], [/(#{ZH_CHAR}|\d)$/o, /^#{ZH_CHAR}/o]) l10n_gsub(text, prev, foll, [" ", ""], [/#{ZH_CHAR}$/o, /^(\d|[A-Za-z](#{ZH_CHAR}|$))/o]) end |
#liquid_init ⇒ Object
26 27 28 29 |
# File 'lib/isodoc/i18n.rb', line 26 def liquid_init ::IsoDoc::I18n::Liquid.set(self) ::Liquid::Template.register_filter(::IsoDoc::I18n::Liquid) end |
#load_yaml(lang, script, i18nyaml = nil, i18nhash = nil) ⇒ Object
8 9 10 11 12 13 14 15 |
# File 'lib/isodoc/i18n-yaml.rb', line 8 def load_yaml(lang, script, i18nyaml = nil, i18nhash = nil) ret = load_yaml1(lang, script) i18nyaml and return normalise_hash(ret.deep_merge(YAML.load_file(i18nyaml))) i18nhash and return normalise_hash(ret.deep_merge(i18nhash)) normalise_hash(ret) end |
#load_yaml1(lang, script) ⇒ Object
30 31 32 33 34 35 36 37 38 39 |
# File 'lib/isodoc/i18n-yaml.rb', line 30 def load_yaml1(lang, script) case lang when "zh" if script == "Hans" then load_yaml2("zh-Hans") else load_yaml2("en") end else load_yaml2(lang) end end |
#load_yaml2(lang) ⇒ Object
locally defined in calling class
42 43 44 45 46 47 48 |
# File 'lib/isodoc/i18n-yaml.rb', line 42 def load_yaml2(lang) YAML.load_file(File.join(File.dirname(__FILE__), "../isodoc-yaml/i18n-#{lang}.yaml")) rescue StandardError YAML.load_file(File.join(File.dirname(__FILE__), "../isodoc-yaml/i18n-en.yaml")) end |
#month_i18n(val) ⇒ Object
26 27 28 29 30 31 32 33 34 35 |
# File 'lib/isodoc/date.rb', line 26 def month_i18n(val) { B: :wide, b: :abbreviated, h: :abbreviated }.each do |f, t| @cal_en.calendar_data[:months][:format][t].each do |k, v| m = @cal.calendar_data[:months][:format][t][k] val.gsub!(/%\u200c#{f}<#{v}>/, m) val.gsub!(/%\u200c\^#{f}<#{v}>/, m.upcase) end end val end |
#normalise_hash(ret) ⇒ Object
17 18 19 20 21 22 23 24 25 26 27 28 |
# File 'lib/isodoc/i18n-yaml.rb', line 17 def normalise_hash(ret) case ret when Hash ret.each do |k, v| ret[k] = normalise_hash(v) end ret when Array then ret.map { |n| normalise_hash(n) } when String then cleanup_entities(ret.unicode_normalize(:nfc)) else ret end end |
#ordinal_key(term) ⇒ Object
110 111 112 113 114 |
# File 'lib/isodoc/i18n.rb', line 110 def ordinal_key(term) @labels["ordinal_keys"].each_with_object([]) do |k, m| m << (term[k.to_s] || INFLECTIONS[k.to_sym]) end.join(".") end |
#populate(keys, vars = {}) ⇒ Object
populate with variables, Liquid, inflections, ordinals/spellout
47 48 49 50 |
# File 'lib/isodoc/i18n.rb', line 47 def populate(keys, vars = {}) ::Liquid::Template.parse(@labels.dig(*Array(keys))) .render(vars.merge("labels" => @labels)) end |
#set(key, val) ⇒ Object
54 55 56 |
# File 'lib/isodoc/i18n-yaml.rb', line 54 def set(key, val) @labels[key] = val end |
#to_xml(node) ⇒ Object
173 174 175 176 |
# File 'lib/isodoc/l10n.rb', line 173 def to_xml(node) node&.to_xml(encoding: "UTF-8", indent: 0, save_with: Nokogiri::XML::Node::SaveOptions::AS_XML) end |
#tw_cldr_lang ⇒ Object
116 117 118 119 120 121 |
# File 'lib/isodoc/i18n.rb', line 116 def tw_cldr_lang if @lang == "zh" && @script == "Hans" then :"zh-cn" elsif @lang == "zh" && @script == "Hant" then :"zh-tw" else @lang.to_sym end end |
#tw_cldr_localize(num) ⇒ Object
92 93 94 95 96 |
# File 'lib/isodoc/i18n.rb', line 92 def tw_cldr_localize(num) num.localize(tw_cldr_lang) rescue StandardError num.localize(:en) end |