Module: Grc
- Included in:
- String
- Defined in:
- lib/grc.rb,
lib/grc/version.rb
Overview
Methods for working with ancient greek in ruby
Defined Under Namespace
Classes: Error
Constant Summary collapse
- VERSION =
'0.1.4'
Instance Method Summary collapse
- #acute_to_grave ⇒ Object
- #grave_to_acute ⇒ Object
-
#grc? ⇒ Boolean
‘grc?` (str → bool) Returns true if the string contains greek characters.
-
#grc_downcase ⇒ Object
‘grc_downcase` (str → str) Returns the lowercase version of string for greek characters resolving confusable characters.
-
#grc_upcase ⇒ Object
‘grc_upcase` (str → str) Default `upcase` methods strips diacritical marks from greek characters.
-
#hash_dump ⇒ Object
‘hash_dump`: (str → hash) Returns a hash of the string’s unicode points (Char: Unicode_points).
- #lower? ⇒ Boolean
-
#nfc ⇒ Object
‘nfc` (str → str) Returns a string with the canonical composition of the string.
-
#nfd ⇒ Object
‘nfd` (str → str) Returns a string with the canonical decomposition of the string.
-
#no_diacritics ⇒ Object
‘no_diacritics` (str → str) Returns a string with the diacritics removed.
-
#no_downcase_diacritics ⇒ Object
‘no_downcase_diacritics` (str → str) Returns a string with the diacritics removed from lowercase characters.
-
#no_upcase_diacritics ⇒ Object
‘no_upcase_diacritics` (str → str) Returns a string with the diacritics removed from uppercase characters.
- #oxia_to_tonos ⇒ Object
-
#to_acute ⇒ Object
‘to_acute` (str → str) Returns a string with the acute replacing the grave accent.
-
#to_grave ⇒ Object
‘to_grave` (str → str) Returns a string with the grave replacing the acute accent.
-
#to_oxia ⇒ Object
‘to_oxia` (str → str) Returns a string with the oxia replacing the tonos.
-
#to_tonos ⇒ Object
‘to_tonos` (str → str) Returns a string with the tonos replacing the oxia.
-
#tokenize ⇒ Object
‘tokenize` (str → array) Returns an array of tokens from the string.
- #tonos_to_oxia ⇒ Object
-
#transliterate ⇒ Object
‘transliterate` (str → str) Returns a string with greek characters replaced with their transliteration.
-
#unicode_name ⇒ Object
‘unicode_name` (str → array) Returns an array of unicode names from the string.
-
#unicode_points ⇒ Object
‘unicode_points` (str → array) Returns an array of unicode points from the string.
- #upper? ⇒ Boolean
Instance Method Details
#acute_to_grave ⇒ Object
243 244 245 246 247 |
# File 'lib/grc.rb', line 243 def acute_to_grave return @std_error unless grc? to_grave end |
#grave_to_acute ⇒ Object
237 238 239 240 241 |
# File 'lib/grc.rb', line 237 def grave_to_acute return @std_error unless grc? to_acute end |
#grc? ⇒ Boolean
‘grc?` (str → bool) Returns true if the string contains greek characters.
15 16 17 |
# File 'lib/grc.rb', line 15 def grc? !scan(/(\p{Greek})/).empty? end |
#grc_downcase ⇒ Object
‘grc_downcase` (str → str) Returns the lowercase version of string for greek characters resolving confusable characters. See www.w3.org/TR/charmod-norm/#PreNormalization
89 90 91 |
# File 'lib/grc.rb', line 89 def grc_downcase nfd.downcase.nfc end |
#grc_upcase ⇒ Object
‘grc_upcase` (str → str) Default `upcase` methods strips diacritical marks from greek characters. This method returns the corresponding uppercase version of string for greek characters preserving diacritical marks. See pages 1-7 of www.tlg.uci.edu/encoding/precomposed.pdf icu.unicode.org/design/case/greek-upper
98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 |
# File 'lib/grc.rb', line 98 def grc_upcase case_map = { ᾀ: 'ᾈ', ᾁ: 'ᾉ', ᾂ: 'ᾊ', ᾃ: 'ᾋ', ᾄ: 'ᾌ', ᾅ: 'ᾍ', ᾆ: 'ᾎ', ᾇ: 'ᾏ', ᾐ: 'ᾘ', ᾑ: 'ᾙ', ᾒ: 'ᾚ', ᾓ: 'ᾛ', ᾔ: 'ᾜ', ᾕ: 'ᾝ', ᾖ: 'ᾞ', ᾗ: 'ᾟ', ᾠ: 'ᾨ', ᾡ: 'ᾩ', ᾢ: 'ᾪ', ᾣ: 'ᾫ', ᾤ: 'ᾬ', ᾥ: 'ᾭ', ᾦ: 'ᾮ', ᾧ: 'ᾯ', ᾳ: 'ᾼ', ῃ: 'ῌ', ῳ: 'ῼ' } nfc.each_char.map do |char| if char.grc? case_map[:"#{char}"] || char.upcase else char end end.join end |
#hash_dump ⇒ Object
‘hash_dump`: (str → hash) Returns a hash of the string’s unicode points (Char: Unicode_points).
55 56 57 58 59 60 61 |
# File 'lib/grc.rb', line 55 def hash_dump hash = {} each_char do |character| hash[character] = character.dump end hash end |
#lower? ⇒ Boolean
221 222 223 |
# File 'lib/grc.rb', line 221 def lower? !!match(/\p{Lower}/) end |
#nfc ⇒ Object
‘nfc` (str → str) Returns a string with the canonical composition of the string.
80 81 82 |
# File 'lib/grc.rb', line 80 def nfc unicode_normalize(:nfc) end |
#nfd ⇒ Object
‘nfd` (str → str) Returns a string with the canonical decomposition of the string.
74 75 76 |
# File 'lib/grc.rb', line 74 def nfd unicode_normalize(:nfd) end |
#no_diacritics ⇒ Object
‘no_diacritics` (str → str) Returns a string with the diacritics removed.
170 171 172 173 174 |
# File 'lib/grc.rb', line 170 def no_diacritics return @std_error unless grc? no_downcase_diacritics.no_upcase_diacritics end |
#no_downcase_diacritics ⇒ Object
‘no_downcase_diacritics` (str → str) Returns a string with the diacritics removed from lowercase characters.
142 143 144 145 146 147 148 149 150 151 152 |
# File 'lib/grc.rb', line 142 def no_downcase_diacritics return @std_error unless grc? each_char.map do |char| # Loop through each character if char.grc? && char.lower? # If character is greek and lowercase char.nfd.gsub(/\p{Mn}/, '').nfc # decompose, remove non-spacing markers (diacritics), recompose and return else # else char # return char end end.join # end char loop end |
#no_upcase_diacritics ⇒ Object
‘no_upcase_diacritics` (str → str) Returns a string with the diacritics removed from uppercase characters.
156 157 158 159 160 161 162 163 164 165 166 |
# File 'lib/grc.rb', line 156 def no_upcase_diacritics return @std_error unless grc? each_char.map do |char| # Loop through each character if char.grc? && char.upper? # If character is greek and uppercase char.nfd.gsub(/\p{Mn}/, '').nfc # Decompose, remove non-spacing markers (diacritics), recompose and return else # else char # Return char end end.join end |
#oxia_to_tonos ⇒ Object
225 226 227 228 229 |
# File 'lib/grc.rb', line 225 def oxia_to_tonos return @std_error unless grc? to_tonos end |
#to_acute ⇒ Object
‘to_acute` (str → str) Returns a string with the acute replacing the grave accent.
190 191 192 193 194 195 196 |
# File 'lib/grc.rb', line 190 def to_acute return @std_error unless grc? # Simple transform method with acute to grave mapping tr('ἂᾂἃᾃὰὰᾲἒἓὲὲἢᾒἣᾓὴὴῂἲἳὶὶῒὂὃὸὸὒὓὺὺῢῢὢᾢὣᾣὼῲ', 'ἄᾄἅᾅάάᾴἔἕέέἤᾔἥᾕήήῄἴἵίίΐὄὅόόὔὕύύΰΰὤᾤὥᾥώῴ') end |
#to_grave ⇒ Object
‘to_grave` (str → str) Returns a string with the grave replacing the acute accent.
180 181 182 183 184 185 186 |
# File 'lib/grc.rb', line 180 def to_grave return @std_error unless grc? # Simple transform method with grave to acute mapping tr('ἄᾄἅᾅάάᾴἔἕέέἤᾔἥᾕήήῄἴἵίίΐὄὅόόὔὕύύΰΰὤᾤὥᾥώῴ', 'ἂᾂἃᾃὰὰᾲἒἓὲὲἢᾒἣᾓὴὴῂἲἳὶὶῒὂὃὸὸὒὓὺὺῢῢὢᾢὣᾣὼῲ') end |
#to_oxia ⇒ Object
‘to_oxia` (str → str) Returns a string with the oxia replacing the tonos.
200 201 202 203 204 205 |
# File 'lib/grc.rb', line 200 def to_oxia return @std_error unless grc? tr('άΆέΈήΉίΊΐόΌύΎΰώΏ', 'άΆέΈήΉίΊΐόΌύΎΰώΏ') end |
#to_tonos ⇒ Object
‘to_tonos` (str → str) Returns a string with the tonos replacing the oxia. See page 9 of www.tlg.uci.edu/encoding/precomposed.pdf
210 211 212 213 214 215 |
# File 'lib/grc.rb', line 210 def to_tonos return @std_error unless grc? tr('άΆέΈήΉίΊΐόΌύΎΰώΏ', 'άΆέΈήΉίΊΐόΌύΎΰώΏ') end |
#tokenize ⇒ Object
‘tokenize` (str → array) Returns an array of tokens from the string.
21 22 23 |
# File 'lib/grc.rb', line 21 def tokenize gsub(/([[:punct:]]|·|·|‧|⸱|𐄁|\.|;|;)/, ' \1').split end |
#tonos_to_oxia ⇒ Object
231 232 233 234 235 |
# File 'lib/grc.rb', line 231 def tonos_to_oxia return @std_error unless grc? to_oxia end |
#transliterate ⇒ Object
‘transliterate` (str → str) Returns a string with greek characters replaced with their transliteration.
27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 |
# File 'lib/grc.rb', line 27 def transliterate return @std_error unless grc? result = [] str = self str.tokenize do |token| result << if token.grc? the_word = token.gsub(/ῥ/, 'rh') the_word = the_word =~ /[ἁἅᾅἃᾃἇᾇᾁἑἕἓἡἥᾕἣᾓἧᾗᾑἱἵἳἷὁὅὃὑὕὓὗὡὥᾥὣᾣὧᾧᾡ]/ ? "h#{the_word.no_diacritics}" : the_word.no_diacritics hash.each { |k, v| the_word = the_word.gsub(/#{k}/, v) } the_word else word end end result.join(' ') end |
#unicode_name ⇒ Object
‘unicode_name` (str → array) Returns an array of unicode names from the string.
65 66 67 68 |
# File 'lib/grc.rb', line 65 def unicode_name require 'unicode/name' each_char.map { |character| Unicode::Name.of character } end |
#unicode_points ⇒ Object
‘unicode_points` (str → array) Returns an array of unicode points from the string.
49 50 51 |
# File 'lib/grc.rb', line 49 def unicode_points unpack('U*').map { |i| "\\u#{i.to_s(16).rjust(4, "0").upcase}" } end |
#upper? ⇒ Boolean
217 218 219 |
# File 'lib/grc.rb', line 217 def upper? !!match(/\p{Upper}/) end |