Class: String::UTF8
Constant Summary
collapse
- UTF8 =
'utf-8'
Constants inherited
from String
Binary, COLORS, Encodings, Escapes, UNICODE_LEADERS_AND_TRAILERS, UNICODE_LT_PAT, UNICODE_L_PAT, UNICODE_T_PAT, UNICODE_WHITESPACE
Instance Attribute Summary collapse
Class Method Summary
collapse
Instance Method Summary
collapse
Methods inherited from String
#arguments, #ascii, #binary, #camelcase, #chunks, encodings, #mirc_formatted, #mirc_stripped, #mirc_translated_color, #post_arguments, #strip_user_prefixes, #to_flags, #to_s, #unescaped, #user_prefixes, #valid_channelname?, #valid_nickname?, #valid_user?
Constructor Details
#initialize(string, collation = nil) ⇒ UTF8
Returns a new instance of UTF8.
22
23
24
25
|
# File 'lib/string/utf8.rb', line 22
def initialize(string, collation=nil)
super(Unicode::normalize_KC(string))
@collation = collation
end
|
Instance Attribute Details
#collation ⇒ Object
Returns the value of attribute collation.
20
21
22
|
# File 'lib/string/utf8.rb', line 20
def collation
@collation
end
|
Class Method Details
.new(string, encoding, collation = nil) ⇒ Object
13
14
15
16
17
18
|
# File 'lib/string/utf8.rb', line 13
def self.new(string, encoding, collation=nil)
raise "Encoding must be 'utf-8' but is '#{encoding}'" unless encoding == UTF8
obj = allocate
obj.send(:initialize, string, collation)
obj
end
|
Instance Method Details
#<=>(other) ⇒ Object
56
57
58
59
|
# File 'lib/string/utf8.rb', line 56
def <=>(other)
raise "Can't compare strings with different collation" unless @collation == other.collation
Unicode.strcmp(self, other)
end
|
#==(other) ⇒ Object
52
53
54
|
# File 'lib/string/utf8.rb', line 52
def ==(other)
super(other.utf8)
end
|
#[](arg1, arg2 = nil) ⇒ Object
Also known as:
slice
See String#[]. May return an integer > 255 when used like “342210205” # => 8709
33
34
35
36
37
38
39
40
41
|
# File 'lib/string/utf8.rb', line 33
def [](arg1, arg2=nil) if arg2 then
unpack("U*").slice(arg1, arg2).pack("U*").utf8
elsif Range === arg1 then
unpack("U*").slice(arg1).pack("U*").utf8
else
unpack("U*").slice(*args)
end
end
|
#[]=(*args) ⇒ Object
44
45
46
47
48
49
50
|
# File 'lib/string/utf8.rb', line 44
def []=(*args)
value = args.pop
codepoints = unpack("U*")
codepoints[*args] = value.utf8.unpack("U*")
replace(codepoints.pack("U*"))
self
end
|
#byte_insert ⇒ Object
6
|
# File 'lib/string/utf8.rb', line 6
alias byte_insert insert
|
#capitalize ⇒ Object
64
|
# File 'lib/string/utf8.rb', line 64
def capitalize; Unicode::capitalize(self).utf8; end
|
#chop ⇒ Object
117
118
119
|
# File 'lib/string/utf8.rb', line 117
def chop
gsub(/(?:.|\r?\n)\z/u, '')
end
|
#chop! ⇒ Object
113
114
115
|
# File 'lib/string/utf8.rb', line 113
def chop!
gsub!(/(?:.|\r?\n)\z/u, '')
end
|
#decompose ⇒ Object
Decomposes the string and returns the decomposed string
135
136
137
|
# File 'lib/string/utf8.rb', line 135
def decompose Unicode::decompose(self)
end
|
#downcase ⇒ Object
63
|
# File 'lib/string/utf8.rb', line 63
def downcase; Unicode::downcase(self).utf8; end
|
#each_char(&block) ⇒ Object
121
122
123
124
|
# File 'lib/string/utf8.rb', line 121
def each_char(&block)
scan(/./um, &block)
self
end
|
#encoding ⇒ Object
27
28
29
|
# File 'lib/string/utf8.rb', line 27
def encoding
String::UTF8::UTF8
end
|
#index(item, offset = 0) ⇒ Object
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
|
# File 'lib/string/utf8.rb', line 77
def index(item, offset=0)
case item
when Regexp
mb = unpack("U*")[offset..-1].pack("U*")
bi = mb.byte_index(item)
bi && mb.byte_slice(0,bi).unpack("U*").size+offset
when Integer
unpack("U*")[offset..-1].index(item)+offset
else
raise "Must be of same encoding" if String === item and encoding != item.encoding
if offset.zero? then
bi = byte_index(item)
bi && byte_slice(0,bi).unpack("U*").size
else
index(Regexp.new(Regexp.escape(item)), offset)
end
end
end
|
#insert(offset, fragment) ⇒ Object
Inserts the string at codepoint offset specified in offset.
109
110
111
|
# File 'lib/string/utf8.rb', line 109
def insert(offset, fragment) replace(unpack("U*").insert(offset, fragment.unpack("U*")).flatten.pack("U*"))
end
|
#inspect ⇒ Object
154
155
156
|
# File 'lib/string/utf8.rb', line 154
def inspect
"#{encoding}(#{collation||'none'}):#{super}"
end
|
#length ⇒ Object
61
|
# File 'lib/string/utf8.rb', line 61
def length; @length||=unpack("U*").size; end
|
#lstrip ⇒ Object
74
|
# File 'lib/string/utf8.rb', line 74
def lstrip; gsub(UNICODE_L_PAT, '').utf8; end
|
#normalize_C ⇒ Object
Normalizes the string to form C and returns the result
140
141
142
|
# File 'lib/string/utf8.rb', line 140
def normalize_C Unicode::normalize_C(self)
end
|
#normalize_D ⇒ Object
Normalizes the string to form D and returns the result
145
146
147
|
# File 'lib/string/utf8.rb', line 145
def normalize_D Unicode::normalize_D(self)
end
|
#normalize_KC ⇒ Object
Normalizes the string to form KC and returns the result
150
151
152
|
# File 'lib/string/utf8.rb', line 150
def normalize_KC Unicode::normalize_KC(self)
end
|
#reverse ⇒ Object
72
|
# File 'lib/string/utf8.rb', line 72
def reverse; unpack("U*").reverse.pack("U*").utf8; end
|
#rindex(item, offset = -1)) ⇒ Object
97
98
99
100
101
102
103
104
105
106
|
# File 'lib/string/utf8.rb', line 97
def rindex(item, offset=-1)
case item
when Integer
unpack("U*")[0..offset].rindex(item)
else
raise "Must be of same encoding" if String === item and encoding != item.encoding
bi = byte_rindex(item, offset)
bi && byte_slice(0,bi).unpack("U*").size
end
end
|
#rstrip ⇒ Object
75
|
# File 'lib/string/utf8.rb', line 75
def rstrip; gsub(UNICODE_T_PAT, '').utf8; end
|
#swapcase ⇒ Object
65
66
67
68
69
70
71
|
# File 'lib/string/utf8.rb', line 65
def swapcase
up = Unicode::upcase(self)
down = Unicode::downcase(self)
unpack("U*").zip(up.unpack("U*"), down.unpack("U*")).map { |n,u,d|
n == u ? d : u
}.pack("U*")
end
|
#upcase ⇒ Object
62
|
# File 'lib/string/utf8.rb', line 62
def upcase; Unicode::upcase(self).utf8; end
|
#utf8(collation = nil) ⇒ Object