Class: String::UTF8

Inherits:
String show all
Defined in:
lib/string/utf8.rb

Constant Summary collapse

UTF8 =
'utf-8'

Constants inherited from String

Binary, COLORS, Encodings, Escapes, UNICODE_LEADERS_AND_TRAILERS, UNICODE_LT_PAT, UNICODE_L_PAT, UNICODE_T_PAT, UNICODE_WHITESPACE

Instance Attribute Summary collapse

Class Method Summary collapse

Instance Method Summary collapse

Methods inherited from String

#arguments, #ascii, #binary, #camelcase, #chunks, #config_key, encodings, #mirc_formatted, #mirc_stripped, #mirc_translated_color, #post_arguments, #same_nick?, #strip_user_prefixes, #to_flags, #to_s, #unescaped, #user_prefixes, #valid_channelname?, #valid_nickname?, #valid_user?

Constructor Details

#initialize(string, collation = nil) ⇒ UTF8

Returns a new instance of UTF8.



22
23
24
25
# File 'lib/string/utf8.rb', line 22

def initialize(string, collation=nil)
	super(Unicode::normalize_KC(string))
	@collation = collation
end

Instance Attribute Details

#collationObject (readonly)

Returns the value of attribute collation.



20
21
22
# File 'lib/string/utf8.rb', line 20

def collation
  @collation
end

Class Method Details

.new(string, encoding, collation = nil) ⇒ Object



13
14
15
16
17
18
# File 'lib/string/utf8.rb', line 13

def self.new(string, encoding, collation=nil)
	raise "Encoding must be 'utf-8' but is '#{encoding}'" unless encoding == UTF8
	obj = allocate
	obj.send(:initialize, string, collation)
	obj
end

Instance Method Details

#<=>(other) ⇒ Object



56
57
58
59
# File 'lib/string/utf8.rb', line 56

def <=>(other)
	raise "Can't compare strings with different collation" unless @collation == other.collation
	Unicode.strcmp(self, other)
end

#==(other) ⇒ Object



52
53
54
# File 'lib/string/utf8.rb', line 52

def ==(other)
	super(other.utf8)
end

#[](arg1, arg2 = nil) ⇒ Object Also known as: slice

See String#[]. May return an integer > 255 when used like “342210205” # => 8709



33
34
35
36
37
38
39
40
41
# File 'lib/string/utf8.rb', line 33

def [](arg1, arg2=nil) #:nodoc:
	if arg2 then
		unpack("U*").slice(arg1, arg2).pack("U*").utf8
	elsif Range === arg1 then
		unpack("U*").slice(arg1).pack("U*").utf8
	else
		unpack("U*").slice(*args)
	end
end

#[]=(*args) ⇒ Object



44
45
46
47
48
49
50
# File 'lib/string/utf8.rb', line 44

def []=(*args)
	value = args.pop
	codepoints = unpack("U*")
	codepoints[*args] = value.utf8.unpack("U*")
	replace(codepoints.pack("U*"))
	self
end

#byte_insertObject



6
# File 'lib/string/utf8.rb', line 6

alias byte_insert insert

#capitalizeObject



64
# File 'lib/string/utf8.rb', line 64

def capitalize; Unicode::capitalize(self).utf8; end

#chopObject



117
118
119
# File 'lib/string/utf8.rb', line 117

def chop
	gsub(/(?:.|\r?\n)\z/u, '')
end

#chop!Object



113
114
115
# File 'lib/string/utf8.rb', line 113

def chop!
	gsub!(/(?:.|\r?\n)\z/u, '')
end

#decomposeObject

Decomposes the string and returns the decomposed string



135
136
137
# File 'lib/string/utf8.rb', line 135

def decompose #:nodoc:
	Unicode::decompose(self)
end

#downcaseObject



63
# File 'lib/string/utf8.rb', line 63

def downcase;   Unicode::downcase(self).utf8; end

#dupObject



130
131
132
# File 'lib/string/utf8.rb', line 130

def dup
	String::UTF8.new(self, String::UTF8::UTF8, @collation)
end

#each_char(&block) ⇒ Object



121
122
123
124
# File 'lib/string/utf8.rb', line 121

def each_char(&block)
	scan(/./um, &block)
	self
end

#encodingObject



27
28
29
# File 'lib/string/utf8.rb', line 27

def encoding
	String::UTF8::UTF8
end

#index(item, offset = 0) ⇒ Object



77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
# File 'lib/string/utf8.rb', line 77

def index(item, offset=0)
	case item
		when Regexp
			mb = unpack("U*")[offset..-1].pack("U*")
			bi = mb.byte_index(item)
			bi && mb.byte_slice(0,bi).unpack("U*").size+offset
		when Integer
			# sucks, but Array#index does not accept an offset-arg
			unpack("U*")[offset..-1].index(item)+offset
		else
			raise "Must be of same encoding" if String === item and encoding != item.encoding
			if offset.zero? then
				bi = byte_index(item)
				bi && byte_slice(0,bi).unpack("U*").size
			else
				index(Regexp.new(Regexp.escape(item)), offset)
			end
	end
end

#insert(offset, fragment) ⇒ Object

Inserts the string at codepoint offset specified in offset.



109
110
111
# File 'lib/string/utf8.rb', line 109

def insert(offset, fragment)  #:nodoc:
	replace(unpack("U*").insert(offset, fragment.unpack("U*")).flatten.pack("U*"))
end

#inspectObject



154
155
156
# File 'lib/string/utf8.rb', line 154

def inspect
	"#{encoding}(#{collation||'none'}):#{super}"
end

#lengthObject



61
# File 'lib/string/utf8.rb', line 61

def length;     @length||=unpack("U*").size; end

#lstripObject



74
# File 'lib/string/utf8.rb', line 74

def lstrip;     gsub(UNICODE_L_PAT, '').utf8; end

#normalize_CObject

Normalizes the string to form C and returns the result



140
141
142
# File 'lib/string/utf8.rb', line 140

def normalize_C #:nodoc:
	Unicode::normalize_C(self)
end

#normalize_DObject

Normalizes the string to form D and returns the result



145
146
147
# File 'lib/string/utf8.rb', line 145

def normalize_D #:nodoc:
	Unicode::normalize_D(self)
end

#normalize_KCObject

Normalizes the string to form KC and returns the result



150
151
152
# File 'lib/string/utf8.rb', line 150

def normalize_KC #:nodoc:
	Unicode::normalize_KC(self)
end

#reverseObject



72
# File 'lib/string/utf8.rb', line 72

def reverse;    unpack("U*").reverse.pack("U*").utf8; end

#rindex(item, offset = -1)) ⇒ Object



97
98
99
100
101
102
103
104
105
106
# File 'lib/string/utf8.rb', line 97

def rindex(item, offset=-1)
	case item
		when Integer
			unpack("U*")[0..offset].rindex(item)
		else
			raise "Must be of same encoding" if String === item and encoding != item.encoding
			bi = byte_rindex(item, offset)
			bi && byte_slice(0,bi).unpack("U*").size
	end
end

#rstripObject



75
# File 'lib/string/utf8.rb', line 75

def rstrip;     gsub(UNICODE_T_PAT, '').utf8; end

#stripObject



73
# File 'lib/string/utf8.rb', line 73

def strip;      gsub(UNICODE_L_PAT, '').gsub(UNICODE_L_PAT, '').utf8; end

#swapcaseObject



65
66
67
68
69
70
71
# File 'lib/string/utf8.rb', line 65

def swapcase
	up   = Unicode::upcase(self)
	down = Unicode::downcase(self)
	unpack("U*").zip(up.unpack("U*"), down.unpack("U*")).map { |n,u,d|
		n == u ? d : u
	}.pack("U*")
end

#upcaseObject



62
# File 'lib/string/utf8.rb', line 62

def upcase;     Unicode::upcase(self).utf8; end

#utf8(collation = nil) ⇒ Object



126
127
128
# File 'lib/string/utf8.rb', line 126

def utf8(collation=nil)
	String::UTF8.new(self, String::UTF8::UTF8, collation)
end