Class: String
- Inherits:
-
Object
show all
- Defined in:
- lib/rcs-common/sanitize.rb,
lib/rcs-common/binary.rb,
lib/rcs-common/utf16le.rb,
lib/rcs-common/keywords.rb,
lib/rcs-common/pascalize.rb
Overview
here we are re-opening the ruby String class, the namespace must not be specified
Constant Summary
collapse
- REMOVE_INVALID_CHARS_REGEXP =
Regexp.new(/([^[:alnum:][:graph:]\n\r])+/u)
Instance Method Summary
collapse
Instance Method Details
#binary_add_at_offset(offset, value) ⇒ Object
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
|
# File 'lib/rcs-common/binary.rb', line 46
def binary_add_at_offset(offset, value)
io = StringIO.new(self)
raise OutOfBounds if offset < 0
raise OutOfBounds if offset > io.size
io.pos = offset
current = io.read(4).unpack('I').first
current += value
current = [current].pack('I')
io.pos = offset
io.write current
io.close
self
end
|
#binary_patch(match, replace) ⇒ Object
24
25
26
27
28
29
30
|
# File 'lib/rcs-common/binary.rb', line 24
def binary_patch(match, replace)
raise MatchNotFound unless self[match]
self.gsub!(match.force_encoding('ASCII-8BIT')) do |param|
replace.force_encoding('ASCII-8BIT')
end
end
|
#binary_patch_at_offset(offset, replace) ⇒ Object
32
33
34
35
36
37
38
39
40
41
42
43
44
|
# File 'lib/rcs-common/binary.rb', line 32
def binary_patch_at_offset(offset, replace)
io = StringIO.new(self)
raise OutOfBounds if offset < 0
raise OutOfBounds if offset > io.size
raise OutOfBoundsString if offset + replace.bytesize > io.size
io.pos = offset
io.write replace
io.close
self
end
|
#force_utf8(modify_self = false) ⇒ Object
13
14
15
16
17
18
19
20
|
# File 'lib/rcs-common/sanitize.rb', line 13
def force_utf8(modify_self = false)
src_encoding = valid_encoding? ? encoding.to_s : 'BINARY'
dst_encoding = 'UTF-8'
args = [dst_encoding, src_encoding, {:invalid => :replace, :undef => :replace, replace: ''}]
modify_self ? encode!(*args) : encode(*args)
end
|
#force_utf8! ⇒ Object
22
23
24
|
# File 'lib/rcs-common/sanitize.rb', line 22
def force_utf8!
force_utf8(true)
end
|
#keywords ⇒ Object
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
|
# File 'lib/rcs-common/keywords.rb', line 10
def keywords
keywords = self.dup
keywords.force_utf8!
keywords.gsub!(/([^[:alnum:]])+/u, ' ')
keywords.strip!
keywords.downcase!
keywords = keywords.split " "
keywords.delete_if {|w| w.size > 25}
keywords.uniq!
keywords.sort!
keywords
rescue Exception => e
[]
end
|
#pascalize ⇒ Object
returns a string encoded into a pascalized form
7
8
9
10
11
12
13
14
15
16
17
18
|
# File 'lib/rcs-common/pascalize.rb', line 7
def pascalize
pascalized = [self.encode('UTF-16LE').bytesize + 2].pack('I')
pascalized += self.encode('UTF-16LE').unpack('H*').pack('H*')
pascalized += "\x00\x00"
return pascalized.encode!('ASCII-8BIT')
end
|
#remove_invalid_chars ⇒ Object
9
10
11
|
# File 'lib/rcs-common/sanitize.rb', line 9
def remove_invalid_chars
self.force_utf8.gsub(REMOVE_INVALID_CHARS_REGEXP, ' ')
end
|
#safe_utf8_encode ⇒ Object
79
80
81
82
|
# File 'lib/rcs-common/utf16le.rb', line 79
def safe_utf8_encode
self.force_encoding('UTF-8')
self.encode! 'UTF-8', 'UTF-8', invalid: :replace, undef: :replace, replace: ''
end
|
#safe_utf8_encode_invalid ⇒ Object
71
72
73
74
75
76
77
|
# File 'lib/rcs-common/utf16le.rb', line 71
def safe_utf8_encode_invalid
return self if self.encoding == Encoding::UTF_8 and self.valid_encoding?
self.safe_utf8_encode
return self if self.valid_encoding?
self.force_encoding('BINARY')
self.encode! 'BINARY', 'UTF-8', invalid: :replace, undef: :replace, replace: '?'
end
|
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
|
# File 'lib/rcs-common/sanitize.rb', line 26
def strip_html_tags
copy = self.dup
copy.gsub!(/<[^>]*>/, '')
copy.gsub!(/&(amp;)*lt;.*?&(amp;)*gt;/im, '')
copy.gsub!(/&(amp;)*((#x?)?[a-f0-9]+|[a-z]+);/i, ' ')
copy
end
|
#terminate_utf16le ⇒ Object
59
60
61
|
# File 'lib/rcs-common/utf16le.rb', line 59
def terminate_utf16le
self.force_encoding('UTF-16LE') + "\0".encode('UTF-16LE')
end
|
#to_binary ⇒ Object
46
47
48
|
# File 'lib/rcs-common/utf16le.rb', line 46
def to_binary
self.unpack("H*").pack("H*")
end
|
#to_utf16le ⇒ Object
63
64
65
|
# File 'lib/rcs-common/utf16le.rb', line 63
def to_utf16le
self.encode('UTF-16LE')
end
|
#to_utf16le_binary ⇒ Object
50
51
52
|
# File 'lib/rcs-common/utf16le.rb', line 50
def to_utf16le_binary
self.encode('UTF-16LE').to_binary
end
|
#to_utf16le_binary_null ⇒ Object
54
55
56
57
|
# File 'lib/rcs-common/utf16le.rb', line 54
def to_utf16le_binary_null
(self + "\0").to_utf16le_binary
end
|
#unpascalize ⇒ Object
returns a string decoded from its pascalized form
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
|
# File 'lib/rcs-common/pascalize.rb', line 21
def unpascalize
begin
len = self.unpack('I')
return nil unless len.first <= self.length - 4
unpascalized = self.slice(4, len.first).force_encoding('UTF-16LE')
unpascalized.encode!('UTF-8')
unpascalized.chop!
return unpascalized
rescue
return nil
end
end
|
#unpascalize_ary ⇒ Object
returns an array containing all the concatenated pascalized strings
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
|
# File 'lib/rcs-common/pascalize.rb', line 41
def unpascalize_ary
many = []
buffer = self
len = 0
begin
len += buffer.unpack('I').first + 4
str = buffer.unpascalize
many << str unless str.nil?
buffer = self.slice(len, self.length)
break if buffer.nil?
end while buffer.length != 0
return many
end
|
#utf16le_to_utf8 ⇒ Object
67
68
69
|
# File 'lib/rcs-common/utf16le.rb', line 67
def utf16le_to_utf8
self.force_encoding('UTF-16LE').encode('UTF-8').chomp("\0")
end
|