Module: String::StringExtensions

Included in:
String
Defined in:
lib/libmatty/string.rb

Instance Method Summary collapse

Instance Method Details

#adlerObject

A hacked up adler16 checksum, a la Andrew Tridgell. This is probably even slower than Ruby’s native CRC support. A weak, trivial checksum, part of rsync.



210
211
212
213
214
215
216
217
# File 'lib/libmatty/string.rb', line 210

def adler
    a, b = 0, 0
    0.upto(size-1) {|i| a += self[i]}
    a %= 65536
    0.upto(size-1) {|i| b += ((size-i)+1) * self[i]}
    b %= 65536
    return (a|(b<<16))
end

#asciizObject

Sometimes string buffers passed through Win32 interfaces come with garbage after the trailing NUL; this method gets rid of that, like String#trim



45
46
47
48
49
50
51
# File 'lib/libmatty/string.rb', line 45

def asciiz
    begin
        self[0..self.index("\x00")-1]
    rescue
        self
    end
end

#asciiz!Object



53
54
55
# File 'lib/libmatty/string.rb', line 53

def asciiz!
  replace asciiz
end

#b64(len = nil) ⇒ Object

Base64 encode



353
354
355
356
357
358
359
360
# File 'lib/libmatty/string.rb', line 353

def b64(len=nil)
  ret = [self].pack("m").gsub("\n", "")
  if len and Numeric === len 
    ret.scan(/.{1,#{len}}/).join("\n") + "\n"
  else
    ret
  end
end

#class_nameObject

convert a string to its idiomatic ruby class name



104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
# File 'lib/libmatty/string.rb', line 104

def class_name
    r = ""
    up = true
    each_byte do |c|
        if c == 95
            if up
                r << "::"
            else
                up = true
            end
        else
            m = up ? :upcase : :to_s
            r << (c.chr.send(m))
            up = false
        end
    end
    r
end

#crc32Object

returns CRC32 checksum for the string object



377
378
379
380
381
382
383
384
385
386
387
388
389
390
# File 'lib/libmatty/string.rb', line 377

def crc32
  ## pure ruby version. slower, but here for reference (found on some forum)
  #  r = 0xFFFFFFFF
  #  self.each_byte do |b|
  #    r ^= b
  #    8.times do
  #      r = (r>>1) ^ (0xEDB88320 * (r & 1))
  #    end
  #  end
  #  r ^ 0xFFFFFFFF
  # # or... we can just use:
  require 'zlib'
  Zlib.crc32 self
end

#cstring(off = 0) ⇒ Object

Returns a single null-terminated ascii string from beginning of self. This will return the entire string if no null is encountered.

Parameters:

off = specify an optional beggining offset


372
373
374
# File 'lib/libmatty/string.rb', line 372

def cstring(off=0)
  self[ off, self.index("\x00") || self.size ]
end

#d64Object

Base64 decode



363
# File 'lib/libmatty/string.rb', line 363

def d64;  self.unpack("m")[0];  end

#dehexifyObject

Convert a string of raw hex characters (no %‘s or anything) into binary



278
279
280
281
# File 'lib/libmatty/string.rb', line 278

def dehexify
    (ret||="") << (me||=clone).shift(2).to_i(16).chr while not (me||=clone).empty?
    return ret
end

#dehexify!Object

Convert a string of raw hex characters (no %‘s or anything) into binary in place



284
285
286
287
# File 'lib/libmatty/string.rb', line 284

def dehexify!
    (ret||="") << (me||=clone).shift(2).to_i(16).chr while not (me||=clone).empty?
    self.replace ret
end

#ends_with?(x) ⇒ Boolean

Returns:

  • (Boolean)


132
133
134
# File 'lib/libmatty/string.rb', line 132

def ends_with? x
    self[-(x.size)..-1] == x
end

#entropyObject

Cribbed from Ero Carrera’s pefile; a relatively expensive entropy function, gives a float result of random-bits-per-byte.



139
140
141
142
143
144
145
146
147
148
149
# File 'lib/libmatty/string.rb', line 139

def entropy
    e = 0
    0.upto(255) do |i|
        x = count(i.chr)/size.to_f
        if x > 0
            e += - x * Math.log2(x)
        end
    end

    return e
end

#from_utf16_bufferObject

Convenience for parsing UNICODE strings from a buffer Assumes last char ends in 00, which is not always true but works in English



38
39
40
# File 'lib/libmatty/string.rb', line 38

def from_utf16_buffer
    self[0..index("\0\0\0")+2].from_utf16
end

#hashcodeObject

fast 37 hash of a string, for non-security stuff.



434
435
436
437
438
439
440
441
# File 'lib/libmatty/string.rb', line 434

def hashcode
    return 5381 if not ((l = self.size) and l > 0)
    code = 0
    0.upto(l-1) do |i|
        code = ((code << 5) - code) + self[i]
    end
    code
end

#hexdump(capture = false) ⇒ Object

My entry into the hexdump race. Outputs canonical hexdump, uses StringIO for speed, could be cleaned up with “ljust”, and should probably use table lookup instead of to_s(16) method calls.



64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
# File 'lib/libmatty/string.rb', line 64

def hexdump(capture=false)
    require 'stringio'
    sio = StringIO.new
    rem = size - 1
    off = 0

    while rem > 0
        pbuf = ""
        pad = (15 - rem) if rem < 16
        pad ||= 0

        sio.write(("0" * (8 - (x = off.to_s(16)).size)) + x + "  ")

        0.upto(15-pad) do |i|
            c = self[off]
            x = c.to_s(16)
            sio.write(("0" * (2 - x.size)) + x + " ")
            if c.printable?
                pbuf << c
            else
                pbuf << "."
            end
            off += 1
            rem -= 1
            sio.write(" ") if i == 7
        end

        sio.write("-- " * pad) if pad > 0
        sio.write(" |#{ pbuf }|\n")
    end

    sio.rewind()
    if capture
        sio.read()
    else
        puts sio.read()
    end
end

#hexifyObject

Convert a string into hex characters



265
266
267
268
269
# File 'lib/libmatty/string.rb', line 265

def hexify
    l = []
    each_byte{|b| l << "%02x" % b}
    l.join
end

#hexify!Object

convert a string to hex characters in place



272
273
274
# File 'lib/libmatty/string.rb', line 272

def hexify!
    self.replace hexify
end

#is_hex?Boolean

shortcut for hex sanity with regex remember kids, always pratice safe hex

Returns:

  • (Boolean)


59
# File 'lib/libmatty/string.rb', line 59

def is_hex? ; (self =~ /^[a-f0-9]+$/i)? true : false ; end

#md5Object



413
414
415
416
# File 'lib/libmatty/string.rb', line 413

def md5
  require 'digest/md5'
  Digest::MD5.digest(self).hexify
end

#method_nameObject

oh, it’s exactly what it sounds like.



232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
# File 'lib/libmatty/string.rb', line 232

def method_name
    r = ""
    scoped = false
    each_byte do |c|
        if c == 58
            if not scoped
                r << "_"
                scoped = true
            else
                scoped = false
            end
        else
            if r.size == 0
                r << c.chr.downcase
            else
                if c.upper?
                    r << "_"
                    r << c.chr.downcase
                else
                    r << c.chr
                end
            end
        end
    end
    return r
end

#nextstring(opts = {}) ⇒ Object

The driver function for String#strings below; really, this will run on any Enumerable that contains Fixnums.



153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
# File 'lib/libmatty/string.rb', line 153

def nextstring(opts={})
    off = opts[:offset] || 0
    sz = opts[:minimum] || 7
    u = opts[:unicode] || false
    l = size
    i = off
    while i < l
        if self[i].printable?
            start = i
            cnt = 1
            i += 1
            lastu = false
            while i < l
                if self[i].printable?
                    lastu = false
                    cnt += 1
                    i += 1
                elsif u and self[i] == 0 and not lastu
                    lastu = true
                    i += 1
                else
                    break
                end
            end

            return([start, i - start]) if cnt >= sz
        else
            i += 1
        end
    end

    return false, false
end

#or(str) ⇒ Object

OR two strings together. Slow. Handles mismatched lengths by zero-extending



290
291
292
293
294
295
296
297
298
299
# File 'lib/libmatty/string.rb', line 290

def or(str)
    max = size < str.size ? str.size : size
    ret = ""
    0.upto(max-1) do |i|
        x = self[i] || 0
        y = str[i] || 0
        ret << (x | y).chr
    end
    return ret
end

#pad(size, char = "\x00") ⇒ Object

I love you String#ljust



260
261
262
# File 'lib/libmatty/string.rb', line 260

def pad(size, char="\x00")
    ljust(size, char)
end

#pbcopyObject

a pbcopy from irb on any String object yes, we’re a mac shop.



409
410
411
# File 'lib/libmatty/string.rb', line 409

def pbcopy
  IO.popen("pbcopy", "w") {|io| io.write self}
end

#rotate_bytes(k = 0) ⇒ Object

byte rotation cypher (yes it’s been useful)



317
318
319
320
321
322
323
# File 'lib/libmatty/string.rb', line 317

def rotate_bytes(k=0)
    r = []
    each_byte do |b|
        r << ((b + k) % 256).chr
    end
    return r.join
end

#sha1Object



418
419
420
421
# File 'lib/libmatty/string.rb', line 418

def sha1
  require 'digest/sha1'
  Digest::SHA1.digest(self).hexify
end

#sha256Object



423
424
425
426
# File 'lib/libmatty/string.rb', line 423

def sha256
  require 'digest/sha256'
  Digest::SHA256.digest(self).hexify
end

#sha512Object



428
429
430
431
# File 'lib/libmatty/string.rb', line 428

def sha512
  require 'digest/sha512'
  Digest::SHA512.digest(self).hexify
end

#shift(count = 1) ⇒ Object

Insanely useful shorthand: pop bytes off the front of a string



326
327
328
329
# File 'lib/libmatty/string.rb', line 326

def shift(count=1)
    return self if count == 0
    slice! 0..(count-1)
end

#shift_b16Object



228
# File 'lib/libmatty/string.rb', line 228

def shift_b16; shift(2).to_b16; end

#shift_b32Object



226
# File 'lib/libmatty/string.rb', line 226

def shift_b32; shift(4).to_b32; end

#shift_l16Object



227
# File 'lib/libmatty/string.rb', line 227

def shift_l16; shift(2).to_l16; end

#shift_l32Object



225
# File 'lib/libmatty/string.rb', line 225

def shift_l32; shift(4).to_l32; end

#shift_tok(rx) ⇒ Object

“foo: bar”.shift_tok /:\s*/ => “foo” # leaving “bar”



339
340
341
342
343
344
345
346
347
348
349
350
# File 'lib/libmatty/string.rb', line 339

def shift_tok(rx)
    src = rx.source if rx.kind_of? Regexp
    rx = Regexp.new "(#{ src })"
    idx = (self =~ rx)
    if idx
        ret = shift(idx)
        shift($1.size)
        return ret
    else
        shift(self.size)
    end
end

#shift_u8Object



229
# File 'lib/libmatty/string.rb', line 229

def shift_u8; shift(1).to_u8; end

#starts_with?(x) ⇒ Boolean

Insane that this isn’t in the library by default. 1.9 does so we don’t add it then

Returns:

  • (Boolean)


126
127
128
# File 'lib/libmatty/string.rb', line 126

def starts_with? x
    self[0..x.size-1] == x
end

#strings(opts = {}) ⇒ Object

A la Unix strings(1). With a block, yields offset, string length, and contents. Otherwise returns a list. Accepts options: :unicode: superficial but effective Win32 Unicode support, skips NULs :minimum: minimum length of returned strings, ala strings -10



191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
# File 'lib/libmatty/string.rb', line 191

def strings(opts={})
    ret = []
    opts[:offset] ||= 0
    while 1
        off, size = nextstring(opts)
        break if not off
        opts[:offset] += (off + size)
        if block_given?
            yield off, size, self[off,size]
        else
            ret << [off, size, self[off,size]]
        end
    end
    ret
end

#time_at(fmt = 0) ⇒ Object

Returns a Time object for a string representing seconds since epoch fmt: format to be passed to String#to_i (see String#to_i)



445
446
447
# File 'lib/libmatty/string.rb', line 445

def time_at(fmt=0)
  Time.at(self.to_i(fmt))
end

#to_b16Object



223
# File 'lib/libmatty/string.rb', line 223

def to_b16; unpack("n").first; end

#to_b32Object



221
# File 'lib/libmatty/string.rb', line 221

def to_b32; unpack("N").first; end

#to_l16Object



222
# File 'lib/libmatty/string.rb', line 222

def to_l16; unpack("v").first; end

#to_l32Object

Convert binary strings back to integers



220
# File 'lib/libmatty/string.rb', line 220

def to_l32; unpack("L").first; end

#to_stringioObject

Return a self encapsulated in a StringIO object.



402
403
404
405
# File 'lib/libmatty/string.rb', line 402

def to_stringio
  require 'stringio'
  StringIO.new(self)
end

#to_u8Object



224
# File 'lib/libmatty/string.rb', line 224

def to_u8; self[0]; end

#underscoreObject



331
332
333
334
335
336
# File 'lib/libmatty/string.rb', line 331

def underscore
    first = false
    gsub(/[a-z0-9][A-Z]/) do |m|
        "#{ m[0].chr }_#{ m[1].chr.downcase }"
    end
end

#xor(str) ⇒ Object

XOR two strings. wrapping around if str is shorter than self.



302
303
304
305
306
307
308
# File 'lib/libmatty/string.rb', line 302

def xor(str)
    r = []
    size.times do |i|
        r << (self[i] ^ str[i % str.size]).chr
    end
    return r.join
end

#xor!(str) ⇒ Object



310
311
312
313
314
# File 'lib/libmatty/string.rb', line 310

def xor!(str)
    size.times do |i|
        self[i] ^= str[i % str.size]
    end
end