Class: String

Inherits:
Object show all
Defined in:
lib/rbkb/extends.rb,
lib/rbkb/plug/blit.rb

Overview

of module Plug

Direct Known Subclasses

Rbkb::Http::Body

Instance Method Summary collapse

Instance Method Details

#^(x) ⇒ Object

convert bytes to number then xor against another byte-string or number



204
205
206
207
# File 'lib/rbkb/extends.rb', line 204

def ^(x)
  x = x.dat_to_num unless x.is_a? Numeric
  (self.dat_to_num ^ x)#.to_bytes
end

#b64(len = nil) ⇒ Object

Base64 encode



57
58
59
60
61
62
63
64
# File 'lib/rbkb/extends.rb', line 57

def b64(len=nil)
  ret = [self].pack("m").gsub("\n", "")
  if len and Numeric === len 
    ret.scan(/.{1,#{len}}/).join("\n") + "\n"
  else
    ret
  end
end

#bgrep(find, align = nil) ⇒ Object

Binary grep

Parameters:

find  : A Regexp or string to search for in self
align : nil | numeric alignment (matches only made if aligned)


324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
# File 'lib/rbkb/extends.rb', line 324

def bgrep(find, align=nil)
  if align and (not align.is_a?(Integer) or align < 0)
    raise "alignment must be a integer >= 0"
  end

  dat=self
  if find.kind_of? Regexp
    search = lambda do |m, buf| 
      if m = m.match(buf)
        mtch = m[0]
        off,endoff = m.offset(0)
        return off, endoff, mtch
      end
    end
  else
    search = lambda do |s, buf|
      if off = buf.index(s)
        return off, off+s.size, s
      end
    end
  end

  ret=[]
  pos = 0
  while (res = search.call(find, dat[pos..-1]))
    off, endoff, match = res
    if align and ( pad = (pos+off).pad(align) ) != 0
      pos += pad
    else
      hit = [pos+off, pos+endoff, match]
      if not block_given? or yield([pos+off, pos+endoff, match])
        ret << hit
      end
      pos += endoff
    end
  end
  return ret
end

#blit(idx = 0) ⇒ Object


A Blit sender convenience method for strings



218
219
220
221
# File 'lib/rbkb/plug/blit.rb', line 218

def blit(idx=0)
  raise "blit must be initialized with blit_init" unless Plug::Blit.initialized?
  Plug::Blit.blit_send(self, idx)
end

#camelizeObject

Converts a ‘_’ delimited string to CamelCase like ‘foo_class’ into ‘FooClass’. See also: camelize_meth, decamelize



506
507
508
# File 'lib/rbkb/extends.rb', line 506

def camelize
  self.gsub(/(^|_)([a-z])/) { $2.upcase }
end

#camelize_methObject

Converts a ‘_’ delimited string to method style camelCase like ‘foo_method’ into ‘fooMethod’. See also: camelize, decamelize



513
514
515
# File 'lib/rbkb/extends.rb', line 513

def camelize_meth
  self.gsub(/_([a-z])/) { $1.upcase }
end

#class_nameObject

convert a string to its idiomatic ruby class name



532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
# File 'lib/rbkb/extends.rb', line 532

def class_name
  r = ""
  up = true
  each_byte do |c|
    if c == 95
      if up
        r << "::"
      else
        up = true
      end
    else
      m = up ? :upcase : :to_s
      r << (c.chr.send(m))
      up = false
    end
  end
  r
end

#const_lookup(ns = Object) ⇒ Object

Returns a reference to actual constant for a given name in namespace can be used to lookup classes from enums and such



555
556
557
558
559
# File 'lib/rbkb/extends.rb', line 555

def const_lookup(ns=Object)
  if c=ns.constants.select {|n| n == self.class_name } and not c.empty?
    ns.const_get(c.first)
  end
end

#crc32Object

returns CRC32 checksum for the string object



475
476
477
478
479
480
481
482
483
484
485
486
487
# File 'lib/rbkb/extends.rb', line 475

def crc32
  ## pure ruby version. slower, but here for reference (found on some forum)
  #  r = 0xFFFFFFFF
  #  self.each_byte do |b|
  #    r ^= b
  #    8.times do
  #      r = (r>>1) ^ (0xEDB88320 * (r & 1))
  #    end
  #  end
  #  r ^ 0xFFFFFFFF
  ## or... we can just use:
  Zlib.crc32 self
end

#cstring(off = 0) ⇒ Object

Returns a single null-terminated ascii string from beginning of self. This will return the entire string if no null is encountered.

Parameters:

off = specify an optional beggining offset


470
471
472
# File 'lib/rbkb/extends.rb', line 470

def cstring(off=0)
  self[ off, self.index("\x00") || self.size ]
end

#d64Object

Base64 decode



67
# File 'lib/rbkb/extends.rb', line 67

def d64;  self.unpack("m")[0];  end

#dat_to_num(order = :big) ⇒ Object Also known as: lazy_to_n, lazy_to_num, dat_to_n

A “generalized” lazy bytestring -> numeric converter.

Parameters:

order => :big or :little endian (default is :big)

Bonus: should work seamlessly with really large strings.

>> ("\xFF"*10).dat_to_num
=> 1208925819614629174706175
>> ("\xFF"*20).dat_to_num
=> 1461501637330902918203684832716283019655932542975


162
163
164
165
166
167
168
# File 'lib/rbkb/extends.rb', line 162

def dat_to_num(order=:big)
  s=self
  s.reverse! if order == :little
  r = 0
  s.each_byte {|c| r = ((r << 8) | c)}
  r
end

#decamelizeObject

Converts a CamelCase or camelCase string into ‘_’ delimited form like ‘FooBar’ or ‘fooBar’ into ‘foo_bar’.

Note: This method only handles camel humps. Strings with consecutive uppercase chars like ‘FooBAR’ will be converted to ‘foo_bar’

See also: camelize, camelize_meth



525
526
527
528
529
# File 'lib/rbkb/extends.rb', line 525

def decamelize
  self.gsub(/(^|[a-z])([A-Z])/) do 
    ($1.empty?)? $2 : "#{$1}_#{$2}"
  end.downcase
end

#dehexdump(opt = {}) ⇒ Object Also known as: dedump, undump, unhexdump

Converts a hexdump back to binary - takes the same options as hexdump(). Fairly flexible. Should work both with ‘xxd’ and ‘hexdump -C’ style dumps.



286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
# File 'lib/rbkb/extends.rb', line 286

def dehexdump(opt={})
  s=self
  out = opt[:out] || StringIO.new
  len = (opt[:len] and opt[:len] > 0)? opt[:len] : 16

  hcrx = /[A-Fa-f0-9]/
  dumprx = /^(#{hcrx}+):?\s*((?:#{hcrx}{2}\s*){0,#{len}})/
  off = opt[:start_addr] || 0

  i=1
  # iterate each line of hexdump
  s.split(/\r?\n/).each do |hl|
    # match and check offset
    if m = dumprx.match(hl) and $1.hex == off
      i+=1
      # take the data chunk and unhexify it
      raw = $2.unhexify
      off += out.write(raw)
    else
      raise "Hexdump parse error on line #{i} #{s}"
    end
  end

  if out.class == StringIO
    out.string
  end
end

#entropyObject

calculates entropy in string

TQBF’s description: “I also added a chi-squared test to quickly figure out entropy of a string, in ”bits of randomness per byte“. This is useful, so…”



181
182
183
184
185
186
187
188
189
190
# File 'lib/rbkb/extends.rb', line 181

def entropy
  e = 0
  0.upto(255) do |i|
    x = count(i.chr)/size.to_f
    if x > 0
      e += - x * x.log2
    end
  end
  e
end

#hex_to_num(order = :big) ⇒ Object

Converts a hex value to numeric.

Parameters:

order => :big or :little endian (default is :big)


135
136
137
138
139
140
141
142
143
144
145
146
# File 'lib/rbkb/extends.rb', line 135

def hex_to_num(order=:big)
  s=self
  raise "invalid hex value: '#{s.inspect}'" unless s.ishex?

  r = if order == :little
        s.scan(/.{2}/).reverse.join
      elsif order == :big
        s 
      else
        raise "Invalid byte order #{order.inspect}"
      end.hex
end

#hexdump(opt = {}) ⇒ Object

Returns or prints a hexdump in the style of ‘hexdump -C’

:len => optionally specify a length other than 16 for a wider or thinner dump. If length is an odd number, it will be rounded up.

:out => optionally specify an alternate IO object for output. By default, hexdump will output to STDOUT. Pass a StringIO object and it will return it as a string.

Example:

Here’s the default behavior done explicitely:

>> xxd = dat.hexdump(:len => 16, :out => StringIO.new)
=> <a string containing hexdump>

Here’s how to change it to STDERR

>> xxd = dat.hexdump(:len => 16, :out => STDERR)
<prints hexdump on STDERR>
-> nil # return value is nil!


250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
# File 'lib/rbkb/extends.rb', line 250

def hexdump(opt={})
  s=self
  out = opt[:out] || StringIO.new
  len = (opt[:len] and opt[:len] > 0)? opt[:len] + (opt[:len] % 2) : 16

  off = opt[:start_addr] || 0
  offlen = opt[:start_len] || 8

  hlen=len/2

  s.scan(/(?:.|\n){1,#{len}}/) do |m|
    out.write(off.to_s(16).rjust(offlen, "0") + '  ')

    i=0
    m.each_byte do |c|
      out.write c.to_s(16).rjust(2,"0") + " "
      out.write(' ') if (i+=1) == hlen
    end

    out.write("   " * (len-i) ) # pad
    out.write(" ") if i < hlen

    out.write(" |" + m.tr("\0-\37\177-\377", '.') + "|\n")
    off += m.length
  end

  out.write(off.to_s(16).rjust(offlen,'0') + "\n")

  if out.class == StringIO
    out.string
  end
end

#hexify(opts = {}) ⇒ Object

Convert a string to ASCII hex string. Supports a few options for format:

:delim - delimter between each hex byte
:prefix - prefix before each hex byte
:suffix - suffix after each hex byte


94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
# File 'lib/rbkb/extends.rb', line 94

def hexify(opts={})
  s=self
  delim = opts[:delim]
  pre = (opts[:prefix] || "")
  suf = (opts[:suffix] || "")

  if (rx=opts[:rx]) and not rx.kind_of? Regexp
    raise "rx must be a regular expression for a character class"
  end

  hx=Rbkb::HEXCHARS

  out=Array.new

  s.each_byte do |c| 
    hc = if (rx and not rx.match c.chr)
           c.chr 
         else
           pre + (hx[(c >> 4)] + hx[(c & 0xf )]) + suf
         end
    out << (hc)
  end
  out.join(delim)
end

#ishex?Boolean

shortcut for hex sanity with regex

Returns:

  • (Boolean)


32
# File 'lib/rbkb/extends.rb', line 32

def ishex? ; (self =~ /^[a-f0-9]+$/i)? true : false ; end

#lalign(a, p = ' ') ⇒ Object

left-align to ‘a’ alignment padded with ‘p’



79
80
81
82
83
84
85
# File 'lib/rbkb/extends.rb', line 79

def lalign(a, p=' ')
  s=self
  p ||= ' '
  l = s.length
  pad = l.pad(a)
  s.ljust(pad+l, p)
end

#pipe_magick(arg = "") ⇒ Object

This attempts to identify a blob of data using ‘file(1)’ via popen3 (using popen3 because IO.popen blows) Tried doing this with a fmagic ruby extention to libmagic, but it was a whole lot slower.



493
494
495
496
497
498
499
500
501
# File 'lib/rbkb/extends.rb', line 493

def pipe_magick(arg="")
  ret=""
  Open3.popen3("file #{arg} -") do |w,r,e|
    w.write self; w.close
    ret = r.read ; r.close
    ret.sub!(/^\/dev\/stdin: /, "")
  end
  ret
end

#ralign(a, p = ' ') ⇒ Object

right-align to ‘a’ alignment padded with ‘p’



70
71
72
73
74
75
76
# File 'lib/rbkb/extends.rb', line 70

def ralign(a, p=' ')
  s=self
  p ||= ' '
  l = s.length
  pad = l.pad(a)
  s.rjust(pad+l, p)
end

#randomizeObject

String randomizer



222
# File 'lib/rbkb/extends.rb', line 222

def randomize ; self.split('').randomize.to_s ; end

#randomize!Object

In-place string randomizer



225
# File 'lib/rbkb/extends.rb', line 225

def randomize! ; self.replace(randomize) end

#rotate_bytes(k = 0) ⇒ Object

Byte rotation as found in lame ciphers. This was cribbed from Timur Duehr with only a minor change.



211
212
213
214
215
216
217
218
219
# File 'lib/rbkb/extends.rb', line 211

def rotate_bytes(k=0)
  r = self.dup
  i=0
  self.each_byte do |b| 
    r[i] = ((b + k) % 384).chr
    i+=1
  end
  return r
end

#starts_with?(dat) ⇒ Boolean

Does string “start with” dat? No clue whether/when this is faster than a regex, but it is easier to type.

Returns:

  • (Boolean)


459
460
461
# File 'lib/rbkb/extends.rb', line 459

def starts_with?(dat)
  self[0,dat.size] == dat
end

#strings(opts = {}) ⇒ Object

A ‘strings’ method a-la unix strings utility. Finds printable strings in a binary blob. Supports ASCII and little endian unicode (though only for ASCII printable character.)

Parameters and options:

* Use the :minimum parameter to specify minimum number of characters
  to match. (default = 6)

* Use the :encoding parameter as one of :ascii, :unicode, or :both
  (default = :ascii)

* The 'strings' method uses Regexp under the hood. Therefore
  you can pass a character class for "valid characters" with :valid
  (default = /[\r\n [:print:]]/)

* Supports an optional block, which will be passed |offset, type, string|
  for each match.
  The block's boolean return value also determines whether the match 
  passes or fails (true or false/nil) and gets returned by the function.

Return Value:

Returns an array consisting of matches with the following elements:

 [[start_offset, end_offset, string_type, string], ...]

* string_type will be one of :ascii or :unicode
* end_offset will include the terminating null character
* end_offset will include all null bytes in unicode strings (including
* both terminating nulls)

 If strings are null terminated, the trailing null *IS* included
 in the end_offset. Unicode matches will also include null bytes.

Todos?

- better unicode support (i.e. not using half-assed unicode)
- support other encodings such as all those the binutils strings does?


402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
# File 'lib/rbkb/extends.rb', line 402

def strings(opts={})
  opts[:encoding] ||= :both
  prx = (opts[:valid] || /[\r\n [:print:]]/)
  min = (opts[:minimum] || 6)
  align = opts[:align]

  raise "Minimum must be numeric and > 0" unless min.kind_of? Numeric and min > 0

  arx = /(#{prx}{#{min}}?#{prx}*\x00?)/
  urx = /((?:#{prx}\x00){#{min}}(?:#{prx}\x00)*(?:\x00\x00)?)/

  rx = case (opts[:encoding] || :both).to_sym
       when :ascii   
         arx
       when :unicode 
         urx
       when :both    
         Regexp.union( arx, urx )
       else 
         raise "Encoding must be :unicode, :ascii, or :both"
       end

  off=0
  ret = []

  while mtch = rx.match(self[off..-1])
    # calculate relative offsets
    rel_off = mtch.offset(0)
    startoff = off + rel_off[0]
    endoff   = off + rel_off[1]
    off += rel_off[1]

    if align and (pad=startoff.pad(align)) != 0
      off = startoff + pad
      next
    end

    stype = if mtch[1]
              :ascii
            elsif mtch[2]
              :unicode
            end


    mret = [startoff, endoff, stype, mtch[0] ]

    # yield to a block for additional criteria
    next if block_given? and not yield( *mret )

    ret << mret
  end

  return ret
end

#to_stringioObject

Return a self encapsulated in a StringIO object. This is handy.



562
563
564
# File 'lib/rbkb/extends.rb', line 562

def to_stringio
  StringIO.new(self)
end

#unhexify(d = /\s*/) ⇒ Object

Convert ASCII hex string to raw.

Parameters:

d = optional 'delimiter' between hex bytes (zero+ spaces by default)


125
126
127
# File 'lib/rbkb/extends.rb', line 125

def unhexify(d=/\s*/)
  self.strip.gsub(/([A-Fa-f0-9]{1,2})#{d}?/) { $1.hex.chr }
end

#urldec(opts = {}) ⇒ Object

Undo percent-hexified url encoding data



50
51
52
53
54
# File 'lib/rbkb/extends.rb', line 50

def urldec(opts={})
  s=self
  s.gsub!('+', ' ') unless opts[:noplus]
  s.gsub(/%([A-Fa-f0-9]{2})/) {$1.hex.chr}
end

#urlenc(opts = {}) ⇒ Object

Encode into percent-hexify url encoding format



35
36
37
38
39
40
41
42
43
44
45
46
47
# File 'lib/rbkb/extends.rb', line 35

def urlenc(opts={})
  s=self
  plus = opts[:plus]
  unless (opts[:rx] ||= /[^A-Za-z0-9_\.~-]/).kind_of? Regexp
    raise "rx must be a regular expression for a character class"
  end
  hx = Rbkb::HEXCHARS

  s.gsub(opts[:rx]) do |c| 
    c=c[0]
    (plus and c==32)? '+' : "%" + (hx[(c >> 4)] + hx[(c & 0xf )])
  end
end

#xor(k) ⇒ Object

xor against a key. key will be repeated or truncated to self.size.



193
194
195
196
197
198
199
200
201
# File 'lib/rbkb/extends.rb', line 193

def xor(k)
  s=self
  out=StringIO.new ; i=0;
  s.each_byte do |x| 
    out.write((x ^ (k[i] || k[i=0]) ).chr)
    i+=1
  end
  out.string
end