Class: String

Inherits:
Object show all
Defined in:
lib/rbkb/extends.rb,
lib/rbkb/plug/blit.rb

Overview

of module Plug

Direct Known Subclasses

Rbkb::Http::Body

Instance Method Summary collapse

Instance Method Details

#^(x) ⇒ Object

convert bytes to number then xor against another byte-string or number



209
210
211
212
# File 'lib/rbkb/extends.rb', line 209

def ^(x)
  x = x.dat_to_num unless x.is_a? Numeric
  (self.dat_to_num ^ x)#.to_bytes
end

#b64(len = nil) ⇒ Object

Base64 encode



62
63
64
65
66
67
68
69
# File 'lib/rbkb/extends.rb', line 62

def b64(len=nil)
  ret = [self].pack("m").gsub("\n", "")
  if len and Numeric === len 
    ret.scan(/.{1,#{len}}/).join("\n") + "\n"
  else
    ret
  end
end

#bgrep(find, align = nil) ⇒ Object

Binary grep

Parameters:

find  : A Regexp or string to search for in self
align : nil | numeric alignment (matches only made if aligned)


329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
# File 'lib/rbkb/extends.rb', line 329

def bgrep(find, align=nil)
  if align and (not align.is_a?(Integer) or align < 0)
    raise "alignment must be a integer >= 0"
  end

  dat=self
  if find.kind_of? Regexp
    search = lambda do |m, buf| 
      if m = m.match(buf)
        mtch = m[0]
        off,endoff = m.offset(0)
        return off, endoff, mtch
      end
    end
  else
    search = lambda do |s, buf|
      if off = buf.index(s)
        return off, off+s.size, s
      end
    end
  end

  ret=[]
  pos = 0
  while (res = search.call(find, dat[pos..-1]))
    off, endoff, match = res
    if align and ( pad = (pos+off).pad(align) ) != 0
      pos += pad
    else
      hit = [pos+off, pos+endoff, match]
      if not block_given? or yield([pos+off, pos+endoff, match])
        ret << hit
      end
      pos += endoff
    end
  end
  return ret
end

#blit(idx = 0) ⇒ Object


A Blit sender convenience method for strings



218
219
220
221
# File 'lib/rbkb/plug/blit.rb', line 218

def blit(idx=0)
  raise "blit must be initialized with blit_init" unless Plug::Blit.initialized?
  Plug::Blit.blit_send(self, idx)
end

#camelizeObject

Converts a ‘_’ delimited string to CamelCase like ‘foo_class’ into ‘FooClass’. See also: camelize_meth, decamelize



511
512
513
# File 'lib/rbkb/extends.rb', line 511

def camelize
  self.gsub(/(^|_)([a-z])/) { $2.upcase }
end

#camelize_methObject

Converts a ‘_’ delimited string to method style camelCase like ‘foo_method’ into ‘fooMethod’. See also: camelize, decamelize



518
519
520
# File 'lib/rbkb/extends.rb', line 518

def camelize_meth
  self.gsub(/_([a-z])/) { $1.upcase }
end

#class_nameObject

convert a string to its idiomatic ruby class name



537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
# File 'lib/rbkb/extends.rb', line 537

def class_name
  r = ""
  up = true
  each_byte do |c|
    if c == 95
      if up
        r << "::"
      else
        up = true
      end
    else
      m = up ? :upcase : :to_s
      r << (c.chr.send(m))
      up = false
    end
  end
  r
end

#const_lookup(ns = Object) ⇒ Object

Returns a reference to actual constant for a given name in namespace can be used to lookup classes from enums and such



560
561
562
563
564
# File 'lib/rbkb/extends.rb', line 560

def const_lookup(ns=Object)
  if c=ns.constants.select {|n| n == self.class_name } and not c.empty?
    ns.const_get(c.first)
  end
end

#crc32Object

returns CRC32 checksum for the string object



480
481
482
483
484
485
486
487
488
489
490
491
492
# File 'lib/rbkb/extends.rb', line 480

def crc32
  ## pure ruby version. slower, but here for reference (found on some forum)
  #  r = 0xFFFFFFFF
  #  self.each_byte do |b|
  #    r ^= b
  #    8.times do
  #      r = (r>>1) ^ (0xEDB88320 * (r & 1))
  #    end
  #  end
  #  r ^ 0xFFFFFFFF
  ## or... we can just use:
  Zlib.crc32 self
end

#cstring(off = 0) ⇒ Object

Returns a single null-terminated ascii string from beginning of self. This will return the entire string if no null is encountered.

Parameters:

off = specify an optional beggining offset


475
476
477
# File 'lib/rbkb/extends.rb', line 475

def cstring(off=0)
  self[ off, self.index("\x00") || self.size ]
end

#d64Object

Base64 decode



72
# File 'lib/rbkb/extends.rb', line 72

def d64;  self.unpack("m")[0];  end

#dat_to_num(order = :big) ⇒ Object Also known as: lazy_to_n, lazy_to_num, dat_to_n

A “generalized” lazy bytestring -> numeric converter.

Parameters:

order => :big or :little endian (default is :big)

Bonus: should work seamlessly with really large strings.

>> ("\xFF"*10).dat_to_num
=> 1208925819614629174706175
>> ("\xFF"*20).dat_to_num
=> 1461501637330902918203684832716283019655932542975


167
168
169
170
171
172
173
# File 'lib/rbkb/extends.rb', line 167

def dat_to_num(order=:big)
  s=self
  s.reverse! if order == :little
  r = 0
  s.each_byte {|c| r = ((r << 8) | c)}
  r
end

#decamelizeObject

Converts a CamelCase or camelCase string into ‘_’ delimited form like ‘FooBar’ or ‘fooBar’ into ‘foo_bar’.

Note: This method only handles camel humps. Strings with consecutive uppercase chars like ‘FooBAR’ will be converted to ‘foo_bar’

See also: camelize, camelize_meth



530
531
532
533
534
# File 'lib/rbkb/extends.rb', line 530

def decamelize
  self.gsub(/(^|[a-z])([A-Z])/) do 
    ($1.empty?)? $2 : "#{$1}_#{$2}"
  end.downcase
end

#dehexdump(opt = {}) ⇒ Object Also known as: dedump, undump, unhexdump

Converts a hexdump back to binary - takes the same options as hexdump(). Fairly flexible. Should work both with ‘xxd’ and ‘hexdump -C’ style dumps.



291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
# File 'lib/rbkb/extends.rb', line 291

def dehexdump(opt={})
  s=self
  out = opt[:out] || StringIO.new
  len = (opt[:len] and opt[:len] > 0)? opt[:len] : 16

  hcrx = /[A-Fa-f0-9]/
  dumprx = /^(#{hcrx}+):?\s*((?:#{hcrx}{2}\s*){0,#{len}})/
  off = opt[:start_addr] || 0

  i=1
  # iterate each line of hexdump
  s.split(/\r?\n/).each do |hl|
    # match and check offset
    if m = dumprx.match(hl) and $1.hex == off
      i+=1
      # take the data chunk and unhexify it
      raw = $2.unhexify
      off += out.write(raw)
    else
      raise "Hexdump parse error on line #{i} #{s}"
    end
  end

  if out.class == StringIO
    out.string
  end
end

#entropyObject

calculates entropy in string

TQBF’s description: “I also added a chi-squared test to quickly figure out entropy of a string, in ”bits of randomness per byte“. This is useful, so…”



186
187
188
189
190
191
192
193
194
195
# File 'lib/rbkb/extends.rb', line 186

def entropy
  e = 0
  0.upto(255) do |i|
    x = count(i.chr)/size.to_f
    if x > 0
      e += - x * x.log2
    end
  end
  e
end

#hex_to_num(order = :big) ⇒ Object

Converts a hex value to numeric.

Parameters:

order => :big or :little endian (default is :big)


140
141
142
143
144
145
146
147
148
149
150
151
# File 'lib/rbkb/extends.rb', line 140

def hex_to_num(order=:big)
  s=self
  raise "invalid hex value: '#{s.inspect}'" unless s.ishex?

  r = if order == :little
        s.scan(/.{2}/).reverse.join
      elsif order == :big
        s 
      else
        raise "Invalid byte order #{order.inspect}"
      end.hex
end

#hexdump(opt = {}) ⇒ Object

Returns or prints a hexdump in the style of ‘hexdump -C’

:len => optionally specify a length other than 16 for a wider or thinner dump. If length is an odd number, it will be rounded up.

:out => optionally specify an alternate IO object for output. By default, hexdump will output to STDOUT. Pass a StringIO object and it will return it as a string.

Example:

Here’s the default behavior done explicitely:

>> xxd = dat.hexdump(:len => 16, :out => StringIO.new)
=> <a string containing hexdump>

Here’s how to change it to STDERR

>> xxd = dat.hexdump(:len => 16, :out => STDERR)
<prints hexdump on STDERR>
-> nil # return value is nil!


255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
# File 'lib/rbkb/extends.rb', line 255

def hexdump(opt={})
  s=self
  out = opt[:out] || StringIO.new
  len = (opt[:len] and opt[:len] > 0)? opt[:len] + (opt[:len] % 2) : 16

  off = opt[:start_addr] || 0
  offlen = opt[:start_len] || 8

  hlen=len/2

  s.scan(/(?:.|\n){1,#{len}}/) do |m|
    out.write(off.to_s(16).rjust(offlen, "0") + '  ')

    i=0
    m.each_byte do |c|
      out.write c.to_s(16).rjust(2,"0") + " "
      out.write(' ') if (i+=1) == hlen
    end

    out.write("   " * (len-i) ) # pad
    out.write(" ") if i < hlen

    out.write(" |" + m.tr("\0-\37\177-\377", '.') + "|\n")
    off += m.length
  end

  out.write(off.to_s(16).rjust(offlen,'0') + "\n")

  if out.class == StringIO
    out.string
  end
end

#hexify(opts = {}) ⇒ Object

Convert a string to ASCII hex string. Supports a few options for format:

:delim - delimter between each hex byte
:prefix - prefix before each hex byte
:suffix - suffix after each hex byte


99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
# File 'lib/rbkb/extends.rb', line 99

def hexify(opts={})
  s=self
  delim = opts[:delim]
  pre = (opts[:prefix] || "")
  suf = (opts[:suffix] || "")

  if (rx=opts[:rx]) and not rx.kind_of? Regexp
    raise "rx must be a regular expression for a character class"
  end

  hx=Rbkb::HEXCHARS

  out=Array.new

  s.each_byte do |c| 
    hc = if (rx and not rx.match c.chr)
           c.chr 
         else
           pre + (hx[(c >> 4)] + hx[(c & 0xf )]) + suf
         end
    out << (hc)
  end
  out.join(delim)
end

#ishex?Boolean

shortcut for hex sanity with regex

Returns:

  • (Boolean)


37
# File 'lib/rbkb/extends.rb', line 37

def ishex? ; (self =~ /^[a-f0-9]+$/i)? true : false ; end

#lalign(a, p = ' ') ⇒ Object

left-align to ‘a’ alignment padded with ‘p’



84
85
86
87
88
89
90
# File 'lib/rbkb/extends.rb', line 84

def lalign(a, p=' ')
  s=self
  p ||= ' '
  l = s.length
  pad = l.pad(a)
  s.ljust(pad+l, p)
end

#pipe_magick(arg = "") ⇒ Object

This attempts to identify a blob of data using ‘file(1)’ via popen3 (using popen3 because IO.popen blows) Tried doing this with a fmagic ruby extention to libmagic, but it was a whole lot slower.



498
499
500
501
502
503
504
505
506
# File 'lib/rbkb/extends.rb', line 498

def pipe_magick(arg="")
  ret=""
  Open3.popen3("file #{arg} -") do |w,r,e|
    w.write self; w.close
    ret = r.read ; r.close
    ret.sub!(/^\/dev\/stdin: /, "")
  end
  ret
end

#ralign(a, p = ' ') ⇒ Object

right-align to ‘a’ alignment padded with ‘p’



75
76
77
78
79
80
81
# File 'lib/rbkb/extends.rb', line 75

def ralign(a, p=' ')
  s=self
  p ||= ' '
  l = s.length
  pad = l.pad(a)
  s.rjust(pad+l, p)
end

#randomizeObject

String randomizer



227
# File 'lib/rbkb/extends.rb', line 227

def randomize ; self.split('').randomize.to_s ; end

#randomize!Object

In-place string randomizer



230
# File 'lib/rbkb/extends.rb', line 230

def randomize! ; self.replace(randomize) end

#rotate_bytes(k = 0) ⇒ Object

Byte rotation as found in lame ciphers. This was cribbed from Timur Duehr with only a minor change.



216
217
218
219
220
221
222
223
224
# File 'lib/rbkb/extends.rb', line 216

def rotate_bytes(k=0)
  r = self.dup
  i=0
  self.each_byte do |b| 
    r[i] = ((b + k) % 384).chr
    i+=1
  end
  return r
end

#starts_with?(dat) ⇒ Boolean

Does string “start with” dat? No clue whether/when this is faster than a regex, but it is easier to type.

Returns:

  • (Boolean)


464
465
466
# File 'lib/rbkb/extends.rb', line 464

def starts_with?(dat)
  self[0,dat.size] == dat
end

#strings(opts = {}) ⇒ Object

A ‘strings’ method a-la unix strings utility. Finds printable strings in a binary blob. Supports ASCII and little endian unicode (though only for ASCII printable character.)

Parameters and options:

* Use the :minimum parameter to specify minimum number of characters
  to match. (default = 6)

* Use the :encoding parameter as one of :ascii, :unicode, or :both
  (default = :ascii)

* The 'strings' method uses Regexp under the hood. Therefore
  you can pass a character class for "valid characters" with :valid
  (default = /[\r\n [:print:]]/)

* Supports an optional block, which will be passed |offset, type, string|
  for each match.
  The block's boolean return value also determines whether the match 
  passes or fails (true or false/nil) and gets returned by the function.

Return Value:

Returns an array consisting of matches with the following elements:

 [[start_offset, end_offset, string_type, string], ...]

* string_type will be one of :ascii or :unicode
* end_offset will include the terminating null character
* end_offset will include all null bytes in unicode strings (including
* both terminating nulls)

 If strings are null terminated, the trailing null *IS* included
 in the end_offset. Unicode matches will also include null bytes.

Todos?

- better unicode support (i.e. not using half-assed unicode)
- support other encodings such as all those the binutils strings does?


407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
# File 'lib/rbkb/extends.rb', line 407

def strings(opts={})
  opts[:encoding] ||= :both
  prx = (opts[:valid] || /[\r\n [:print:]]/)
  min = (opts[:minimum] || 6)
  align = opts[:align]

  raise "Minimum must be numeric and > 0" unless min.kind_of? Numeric and min > 0

  arx = /(#{prx}{#{min}}?#{prx}*\x00?)/
  urx = /((?:#{prx}\x00){#{min}}(?:#{prx}\x00)*(?:\x00\x00)?)/

  rx = case (opts[:encoding] || :both).to_sym
       when :ascii   
         arx
       when :unicode 
         urx
       when :both    
         Regexp.union( arx, urx )
       else 
         raise "Encoding must be :unicode, :ascii, or :both"
       end

  off=0
  ret = []

  while mtch = rx.match(self[off..-1])
    # calculate relative offsets
    rel_off = mtch.offset(0)
    startoff = off + rel_off[0]
    endoff   = off + rel_off[1]
    off += rel_off[1]

    if align and (pad=startoff.pad(align)) != 0
      off = startoff + pad
      next
    end

    stype = if mtch[1]
              :ascii
            elsif mtch[2]
              :unicode
            end


    mret = [startoff, endoff, stype, mtch[0] ]

    # yield to a block for additional criteria
    next if block_given? and not yield( *mret )

    ret << mret
  end

  return ret
end

#to_stringioObject

Return a self encapsulated in a StringIO object. This is handy.



567
568
569
# File 'lib/rbkb/extends.rb', line 567

def to_stringio
  StringIO.new(self)
end

#unhexify(d = /\s*/) ⇒ Object

Convert ASCII hex string to raw.

Parameters:

d = optional 'delimiter' between hex bytes (zero+ spaces by default)


130
131
132
# File 'lib/rbkb/extends.rb', line 130

def unhexify(d=/\s*/)
  self.strip.gsub(/([A-Fa-f0-9]{1,2})#{d}?/) { $1.hex.chr }
end

#urldec(opts = {}) ⇒ Object

Undo percent-hexified url encoding data



55
56
57
58
59
# File 'lib/rbkb/extends.rb', line 55

def urldec(opts={})
  s=self
  s.gsub!('+', ' ') unless opts[:noplus]
  s.gsub(/%([A-Fa-f0-9]{2})/) {$1.hex.chr}
end

#urlenc(opts = {}) ⇒ Object

Encode into percent-hexify url encoding format



40
41
42
43
44
45
46
47
48
49
50
51
52
# File 'lib/rbkb/extends.rb', line 40

def urlenc(opts={})
  s=self
  plus = opts[:plus]
  unless (opts[:rx] ||= /[^A-Za-z0-9_\.~-]/).kind_of? Regexp
    raise "rx must be a regular expression for a character class"
  end
  hx = Rbkb::HEXCHARS

  s.gsub(opts[:rx]) do |c| 
    c=c[0]
    (plus and c==32)? '+' : "%" + (hx[(c >> 4)] + hx[(c & 0xf )])
  end
end

#xor(k) ⇒ Object

xor against a key. key will be repeated or truncated to self.size.



198
199
200
201
202
203
204
205
206
# File 'lib/rbkb/extends.rb', line 198

def xor(k)
  s=self
  out=StringIO.new ; i=0;
  s.each_byte do |x| 
    out.write((x ^ (k[i] || k[i=0]) ).chr)
    i+=1
  end
  out.string
end