Class: String

Inherits:

Object

Object
String

show all

Defined in:: lib/rbkb/extends.rb,
lib/rbkb/plug/blit.rb

Overview

of module Plug

Direct Known Subclasses

Rbkb::Http::Body

Instance Method Summary collapse

#^(x) ⇒ Object

convert bytes to number then xor against another byte-string or number.
#b64(len = nil) ⇒ Object

Base64 encode.
#bgrep(find, align = nil) ⇒ Object

Binary grep.
#blit(idx = 0) ⇒ Object

———————————————————————- A Blit sender convenience method for strings.
#camelize ⇒ Object

Converts a ‘_’ delimited string to CamelCase like ‘foo_class’ into ‘FooClass’.
#camelize_meth ⇒ Object

Converts a ‘_’ delimited string to method style camelCase like ‘foo_method’ into ‘fooMethod’.
#class_name ⇒ Object

convert a string to its idiomatic ruby class name.
#const_lookup(ns = Object) ⇒ Object

Returns a reference to actual constant for a given name in namespace can be used to lookup classes from enums and such.
#crc32 ⇒ Object

returns CRC32 checksum for the string object.
#cstring(off = 0) ⇒ Object

Returns a single null-terminated ascii string from beginning of self.
#d64 ⇒ Object

Base64 decode.
#dat_to_num(order = :big) ⇒ Object (also: #lazy_to_n, #lazy_to_num, #dat_to_n)

A “generalized” lazy bytestring -> numeric converter.
#decamelize ⇒ Object

Converts a CamelCase or camelCase string into ‘_’ delimited form like ‘FooBar’ or ‘fooBar’ into ‘foo_bar’.
#dehexdump(opt = {}) ⇒ Object (also: #dedump, #undump, #unhexdump)

Converts a hexdump back to binary - takes the same options as hexdump().
#entropy ⇒ Object

calculates entropy in string.
#hex_to_num(order = :big) ⇒ Object

Converts a hex value to numeric.
#hexdump(opt = {}) ⇒ Object

Returns or prints a hexdump in the style of ‘hexdump -C’.
#hexify(opts = {}) ⇒ Object

Convert a string to ASCII hex string.
#ishex? ⇒ Boolean

shortcut for hex sanity with regex.
#lalign(a, p = ' ') ⇒ Object

left-align to ‘a’ alignment padded with ‘p’.
#pipe_magick(arg = "") ⇒ Object

This attempts to identify a blob of data using ‘file(1)’ via popen3 (using popen3 because IO.popen blows) Tried doing this with a fmagic ruby extention to libmagic, but it was a whole lot slower.
#ralign(a, p = ' ') ⇒ Object

right-align to ‘a’ alignment padded with ‘p’.
#randomize ⇒ Object

String randomizer.
#randomize! ⇒ Object

In-place string randomizer.
#rotate_bytes(k = 0) ⇒ Object

Byte rotation as found in lame ciphers.
#starts_with?(dat) ⇒ Boolean

Does string “start with” dat? No clue whether/when this is faster than a regex, but it is easier to type.
#strings(opts = {}) ⇒ Object

A ‘strings’ method a-la unix strings utility.
#to_stringio ⇒ Object

Return a self encapsulated in a StringIO object.
#unhexify(d = /\s*/) ⇒ Object

Convert ASCII hex string to raw.
#urldec(opts = {}) ⇒ Object

Undo percent-hexified url encoding data.
#urlenc(opts = {}) ⇒ Object

Encode into percent-hexify url encoding format.
#xor(k) ⇒ Object

xor against a key.

Instance Method Details

#^(x) ⇒ `Object`

convert bytes to number then xor against another byte-string or number

# File 'lib/rbkb/extends.rb', line 209

def ^(x)
  x = x.dat_to_num unless x.is_a? Numeric
  (self.dat_to_num ^ x)#.to_bytes
end

#b64(len = nil) ⇒ `Object`

Base64 encode

# File 'lib/rbkb/extends.rb', line 62

def b64(len=nil)
  ret = [self].pack("m").gsub("\n", "")
  if len and Numeric === len 
    ret.scan(/.{1,#{len}}/).join("\n") + "\n"
  else
    ret
  end
end

#bgrep(find, align = nil) ⇒ `Object`

Binary grep

Parameters:

find  : A Regexp or string to search for in self
align : nil | numeric alignment (matches only made if aligned)

# File 'lib/rbkb/extends.rb', line 329

def bgrep(find, align=nil)
  if align and (not align.is_a?(Integer) or align < 0)
    raise "alignment must be a integer >= 0"
  end

  dat=self
  if find.kind_of? Regexp
    search = lambda do |m, buf| 
      if m = m.match(buf)
        mtch = m[0]
        off,endoff = m.offset(0)
        return off, endoff, mtch
      end
    end
  else
    search = lambda do |s, buf|
      if off = buf.index(s)
        return off, off+s.size, s
      end
    end
  end

  ret=[]
  pos = 0
  while (res = search.call(find, dat[pos..-1]))
    off, endoff, match = res
    if align and ( pad = (pos+off).pad(align) ) != 0
      pos += pad
    else
      hit = [pos+off, pos+endoff, match]
      if not block_given? or yield([pos+off, pos+endoff, match])
        ret << hit
      end
      pos += endoff
    end
  end
  return ret
end

#blit(idx = 0) ⇒ `Object`

A Blit sender convenience method for strings

# File 'lib/rbkb/plug/blit.rb', line 218

def blit(idx=0)
  raise "blit must be initialized with blit_init" unless Plug::Blit.initialized?
  Plug::Blit.blit_send(self, idx)
end

#camelize ⇒ `Object`

Converts a ‘_’ delimited string to CamelCase like ‘foo_class’ into ‘FooClass’. See also: camelize_meth, decamelize



511
512
513

# File 'lib/rbkb/extends.rb', line 511

def camelize
  self.gsub(/(^|_)([a-z])/) { $2.upcase }
end

#camelize_meth ⇒ `Object`

Converts a ‘_’ delimited string to method style camelCase like ‘foo_method’ into ‘fooMethod’. See also: camelize, decamelize



518
519
520

# File 'lib/rbkb/extends.rb', line 518

def camelize_meth
  self.gsub(/_([a-z])/) { $1.upcase }
end

#class_name ⇒ `Object`

convert a string to its idiomatic ruby class name

# File 'lib/rbkb/extends.rb', line 537

def class_name
  r = ""
  up = true
  each_byte do |c|
    if c == 95
      if up
        r << "::"
      else
        up = true
      end
    else
      m = up ? :upcase : :to_s
      r << (c.chr.send(m))
      up = false
    end
  end
  r
end

#const_lookup(ns = Object) ⇒ `Object`

Returns a reference to actual constant for a given name in namespace can be used to lookup classes from enums and such

# File 'lib/rbkb/extends.rb', line 560

def const_lookup(ns=Object)
  if c=ns.constants.select {|n| n == self.class_name } and not c.empty?
    ns.const_get(c.first)
  end
end

#crc32 ⇒ `Object`

returns CRC32 checksum for the string object

# File 'lib/rbkb/extends.rb', line 480

def crc32
  ## pure ruby version. slower, but here for reference (found on some forum)
  #  r = 0xFFFFFFFF
  #  self.each_byte do |b|
  #    r ^= b
  #    8.times do
  #      r = (r>>1) ^ (0xEDB88320 * (r & 1))
  #    end
  #  end
  #  r ^ 0xFFFFFFFF
  ## or... we can just use:
  Zlib.crc32 self
end

#cstring(off = 0) ⇒ `Object`

Returns a single null-terminated ascii string from beginning of self. This will return the entire string if no null is encountered.

Parameters:

off = specify an optional beggining offset



475
476
477

# File 'lib/rbkb/extends.rb', line 475

def cstring(off=0)
  self[ off, self.index("\x00") || self.size ]
end

#d64 ⇒ `Object`

Base64 decode

72	# File 'lib/rbkb/extends.rb', line 72 def d64; self.unpack("m")[0]; end

#dat_to_num(order = :big) ⇒ `Object` Also known as: lazy_to_n, lazy_to_num, dat_to_n

A “generalized” lazy bytestring -> numeric converter.

Parameters:

order => :big or :little endian (default is :big)

Bonus: should work seamlessly with really large strings.

>> ("\xFF"*10).dat_to_num
=> 1208925819614629174706175
>> ("\xFF"*20).dat_to_num
=> 1461501637330902918203684832716283019655932542975

# File 'lib/rbkb/extends.rb', line 167

def dat_to_num(order=:big)
  s=self
  s.reverse! if order == :little
  r = 0
  s.each_byte {|c| r = ((r << 8) | c)}
  r
end

#decamelize ⇒ `Object`

Converts a CamelCase or camelCase string into ‘_’ delimited form like ‘FooBar’ or ‘fooBar’ into ‘foo_bar’.

Note: This method only handles camel humps. Strings with consecutive uppercase chars like ‘FooBAR’ will be converted to ‘foo_bar’

#dehexdump(opt = {}) ⇒ `Object` Also known as: dedump, undump, unhexdump

Converts a hexdump back to binary - takes the same options as hexdump(). Fairly flexible. Should work both with ‘xxd’ and ‘hexdump -C’ style dumps.

# File 'lib/rbkb/extends.rb', line 291

def dehexdump(opt={})
  s=self
  out = opt[:out] || StringIO.new
  len = (opt[:len] and opt[:len] > 0)? opt[:len] : 16

  hcrx = /[A-Fa-f0-9]/
  dumprx = /^(#{hcrx}+):?\s*((?:#{hcrx}{2}\s*){0,#{len}})/
  off = opt[:start_addr] || 0

  i=1
  # iterate each line of hexdump
  s.split(/\r?\n/).each do |hl|
    # match and check offset
    if m = dumprx.match(hl) and $1.hex == off
      i+=1
      # take the data chunk and unhexify it
      raw = $2.unhexify
      off += out.write(raw)
    else
      raise "Hexdump parse error on line #{i} #{s}"
    end
  end

  if out.class == StringIO
    out.string
  end
end

#entropy ⇒ `Object`

calculates entropy in string

TQBF’s description: “I also added a chi-squared test to quickly figure out entropy of a string, in ”bits of randomness per byte“. This is useful, so…”

# File 'lib/rbkb/extends.rb', line 186

def entropy
  e = 0
  0.upto(255) do |i|
    x = count(i.chr)/size.to_f
    if x > 0
      e += - x * x.log2
    end
  end
  e
end

#hex_to_num(order = :big) ⇒ `Object`

Converts a hex value to numeric.

Parameters:

order => :big or :little endian (default is :big)

# File 'lib/rbkb/extends.rb', line 140

def hex_to_num(order=:big)
  s=self
  raise "invalid hex value: '#{s.inspect}'" unless s.ishex?

  r = if order == :little
        s.scan(/.{2}/).reverse.join
      elsif order == :big
        s 
      else
        raise "Invalid byte order #{order.inspect}"
      end.hex
end

#hexdump(opt = {}) ⇒ `Object`

Returns or prints a hexdump in the style of ‘hexdump -C’

:len => optionally specify a length other than 16 for a wider or thinner dump. If length is an odd number, it will be rounded up.

:out => optionally specify an alternate IO object for output. By default, hexdump will output to STDOUT. Pass a StringIO object and it will return it as a string.

Example:

Here’s the default behavior done explicitely:

>> xxd = dat.hexdump(:len => 16, :out => StringIO.new)
=> <a string containing hexdump>

Here’s how to change it to STDERR

>> xxd = dat.hexdump(:len => 16, :out => STDERR)
<prints hexdump on STDERR>
-> nil # return value is nil!

# File 'lib/rbkb/extends.rb', line 255

def hexdump(opt={})
  s=self
  out = opt[:out] || StringIO.new
  len = (opt[:len] and opt[:len] > 0)? opt[:len] + (opt[:len] % 2) : 16

  off = opt[:start_addr] || 0
  offlen = opt[:start_len] || 8

  hlen=len/2

  s.scan(/(?:.|\n){1,#{len}}/) do |m|
    out.write(off.to_s(16).rjust(offlen, "0") + '  ')

    i=0
    m.each_byte do |c|
      out.write c.to_s(16).rjust(2,"0") + " "
      out.write(' ') if (i+=1) == hlen
    end

    out.write("   " * (len-i) ) # pad
    out.write(" ") if i < hlen

    out.write(" |" + m.tr("\0-\37\177-\377", '.') + "|\n")
    off += m.length
  end

  out.write(off.to_s(16).rjust(offlen,'0') + "\n")

  if out.class == StringIO
    out.string
  end
end

#hexify(opts = {}) ⇒ `Object`

Convert a string to ASCII hex string. Supports a few options for format:

:delim - delimter between each hex byte
:prefix - prefix before each hex byte
:suffix - suffix after each hex byte

# File 'lib/rbkb/extends.rb', line 99

def hexify(opts={})
  s=self
  delim = opts[:delim]
  pre = (opts[:prefix] || "")
  suf = (opts[:suffix] || "")

  if (rx=opts[:rx]) and not rx.kind_of? Regexp
    raise "rx must be a regular expression for a character class"
  end

  hx=Rbkb::HEXCHARS

  out=Array.new

  s.each_byte do |c| 
    hc = if (rx and not rx.match c.chr)
           c.chr 
         else
           pre + (hx[(c >> 4)] + hx[(c & 0xf )]) + suf
         end
    out << (hc)
  end
  out.join(delim)
end

#ishex? ⇒ `Boolean`

shortcut for hex sanity with regex

Returns:

(Boolean)

37	# File 'lib/rbkb/extends.rb', line 37 def ishex? ; (self =~ /^[a-f0-9]+$/i)? true : false ; end

#lalign(a, p = ' ') ⇒ `Object`

left-align to ‘a’ alignment padded with ‘p’

# File 'lib/rbkb/extends.rb', line 84

def lalign(a, p=' ')
  s=self
  p ||= ' '
  l = s.length
  pad = l.pad(a)
  s.ljust(pad+l, p)
end

#pipe_magick(arg = "") ⇒ `Object`

This attempts to identify a blob of data using ‘file(1)’ via popen3 (using popen3 because IO.popen blows) Tried doing this with a fmagic ruby extention to libmagic, but it was a whole lot slower.

# File 'lib/rbkb/extends.rb', line 498

def pipe_magick(arg="")
  ret=""
  Open3.popen3("file #{arg} -") do |w,r,e|
    w.write self; w.close
    ret = r.read ; r.close
    ret.sub!(/^\/dev\/stdin: /, "")
  end
  ret
end

#ralign(a, p = ' ') ⇒ `Object`

right-align to ‘a’ alignment padded with ‘p’

# File 'lib/rbkb/extends.rb', line 75

def ralign(a, p=' ')
  s=self
  p ||= ' '
  l = s.length
  pad = l.pad(a)
  s.rjust(pad+l, p)
end

#randomize ⇒ `Object`

String randomizer

227	# File 'lib/rbkb/extends.rb', line 227 def randomize ; self.split('').randomize.to_s ; end

#randomize! ⇒ `Object`

In-place string randomizer

230	# File 'lib/rbkb/extends.rb', line 230 def randomize! ; self.replace(randomize) end

#rotate_bytes(k = 0) ⇒ `Object`

Byte rotation as found in lame ciphers. This was cribbed from Timur Duehr with only a minor change.

# File 'lib/rbkb/extends.rb', line 216

def rotate_bytes(k=0)
  r = self.dup
  i=0
  self.each_byte do |b| 
    r[i] = ((b + k) % 384).chr
    i+=1
  end
  return r
end

#starts_with?(dat) ⇒ `Boolean`

Does string “start with” dat? No clue whether/when this is faster than a regex, but it is easier to type.

Returns:

(Boolean)



464
465
466

# File 'lib/rbkb/extends.rb', line 464

def starts_with?(dat)
  self[0,dat.size] == dat
end

#strings(opts = {}) ⇒ `Object`

A ‘strings’ method a-la unix strings utility. Finds printable strings in a binary blob. Supports ASCII and little endian unicode (though only for ASCII printable character.)

Parameters and options:

* Use the :minimum parameter to specify minimum number of characters
  to match. (default = 6)

* Use the :encoding parameter as one of :ascii, :unicode, or :both
  (default = :ascii)

* The 'strings' method uses Regexp under the hood. Therefore
  you can pass a character class for "valid characters" with :valid
  (default = /[\r\n [:print:]]/)

* Supports an optional block, which will be passed |offset, type, string|
  for each match.
  The block's boolean return value also determines whether the match 
  passes or fails (true or false/nil) and gets returned by the function.

Return Value:

Returns an array consisting of matches with the following elements:

 [[start_offset, end_offset, string_type, string], ...]

* string_type will be one of :ascii or :unicode
* end_offset will include the terminating null character
* end_offset will include all null bytes in unicode strings (including
* both terminating nulls)

 If strings are null terminated, the trailing null *IS* included
 in the end_offset. Unicode matches will also include null bytes.

Todos?

- better unicode support (i.e. not using half-assed unicode)
- support other encodings such as all those the binutils strings does?

# File 'lib/rbkb/extends.rb', line 407

def strings(opts={})
  opts[:encoding] ||= :both
  prx = (opts[:valid] || /[\r\n [:print:]]/)
  min = (opts[:minimum] || 6)
  align = opts[:align]

  raise "Minimum must be numeric and > 0" unless min.kind_of? Numeric and min > 0

  arx = /(#{prx}{#{min}}?#{prx}*\x00?)/
  urx = /((?:#{prx}\x00){#{min}}(?:#{prx}\x00)*(?:\x00\x00)?)/

  rx = case (opts[:encoding] || :both).to_sym
       when :ascii   
         arx
       when :unicode 
         urx
       when :both    
         Regexp.union( arx, urx )
       else 
         raise "Encoding must be :unicode, :ascii, or :both"
       end

  off=0
  ret = []

  while mtch = rx.match(self[off..-1])
    # calculate relative offsets
    rel_off = mtch.offset(0)
    startoff = off + rel_off[0]
    endoff   = off + rel_off[1]
    off += rel_off[1]

    if align and (pad=startoff.pad(align)) != 0
      off = startoff + pad
      next
    end

    stype = if mtch[1]
              :ascii
            elsif mtch[2]
              :unicode
            end


    mret = [startoff, endoff, stype, mtch[0] ]

    # yield to a block for additional criteria
    next if block_given? and not yield( *mret )

    ret << mret
  end

  return ret
end

#to_stringio ⇒ `Object`

Return a self encapsulated in a StringIO object. This is handy.



567
568
569

# File 'lib/rbkb/extends.rb', line 567

def to_stringio
  StringIO.new(self)
end

#unhexify(d = /\s*/) ⇒ `Object`

Convert ASCII hex string to raw.

Parameters:

d = optional 'delimiter' between hex bytes (zero+ spaces by default)



130
131
132

# File 'lib/rbkb/extends.rb', line 130

def unhexify(d=/\s*/)
  self.strip.gsub(/([A-Fa-f0-9]{1,2})#{d}?/) { $1.hex.chr }
end

#urldec(opts = {}) ⇒ `Object`

Undo percent-hexified url encoding data

# File 'lib/rbkb/extends.rb', line 55

def urldec(opts={})
  s=self
  s.gsub!('+', ' ') unless opts[:noplus]
  s.gsub(/%([A-Fa-f0-9]{2})/) {$1.hex.chr}
end

#urlenc(opts = {}) ⇒ `Object`

Encode into percent-hexify url encoding format

# File 'lib/rbkb/extends.rb', line 40

def urlenc(opts={})
  s=self
  plus = opts[:plus]
  unless (opts[:rx] ||= /[^A-Za-z0-9_\.~-]/).kind_of? Regexp
    raise "rx must be a regular expression for a character class"
  end
  hx = Rbkb::HEXCHARS

  s.gsub(opts[:rx]) do |c| 
    c=c[0]
    (plus and c==32)? '+' : "%" + (hx[(c >> 4)] + hx[(c & 0xf )])
  end
end

#xor(k) ⇒ `Object`

xor against a key. key will be repeated or truncated to self.size.

# File 'lib/rbkb/extends.rb', line 198

def xor(k)
  s=self
  out=StringIO.new ; i=0;
  s.each_byte do |x| 
    out.write((x ^ (k[i] || k[i=0]) ).chr)
    i+=1
  end
  out.string
end

Class: String

Overview

Direct Known Subclasses

Instance Method Summary collapse

Instance Method Details

#^(x) ⇒ Object

#b64(len = nil) ⇒ Object

#bgrep(find, align = nil) ⇒ Object

#blit(idx = 0) ⇒ Object

#camelize ⇒ Object

#camelize_meth ⇒ Object

#class_name ⇒ Object

#const_lookup(ns = Object) ⇒ Object

#crc32 ⇒ Object

#cstring(off = 0) ⇒ Object

#d64 ⇒ Object

#dat_to_num(order = :big) ⇒ Object Also known as: lazy_to_n, lazy_to_num, dat_to_n

#decamelize ⇒ Object

#dehexdump(opt = {}) ⇒ Object Also known as: dedump, undump, unhexdump

#entropy ⇒ Object

#hex_to_num(order = :big) ⇒ Object

#hexdump(opt = {}) ⇒ Object

#hexify(opts = {}) ⇒ Object

#ishex? ⇒ Boolean

#lalign(a, p = ' ') ⇒ Object

#pipe_magick(arg = "") ⇒ Object

#ralign(a, p = ' ') ⇒ Object

#randomize ⇒ Object

#randomize! ⇒ Object

#rotate_bytes(k = 0) ⇒ Object

#starts_with?(dat) ⇒ Boolean

#strings(opts = {}) ⇒ Object

Parameters and options:

Return Value:

#to_stringio ⇒ Object

#unhexify(d = /\s*/) ⇒ Object

#urldec(opts = {}) ⇒ Object

#urlenc(opts = {}) ⇒ Object

#xor(k) ⇒ Object

#^(x) ⇒ `Object`

#b64(len = nil) ⇒ `Object`

#bgrep(find, align = nil) ⇒ `Object`

#blit(idx = 0) ⇒ `Object`

#camelize ⇒ `Object`

#camelize_meth ⇒ `Object`

#class_name ⇒ `Object`

#const_lookup(ns = Object) ⇒ `Object`

#crc32 ⇒ `Object`

#cstring(off = 0) ⇒ `Object`

#d64 ⇒ `Object`

#dat_to_num(order = :big) ⇒ `Object` Also known as: lazy_to_n, lazy_to_num, dat_to_n

#decamelize ⇒ `Object`

#dehexdump(opt = {}) ⇒ `Object` Also known as: dedump, undump, unhexdump

#entropy ⇒ `Object`

#hex_to_num(order = :big) ⇒ `Object`

#hexdump(opt = {}) ⇒ `Object`

#hexify(opts = {}) ⇒ `Object`

#ishex? ⇒ `Boolean`

#lalign(a, p = ' ') ⇒ `Object`

#pipe_magick(arg = "") ⇒ `Object`

#ralign(a, p = ' ') ⇒ `Object`

#randomize ⇒ `Object`

#randomize! ⇒ `Object`

#rotate_bytes(k = 0) ⇒ `Object`

#starts_with?(dat) ⇒ `Boolean`

#strings(opts = {}) ⇒ `Object`

#to_stringio ⇒ `Object`

#unhexify(d = /\s*/) ⇒ `Object`

#urldec(opts = {}) ⇒ `Object`

#urlenc(opts = {}) ⇒ `Object`

#xor(k) ⇒ `Object`