Module: Rex::Text

Defined in:
lib/rex/text.rb

Overview

This class formats text in various fashions and also provides a mechanism for wrapping text at a given column.

Defined Under Namespace

Classes: IllegalSequence

Constant Summary collapse

TLDs =

Constants

['com', 'net', 'org', 'gov', 'biz', 'edu']
States =
["AK", "AL", "AR", "AZ", "CA", "CO", "CT", "DE", "FL", "GA", "HI",
"IA", "ID", "IL", "IN", "KS", "KY", "LA", "MA", "MD", "ME", "MI", "MN",
"MO", "MS", "MT", "NC", "ND", "NE", "NH", "NJ", "NM", "NV", "NY", "OH",
"OK", "OR", "PA", "RI", "SC", "SD", "TN", "TX", "UT", "VA", "VT", "WA",
"WI", "WV", "WY"]
UpperAlpha =
"ABCDEFGHIJKLMNOPQRSTUVWXYZ"
LowerAlpha =
"abcdefghijklmnopqrstuvwxyz"
Numerals =
"0123456789"
Base32 =
"ABCDEFGHIJKLMNOPQRSTUVWXYZ234567"
Alpha =
UpperAlpha + LowerAlpha
AlphaNumeric =
Alpha + Numerals
HighAscii =
[*(0x80 .. 0xff)].pack("C*")
LowAscii =
[*(0x00 .. 0x1f)].pack("C*")
DefaultWrap =
60
AllChars =
[*(0x00 .. 0xff)].pack("C*")
Punctuation =
( [*(0x21 .. 0x2f)] + [*(0x3a .. 0x3F)] + [*(0x5b .. 0x60)] + [*(0x7b .. 0x7e)] ).flatten.pack("C*")
DefaultPatternSets =
[ Rex::Text::UpperAlpha, Rex::Text::LowerAlpha, Rex::Text::Numerals ]
Iconv_EBCDIC =

In case Iconv isn't loaded

[
  "\x00", "\x01", "\x02", "\x03", "7", "-", ".", "/", "\x16", "\x05",
  "%", "\v", "\f", "\r", "\x0E", "\x0F", "\x10", "\x11", "\x12", "\x13",
  "<", "=", "2", "&", "\x18", "\x19", "?", "'", "\x1C", "\x1D", "\x1E",
  "\x1F", "@", "Z", "\x7F", "{", "[", "l", "P", "}", "M", "]", "\\",
  "N", "k", "`", "K", "a", "\xF0", "\xF1", "\xF2", "\xF3", "\xF4",
  "\xF5", "\xF6", "\xF7", "\xF8", "\xF9", "z", "^", "L", "~", "n", "o",
  "|", "\xC1", "\xC2", "\xC3", "\xC4", "\xC5", "\xC6", "\xC7", "\xC8",
  "\xC9", "\xD1", "\xD2", "\xD3", "\xD4", "\xD5", "\xD6", "\xD7",
  "\xD8", "\xD9", "\xE2", "\xE3", "\xE4", "\xE5", "\xE6", "\xE7",
  "\xE8", "\xE9", nil, "\xE0", nil, nil, "m", "y", "\x81", "\x82",
  "\x83", "\x84", "\x85", "\x86", "\x87", "\x88", "\x89", "\x91",
  "\x92", "\x93", "\x94", "\x95", "\x96", "\x97", "\x98", "\x99",
  "\xA2", "\xA3", "\xA4", "\xA5", "\xA6", "\xA7", "\xA8", "\xA9",
  "\xC0", "O", "\xD0", "\xA1", "\a", nil, nil, nil, nil, nil, nil,
  nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil,
  nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil,
  nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil,
  nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil,
  nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil,
  nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil,
  nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil,
  nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil,
  nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil,
  nil, nil, nil, nil, nil
]
Iconv_ASCII =
[
  "\x00", "\x01", "\x02", "\x03", "\x04", "\x05", "\x06", "\a", "\b",
  "\t", "\n", "\v", "\f", "\r", "\x0E", "\x0F", "\x10", "\x11", "\x12",
  "\x13", "\x14", "\x15", "\x16", "\x17", "\x18", "\x19", "\x1A", "\e",
  "\x1C", "\x1D", "\x1E", "\x1F", " ", "!", "\"", "#", "$", "%", "&",
  "'", "(", ")", "*", "+", ",", "-", ".", "/", "0", "1", "2", "3", "4",
  "5", "6", "7", "8", "9", ":", ";", "<", "=", ">", "?", "@", "A", "B",
  "C", "D", "E", "F", "G", "H", "I", "J", "K", "L", "M", "N", "O", "P",
  "Q", "R", "S", "T", "U", "V", "W", "X", "Y", "Z", nil, "\\", nil,
  nil, "_", "`", "a", "b", "c", "d", "e", "f", "g", "h", "i", "j", "k",
  "l", "m", "n", "o", "p", "q", "r", "s", "t", "u", "v", "w", "x", "y",
  "z", "{", "|", "}", "~", "\x7F", nil, nil, nil, nil, nil, nil, nil,
  nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil,
  nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil,
  nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil,
  nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil,
  nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil,
  nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil,
  nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil,
  nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil,
  nil, nil, nil, nil, nil, nil, nil, nil, nil
]
Surnames =

Most 100 common surnames, male/female names in the U.S. (names.mongabay.com/)

[
  "adams", "alexander", "allen", "anderson", "bailey", "baker", "barnes",
  "bell", "bennett", "brooks", "brown", "bryant", "butler", "campbell",
  "carter", "clark", "coleman", "collins", "cook", "cooper", "cox",
  "davis", "diaz", "edwards", "evans", "flores", "foster", "garcia",
  "gonzales", "gonzalez", "gray", "green", "griffin", "hall", "harris",
  "hayes", "henderson", "hernandez", "hill", "howard", "hughes", "jackson",
  "james", "jenkins", "johnson", "jones", "kelly", "king", "lee", "lewis",
  "long", "lopez", "martin", "martinez", "miller", "mitchell", "moore",
  "morgan", "morris", "murphy", "nelson", "parker", "patterson", "perez",
  "perry", "peterson", "phillips", "powell", "price", "ramirez", "reed",
  "richardson", "rivera", "roberts", "robinson", "rodriguez", "rogers",
  "ross", "russell", "sanchez", "sanders", "scott", "simmons", "smith",
  "stewart", "taylor", "thomas", "thompson", "torres", "turner", "walker",
  "ward", "washington", "watson", "white", "williams", "wilson", "wood",
  "wright", "young"
]
Names_Male =
[
  "aaron", "adam", "alan", "albert", "andrew", "anthony", "antonio",
  "arthur", "benjamin", "billy", "bobby", "brandon", "brian", "bruce",
  "carl", "carlos", "charles", "chris", "christopher", "clarence", "craig",
  "daniel", "david", "dennis", "donald", "douglas", "earl", "edward",
  "eric", "ernest", "eugene", "frank", "fred", "gary", "george", "gerald",
  "gregory", "harold", "harry", "henry", "howard", "jack", "james", "jason",
  "jeffrey", "jeremy", "jerry", "jesse", "jimmy", "joe", "john", "johnny",
  "jonathan", "jose", "joseph", "joshua", "juan", "justin", "keith",
  "kenneth", "kevin", "larry", "lawrence", "louis", "mark", "martin",
  "matthew", "michael", "nicholas", "patrick", "paul", "peter", "philip",
  "phillip", "ralph", "randy", "raymond", "richard", "robert", "roger",
  "ronald", "roy", "russell", "ryan", "samuel", "scott", "sean", "shawn",
  "stephen", "steve", "steven", "terry", "thomas", "timothy", "todd",
  "victor", "walter", "wayne", "william", "willie"
]
Names_Female =
[
  "alice", "amanda", "amy", "andrea", "angela", "ann", "anna", "anne",
  "annie", "ashley", "barbara", "betty", "beverly", "bonnie", "brenda",
  "carol", "carolyn", "catherine", "cheryl", "christina", "christine",
  "cynthia", "deborah", "debra", "denise", "diana", "diane", "donna",
  "doris", "dorothy", "elizabeth", "emily", "evelyn", "frances", "gloria",
  "heather", "helen", "irene", "jacqueline", "jane", "janet", "janice",
  "jean", "jennifer", "jessica", "joan", "joyce", "judith", "judy", "julia",
  "julie", "karen", "katherine", "kathleen", "kathryn", "kathy", "kelly",
  "kimberly", "laura", "lillian", "linda", "lisa", "lois", "lori", "louise",
  "margaret", "maria", "marie", "marilyn", "martha", "mary", "melissa",
  "michelle", "mildred", "nancy", "nicole", "norma", "pamela", "patricia",
  "paula", "phyllis", "rachel", "rebecca", "robin", "rose", "ruby", "ruth",
  "sandra", "sara", "sarah", "sharon", "shirley", "stephanie", "susan",
  "tammy", "teresa", "theresa", "tina", "virginia", "wanda"
]
@@codepage_map_cache =
nil

Class Method Summary collapse

Class Method Details

.ascii_safe_hex(str, whitespace = false) ⇒ Object

Turn non-printable chars into hex representations, leaving others alone

If whitespace is true, converts whitespace (0x20, 0x09, etc) to hex as well.


979
980
981
982
983
984
985
# File 'lib/rex/text.rb', line 979

def self.ascii_safe_hex(str, whitespace=false)
  if whitespace
    str.gsub(/([\x00-\x20\x80-\xFF])/n){ |x| "\\x%.2x" % x.unpack("C*")[0] }
  else
    str.gsub(/([\x00-\x08\x0b\x0c\x0e-\x1f\x80-\xFF])/n){ |x| "\\x%.2x" % x.unpack("C*")[0]}
  end
end

.b32decode(bytes_in) ⇒ Object

Base32 decoder


1107
1108
1109
1110
1111
1112
1113
# File 'lib/rex/text.rb', line 1107

def self.b32decode(bytes_in)
  bytes = bytes_in.take_while {|c| c != 61} # strip padding
  n = (bytes.length * 5.0 / 8.0).floor
  p = bytes.length < 8 ? 5 - (n * 8) % 5 : 0
  c = bytes.inject(0) {|m,o| (m << 5) + Base32.index(o.chr)} >> p
  (0..n-1).to_a.reverse.collect {|i| ((c >> i * 8) & 0xff).chr}
end

.b32encode(bytes_in) ⇒ Object

Base32 encoder


1082
1083
1084
1085
1086
1087
1088
# File 'lib/rex/text.rb', line 1082

def self.b32encode(bytes_in)
  n = (bytes_in.length * 8.0 / 5.0).ceil
  p = n < 8 ? 5 - (bytes_in.length * 8) % 5 : 0
  c = bytes_in.inject(0) {|m,o| (m << 8) + o} << p
  [(0..n-1).to_a.reverse.collect {|i| Base32[(c >> i * 5) & 0x1f].chr},
  ("=" * (8-n))]
end

.badchar_index(data, badchars = '') ⇒ Fixnum?

Return the index of the first badchar in data, otherwise return nil if there wasn't any badchar occurences.


1492
1493
1494
1495
1496
1497
1498
# File 'lib/rex/text.rb', line 1492

def self.badchar_index(data, badchars = '')
  badchars.unpack("C*").each { |badchar|
    pos = data.index(badchar.chr)
    return pos if pos
  }
  return nil
end

.charset_exclude(keepers) ⇒ String

Returns all chars that are not in the supplied set


1516
1517
1518
# File 'lib/rex/text.rb', line 1516

def self.charset_exclude(keepers)
  [*(0..255)].pack('C*').delete(keepers)
end

.compress(str) ⇒ String

Compresses a string, eliminating all superfluous whitespace before and after lines and eliminating all lines.


1381
1382
1383
# File 'lib/rex/text.rb', line 1381

def self.compress(str)
  str.gsub(/\n/m, ' ').gsub(/\s+/, ' ').gsub(/^\s+/, '').gsub(/\s+$/, '')
end

.decode_base32(str) ⇒ Object


1115
1116
1117
1118
1119
1120
1121
1122
1123
1124
1125
1126
1127
# File 'lib/rex/text.rb', line 1115

def self.decode_base32(str)
  bytes = str.bytes
  result = ''
  size= 8
  while bytes.any? do
    bytes.each_slice(size) do |a|
    bytes_out = b32decode(a).flatten.join
    result << bytes_out
    bytes = bytes.drop(size)
    end
  end
  return result
end

.decode_base64(str) ⇒ Object

Base64 decoder


1139
1140
1141
# File 'lib/rex/text.rb', line 1139

def self.decode_base64(str)
  str.to_s.unpack("m")[0]
end

.dehex(str) ⇒ Object

Convert hex-encoded characters to literals.

Examples:

Rex::Text.dehex("AA\\x42CC") # => "AABCC"

See Also:


1179
1180
1181
1182
1183
1184
1185
1186
1187
1188
# File 'lib/rex/text.rb', line 1179

def self.dehex(str)
  return str unless str.respond_to? :match
  return str unless str.respond_to? :gsub
  regex = /\x5cx[0-9a-f]{2}/nmi
  if str.match(regex)
    str.gsub(regex) { |x| x[2,2].to_i(16).chr }
  else
    str
  end
end

.dehex!(str) ⇒ Object

Convert and replace hex-encoded characters to literals.


1194
1195
1196
1197
1198
1199
# File 'lib/rex/text.rb', line 1194

def self.dehex!(str)
  return str unless str.respond_to? :match
  return str unless str.respond_to? :gsub
  regex = /\x5cx[0-9a-f]{2}/nmi
  str.gsub!(regex) { |x| x[2,2].to_i(16).chr }
end

.encode_base32(str) ⇒ Object


1090
1091
1092
1093
1094
1095
1096
1097
1098
1099
1100
1101
1102
# File 'lib/rex/text.rb', line 1090

def self.encode_base32(str)
  bytes = str.bytes
  result = ''
  size= 5
  while bytes.any? do
    bytes.each_slice(size) do |a|
    bytes_out = b32encode(a).flatten.join
    result << bytes_out
    bytes = bytes.drop(size)
    end
  end
  return result
end

.encode_base64(str, delim = '') ⇒ Object

Base64 encoder


1132
1133
1134
# File 'lib/rex/text.rb', line 1132

def self.encode_base64(str, delim='')
  [str.to_s].pack("m").gsub(/\s+/, delim)
end

.from_ebcdic(str) ⇒ Object

Converts EBCIDC to ASCII


449
450
451
452
453
454
455
456
457
# File 'lib/rex/text.rb', line 449

def self.from_ebcdic(str)
  begin
    Iconv.iconv("ASCII", "EBCDIC-US", str).first
  rescue ::Iconv::IllegalSequence => e
    raise e
  rescue
    self.from_ebcdic_rex(str)
  end
end

.from_ebcdic_rex(str) ⇒ Object

A native implementation of the EBCDIC->ASCII table, used to fall back from using Iconv


424
425
426
427
428
429
430
431
432
433
434
# File 'lib/rex/text.rb', line 424

def self.from_ebcdic_rex(str)
  new_str = []
  str.each_byte do |x|
    if Iconv_EBCDIC.index(x.chr)
      new_str << Iconv_ASCII[Iconv_EBCDIC.index(x.chr)]
    else
      raise Rex::Text::IllegalSequence, ("\\x%x" % x)
    end
  end
  new_str.join
end

.gzip(str, level = 9) ⇒ String

Compresses a string using gzip

Raises:


1456
1457
1458
1459
1460
1461
1462
1463
1464
1465
1466
# File 'lib/rex/text.rb', line 1456

def self.gzip(str, level = 9)
  raise RuntimeError, "Gzip support is not present." if (!zlib_present?)
  raise RuntimeError, "Invalid gzip compression level" if (level < 1 or level > 9)

  s = ""
  s.force_encoding('ASCII-8BIT') if s.respond_to?(:encoding)
  gz = Zlib::GzipWriter.new(StringIO.new(s, 'wb'), level)
  gz << str
  gz.close
  return s
end

.gzip_present?Boolean

backwards compat for just a bit…


1412
1413
1414
# File 'lib/rex/text.rb', line 1412

def self.gzip_present?
  self.zlib_present?
end

.hex_to_raw(str) ⇒ Object

Converts a hex string to a raw string

Examples:

Rex::Text.hex_to_raw("\\x41\\x7f\\x42") # => "A\x7fB"

966
967
968
# File 'lib/rex/text.rb', line 966

def self.hex_to_raw(str)
  [ str.downcase.gsub(/'/,'').gsub(/\\?x([a-f0-9][a-f0-9])/, '\1') ].pack("H*")
end

.hexify(str, col = DefaultWrap, line_start = '', line_end = '', buf_start = '', buf_end = '') ⇒ Object

Converts a string to a hex version with wrapping support


998
999
1000
1001
1002
1003
1004
1005
1006
1007
1008
1009
1010
1011
1012
1013
1014
1015
1016
1017
1018
1019
1020
1021
1022
1023
1024
1025
1026
1027
1028
1029
1030
1031
1032
1033
1034
1035
1036
1037
1038
1039
1040
1041
1042
1043
1044
# File 'lib/rex/text.rb', line 998

def self.hexify(str, col = DefaultWrap, line_start = '', line_end = '', buf_start = '', buf_end = '')
  output	 = buf_start
  cur	 = 0
  count	 = 0
  new_line = true

  # Go through each byte in the string
  str.each_byte { |byte|
    count  += 1
    append	= ''

    # If this is a new line, prepend with the
    # line start text
    if (new_line == true)
      append	 << line_start
      new_line  = false
    end

    # Append the hexified version of the byte
    append << sprintf("\\x%.2x", byte)
    cur    += append.length

    # If we're about to hit the column or have gone past it,
    # time to finish up this line
    if ((cur + line_end.length >= col) or (cur + buf_end.length  >= col))
      new_line  = true
      cur	  = 0

      # If this is the last byte, use the buf_end instead of
      # line_end
      if (count == str.length)
        append << buf_end + "\n"
      else
        append << line_end + "\n"
      end
    end

    output << append
  }

  # If we were in the middle of a line, finish the buffer at this point
  if (new_line == false)
    output << buf_end + "\n"
  end

  return output
end

.html_decode(str) ⇒ Object

Decode a string that's html encoded


852
853
854
855
# File 'lib/rex/text.rb', line 852

def self.html_decode(str)
  decoded_str = CGI.unescapeHTML(str)
  return decoded_str
end

.html_encode(str, mode = 'hex') ⇒ String

Encode a string in a manner useful for HTTP URIs and URI Parameters.

Raises:

  • (TypeError)

    if mode is not one of the three available modes


836
837
838
839
840
841
842
843
844
845
846
847
# File 'lib/rex/text.rb', line 836

def self.html_encode(str, mode = 'hex')
  case mode
  when 'hex'
    return str.unpack('C*').collect{ |i| "&#x" + ("%.2x" % i) + ";"}.join
  when 'int'
    return str.unpack('C*').collect{ |i| "&#" + i.to_s + ";"}.join
  when 'int-wide'
    return str.unpack('C*').collect{ |i| "&#" + ("0" * (7 - i.to_s.length)) + i.to_s + ";" }.join
  else
    raise TypeError, 'invalid mode'
  end
end

.md5(str) ⇒ Object

Hexidecimal MD5 digest of the supplied string


1153
1154
1155
# File 'lib/rex/text.rb', line 1153

def self.md5(str)
  Digest::MD5.hexdigest(str)
end

.md5_raw(str) ⇒ Object

Raw MD5 digest of the supplied string


1146
1147
1148
# File 'lib/rex/text.rb', line 1146

def self.md5_raw(str)
  Digest::MD5.digest(str)
end

.pack_int64le(val) ⇒ Object

Pack a value as 64 bit litle endian; does not exist for Array.pack


1684
1685
1686
# File 'lib/rex/text.rb', line 1684

def self.pack_int64le(val)
  [val & 0x00000000ffffffff, val >> 32].pack("V2")
end

.patt2(len, sets = nil) ⇒ Object

Step through an arbitrary number of sets of bytes to build up a findable pattern. This is mostly useful for experimentially determining offset lengths into memory structures. Note that the supplied sets should never contain duplicate bytes, or else it can become impossible to measure the offset accurately.


1337
1338
1339
1340
1341
1342
1343
1344
1345
1346
1347
1348
1349
1350
1351
1352
1353
# File 'lib/rex/text.rb', line 1337

def self.patt2(len, sets = nil)
  buf = ""
  counter = []
  sets ||= [ UpperAlpha, LowerAlpha, Numerals ]
  len ||= len.to_i
  return "" if len.zero?

  sets = sets.map {|a| a.split(//)}
  sets.size.times { counter << 0}
  0.upto(len-1) do |i|
    setnum = i % sets.size

    #puts counter.inspect
  end

  return buf
end

.pattern_create(length, sets = nil) ⇒ String

Creates a pattern that can be used for offset calculation purposes. This routine is capable of generating patterns using a supplied set and a supplied number of identifiable characters (slots). The supplied sets should not contain any duplicate characters or the logic will fail.

See Also:


1306
1307
1308
1309
1310
1311
1312
1313
1314
1315
1316
1317
1318
1319
1320
1321
1322
1323
1324
1325
1326
1327
1328
1329
1330
1331
# File 'lib/rex/text.rb', line 1306

def self.pattern_create(length, sets = nil)
  buf = ''
  offsets = []

  # Make sure there's something in sets even if we were given an explicit nil
  sets ||= [ UpperAlpha, LowerAlpha, Numerals ]

  # Return stupid uses
  return "" if length.to_i < 1
  return sets[0][0].chr * length if sets.size == 1 and sets[0].size == 1

  sets.length.times { offsets << 0 }

  until buf.length >= length
    begin
      buf << converge_sets(sets, 0, offsets, length)
    end
  end

  # Maximum permutations reached, but we need more data
  if (buf.length < length)
    buf = buf * (length / buf.length.to_f).ceil
  end

  buf[0,length]
end

.pattern_offset(pattern, value, start = 0) ⇒ Fixnum?

Calculate the offset to a pattern

See Also:


1364
1365
1366
1367
1368
1369
1370
1371
1372
# File 'lib/rex/text.rb', line 1364

def self.pattern_offset(pattern, value, start=0)
  if (value.kind_of?(String))
    pattern.index(value, start)
  elsif (value.kind_of?(Fixnum) or value.kind_of?(Bignum))
    pattern.index([ value ].pack('V'), start)
  else
    raise ::ArgumentError, "Invalid class for value: #{value.class}"
  end
end

.permute_case(word, idx = 0) ⇒ Object

Permute the case of a word


1553
1554
1555
1556
1557
1558
1559
1560
1561
1562
1563
1564
1565
1566
1567
1568
1569
1570
1571
1572
1573
1574
1575
# File 'lib/rex/text.rb', line 1553

def self.permute_case(word, idx=0)
  res = []

  if( (UpperAlpha+LowerAlpha).index(word[idx,1]))

    word_ucase = word.dup
    word_ucase[idx, 1] = word[idx, 1].upcase

    word_lcase = word.dup
    word_lcase[idx, 1] = word[idx, 1].downcase

    if (idx == word.length)
      return [word]
    else
      res << permute_case(word_ucase, idx+1)
      res << permute_case(word_lcase, idx+1)
    end
  else
    res << permute_case(word, idx+1)
  end

  res.flatten
end

.rand_base(len, bad, *foo) ⇒ Object

Base text generator method


1214
1215
1216
1217
1218
1219
1220
# File 'lib/rex/text.rb', line 1214

def self.rand_base(len, bad, *foo)
  cset = (foo.join.unpack("C*") - bad.to_s.unpack("C*")).uniq
  return "" if cset.length == 0
  outp = []
  len.times { outp << cset[rand(cset.length)] }
  outp.pack("C*")
end

.rand_char(bad, chars = AllChars) ⇒ Object

Generates a random character.


1209
1210
1211
# File 'lib/rex/text.rb', line 1209

def self.rand_char(bad, chars = AllChars)
  rand_text(1, bad, chars)
end

.rand_guidString

Generate a random GUID

Examples:

Rex::Text.rand_guid # => "{ca776ced-4ab8-2ed6-6510-aa71e5e2508e}"

1289
1290
1291
# File 'lib/rex/text.rb', line 1289

def self.rand_guid
  "{#{[8,4,4,4,12].map {|a| rand_text_hex(a) }.join("-")}}"
end

.rand_hostnameString

Generate a random hostname


1580
1581
1582
1583
1584
1585
1586
1587
# File 'lib/rex/text.rb', line 1580

def self.rand_hostname
  host = []
  (rand(5) + 1).times {
    host.push(Rex::Text.rand_text_alphanumeric(rand(10) + 1))
  }
  host.push(TLDs.sample)
  host.join('.').downcase
end

.rand_mail_addressObject

Generate a random mail address


1619
1620
1621
1622
1623
1624
1625
1626
# File 'lib/rex/text.rb', line 1619

def self.rand_mail_address
  mail_address = ''
  mail_address << Rex::Text.rand_name
  mail_address << '.'
  mail_address << Rex::Text.rand_surname
  mail_address << '@'
  mail_address << Rex::Text.rand_hostname
end

.rand_nameObject

Generate a name


1600
1601
1602
1603
1604
1605
1606
# File 'lib/rex/text.rb', line 1600

def self.rand_name
  if rand(10) % 2 == 0
    Names_Male.sample
  else
    Names_Female.sample
  end
end

.rand_name_femaleObject

Generate a female name


1614
1615
1616
# File 'lib/rex/text.rb', line 1614

def self.rand_name_female
  Names_Female.sample
end

.rand_name_maleObject

Generate a male name


1609
1610
1611
# File 'lib/rex/text.rb', line 1609

def self.rand_name_male
  Names_Male.sample
end

.rand_stateObject

Generate a state


1590
1591
1592
# File 'lib/rex/text.rb', line 1590

def self.rand_state()
  States.sample
end

.rand_surnameObject

Generate a surname


1595
1596
1597
# File 'lib/rex/text.rb', line 1595

def self.rand_surname
  Surnames.sample
end

.rand_text(len, bad = '', chars = AllChars) ⇒ Object

Generate random bytes of data


1223
1224
1225
1226
# File 'lib/rex/text.rb', line 1223

def self.rand_text(len, bad='', chars = AllChars)
  foo = chars.split('')
  rand_base(len, bad, *foo)
end

.rand_text_alpha(len, bad = '') ⇒ Object

Generate random bytes of alpha data


1229
1230
1231
1232
1233
1234
# File 'lib/rex/text.rb', line 1229

def self.rand_text_alpha(len, bad='')
  foo = []
  foo += ('A' .. 'Z').to_a
  foo += ('a' .. 'z').to_a
  rand_base(len, bad, *foo )
end

.rand_text_alpha_lower(len, bad = '') ⇒ Object

Generate random bytes of lowercase alpha data


1237
1238
1239
# File 'lib/rex/text.rb', line 1237

def self.rand_text_alpha_lower(len, bad='')
  rand_base(len, bad, *('a' .. 'z').to_a)
end

.rand_text_alpha_upper(len, bad = '') ⇒ Object

Generate random bytes of uppercase alpha data


1242
1243
1244
# File 'lib/rex/text.rb', line 1242

def self.rand_text_alpha_upper(len, bad='')
  rand_base(len, bad, *('A' .. 'Z').to_a)
end

.rand_text_alphanumeric(len, bad = '') ⇒ Object

Generate random bytes of alphanumeric data


1247
1248
1249
1250
1251
1252
1253
# File 'lib/rex/text.rb', line 1247

def self.rand_text_alphanumeric(len, bad='')
  foo = []
  foo += ('A' .. 'Z').to_a
  foo += ('a' .. 'z').to_a
  foo += ('0' .. '9').to_a
  rand_base(len, bad, *foo )
end

.rand_text_english(len, bad = '') ⇒ Object

Generate random bytes of english-like data


1270
1271
1272
1273
1274
# File 'lib/rex/text.rb', line 1270

def self.rand_text_english(len, bad='')
  foo = []
  foo += (0x21 .. 0x7e).map{ |c| c.chr }
  rand_base(len, bad, *foo )
end

.rand_text_hex(len, bad = '') ⇒ Object

Generate random bytes of alphanumeric hex.


1256
1257
1258
1259
1260
1261
# File 'lib/rex/text.rb', line 1256

def self.rand_text_hex(len, bad='')
  foo = []
  foo += ('0' .. '9').to_a
  foo += ('a' .. 'f').to_a
  rand_base(len, bad, *foo)
end

.rand_text_highascii(len, bad = '') ⇒ Object

Generate random bytes of high ascii data


1277
1278
1279
1280
1281
# File 'lib/rex/text.rb', line 1277

def self.rand_text_highascii(len, bad='')
  foo = []
  foo += (0x80 .. 0xff).map{ |c| c.chr }
  rand_base(len, bad, *foo )
end

.rand_text_numeric(len, bad = '') ⇒ Object

Generate random bytes of numeric data


1264
1265
1266
1267
# File 'lib/rex/text.rb', line 1264

def self.rand_text_numeric(len, bad='')
  foo = ('0' .. '9').to_a
  rand_base(len, bad, *foo )
end

.randomize_space(str) ⇒ Object

Randomize the whitespace in a string


1388
1389
1390
1391
1392
1393
1394
1395
1396
1397
1398
1399
# File 'lib/rex/text.rb', line 1388

def self.randomize_space(str)
  set = ["\x09", "\x20", "\x0d", "\x0a"]
  str.gsub(/\s+/) { |s|
    len = rand(50)+2
    buf = ''
    while (buf.length < len)
      buf << set.sample
    end

    buf
  }
end

.refine(str1, str2) ⇒ Object

Removes noise from 2 Strings and return a refined String version.


473
474
475
476
477
478
479
480
481
482
483
484
# File 'lib/rex/text.rb', line 473

def self.refine( str1, str2 )
  return str1 if str1 == str2

  # get the words of the first str in an array
  s_words = to_words( str1 )

  # get the words of the second str in an array
  o_words = to_words( str2 )

  # get what hasn't changed (the rdiff, so to speak) as a string
  (s_words - (s_words - o_words)).join
end

.remove_badchars(data, badchars = '') ⇒ Object

Removes bad characters from a string.

Modifies data in place


1507
1508
1509
# File 'lib/rex/text.rb', line 1507

def self.remove_badchars(data, badchars = '')
  data.delete(badchars)
end

.rol(val, cnt) ⇒ Object

Rotate a 32-bit value to the left by cnt bits


1658
1659
1660
1661
1662
1663
1664
# File 'lib/rex/text.rb', line 1658

def self.rol(val, cnt)
  bits = [val].pack("N").unpack("B32")[0].split(//)
  1.upto(cnt) do |c|
    bits.push( bits.shift )
  end
  [bits.join].pack("B32").unpack("N")[0]
end

.ror(val, cnt) ⇒ Object

Rotate a 32-bit value to the right by cnt bits


1644
1645
1646
1647
1648
1649
1650
# File 'lib/rex/text.rb', line 1644

def self.ror(val, cnt)
  bits = [val].pack("N").unpack("B32")[0].split(//)
  1.upto(cnt) do |c|
    bits.unshift( bits.pop )
  end
  [bits.join].pack("B32").unpack("N")[0]
end

.ror13_hash(name) ⇒ Fixnum

Calculate the ROR13 hash of a given string


1633
1634
1635
1636
1637
# File 'lib/rex/text.rb', line 1633

def self.ror13_hash(name)
  hash = 0
  name.unpack("C*").each {|c| hash = ror(hash, 13); hash += c }
  hash
end

.sha1(str) ⇒ Object

Hexidecimal SHA1 digest of the supplied string


1167
1168
1169
# File 'lib/rex/text.rb', line 1167

def self.sha1(str)
  Digest::SHA1.hexdigest(str)
end

.sha1_raw(str) ⇒ Object

Raw SHA1 digest of the supplied string


1160
1161
1162
# File 'lib/rex/text.rb', line 1160

def self.sha1_raw(str)
  Digest::SHA1.digest(str)
end

.shuffle_a(arr) ⇒ Array

Performs a Fisher-Yates shuffle on an array

Modifies arr in place


1537
1538
1539
1540
1541
1542
1543
1544
1545
1546
1547
1548
1549
1550
# File 'lib/rex/text.rb', line 1537

def self.shuffle_a(arr)
  len = arr.length
  max = len - 1
  cyc = [* (0..max) ]
  for d in cyc
    e = rand(d+1)
    next if e == d
    f = arr[d];
    g = arr[e];
    arr[d] = g;
    arr[e] = f;
  end
  return arr
end

.shuffle_s(str) ⇒ String

Shuffles a byte stream

See Also:


1526
1527
1528
# File 'lib/rex/text.rb', line 1526

def self.shuffle_s(str)
  shuffle_a(str.unpack("C*")).pack("C*")
end

.split_to_a(str, n) ⇒ Object

Split a string by n character into an array


1669
1670
1671
1672
1673
1674
1675
1676
1677
1678
1679
# File 'lib/rex/text.rb', line 1669

def self.split_to_a(str, n)
  if n > 0
    s = str.dup
    until s.empty?
      (ret ||= []).push s.slice!(0, n)
    end
  else
    ret = str
  end
  ret
end

.to_ascii(str = '', type = 'utf-16le', mode = '', size = '') ⇒ Object

Converts a unicode string to standard ASCII text.


759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
# File 'lib/rex/text.rb', line 759

def self.to_ascii(str='', type = 'utf-16le', mode = '', size = '')
  return '' if not str
  case type
  when 'utf-16le'
    return str.unpack('v*').pack('C*')
  when 'utf-16be'
    return str.unpack('n*').pack('C*')
  when 'utf-32le'
    return str.unpack('V*').pack('C*')
  when 'utf-32be'
    return str.unpack('N*').pack('C*')
  when 'utf-7'
    raise TypeError, 'invalid utf type, not yet implemented'
  when 'utf-8'
    raise TypeError, 'invalid utf type, not yet implemented'
  when 'uhwtfms' # suggested name from HD :P
    raise TypeError, 'invalid utf type, not yet implemented'
  when 'uhwtfms-half' # suggested name from HD :P
    raise TypeError, 'invalid utf type, not yet implemented'
  else
    raise TypeError, 'invalid utf type'
  end
end

.to_bash(str, wrap = DefaultWrap, name = "buf") ⇒ Object

Converts a raw string into a Bash buffer


277
278
279
# File 'lib/rex/text.rb', line 277

def self.to_bash(str, wrap = DefaultWrap, name = "buf")
  return hexify(str, wrap, '$\'', '\'\\', "export #{name}=\\\n", '\'')
end

.to_bash_comment(str, wrap = DefaultWrap) ⇒ Object

Creates a Bash-style comment


375
376
377
# File 'lib/rex/text.rb', line 375

def self.to_bash_comment(str, wrap = DefaultWrap)
  return wordwrap(str, 0, wrap, '', '# ')
end

.to_c(str, wrap = DefaultWrap, name = "buf") ⇒ Object

Converts a raw string into a C buffer


231
232
233
# File 'lib/rex/text.rb', line 231

def self.to_c(str, wrap = DefaultWrap, name = "buf")
  return hexify(str, wrap, '"', '"', "unsigned char #{name}[] = \n", '";')
end

.to_c_comment(str, wrap = DefaultWrap) ⇒ Object

Creates a c-style comment


249
250
251
# File 'lib/rex/text.rb', line 249

def self.to_c_comment(str, wrap = DefaultWrap)
  return "/*\n" + wordwrap(str, 0, wrap, '', ' * ') + " */\n"
end

.to_csharp(str, wrap = DefaultWrap, name = "buf") ⇒ Object


235
236
237
238
239
240
241
242
243
244
# File 'lib/rex/text.rb', line 235

def self.to_csharp(str, wrap = DefaultWrap, name = "buf")
  ret = "byte[] #{name} = new byte[#{str.length}] {"
  i = -1;
  while (i += 1) < str.length
    ret << "\n" if i%(wrap/4) == 0
    ret << "0x" << str[i].unpack("H*")[0] << ","
  end
  ret = ret[0..ret.length-2] #cut off last comma
  ret << " };\n"
end

.to_dword(str, wrap = DefaultWrap) ⇒ Object

Creates a comma separated list of dwords


196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
# File 'lib/rex/text.rb', line 196

def self.to_dword(str, wrap = DefaultWrap)
  code = str
  alignnr = str.length % 4
  if (alignnr > 0)
    code << "\x00" * (4 - alignnr)
  end
  codevalues = Array.new
  code.split("").each_slice(4) do |chars4|
    chars4 = chars4.join("")
    dwordvalue = chars4.unpack('*V')
    codevalues.push(dwordvalue[0])
  end
  buff = ""
  0.upto(codevalues.length-1) do |byte|
    if(byte % 8 == 0) and (buff.length > 0)
      buff << "\r\n"
    end
    buff << sprintf('0x%.8x, ', codevalues[byte])
  end
   # strip , at the end
  buff = buff.chomp(', ')
  buff << "\r\n"
  return buff
end

.to_ebcdic(str) ⇒ Object


436
437
438
439
440
441
442
443
444
# File 'lib/rex/text.rb', line 436

def self.to_ebcdic(str)
  begin
    Iconv.iconv("EBCDIC-US", "ASCII", str).first
  rescue ::Iconv::IllegalSequence => e
    raise e
  rescue
    self.to_ebcdic_rex(str)
  end
end

.to_ebcdic_rex(str) ⇒ Object

A native implementation of the ASCII->EBCDIC table, used to fall back from using Iconv


410
411
412
413
414
415
416
417
418
419
420
# File 'lib/rex/text.rb', line 410

def self.to_ebcdic_rex(str)
  new_str = []
  str.each_byte do |x|
    if Iconv_ASCII.index(x.chr)
      new_str << Iconv_EBCDIC[Iconv_ASCII.index(x.chr)]
    else
      raise Rex::Text::IllegalSequence, ("\\x%x" % x)
    end
  end
  new_str.join
end

.to_hex(str, prefix = "\\x", count = 1) ⇒ String

Returns the escaped hex version of the supplied string

Examples:

Rex::Text.to_hex("asdf") # => "\\x61\\x73\\x64\\x66"

Raises:

  • (::RuntimeError)

536
537
538
539
540
541
542
543
544
# File 'lib/rex/text.rb', line 536

def self.to_hex(str, prefix = "\\x", count = 1)
  raise ::RuntimeError, "unable to chunk into #{count} byte chunks" if ((str.length % count) > 0)

  # XXX: Regexp.new is used here since using /.{#{count}}/o would compile
  # the regex the first time it is used and never check again.  Since we
  # want to know how many to capture on every instance, we do it this
  # way.
  return str.unpack('H*')[0].gsub(Regexp.new(".{#{count * 2}}", nil, 'n')) { |s| prefix + s }
end

.to_hex_ascii(str, prefix = "\\x", count = 1, suffix = nil) ⇒ String

Returns the string with nonprintable hex characters sanitized to ascii. Similiar to to_hex, but regular ASCII is not translated if count is 1.

Examples:

Rex::Text.to_hex_ascii("\x7fABC\0") # => "\\x7fABC\\x00"

Raises:

  • (::RuntimeError)

559
560
561
562
563
564
# File 'lib/rex/text.rb', line 559

def self.to_hex_ascii(str, prefix = "\\x", count = 1, suffix=nil)
  raise ::RuntimeError, "unable to chunk into #{count} byte chunks" if ((str.length % count) > 0)
  return str.unpack('H*')[0].gsub(Regexp.new(".{#{count * 2}}", nil, 'n')) { |s|
    (0x20..0x7e) === s.to_i(16) ? s.to_i(16).chr : prefix + s + suffix.to_s
  }
end

.to_hex_dump(str, width = 16, base = nil) ⇒ Object

Converts a string to a nicely formatted hex dump


920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
# File 'lib/rex/text.rb', line 920

def self.to_hex_dump(str, width=16, base=nil)
  buf = ''
  idx = 0
  cnt = 0
  snl = false
  lst = 0
  lft_col_len = (base.to_i+str.length).to_s(16).length
  lft_col_len = 8 if lft_col_len < 8

  while (idx < str.length)
    chunk = str[idx, width]
    addr = base ? "%0#{lft_col_len}x  " %(base.to_i + idx) : ''
    line  = chunk.unpack("H*")[0].scan(/../).join(" ")
    buf << addr + line

    if (lst == 0)
      lst = line.length
      buf << " " * 4
    else
      buf << " " * ((lst - line.length) + 4).abs
    end

    buf << "|"

    chunk.unpack("C*").each do |c|
      if (c >	0x1f and c < 0x7f)
        buf << c.chr
      else
        buf << "."
      end
    end

    buf << "|\n"

    idx += width
  end

  buf << "\n"
end

.to_java(str, name = "shell") ⇒ Object

Converts a raw string into a java byte array


284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
# File 'lib/rex/text.rb', line 284

def self.to_java(str, name = "shell")
  buff = "byte #{name}[] = new byte[]\n{\n"
  cnt = 0
  max = 0
  str.unpack('C*').each do |c|
    buff << ", " if max > 0
    buff << "\t" if max == 0
    buff << sprintf('(byte) 0x%.2x', c)
    max +=1
    cnt +=1

    if (max > 7)
      buff << ",\n" if cnt != str.length
      max = 0
    end
  end
  buff << "\n};\n"
  return buff
end

.to_js_comment(str, wrap = DefaultWrap) ⇒ Object

Creates a javascript-style comment


256
257
258
# File 'lib/rex/text.rb', line 256

def self.to_js_comment(str, wrap = DefaultWrap)
  return wordwrap(str, 0, wrap, '', '// ')
end

.to_mixed_case_array(str) ⇒ Array<String>

Takes a string, and returns an array of all mixed case versions.

Examples:

>> Rex::Text.to_mixed_case_array "abc1"
=> ["abc1", "abC1", "aBc1", "aBC1", "Abc1", "AbC1", "ABc1", "ABC1"]

See Also:


899
900
901
902
903
904
905
906
907
908
909
910
911
912
# File 'lib/rex/text.rb', line 899

def self.to_mixed_case_array(str)
  letters = []
  str.scan(/./).each { |l| letters << [l.downcase, l.upcase] }
  coords = []
  (1 << str.size).times { |i| coords << ("%0#{str.size}b" % i) }
  mixed = []
  coords.each do |coord|
    c = coord.scan(/./).map {|x| x.to_i}
    this_str = ""
    c.each_with_index { |d,i| this_str << letters[i][d] }
    mixed << this_str
  end
  return mixed.uniq
end

.to_num(str, wrap = DefaultWrap) ⇒ Object

Creates a comma separated list of numbers


178
179
180
181
182
183
184
185
186
187
188
189
190
191
# File 'lib/rex/text.rb', line 178

def self.to_num(str, wrap = DefaultWrap)
  code = str.unpack('C*')
  buff = ""
  0.upto(code.length-1) do |byte|
    if(byte % 15 == 0) and (buff.length > 0)
      buff << "\r\n"
    end
    buff << sprintf('0x%.2x, ', code[byte])
  end
  # strip , at the end
  buff = buff.chomp(', ')
  buff << "\r\n"
  return buff
end

.to_octal(str, prefix = "\\") ⇒ String

Returns the escaped octal version of the supplied string

Examples:

Rex::Text.to_octal("asdf") # => "\\141\\163\\144\\146"

517
518
519
520
521
522
523
524
# File 'lib/rex/text.rb', line 517

def self.to_octal(str, prefix = "\\")
  octal = ""
  str.each_byte { |b|
    octal << "#{prefix}#{b.to_s 8}"
  }

  return octal
end

.to_perl(str, wrap = DefaultWrap, name = "buf") ⇒ Object

Converts a raw string into a perl buffer


263
264
265
# File 'lib/rex/text.rb', line 263

def self.to_perl(str, wrap = DefaultWrap, name = "buf")
  return hexify(str, wrap, '"', '" .', "my $#{name} = \n", '";')
end

.to_perl_comment(str, wrap = DefaultWrap) ⇒ Object

Creates a perl-style comment


368
369
370
# File 'lib/rex/text.rb', line 368

def self.to_perl_comment(str, wrap = DefaultWrap)
  return wordwrap(str, 0, wrap, '', '# ')
end

.to_powershell(str, name = "buf") ⇒ Object

Converts a raw string to a powershell byte array


307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
# File 'lib/rex/text.rb', line 307

def self.to_powershell(str, name = "buf")
  return "[Byte[]]$#{name} = ''" if str.nil? or str.empty?

  code = str.unpack('C*')
  buff = "[Byte[]]$#{name} = 0x#{code[0].to_s(16)}"
  1.upto(code.length-1) do |byte|
    if(byte % 10 == 0)
      buff << "\r\n$#{name} += 0x#{code[byte].to_s(16)}"
    else
      buff << ",0x#{code[byte].to_s(16)}"
    end
  end

  return buff
end

.to_python(str, wrap = DefaultWrap, name = "buf") ⇒ Object

Converts a raw string into a python buffer


270
271
272
# File 'lib/rex/text.rb', line 270

def self.to_python(str, wrap = DefaultWrap, name = "buf")
  return hexify(str, wrap, "#{name} += \"", '"', "#{name} =  \"\"\n", '"')
end

.to_rand_case(str) ⇒ String

Converts a string to random case

Examples:

Rex::Text.to_rand_case("asdf") # => "asDf"

See Also:


881
882
883
884
885
886
887
# File 'lib/rex/text.rb', line 881

def self.to_rand_case(str)
  buf = str.dup
  0.upto(str.length) do |i|
    buf[i,1] = rand(2) == 0 ? str[i,1].upcase : str[i,1].downcase
  end
  return buf
end

.to_raw(str) ⇒ Object

Returns the raw string


382
383
384
# File 'lib/rex/text.rb', line 382

def self.to_raw(str)
  return str
end

.to_ruby(str, wrap = DefaultWrap, name = "buf") ⇒ Object

Converts a raw string into a ruby buffer


171
172
173
# File 'lib/rex/text.rb', line 171

def self.to_ruby(str, wrap = DefaultWrap, name = "buf")
  return hexify(str, wrap, '"', '" +', "#{name} = \n", '"')
end

.to_ruby_comment(str, wrap = DefaultWrap) ⇒ Object

Creates a ruby-style comment


224
225
226
# File 'lib/rex/text.rb', line 224

def self.to_ruby_comment(str, wrap = DefaultWrap)
  return wordwrap(str, 0, wrap, '', '# ')
end

.to_unescape(data, endian = ENDIAN_LITTLE, prefix = '%%u') ⇒ Object

Returns a unicode escaped string for Javascript


489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
# File 'lib/rex/text.rb', line 489

def self.to_unescape(data, endian=ENDIAN_LITTLE, prefix='%%u')
  data << "\x41" if (data.length % 2 != 0)
  dptr = 0
  buff = ''
  while (dptr < data.length)
    c1 = data[dptr,1].unpack("C*")[0]
    dptr += 1
    c2 = data[dptr,1].unpack("C*")[0]
    dptr += 1

    if (endian == ENDIAN_LITTLE)
      buff << sprintf("#{prefix}%.2x%.2x", c2, c1)
    else
      buff << sprintf("#{prefix}%.2x%.2x", c1, c2)
    end
  end
  return buff
end

.to_unicode(str = '', type = 'utf-16le', mode = '', size = '') ⇒ Object

Converts standard ASCII text to a unicode string.

Supported unicode types include: utf-16le, utf16-be, utf32-le, utf32-be, utf-7, and utf-8

Providing 'mode' provides hints to the actual encoder as to how it should encode the string.

Only UTF-7 and UTF-8 use “mode”.

utf-7 by default does not encode alphanumeric and a few other characters. By specifying the mode of “all”, then all of the characters are encoded, not just the non-alphanumeric set. to_unicode(str, 'utf-7', 'all')

utf-8 specifies that alphanumeric characters are used directly, eg “a” is just “a”. However, there exist 6 different overlong encodings of “a” that are technically not valid, but parse just fine in most utf-8 parsers. (0xC1A1, 0xE081A1, 0xF08081A1, 0xF8808081A1, 0xFC80808081A1, 0xFE8080808081A1). How many bytes to use for the overlong enocding is specified providing 'size'. to_unicode(str, 'utf-8', 'overlong', 2)

Many utf-8 parsers also allow invalid overlong encodings, where bits that are unused when encoding a single byte are modified. Many parsers will ignore these bits, rendering simple string matching to be ineffective for dealing with UTF-8 strings. There are many more invalid overlong encodings possible for “a”. For example, three encodings are available for an invalid 2 byte encoding of “a”. (0xC1E1 0xC161 0xC121).

By specifying “invalid”, a random invalid encoding is chosen for the given byte size. to_unicode(str, 'utf-8', 'invalid', 2)

utf-7 defaults to 'normal' utf-7 encoding utf-8 defaults to 2 byte 'normal' encoding


603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
# File 'lib/rex/text.rb', line 603

def self.to_unicode(str='', type = 'utf-16le', mode = '', size = '')
  return '' if not str
  case type
  when 'utf-16le'
    return str.unpack('C*').pack('v*')
  when 'utf-16be'
    return str.unpack('C*').pack('n*')
  when 'utf-32le'
    return str.unpack('C*').pack('V*')
  when 'utf-32be'
    return str.unpack('C*').pack('N*')
  when 'utf-7'
    case mode
    when 'all'
      return str.gsub(/./){ |a|
        out = ''
        if 'a' != '+'
          out = encode_base64(to_unicode(a, 'utf-16be')).gsub(/[=\r\n]/, '')
        end
        '+' + out + '-'
      }
    else
      return str.gsub(/[^\n\r\t\ A-Za-z0-9\'\(\),-.\/\:\?]/){ |a|
        out = ''
        if a != '+'
          out = encode_base64(to_unicode(a, 'utf-16be')).gsub(/[=\r\n]/, '')
        end
        '+' + out + '-'
      }
    end
  when 'utf-8'
    if size == ''
      size = 2
    end

    if size >= 2 and size <= 7
      string = ''
      str.each_byte { |a|
        if (a < 21 || a > 0x7f) || mode != ''
          # ugh.	turn a single byte into the binary representation of it, in array form
          bin = [a].pack('C').unpack('B8')[0].split(//)

          # even more ugh.
          bin.collect!{|a_| a_.to_i}

          out = Array.new(8 * size, 0)

          0.upto(size - 1) { |i|
            out[i] = 1
            out[i * 8] = 1
          }

          i = 0
          byte = 0
          bin.reverse.each { |bit|
            if i < 6
              mod = (((size * 8) - 1) - byte * 8) - i
              out[mod] = bit
            else
              byte = byte + 1
              i = 0
              redo
            end
            i = i + 1
          }

          if mode != ''
            case mode
            when 'overlong'
              # do nothing, since we already handle this as above...
            when 'invalid'
              done = 0
              while done == 0
                # the ghetto...
                bits = [7, 8, 15, 16, 23, 24, 31, 32, 41]
                bits.each { |bit|
                  bit = (size * 8) - bit
                  if bit > 1
                    set = rand(2)
                    if out[bit] != set
                      out[bit] = set
                      done = 1
                    end
                  end
                }
              end
            else
              raise TypeError, 'Invalid mode.  Only "overlong" and "invalid" are acceptable modes for utf-8'
            end
          end
          string << [out.join('')].pack('B*')
        else
          string << [a].pack('C')
        end
      }
      return string
    else
      raise TypeError, 'invalid utf-8 size'
    end
  when 'uhwtfms' # suggested name from HD :P
    load_codepage()

    string = ''
    # overloading mode as codepage
    if mode == ''
      mode = 1252 # ANSI - Latan 1, default for US installs of MS products
    else
      mode = mode.to_i
    end
    if @@codepage_map_cache[mode].nil?
      raise TypeError, "Invalid codepage #{mode}"
    end
    str.each_byte {|byte|
      char = [byte].pack('C*')
      possible = @@codepage_map_cache[mode]['data'][char]
      if possible.nil?
        raise TypeError, "codepage #{mode} does not provide an encoding for 0x#{char.unpack('H*')[0]}"
      end
      string << possible[ rand(possible.length) ]
    }
    return string
  when 'uhwtfms-half' # suggested name from HD :P
    load_codepage()
    string = ''
    # overloading mode as codepage
    if mode == ''
      mode = 1252 # ANSI - Latan 1, default for US installs of MS products
    else
      mode = mode.to_i
    end
    if mode != 1252
      raise TypeError, "Invalid codepage #{mode}, only 1252 supported for uhwtfms_half"
    end
    str.each_byte {|byte|
      if ((byte >= 33 && byte <= 63) || (byte >= 96 && byte <= 126))
        string << "\xFF" + [byte ^ 32].pack('C')
      elsif (byte >= 64 && byte <= 95)
        string << "\xFF" + [byte ^ 96].pack('C')
      else
        char = [byte].pack('C')
        possible = @@codepage_map_cache[mode]['data'][char]
        if possible.nil?
          raise TypeError, "codepage #{mode} does not provide an encoding for 0x#{char.unpack('H*')[0]}"
        end
        string << possible[ rand(possible.length) ]
      end
    }
    return string
  else
    raise TypeError, 'invalid utf type'
  end
end

.to_utf8(str) ⇒ Object

Converts ISO-8859-1 to UTF-8


389
390
391
392
393
394
395
396
397
398
399
400
401
# File 'lib/rex/text.rb', line 389

def self.to_utf8(str)

  if str.respond_to?(:encode)
    # Skip over any bytes that fail to convert to UTF-8
    return str.encode('utf-8', { :invalid => :replace, :undef => :replace, :replace => '' })
  end

  begin
    Iconv.iconv("utf-8","iso-8859-1", str).join(" ")
  rescue
    raise ::RuntimeError, "Your installation does not support iconv (needed for utf8 conversion)"
  end
end

.to_vbapplication(str, name = "buf") ⇒ Object

Converts a raw string into a vba buffer


347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
# File 'lib/rex/text.rb', line 347

def self.to_vbapplication(str, name = "buf")
  return "#{name} = Array()" if str.nil? or str.empty?

  code  = str.unpack('C*')
  buff = "#{name} = Array("
  maxbytes = 20

  1.upto(code.length) do |idx|
    buff << code[idx].to_s
    buff << "," if idx < code.length - 1
    buff << " _\r\n" if (idx > 1 and (idx % maxbytes) == 0)
  end

  buff << ")\r\n"

  return buff
end

.to_vbscript(str, name = "buf") ⇒ Object

Converts a raw string to a vbscript byte array


326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
# File 'lib/rex/text.rb', line 326

def self.to_vbscript(str, name = "buf")
  return "#{name}" if str.nil? or str.empty?

  code = str.unpack('C*')
  buff = "#{name}=Chr(#{code[0]})"
  1.upto(code.length-1) do |byte|
    if(byte % 100 == 0)
      buff << "\r\n#{name}=#{name}"
    end
    # exe is an Array of bytes, not a String, thanks to the unpack
    # above, so the following line is not subject to the different
    # treatments of String#[] between ruby 1.8 and 1.9
    buff << "&Chr(#{code[byte]})"
  end

  return buff
end

.to_words(str, strict = false) ⇒ Object

Returns the words in str as an Array.

strict - include only words, no boundary characters (like spaces, etc.)


464
465
466
467
468
# File 'lib/rex/text.rb', line 464

def self.to_words( str, strict = false )
  splits = str.split( /\b/ )
  splits.reject! { |w| !(w =~ /\w/) } if strict
  splits
end

.ungzip(str) ⇒ String

Uncompresses a string using gzip

Raises:


1473
1474
1475
1476
1477
1478
1479
1480
1481
1482
# File 'lib/rex/text.rb', line 1473

def self.ungzip(str)
  raise RuntimeError, "Gzip support is not present." if (!zlib_present?)

  s = ""
  s.force_encoding('ASCII-8BIT') if s.respond_to?(:encoding)
  gz = Zlib::GzipReader.new(StringIO.new(str, 'rb'))
  s << gz.read
  gz.close
  return s
end

.unicode_filter_decode(str) ⇒ Object


1701
1702
1703
# File 'lib/rex/text.rb', line 1701

def self.unicode_filter_decode(str)
  str.to_s.gsub( /\$U\$([\x20-\x2c\x2e-\x7E]*)\-0x([A-Fa-f0-9]+)/n ){|m| [$2].pack("H*") }
end

.unicode_filter_encode(str) ⇒ Object

A custom unicode filter for dealing with multi-byte strings on a 8-bit console Punycode would have been more “standard”, but it requires valid Unicode chars


1693
1694
1695
1696
1697
1698
1699
# File 'lib/rex/text.rb', line 1693

def self.unicode_filter_encode(str)
  if (str.to_s.unpack("C*") & ( LowAscii + HighAscii + "\x7f" ).unpack("C*")).length > 0
    str = "$U$" + str.unpack("C*").select{|c| c < 0x7f and c > 0x1f and c != 0x2d}.pack("C*") + "-0x" + str.unpack("H*")[0]
  else
    str
  end
end

.uri_decode(str) ⇒ Object

Decode a URI encoded string


867
868
869
# File 'lib/rex/text.rb', line 867

def self.uri_decode(str)
  str.gsub(/(%[a-z0-9]{2})/i){ |c| [c[1,2]].pack("H*") }
end

.uri_encode(str, mode = 'hex-normal') ⇒ Object

Encode a string in a manor useful for HTTP URIs and URI Parameters.


786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
# File 'lib/rex/text.rb', line 786

def self.uri_encode(str, mode = 'hex-normal')
  return "" if str == nil

  return str if mode == 'none' # fast track no encoding

  all = /[^\/\\]+/
  # http://tools.ietf.org/html/rfc3986#section-2.3
  normal = /[^a-zA-Z0-9\/\\\.\-_~]+/

  case mode
  when 'hex-normal'
    return str.gsub(normal) { |s| Rex::Text.to_hex(s, '%') }
  when 'hex-all'
    return str.gsub(all) { |s| Rex::Text.to_hex(s, '%') }
  when 'hex-random'
    res = ''
    str.each_byte do |c|
      b = c.chr
      res << ((rand(2) == 0) ?
        b.gsub(all)   { |s| Rex::Text.to_hex(s, '%') } :
        b.gsub(normal){ |s| Rex::Text.to_hex(s, '%') } )
    end
    return res
  when 'u-normal'
    return str.gsub(normal) { |s| Rex::Text.to_hex(Rex::Text.to_unicode(s, 'uhwtfms'), '%u', 2) }
  when 'u-all'
    return str.gsub(all) { |s| Rex::Text.to_hex(Rex::Text.to_unicode(s, 'uhwtfms'), '%u', 2) }
  when 'u-random'
    res = ''
    str.each_byte do |c|
      b = c.chr
      res << ((rand(2) == 0) ?
        b.gsub(all)   { |s| Rex::Text.to_hex(Rex::Text.to_unicode(s, 'uhwtfms'), '%u', 2) } :
        b.gsub(normal){ |s| Rex::Text.to_hex(Rex::Text.to_unicode(s, 'uhwtfms'), '%u', 2) } )
    end
    return res
  when 'u-half'
    return str.gsub(all) { |s| Rex::Text.to_hex(Rex::Text.to_unicode(s, 'uhwtfms-half'), '%u', 2) }
  else
    raise TypeError, "invalid mode #{mode.inspect}"
  end
end

.wordwrap(str, indent = 0, col = DefaultWrap, append = '', prepend = '') ⇒ Object

Wraps text at a given column using a supplied indention


990
991
992
993
# File 'lib/rex/text.rb', line 990

def self.wordwrap(str, indent = 0, col = DefaultWrap, append = '', prepend = '')
  return str.gsub(/.{1,#{col - indent}}(?:\s|\Z)/){
    ( (" " * indent) + prepend + $& + append + 5.chr).gsub(/\n\005/,"\n").gsub(/\005/,"\n")}
end

.xml_char_encode(str) ⇒ Object

Encode an ASCII string so it's safe for XML. It's a wrapper for to_hex_ascii.


860
861
862
# File 'lib/rex/text.rb', line 860

def self.xml_char_encode(str)
  self.to_hex_ascii(str, "&#x", 1, ";")
end

.zlib_deflate(str, level = Zlib::BEST_COMPRESSION) ⇒ String

Compresses a string using zlib


1422
1423
1424
1425
1426
1427
1428
1429
1430
1431
# File 'lib/rex/text.rb', line 1422

def self.zlib_deflate(str, level = Zlib::BEST_COMPRESSION)
  if self.zlib_present?
    z = Zlib::Deflate.new(level)
    dst = z.deflate(str, Zlib::FINISH)
    z.close
    return dst
  else
    raise RuntimeError, "Gzip support is not present."
  end
end

.zlib_inflate(str) ⇒ String

Uncompresses a string using zlib


1438
1439
1440
1441
1442
1443
1444
1445
1446
1447
1448
# File 'lib/rex/text.rb', line 1438

def self.zlib_inflate(str)
  if(self.zlib_present?)
    zstream = Zlib::Inflate.new
    buf = zstream.inflate(str)
    zstream.finish
    zstream.close
    return buf
  else
    raise RuntimeError, "Gzip support is not present."
  end
end

.zlib_present?Boolean

Returns true if zlib can be used.


1402
1403
1404
1405
1406
1407
1408
1409
# File 'lib/rex/text.rb', line 1402

def self.zlib_present?
  begin
    temp = Zlib
    return true
  rescue
    return false
  end
end