Class: String

Inherits:

Object

Object
String

show all

Defined in:: lib/butler/irc/string.rb,
lib/ruby/string/chunks.rb,
lib/ruby/string/arguments.rb,
lib/ruby/string/camelcase.rb,
lib/ruby/string/unescaped.rb,
lib/ruby/string/post_arguments.rb,
lib/string.rb

Overview

provides IRC-related methods for string-class

Direct Known Subclasses

ASCII, MBEncoded, SBEncoded, UTF8

Defined Under Namespace

Classes: ASCII, MBEncoded, SBEncoded, SingleQuoteException, UTF8

Constant Summary collapse

COLORS = Colors Lookup-table for mirc_translated_color

{
	'white'    => 0,
	'black'    => 1,
	'blue'     => 2,
	'green'    => 3,
	'red'      => 4,
	'brown'    => 5,
	'purple'   => 6,
	'orange'   => 7,
	'yellow'   => 8,
	'ltgreen'  => 9,
	'teal'     => 10,
	'ltcyan'   => 11,
	'ltblue'   => 12,
	'pink'     => 13,
	'grey'     => 14,
	'ltgrey'   => 15
}

Escapes = convert " to “ and similar

Hash.new{|h,k|k}.update({
	'\\\\' => '\\',
	'\\"'  => '"',
	"\\'"  => "'",
	'\e'   => "\e",
	'\r'   => "\r",
	'\n'   => "\n",
	'\f'   => "\f",
	'\t'   => "\t",
	'\ '   => " ",
})

Encodings = encoding -> singlebyte

{
	'binary'         => ASCII, # only with collation, else it's ::String
	'ascii'          => ASCII,
	'utf-8'          => UTF8,

	'iso-8859-1'     => SBEncoded,
	'iso-8859-2'     => SBEncoded,
	'iso-8859-3'     => SBEncoded,
	'iso-8859-4'     => SBEncoded,
	'iso-8859-5'     => SBEncoded,
	'iso-8859-6'     => SBEncoded,
	'iso-8859-7'     => SBEncoded,
	'iso-8859-8'     => SBEncoded,
	'iso-8859-9'     => SBEncoded,
	'iso-8859-10'    => SBEncoded,
	'iso-8859-11'    => SBEncoded,
	'iso-8859-12'    => SBEncoded,
	'iso-8859-13'    => SBEncoded,
	'iso-8859-14'    => SBEncoded,
	'iso-8859-15'    => SBEncoded,
	'macroman'       => SBEncoded,
	'macroman'       => SBEncoded,
	
	'utf-8-mac'      => MBEncoded,
	'utf-16'         => MBEncoded,
	'utf-16be'       => MBEncoded,
	'utf-16le'       => MBEncoded,
	'utf-32'         => MBEncoded,
	'utf-32be'       => MBEncoded,
	'utf-32le'       => MBEncoded,
	'ucs-2be'        => MBEncoded,
	'ucs-2le'        => MBEncoded,
	'ucs-4be'        => MBEncoded,
	'ucs-4le'        => MBEncoded,
	'ucs-2-internal' => MBEncoded,
	'ucs-2-swapped'  => MBEncoded,
	'ucs-4-internal' => MBEncoded,
	'ucs-4-swapped'  => MBEncoded,
}

UNICODE_WHITESPACE = UNICODE_ is used for strip/strip!

[
	(0x0009..0x000D).to_a,  # White_Space # Cc   [5] <control-0009>..<control-000D>
	0x0020,          # White_Space # Zs       SPACE
	0x0085,          # White_Space # Cc       <control-0085>
	0x00A0,          # White_Space # Zs       NO-BREAK SPACE
	0x1680,          # White_Space # Zs       OGHAM SPACE MARK
	0x180E,          # White_Space # Zs       MONGOLIAN VOWEL SEPARATOR
	(0x2000..0x200A).to_a, # White_Space # Zs  [11] EN QUAD..HAIR SPACE
	0x2028,          # White_Space # Zl       LINE SEPARATOR
	0x2029,          # White_Space # Zp       PARAGRAPH SEPARATOR
	0x202F,          # White_Space # Zs       NARROW NO-BREAK SPACE
	0x205F,          # White_Space # Zs       MEDIUM MATHEMATICAL SPACE
	0x3000,          # White_Space # Zs       IDEOGRAPHIC SPACE
].flatten

UNICODE_LEADERS_AND_TRAILERS = :nodoc:

UNICODE_WHITESPACE + [65279]

UNICODE_LT_PAT =

/#{UNICODE_LEADERS_AND_TRAILERS.pack("U*").gsub(/.(?=.)/u, '\0|')}/u

UNICODE_T_PAT =

/#{UNICODE_LT_PAT}+$/

UNICODE_L_PAT =

/^#{UNICODE_LT_PAT}+/

Binary =

'binary'.freeze

Class Method Summary collapse

.encodings ⇒ Object

Instance Method Summary collapse

#arguments(unescape = true) ⇒ Object

parse a string of single/double/unquoted arguments (or any mix of) raises an exception if an odd number of quotes is matched examples: ‘hello hello “ruby world”’.arguments # => [“hello”, “hello”, “ruby world”] ‘hello hello ruby\ world’.arguments # => [“hello”, “hello”, “ruby world”] ‘hello hello ruby\ world’.arguments(false) # => [“hello”, “hello”, “ruby\ world”].
#ascii(collation = nil) ⇒ Object
#binary ⇒ Object
#camelcase ⇒ Object

CamelCase a string, e.g.
#chunks(chunk_length) ⇒ Object

splits a string into chunks of length ‘len’.
#collation ⇒ Object
#encoding ⇒ Object
#mirc_formatted ⇒ Object

provides mirc formatting: ![b]: bold ![i]: italic ![u]: underline ![r]: reverse ![c]: reset color ![cm]: set color, m=0-15 or (COLOR_CONSTANT) ![cm,n]: set color, m is foreground, n is background, m,n=0-15 or (COLOR_CONSTANT) ![o]: reset all effects Samples: ![bc(blue)]Bold blue text! and normal again ![bc2]Bold blue text! and normal again (same as above) ![bc(white),(black)]White bold text on black background! ![bi]Bold italic text!.
#mirc_stripped ⇒ Object

returns a string with formating codes in mirc-format stripped.
#mirc_translated_color(i, s) ⇒ Object

helper method for mirc_formatted, extracts color portions from ![c(color)] statements.
#post_arguments(unescaped = true) ⇒ Object
#strip_user_prefixes ⇒ Object

removes indicators from nicknames and channelnames.
#to_flags ⇒ Object

Converts string representation of user-prefixes to binary flags.
#to_s(encoding = nil, collation = nil) ⇒ Object
#unescaped ⇒ Object
#user_prefixes ⇒ Object

returns prefixes found in front of a nickname Sequential parsing since @@nickname is not valid.
#utf8(collation = nil) ⇒ Object
#valid_channelname? ⇒ Boolean

returns whether or not a string represents a valid channelname.
#valid_nickname? ⇒ Boolean

returns if the the string represents a valid nickname this method does not take care of prefixes lik “@”, “+”, “-” see valid_user? for this funktionality or strip_user_prefixes.
#valid_user? ⇒ Boolean

the same as valid_nickname? except that preceding @, + or - are ignored.

Class Method Details

.encodings ⇒ `Object`



66
67
68

# File 'lib/string.rb', line 66

def self.encodings
	Encodings.keys
end

Instance Method Details

#arguments(unescape = true) ⇒ `Object`

parse a string of single/double/unquoted arguments (or any mix of) raises an exception if an odd number of quotes is matched examples:

'hello hello "ruby world"'.arguments # => ["hello", "hello", "ruby world"]
'hello hello ruby\ world'.arguments #  => ["hello", "hello", "ruby world"]
'hello hello ruby\ world'.arguments(false) #  => ["hello", "hello", "ruby\\ world"]

# File 'lib/ruby/string/arguments.rb', line 30

def arguments(unescape=true)
	args = scan(
		/
			# match a double quoted string
			"(?:\\.|[^\\"])*" |

			# match a single quoted string
			'(?:\\.|[^\\'])*' |

			# match non-quoted words (may contain escaped spaces)
			(?:\\.|[^\\'"\s])+ |

			# match a single " or '
			["']
		/x
	)
	if unescape
		args.map! { |arg| ((arg[0,1] == '"' && arg[1]) ? arg[1..-2] : arg).unescaped }
	else
		args.map! { |arg| ((arg[0,1] == '"' && arg[1]) ? arg[1..-2] : arg) }
	end
	if (sq = args.index('"')) || (sq = args.index("'")) then
		raise SingleQuoteException.new(args, sq)
	end
	args
end

#ascii(collation = nil) ⇒ `Object`



165
166
167

# File 'lib/string.rb', line 165

def ascii(collation=nil)
	String::ASCII.new(self, String::ASCII::ASCII, collation)
end

#binary ⇒ `Object`



173
174
175

# File 'lib/string.rb', line 173

def binary
	String.new(self)
end

#camelcase ⇒ `Object`

CamelCase a string, e.g. “foo_bar” becomes “FooBar”



11
12
13

# File 'lib/ruby/string/camelcase.rb', line 11

def camelcase
	scan(/[^_]+/).map { |s| s.capitalize }.join("")
end

#chunks(chunk_length) ⇒ `Object`

splits a string into chunks of length ‘len’



11
12
13

# File 'lib/ruby/string/chunks.rb', line 11

def chunks(chunk_length)
	scan(/.{1,#{chunk_length}}/)
end

#collation ⇒ `Object`



149
150
151

# File 'lib/string.rb', line 149

def collation
	Binary
end

#encoding ⇒ `Object`



145
146
147

# File 'lib/string.rb', line 145

def encoding
	Binary
end

#mirc_formatted ⇒ `Object`

provides mirc formatting: ![b]: bold ![i]: italic ![u]: underline ![r]: reverse ![c]: reset color ![cm]: set color, m=0-15 or (COLOR_CONSTANT) ![cm,n]: set color, m is foreground, n is background, m,n=0-15 or (COLOR_CONSTANT) ![o]: reset all effects Samples: ![bc(blue)]Bold blue text! and normal again ![bc2]Bold blue text! and normal again (same as above) ![bc(white),(black)]White bold text on black background! ![bi]Bold italic text!

Valid colors are: white (mirc-code: 0) black (mirc-code: 1) blue (mirc-code: 2) green (mirc-code: 3) red (mirc-code: 4) brown (mirc-code: 5) purple (mirc-code: 6) orange (mirc-code: 7) yellow (mirc-code: 8) ltgreen (mirc-code: 9) teal (mirc-code: 10) ltcyan (mirc-code: 11) ltblue (mirc-code: 12) pink (mirc-code: 13) grey (mirc-code: 14) ltgrey (mirc-code: 15)

Note: not every font/size combination displays bold/italic text.

# File 'lib/butler/irc/string.rb', line 125

def mirc_formatted
	self.gsub( /!\[(.*?)\]/ ) do |match|
		codes = $1.downcase
		repl = ""
		i = 0
		while i < codes.length
			case codes[i].chr
				when 'b'
					repl << 2.chr
				when 'o'
					repl << 15.chr
				when 'r'
					repl << 18.chr
				when 'u'
					repl << 31.chr
				when 'i'
					repl << 29.chr
				when '|'
					repl << 9.chr
				when 'c'
					bg = nil
					
					i, fg = mirc_translated_color( i+1, codes )
					i, bg = mirc_translated_color( i+1, codes ) if i < codes.length && codes[i].chr == ','
					
					repl << "" << ( fg || "" )
					repl << "," << bg if bg
					
					i -= 1
			end
				
			i += 1
		end
		repl
	end
end

#mirc_stripped ⇒ `Object`

returns a string with formating codes in mirc-format stripped



83
84
85

# File 'lib/butler/irc/string.rb', line 83

def mirc_stripped
	return self.gsub(/(?:[\x02\x0f\x12\x1f\x1d\x09]|\cc\d{1,2}(?:,\d{1,2})?)/, "")
end

#mirc_translated_color(i, s) ⇒ `Object`

helper method for mirc_formatted, extracts color portions from ![c(color)] statements

# File 'lib/butler/irc/string.rb', line 163

def mirc_translated_color( i, s )
	return [ i, nil ] if i >= s.length
	
	if s[i].chr == '('
		j = s.index( ')', i )
		return [ j+1, "%02d" % COLORS[ s[i+1..j-1].downcase ] ]
	end
	
	j = i
	j += 1 while j < s.length && s[j].chr =~ /[0-9]/
	j += 1 if j == s.length
	return [ j, "%02d" % s[i..j-1].to_i ]
end

#post_arguments(unescaped = true) ⇒ `Object`

# File 'lib/ruby/string/post_arguments.rb', line 12

def post_arguments(unescaped=true)
	post   = []
	offset = -1
	found  = 0
	arguments = begin
		arguments(false)
	rescue SingleQuoteException => ex
		ex.pre+[ex.post.join(" ")]
	end
	arguments.each { |argument|
		post  << (unescaped ? self[offset+1..-1].unescaped : self[offset+1..-1])
		found  = index(argument, offset+1)
		offset = found+argument.length
	}
	post
end

#strip_user_prefixes ⇒ `Object`

removes indicators from nicknames and channelnames

# File 'lib/butler/irc/string.rb', line 50

def strip_user_prefixes
	prefixes	= Butler::IRC::User::PREFIXES.dup
	index		= 0
	while (prefixes.has_key?(self[index,1]))
		prefixes.delete(self[index,1])
		index	+= 1
	end
	return self[index..-1]
end

#to_flags ⇒ `Object`

Converts string representation of user-prefixes to binary flags

# File 'lib/butler/irc/string.rb', line 74

def to_flags
	result	= 0
	0.upto(self.length) { |index|
		result |= Butler::IRC::User::PREFIXES[self[index,1]]
	}
	return result
end

#to_s(encoding = nil, collation = nil) ⇒ `Object`

# File 'lib/string.rb', line 153

def to_s(encoding=nil, collation=nil)
	encoding  = nil if encoding  == Binary
	collation = nil if collation == Binary
	return String.new(self) unless (encoding || collation)
	data = if encoding == self.encoding || self.encoding == Binary || self.encoding == ASCII::ASCII then
		self
	else
		Iconv.iconv(encoding, self.encoding, self).first
	end
	Encodings[encoding].new(data, encoding, collation)
end

#unescaped ⇒ `Object`



14
15
16

# File 'lib/ruby/string/unescaped.rb', line 14

def unescaped
	gsub(/\\.|[^\\]/) { |m| Escapes[m] }
end

#user_prefixes ⇒ `Object`

returns prefixes found in front of a nickname Sequential parsing since @@nickname is not valid.

# File 'lib/butler/irc/string.rb', line 62

def user_prefixes
	prefixes	= Butler::IRC::User::PREFIXES.dup
	index		= 0
	found		= 0
	while (prefixes.has_key?(self[index,1]))
		found |= prefixes.delete(self[index,1])
		index	+= 1
	end
	return found
end

#utf8(collation = nil) ⇒ `Object`



169
170
171

# File 'lib/string.rb', line 169

def utf8(collation=nil)
	String::UTF8.new(self, String::UTF8::UTF8, collation)
end

#valid_channelname? ⇒ `Boolean`

returns whether or not a string represents a valid channelname

Returns:

(Boolean)



32
33
34

# File 'lib/butler/irc/string.rb', line 32

def valid_channelname?
	self =~ /\A[&#!\+][^\x07\x0A\x0D,: ]{1,50}\z/
end

#valid_nickname? ⇒ `Boolean`

returns if the the string represents a valid nickname this method does not take care of prefixes lik “@”, “+”, “-” see valid_user? for this funktionality or strip_user_prefixes

Returns:

(Boolean)

# File 'lib/butler/irc/string.rb', line 39

def valid_nickname?
	#self =~ /\A[0-9A-Za-z_][0-9A-Za-z_\-\|\\\[\]\{\}\^\`]*\z/
	self =~ /\A[0-9A-Za-z_\-\|\\\[\]\{\}\^\`]+\z/
end

#valid_user? ⇒ `Boolean`

the same as valid_nickname? except that preceding @, + or - are ignored

Returns:

(Boolean)



45
46
47

# File 'lib/butler/irc/string.rb', line 45

def valid_user?
	strip_user_prefixes.valid_nickname?
end

Class: String

Overview

Direct Known Subclasses

Defined Under Namespace

Constant Summary collapse

Class Method Summary collapse

Instance Method Summary collapse

Class Method Details

.encodings ⇒ Object

Instance Method Details

#arguments(unescape = true) ⇒ Object

#ascii(collation = nil) ⇒ Object

#binary ⇒ Object

#camelcase ⇒ Object

#chunks(chunk_length) ⇒ Object

#collation ⇒ Object

#encoding ⇒ Object

#mirc_formatted ⇒ Object

#mirc_stripped ⇒ Object

#mirc_translated_color(i, s) ⇒ Object

#post_arguments(unescaped = true) ⇒ Object

#strip_user_prefixes ⇒ Object

#to_flags ⇒ Object

#to_s(encoding = nil, collation = nil) ⇒ Object

#unescaped ⇒ Object

#user_prefixes ⇒ Object

#utf8(collation = nil) ⇒ Object

#valid_channelname? ⇒ Boolean

#valid_nickname? ⇒ Boolean

#valid_user? ⇒ Boolean

.encodings ⇒ `Object`

#arguments(unescape = true) ⇒ `Object`

#ascii(collation = nil) ⇒ `Object`

#binary ⇒ `Object`

#camelcase ⇒ `Object`

#chunks(chunk_length) ⇒ `Object`

#collation ⇒ `Object`

#encoding ⇒ `Object`

#mirc_formatted ⇒ `Object`

#mirc_stripped ⇒ `Object`

#mirc_translated_color(i, s) ⇒ `Object`

#post_arguments(unescaped = true) ⇒ `Object`

#strip_user_prefixes ⇒ `Object`

#to_flags ⇒ `Object`

#to_s(encoding = nil, collation = nil) ⇒ `Object`

#unescaped ⇒ `Object`

#user_prefixes ⇒ `Object`

#utf8(collation = nil) ⇒ `Object`

#valid_channelname? ⇒ `Boolean`

#valid_nickname? ⇒ `Boolean`

#valid_user? ⇒ `Boolean`