Class: String

Inherits:
Object show all
Includes:
RMTools::Cyrillic
Defined in:
lib/rmtools/conversions/string.rb,
lib/rmtools/core/b.rb,
lib/rmtools/core/js.rb,
lib/rmtools/lang/ansi.rb,
lib/rmtools/xml/string.rb,
lib/rmtools/rand/string.rb,
lib/rmtools/core/aliases.rb,
lib/rmtools/time/russian.rb,
lib/rmtools/lang/cyrillic.rb,
lib/rmtools/conversions/ip.rb,
lib/rmtools/text/string_rtl.rb,
lib/rmtools/console/highlight.rb,
lib/rmtools/text/string_parse.rb,
lib/rmtools/text/string_split.rb,
lib/rmtools/text/string_simple.rb,
lib/rmtools/core/string_compliance.rb

Overview

require ‘cgi’

Constant Summary collapse

XML_CHARSET_RE =
/(?:encoding|charset)=(.+?)"/
CALLER_RE =

( path ( file ) )

%r{^(.*?([^/\\]+?))#{	    # ( path ( file ) ) 
  }:(\d+)(?::in #{	      # :( line )[ :in
  }`(block (?:\((\d+) levels\) )?in )?(.+?)'#{   # `[ block in ] ( closure )' ]
})?$}
SIMPLE_CALLER_RE =

( path ( file ) )

%r{^(.*?([^/\\]+?))#{	    # ( path ( file ) ) 
  }:(\d+)(?::in #{	      # :( line )[ :in
  }`(.+?)'#{   # `( closure )' ]
})?$}
JS_CALLER_RE =

( func ) (protocol

%r{^(?:(\S*)[\s@])?\(?(?:[^:]+://#{ #  ( func ) (protocol
   }[^/:]*(?::\d+)?)?#{	               #  root[:port]
   }/([^?#]*?\/(\w+(?:\.\w+)?)#{     #  ( path/( file[ .fileext ] )
   }(?:\?.*?)?)#{	                           #  [ ?query ] )
   }:(\d+)?(?::(\d+))?#{	               #  :( line ):( char ))
}\)?$}
URL_RE =

( protocol

%r{^((?:([^:]+)://)#{	            #  ( protocol
  }([^/:]*(?::(\d+))?))?#{	  #  root[:port] )
  }((/[^?#]*?(?:\.(\w+))?)#{	#  ( path[.( fileext )]
  }(?:\?(.*?))?)?#{	              #  [?( query params )] )   
  }(?:#(.+))?#{	                  #  [ #( anchor ) ]
}$}
IP_RE =
/\d+\.\d+\.\d+\.\d+(?::\d+)?/
IP_RANGE_RE =
/(\d+\.\d+\.\d+\.\d+)\s*-\s*(\d+\.\d+\.\d+\.\d+)/

Constants included from RMTools::Cyrillic

RMTools::Cyrillic::ANSI_ENCODING, RMTools::Cyrillic::ANSI_LETTERS_DC, RMTools::Cyrillic::ANSI_LETTERS_UC, RMTools::Cyrillic::ANSI_YOYE, RMTools::Cyrillic::RU_LETTERS

Class Method Summary collapse

Instance Method Summary collapse

Class Method Details

.rand(*args) ⇒ Object



85
86
87
# File 'lib/rmtools/rand/string.rb', line 85

def self.rand(*args)        
  RMTools.randstr(*args)
end

Instance Method Details

#+(str) ⇒ Object

immutable: ‘123’ + 95 # => ‘12395’ ‘123’.plus 95 # => raise TypeError



51
52
53
# File 'lib/rmtools/core/js.rb', line 51

def +(str)
  plus str.to_s
end

#-(pattern) ⇒ Object



4
5
6
# File 'lib/rmtools/text/string_simple.rb', line 4

def -(pattern)
  gsub pattern, ''
end

#<<(str) ⇒ Object

mutable: ‘123’ << 95 # => ‘12395’ ‘123’.concat 95 # => ‘123_’



58
59
60
# File 'lib/rmtools/core/js.rb', line 58

def <<(str)
  concat str.to_s
end

#>>(str) ⇒ Object

%blah

wall of text in the interpreter

oh it’s too bulky; may be we should save this text into variable blah blah >> (str=”) saved!



42
43
44
# File 'lib/rmtools/text/string_simple.rb', line 42

def >>(str)
  str.replace(self + str)
end

#after(splitter = $/) ⇒ Object



32
33
34
# File 'lib/rmtools/text/string_simple.rb', line 32

def after(splitter=$/)
  split(splitter, 2)[1]
end

#ansi(from_encoding = "UTF-16") ⇒ Object



48
49
50
51
# File 'lib/rmtools/lang/ansi.rb', line 48

def ansi(from_encoding=encoding.name.upcase)
  from_encoding += "//IGNORE"
  (ICONVS["WINDOWS-1251<#{from_encoding}"] ||= Iconv.new('WINDOWS-1251//IGNORE', from_encoding)).iconv(self)
end

#ansi!(from_encoding = "UTF-16") ⇒ Object



57
58
59
# File 'lib/rmtools/lang/ansi.rb', line 57

def ansi!(from_encoding=encoding.name.upcase)
  replace ansi from_encoding
end

#bObject



17
# File 'lib/rmtools/core/b.rb', line 17

def b; !empty? && self end

#bump!(splt = '.') ⇒ Object

‘filename.txt’.bump!.bump!

> “filename.txt.2”

‘filename.txt’.bump!.bump!.bump!(‘_’)

> “filename.txt.2_1”

‘filename.txt’.bump!.bump!.bump!(‘_’).bump!

> “filename.txt.2_1.1”



60
61
62
# File 'lib/rmtools/text/string_simple.rb', line 60

def bump!(splt='.')
  replace bump_version splt
end

#bump_version(splt = '.') ⇒ Object Also known as: next_version



64
65
66
67
68
69
70
# File 'lib/rmtools/text/string_simple.rb', line 64

def bump_version(splt='.')
  re = /(?:(\d*)#{Regexp.escape splt})?/
  s = File.split self
  s[0] == '.' ?
    s[1].reverse.sub(re) {$1?"#{$1.to_i+1}#{splt}":"1#{splt}"}.reverse : 
    File.join(s[0], s[1].reverse.sub(re)  {$1?"#{$1.to_i+1}#{splt}":"1#{splt}"}.reverse)
end

#caps?Boolean

Returns:

  • (Boolean)


12
13
14
# File 'lib/rmtools/lang/cyrillic.rb', line 12

def caps?
  self =~ /^[А-ЯЁ][А-ЯЁ\d ]+$/
end

#ccap(encode = 1) ⇒ Object



80
81
82
# File 'lib/rmtools/lang/cyrillic.rb', line 80

def ccap
  self[0].cupcase + self[1..-1]
end

#cdowncaseObject



31
32
33
34
35
# File 'lib/rmtools/lang/cyrillic.rb', line 31

def cdowncase
  encoding != ANSI_ENCODING ?
    ANSI2UTF[UTF2ANSI[self].tr(*ANSI_LETTERS_DC)] :
    tr(*ANSI_LETTERS_DC)
end

#cdowncase!Object



36
37
38
39
40
# File 'lib/rmtools/lang/cyrillic.rb', line 36

def cdowncase!
  encoding != ANSI_ENCODING ?
    ANSI2UTF[UTF2ANSI[self].tr!(*ANSI_LETTERS_DC)] :
    tr!(*ANSI_LETTERS_DC)
end

#ciObject



98
# File 'lib/rmtools/lang/cyrillic.rb', line 98

def ci; self end

#conj(str) ⇒ Object

“ @@@@@@”.disj “ @@@ @@@”

> “ @@@@@@@@@”



173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
# File 'ext/rmtools.cpp', line 173

static VALUE rb_str_conjunction(VALUE self, VALUE str)
{
  if (RSTRING_LEN(self) != RSTRING_LEN(str))
    rb_raise(rb_eIndexError, "strings sizes differs (%ld and %ld)",
                     RSTRING_LEN(self), RSTRING_LEN(str));
  VALUE new_str = rb_str_new("", 0);
  int i;
  const char *selfptr = RSTRING_PTR(self), *strptr = RSTRING_PTR(str);
  for (i=0;i<RSTRING_LEN(str);i++) {
    if (strptr[i] == '@' && selfptr[i] == '@')
      rb_str_buf_cat(new_str, "@", 1);
    else
      rb_str_buf_cat(new_str, " ", 1);
  }
  return new_str;
}

#crjust(*args) ⇒ Object



114
115
116
# File 'lib/rmtools/lang/cyrillic.rb', line 114

def crjust(*args)
  ANSI2UTF[UTF2ANSI[self].rjust(*args)]
end

#cuncap(encode = 1) ⇒ Object



84
85
86
# File 'lib/rmtools/lang/cyrillic.rb', line 84

def cuncap
  self[0].cdowncase + self[1..-1]
end

#cupcaseObject



20
21
22
23
24
# File 'lib/rmtools/lang/cyrillic.rb', line 20

def cupcase
  encoding != ANSI_ENCODING ?
    ANSI2UTF[UTF2ANSI[self].tr(*ANSI_LETTERS_UC)] :
    tr(*ANSI_LETTERS_UC)      
end

#cupcase!Object



25
26
27
28
29
# File 'lib/rmtools/lang/cyrillic.rb', line 25

def cupcase!
  encoding != ANSI_ENCODING ?
    ANSI2UTF[UTF2ANSI[self].tr!(*ANSI_LETTERS_UC)] :
    tr!(*ANSI_LETTERS_UC)      
end

#cut_line(maxlen, terminator = nil) ⇒ Object

@ maxlen : сделать строку короче значения, @ terminator :

- default = nil : не обрезая слов
- :syntax : которая была бы наиболее законченной фразой


148
149
150
151
152
153
154
155
156
157
158
159
# File 'lib/rmtools/text/string_split.rb', line 148

def cut_line(maxlen, *opts)
  terminator, opts = opts.fetch_opts [:syntax, :flags]
  opts[:charsize] ||= cyr? ? 2 : 1
  return self if size <= maxlen
  blocks = split_to_blocks(maxlen*opts[:charsize]-3, terminator, :strips => true, :strict_overhead => false, :lines => 1)
  cuted = (blocks[0] || self)[0, maxlen]
  if terminator == :syntax
    cuted.gsub!(/[.!?,;]$/, '')
  else cuted.chomp!('.')
  end
  cuted + ''
end

#cyr?Boolean

Returns:

  • (Boolean)


16
17
18
# File 'lib/rmtools/lang/cyrillic.rb', line 16

def cyr?
  self !~ /[^А-пр-ёЁ]/
end

#digit_dateObject Also known as: digitize



5
6
7
8
9
10
11
12
13
14
15
16
17
18
# File 'lib/rmtools/time/russian.rb', line 5

def digit_date
  gsub(/jan(?:uary)?|[яЯ][нН][вВ](?:[аА][рР][яЯьЬ]?)?/i, '01').
  gsub(/feb(?:ruary)?|[фФ][еЕ][вВ](?:[рР][аА][лЛ][яЯьЬ]?)?/i, '02').
  gsub(/mar(?:ch)?|[мМ][аА][рР](?:[тТ][аА]?)?/i, '03').
  gsub(/apr(?:il)?|[аА][пП][рР](?:[еЕ][лЛ][яЯьЬ]?)?/i, '04').
  gsub(/may|[мМ][аА][яЯйЙ]/i, '05').
  gsub(/june?|[иИ][юЮ][нН][яЯьЬ]?/i, '06').
  gsub(/july?|[иИ][юЮ][лЛ][яЯьЬ]?/i, '07').
  gsub(/aug(?:ust)?|[аА][вВ][гГ](?:[уУ][сС][тТ][аА]?)?/i, '08').
  gsub(/sep(?:tember)?|[сС][еЕ][нН](?:[тТ][яЯ][бБ][рР][яЯьЬ]?)?/i, '09').
  gsub(/oct(?:ober)?|[оО][кК][тТ](?:[яЯ][бБ][рР][яЯьЬ]?)?/i, '10').
  gsub(/nov(?:ember)?|[нН][оО][яЯ](?:[бБ][рР][яЯьЬ]?)?/i, '11').
  gsub(/dec(?:ember)?|[дД][еЕ][кК](?:[аА][бБ][рР][яЯьЬ]?)?/i, '12')
end

#digit_numsObject



35
36
37
38
39
40
41
42
43
44
45
46
# File 'lib/rmtools/time/russian.rb', line 35

def digit_nums
  gsub(/[оО][дД][иИ][нН]|[еЕ][дД][иИ][нН][иИ][цЦ][аА]/, '1').
  gsub(/[дД][вВ](?:[оО][йЙ][кК])?[аА]/, '2').
  gsub(/[тТ][рР](?:[иИ]|[оО][йЙ][кК][аА])/, '3').
  gsub(/[чЧ][еЕ][тТ](?:[ыЫ][рР][еЕ]|[вВ][еЕёЁ][рР][кК][аА])/, '4').
  gsub(/[пП][яЯ][тТ](?:[ьЬ]|[еЕёЁ][рР][кК][аА])/, '5').
  gsub(/[шШ][еЕ][сС][тТ](?:[ьЬ]|[еЕёЁ][рР][кК][аА])/, '6').
  gsub(/[сС][еЕ][мМ](?:[ьЬ]|[еЕёЁ][рР][кК][аА])/, '7').
  gsub(/[вВ][оО][сС](?:[еЕ][мМ][ьЬ]|[ьЬ][мМ][еЕёЁ][рР][кК][аА])/i, '8').
  gsub(/[дД][еЕ][вВ][яЯ][тТ](?:[ьЬ]|[кК][аА])/, '9').
  gsub(/[нН][оОуУ][лЛ][ьЬ]/, '0')
end

#disj(str) ⇒ Object

“ @@@@@@”.conj “ @@@ @@@”

> “ @@@”



152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
# File 'ext/rmtools.cpp', line 152

static VALUE rb_str_disjunction(VALUE self, VALUE str)
{
  if (RSTRING_LEN(self) != RSTRING_LEN(str))
    rb_raise(rb_eIndexError, "strings sizes differs (%ld and %ld)",
                     RSTRING_LEN(self), RSTRING_LEN(str));
  VALUE new_str = rb_str_new("", 0);
  int i;
  const char *selfptr = RSTRING_PTR(self), *strptr = RSTRING_PTR(str);
  for (i=0;i<RSTRING_LEN(str);i++) {
    if (strptr[i] != ' ' || selfptr[i] != ' ')
      rb_str_buf_cat(new_str, "@", 1);
    else
      rb_str_buf_cat(new_str, " ", 1);
  }
  return new_str;
}

#div(len) ⇒ Object



45
46
47
48
49
50
51
52
53
54
55
56
57
# File 'lib/rmtools/text/string_split.rb', line 45

def div(len, *)
  if !len.is Fixnum
    deprecate_method "Use #sharp_split instead."
    return sharp_split len
  end
  return [self] if len <= 0
  str = dup
  arr = []
  until str.empty?
    arr << str.slice!(0, len)
  end
  arr
end

#en2ruObject



76
77
78
# File 'lib/rmtools/lang/cyrillic.rb', line 76

def en2ru
  tr "`qwertyuiop[]asdfghjkl;:'zxcvbnm,./|?\"@\#$^&~QWERTYUIOP{}ASDFGHJKLZXCVBNM<>", "ёйцукенгшщзхъфывапролджЖэячсмитьбю./,Э\"№;:?ЁЙЦУКЕНГШЩЗХЪФЫВАПРОЛДЯЧСМИТЬБЮ"
end

#fdowncaseObject

full downcase, because cdowncase doesn’t convert non-cyrillic



58
59
60
# File 'lib/rmtools/lang/cyrillic.rb', line 58

def fdowncase
  downcase.cdowncase
end

#fdowncase!Object



61
62
63
64
# File 'lib/rmtools/lang/cyrillic.rb', line 61

def fdowncase!
  res = downcase!
  cdowncase! or res
end

#find_all_hl(pat, range = 1000) ⇒ Object



16
17
18
19
20
21
22
23
# File 'lib/rmtools/console/highlight.rb', line 16

def find_all_hl(pat, range=1000)
  target = Regexp(
    ".{#{range}}#{pat.is(Regexp) ? pat.source : pat}.{#{range}}", 
    pat.is(Regexp) ? pat.options : 0)
  matches = scan(target)
  puts matches.ghls(pat)
  matches.size
end

#find_compatible_encodingObject



94
95
96
97
98
99
100
101
102
103
104
# File 'lib/rmtools/lang/ansi.rb', line 94

def find_compatible_encoding
  # UTF-8 by default
  return nil if utf?
  for enc, pattern in ENCODINGS_PATTERNS
    force_encoding(enc)
    if self =~ pattern
      return enc
    end
  end
  false
end

#find_hl(pat, range = 1000) ⇒ Object



6
7
8
9
10
11
12
13
14
# File 'lib/rmtools/console/highlight.rb', line 6

def find_hl(pat, range=1000)
  idx = case pat
    when String;   index pat
    when Regexp; self =~ pat
    else raise TypeError, "pattern must be string or regexp"
  end
  puts self[[idx-range, 0].max, 2*range].ghl(pat)
  idx
end

#find_with_offsets(text, offset) ⇒ Object

Fast search for highlighting purposes



47
48
49
50
51
52
# File 'lib/rmtools/text/string_simple.rb', line 47

def find_with_offsets text, offset
  index = index(text)
  start = [0, index - offset].max
  _end = index + text.size
  [self[start...index], text, self[_end, offset]]
end

#from_ip(range = nil) ⇒ Object



6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
# File 'lib/rmtools/conversions/ip.rb', line 6

def from_ip(range=nil)
  int = to_i
  return int if int.to_s == self
  if range
    arr = []
    split(' - ').each {|s|
      res = s.scanf '%d.%d.%d.%d'
      arr << (res[0] << 24) + (res[1] << 16) + (res[2] << 8) + res[3]
    }
    "ip between #{arr[0]} and #{arr[1]}"
  else
    res = scanf '%d.%d.%d.%d'
    (res[0] << 24) + (res[1] << 16) + (res[2] << 8) + res[3]
  end
end

#funcapObject



88
89
90
# File 'lib/rmtools/lang/cyrillic.rb', line 88

def funcap
  self[0].fdowncase + self[1..-1]
end

#fupcaseObject

full upcase, because cdowncase doesn’t convert non-cyrillic



49
50
51
# File 'lib/rmtools/lang/cyrillic.rb', line 49

def fupcase
  upcase.cupcase
end

#fupcase!Object



52
53
54
55
# File 'lib/rmtools/lang/cyrillic.rb', line 52

def fupcase!
  res = upcase!
  cupcase! or res
end

#inlineObject



8
9
10
# File 'lib/rmtools/text/string_simple.rb', line 8

def inline
  index("\n").nil?
end

#is_utf!(utf = 'UTF-8') ⇒ Object



90
91
92
# File 'lib/rmtools/lang/ansi.rb', line 90

def is_utf!(utf='UTF-8')
  force_encoding utf
end

#lchomp(match = /\r\n?/) ⇒ Object



12
13
14
15
16
17
18
# File 'lib/rmtools/text/string_simple.rb', line 12

def lchomp(match=/\r\n?/)
  if index(match) == 0
    self[match.size..-1]
  else
    self.dup
  end
end

#lchomp!(match = /\r\n?/) ⇒ Object



20
21
22
23
24
25
# File 'lib/rmtools/text/string_simple.rb', line 20

def lchomp!(match=/\r\n?/)
  if index(match) == 0
    self[0...match.size] = ''
    self
  end
end

#mask_ip(val) ⇒ Object



26
27
28
29
30
31
32
33
34
35
# File 'lib/rmtools/conversions/ip.rb', line 26

def mask_ip(val)
  int = from_ip
  if val < 0
    maskv = 32+val
  else
    maskv = val
    val = 32 - val
  end
  "#{(int - (int & 2**val - 1)).to_ip}/#{maskv}"
end

#ordObject



5
# File 'lib/rmtools/core/string_compliance.rb', line 5

def ord; self[0] end

#parse(as) ⇒ Object



28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
# File 'lib/rmtools/text/string_parse.rb', line 28

def parse(as)
  case as
    when :uri
      m = match URL_RE
      !m || m[0].empty? ?
        {  'href'	        => self  } : 
        {	'href'	      => self,
            'root'	      => m[1],
            'protocol'	=> m[2],
            'host'	      => m[3], 
            'port'	      => m[4] ? m[4].to_i : 80,
            'fullpath'	=> m[5] || '/',
            'pathname'	=> m[5] || '/',
            'path'	      => m[6] || '',
            'ext'	        => m[7],
            'query'	      => m[8] && m[8].to_params(false),
            'anchor'	    => m[9] }
    when :caller
      m = match CALLER_RE
      !m || m[0].empty? ? nil : 
        {  'path' => m[1],
            'file' => m[2],
            'line' => m[3].to_i,
            'block_level' => m[4] && (m[5] || 1).to_i, # > 1.9
            'func' => m[6],
            'fullpath' => m[1] =~ /[\(\[]/ ? 
              m[1] : 
              File.expand_path(m[1])     }
    when :js_caller
      m = match JS_CALLER_RE
      !m || m[0].empty? ? nil : 
        {  'func' => m[1],
            'fullpath' => m[2],
            'file' => m[3],
            'line' => m[4] && m[4].to_i,
            'char' => m[5] && m[5].to_i,
            'from' => m[3..5].compact*':' }
    when :ip;          self[IP_RE]
    when :ip_range; (m = match IP_RANGE_RE) && m[1]..m[2]
    else raise ArgumentError, "Incorrect flag. Correct flags: :uri, :caller, :ip, :ip_range"
  end
end

#parseip(range = nil) ⇒ Object



76
77
78
79
# File 'lib/rmtools/text/string_parse.rb', line 76

def parseip(range=nil)
  deprecate_method "Use String#parse(:ip#{'_range' if range}) instead."
  parse :"ip#{'_range' if range}"
end

#parseipsObject



41
42
43
44
# File 'lib/rmtools/conversions/ip.rb', line 41

def parseips
  deprecation "Use #scan_ip"
  scan_ip
end

#parseuriObject



71
72
73
74
# File 'lib/rmtools/text/string_parse.rb', line 71

def parseuri
  deprecate_method "Use String#parse(:uri) instead."
  parse :uri
end

#plusObject



46
# File 'lib/rmtools/core/js.rb', line 46

alias :plus :+

#rand(chsize = 1) ⇒ Object



89
90
91
# File 'lib/rmtools/rand/string.rb', line 89

def rand(chsize=1)
  self[Kernel.rand(size*chsize), chsize]
end

#randsample(qty = Kernel.rand(size)) ⇒ Object



97
98
99
# File 'lib/rmtools/rand/string.rb', line 97

def randsample(qty=Kernel.rand(size))
  split('').randsample(qty)
end

#randsubstr(chsize = 1) ⇒ Object



93
94
95
# File 'lib/rmtools/rand/string.rb', line 93

def randsubstr(chsize=1)
  (a = Kernel.rand(size*chsize)) > (b = Kernel.rand(size*chsize)) ? self[b..a] : self[a..b]
end

#recordize(whiny = true) ⇒ Object Also known as: to_record

polymorphic helper



114
115
116
117
118
119
120
121
122
123
124
125
126
# File 'lib/rmtools/text/string_simple.rb', line 114

def recordize(whiny=true)
  classname, id = split('#')
  unless id
    if whiny
      raise ArgumentError, "Could not find record by string: #{inspect}"
    else return false
    end
  end
  model = classname.constantize
  model.respond_to?(:get) ?
    model.get(id) : 
    model.where(:id => id).first
end

#rmumlautObject



42
43
44
45
46
# File 'lib/rmtools/lang/cyrillic.rb', line 42

def rmumlaut
  encoding != ANSI_ENCODING ?
    ANSI2UTF[UTF2ANSI[self].tr(*ANSI_YOYE)] :
    tr(*ANSI_YOYE)
end

#rsplit(splitter = $/, qty = 0) ⇒ Object

lookbehind #split



24
25
26
# File 'lib/rmtools/text/string_rtl.rb', line 24

def rsplit(splitter=$/, qty=0)
  reverse.split(splitter.reverse, qty).reverse.reverses
end

#rsub(from, to = nil, &block) ⇒ Object

rightmost #sub



6
7
8
9
10
11
12
13
14
15
# File 'lib/rmtools/text/string_rtl.rb', line 6

def rsub(from, to=nil, &block)
  if block
    reverse.sub(from.reverse) {|m| block[m.reverse].reverse}.reverse
  else
    q = to.scan(/\\\d(\D|$)/).size+1
    to = to.reverse
    to.gsub!(/(^|\D)(\d)\\/) {"#$1\\#{q-$2.to_i}"} if q > 1
    reverse.sub(from.reverse, to).reverse
  end
end

#rsub!(from, to = nil, &block) ⇒ Object

in-place #rsub



18
19
20
21
# File 'lib/rmtools/text/string_rtl.rb', line 18

def rsub!(from, to=nil, &block)
  new = rsub from, to, &block
  new == self ? nil : replace(new)
end

#ru2enObject



72
73
74
# File 'lib/rmtools/lang/cyrillic.rb', line 72

def ru2en
  tr "ёйцукенгшщзхъфывапролдэячсмить/.ю?,б\"№;:жЁЙЦУКЕНГШЩЗХЪФЫВАПРОЛДЖЭЯЧСМИВТЬБЮ", "`qwertyuiop[]asdfghjkl'zxcvbnm|/.&?,@\#$^;~QWERTYUIOP{}ASDFGHJKL:\"ZXCVBDNM<>"
end

#scan_ipObject



37
38
39
# File 'lib/rmtools/conversions/ip.rb', line 37

def scan_ip
  scan(/(\d+\.\d+\.\d+\.\d+)(?::(\d+))?/)
end

#sharp_split(splitter, *args) ⇒ Object

Same as split, but without :reject_splitter option keeps splitters on the left of parts with :report_headers option collects all regexp’ed splitters along with result array



9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
# File 'lib/rmtools/text/string_split.rb', line 9

def sharp_split(splitter, *args)
  count, opts = args.fetch_opts [0, :flags], :include_splitter => true
  if !opts[:report_headers] and opts[:include_splitter] and splitter.is Regexp
    return split(/(?=#{splitter.source})/u, count)
  end
  a = split(splitter, count)
  return a if !opts[:include_splitter] and !opts[:report_headers]
  skan = nil
  case splitter
    when String
      skan = ([splitter]*a.size).unshift ''
      a = (1...a.size).map {|i| splitter+a[i]}.unshift a[0] if opts[:include_splitter]
    when Regexp
      skan = scan(splitter).unshift ''
      a = (0...a.size).map {|i| skan[i].to_s+a[i]} if opts[:include_splitter]
  end
  opts[:report_headers] ? [a, skan] : a
end

#sharp_splitr(splitter, *args) ⇒ Object

Same as sharp_split, but without keeps splitters on the right of parts



29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
# File 'lib/rmtools/text/string_split.rb', line 29

def sharp_splitr(splitter, *args)
  count, opts = args.fetch_opts [0, :flags], :include_splitter => true
  a = split(splitter, count)
  return a if !opts[:include_splitter] and !opts[:report_headers]
  skan = nil
  case splitter
    when String
      skan = [splitter]*a.size << ''
      a = (0...a.size-1).map {|i| a[i]+splitter} << a[i] if opts[:include_splitter]
    when Regexp
      skan = scan(splitter) << ''
      a = (0...a.size).map {|i| a[i]+skan[i]} if opts[:include_splitter]
  end
  opts[:report_headers] ? [a, skan] : a
end

#split_to_blocks(maxlen, *opts) ⇒ Object

Base smart-split method

Keep in mind that cyrrilic in 1.8 is 2-byte long as method doesn't use cyrillic lib to not decrease speed.

Raises:



84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
# File 'lib/rmtools/text/string_split.rb', line 84

def split_to_blocks(maxlen, *opts)
  raise Exception, "Can't split text with maxlen = #{maxlen}" if maxlen < 1
  return [self] if size <= maxlen
  terminator, opts = opts.fetch_opts [nil, :flags], :strict_overhead => true, :no_blanks => true
  if opts[:strict_overhead]
    opts[:strips] = true
  end
  blocks = []
  term_re = /[^#{terminator}]+\z/ if terminator and terminator != :syntax
  words, buf = split(opts[:strips] ? ' ' : / /), nil
  while !words.empty? or (buf and !buf.empty?)
    if terminator and !blocks.empty?
      buf_add = if terminator == :syntax
        split_by_syntax blocks[-1], maxlen, buf.size
      else
        blocks[-1][term_re]
      end
      if buf_add and !buf_add.empty?
        if buf_add == blocks[-1]
          blocks.pop
        else
          blocks[-1] = blocks[-1][0...-buf_add.size]
        end
        buf = buf_add + buf
      end
    end
    if blocks.size == opts[:lines]
      return sanitize_blocks! blocks, maxlen, terminator, opts
    end
    blocks << ''
    if buf
      blocks[-1] << buf
      buf = nil
    end
    until words.empty?
      buf = words.shift + ' '
      break if blocks[-1].size + buf.size - 1 > maxlen
      blocks[-1] << buf
      buf = nil
    end
  end
  sanitize_blocks! blocks, maxlen, terminator, opts
end

#split_to_lines(maxlen, *opts) ⇒ Object

Raises:



168
169
170
171
172
173
# File 'lib/rmtools/text/string_split.rb', line 168

def split_to_lines(maxlen, *opts)
  raise Exception, "Can't break text with maxlen = #{maxlen}" if maxlen < 1
  opts = opts.fetch_opts(:flags, :strips => true)[0].merge(:strict_overhead => false)
  opts[:charsize] ||= a[0].cyr? ? 2 : 1
  split("\n").map {|string| string.strip.split_to_blocks(maxlen*opts[:charsize], opts)}.flatten*"\n"
end

#squeeze_newlinesObject

remove empty strings from html output



109
110
111
# File 'lib/rmtools/text/string_simple.rb', line 109

def squeeze_newlines
  gsub(/\s+\n+/, "\n").squeeze("\n")
end

#swapObject



7
8
9
10
# File 'lib/rmtools/lang/cyrillic.rb', line 7

def swap
  sub(/([a-zA-Z])|([А-пр-ёЁ])/) {|m| return $~[1]? en2ru: ru2en}
  self
end

#symbols_countObject



93
94
95
# File 'lib/rmtools/text/string_simple.rb', line 93

def symbols_count
  unpack('U*').size
end

#to_html(forceutf = nil) ⇒ Object Also known as: to_doc



60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
# File 'lib/rmtools/xml/string.rb', line 60

def to_html(forceutf=nil)
  str = b || "<html/>"
  doc = if forceutf
      LibXML::XML::HTMLParser.string(str.xml_to_utf, :options => 97,
        :encoding => LibXML::XML::Encoding::UTF_8).parse
    else
      begin
        if RUBY_VERSION > '1.9'
          LibXML::XML::HTMLParser.string(str, :options => 97, 
            :encoding => LibXML::XML::Encoding.const_get(__ENCODING__.to_s.tr('-','_').to_sym)).parse
        else
          LibXML::XML::HTMLParser.string(str, :options => 97).parse
        end
      rescue
        if enc = xml_charset
          LibXML::XML::HTMLParser.string(str, :options => 97, 
            :encoding => LibXML::XML::Encoding.const_get(enc.upcase.tr('-','_').to_sym)).parse
        else to_html :forceutf
        end
      end   
    end
  doc.order_elements!
  doc
end

#to_ipObject



22
23
24
# File 'lib/rmtools/conversions/ip.rb', line 22

def to_ip
  from_ip.to_ip
end

#to_params(unscp = true, params_delim = '&', k_v_delim = '=') ⇒ Object

with default delimiters - the opposite of #urlencode



7
8
9
10
11
12
13
14
15
16
17
18
19
# File 'lib/rmtools/conversions/string.rb', line 7

def to_params(unscp=true, params_delim='&', k_v_delim='=')
  params = split(params_delim)
  h = {}
  params.each {|par|
    str = par.split(k_v_delim, 2)
    if unscp
      h[CGI.unescape(str[0]) || ''] = CGI.unescape(str[1] || '')
    else
      h[str[0]] = str[1]
    end
  }
  h
end

#to_re(esc = false) ⇒ Object



78
79
80
# File 'lib/rmtools/text/string_simple.rb', line 78

def to_re(esc=false)
  Regexp.new(esc ? Regexp.escape(self) : self)
end

#to_searchObject

make simple strings readable by FTS engines and make results more cacheable by key-value dbs



104
105
106
# File 'lib/rmtools/text/string_simple.rb', line 104

def to_search
  gsub(/[\0-\/:-@\[-`{-~ \s]/, ' ').strip.squeeze(' ').fdowncase
end

#to_xml(forceutf = nil) ⇒ Object



86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
# File 'lib/rmtools/xml/string.rb', line 86

def to_xml(forceutf=nil)
  doc = if forceutf
      LibXML::XML::Document.string(xml_to_utf, :options => 97,
        :encoding => LibXML::XML::Encoding::UTF_8)
    else
      begin
        if RUBY_VERSION > '1.9'
          LibXML::XML::Document.string(self, :options => 97, 
            :encoding => LibXML::XML::Encoding.const_get(__ENCODING__.to_s.tr('-','_').to_sym))
        else
          LibXML::XML::Document.string(self, :options => 97)
        end
      rescue
        if enc = xml_charset
          LibXML::XML::Document.string(self, :options => 97, 
            :encoding => LibXML::XML::Encoding.const_get(enc.upcase.tr('-','_').to_sym))
        else to_xml :forceutf
        end
      end   
    end
  doc.order_elements!
  doc
end

#tr_dateObject



20
21
22
23
24
25
26
27
28
29
30
31
32
33
# File 'lib/rmtools/time/russian.rb', line 20

def tr_date
  gsub(/[яЯ][нН][вВ](?:[аА][рР][яЯьЬ]?)?/, 'jan').
  gsub(/[фФ][еЕ][вВ](?:[рР][аА][лЛ][яЯьЬ]?)?/, 'feb').
  gsub(/[мМ][аА][рР](?:[тТ][аА]?)?/, 'mar').
  gsub(/[аА][пП][рР](?:[еЕ][лЛ][яЯьЬ]?)?/, 'apr').
  gsub(/[мМ][аА][яЯйЙ]?/, 'may').
  gsub(/[иИ][юЮ][нН][яЯьЬ]?/, 'jun').
  gsub(/[иИ][юЮ][лЛ][яЯьЬ]?/, 'jul').
  gsub(/[аА][вВ][гГ](?:[уУ][сС][тТ][аА]?)?/, 'aug').
  gsub(/[сС][еЕ][нН](?:[тТ][яЯ][бБ][рР][яЯьЬ]?)?/, 'sep').
  gsub(/[оО][кК][тТ](?:[яЯ][бБ][рР][яЯьЬ]?)?/, 'oct').
  gsub(/[нН][оО][яЯ](?:[бБ][рР][яЯьЬ]?)?/, 'nov').
  gsub(/[дД][еЕ][кК](?:[аА][бБ][рР][яЯьЬ]?)?/, 'dec')
end

#translitObject



68
69
70
# File 'lib/rmtools/lang/cyrillic.rb', line 68

def translit
  gsub(/ё/i, 'yo').gsub(/й/i, 'y').gsub(/ц/i, 'c').gsub(/у/i, 'u').gsub(/к/i, 'k').gsub(/е/i, 'e').gsub(/н/i, 'n').gsub(/г/i, 'g').gsub(/ш/i, 'sh').gsub(/щ/i, 'sch').gsub(/з/i, 'z').gsub(/х/i, 'h').gsub(/[ьъ]/i, "'").gsub(/ф/i, 'f').gsub(/[иы]/i, 'i').gsub(/в/i, 'v').gsub(/а/i, 'a').gsub(/п/i, 'p').gsub(/р/i, 'r').gsub(/о/i, 'o').gsub(/л/i, 'l').gsub(/д/i, 'd').gsub(/ж/i, 'j').gsub(/э/i, 'e').gsub(/я/i, 'ya').gsub(/ч/i, 'ch').gsub(/с/i, 's').gsub(/м/i, 'm').gsub(/т/i, 't').gsub(/б/i, 'b').gsub(/ю/i, 'yu')
end

#uncapObject Also known as: decapitalize



73
74
75
# File 'lib/rmtools/text/string_simple.rb', line 73

def uncap
  self[0].downcase + self[1..-1]
end

#until(splitter = $/) ⇒ Object Also known as: till



27
28
29
# File 'lib/rmtools/text/string_simple.rb', line 27

def until(splitter=$/)
  split(splitter, 2)[0]
end

#utf(from_encoding = "UTF-16") ⇒ Object



43
44
45
46
# File 'lib/rmtools/lang/ansi.rb', line 43

def utf(from_encoding=encoding.name.upcase)
  from_encoding += "//IGNORE"
  (ICONVS["UTF-8<#{from_encoding}"] ||= Iconv.new('UTF-8//IGNORE', from_encoding)).iconv(self)
end

#utf!(from_encoding = "UTF-16") ⇒ Object



53
54
55
# File 'lib/rmtools/lang/ansi.rb', line 53

def utf!(from_encoding=encoding.name.upcase)
  replace utf from_encoding
end

#utf?Boolean

Returns:

  • (Boolean)


82
83
84
85
86
87
88
# File 'lib/rmtools/lang/ansi.rb', line 82

def utf?
  begin
    encoding == Encoding::UTF_8 and self =~ /./u
  rescue Encoding::CompatibilityError, ArgumentError
    false
  end
end

#xml_charsetObject



9
10
11
12
13
14
15
16
# File 'lib/rmtools/xml/string.rb', line 9

def xml_charset
  charset = (charset = self[0,2000].match(XML_CHARSET_RE)) ? 
    charset[1].upcase : 'UTF8'
  if charset and charset != 'UTF-8'
    utf!(charset) rescue(charset = nil) 
  end
  charset
end

#xml_to_utfObject



18
19
20
21
22
23
24
25
# File 'lib/rmtools/xml/string.rb', line 18

def xml_to_utf
  charset = (charset = self[0,2000].match(XML_CHARSET_RE)) ? 
    charset[1].upcase : 'UTF8'
  if charset and charset != 'UTF-8'
    utf!(charset) rescue() 
  end
  self
end