Class: RegexpTree::CharClass

Inherits:
Elt show all
Defined in:
lib/regexptree.rb

Constant Summary collapse

None =
NatSet.empty
Any =
NatSet.universal
NL =
NatSet.new(?\n)
NonNL =
~NL
Word =
NatSet.new(?0..?9, ?A..?Z, ?_, ?a..?z)
NonWord =
~Word
Space =
NatSet.new(?t, ?\n, ?\f, ?\r, ?\s)
NonSpace =
~Space
Digit =
NatSet.new(?0..?9)
NonDigit =
~Digit
UpAlpha =
NatSet.new(?A..?Z)
LowAlpha =
NatSet.new(?a..?z)

Constants inherited from RegexpTree

EmptySequence, EmptySet

Instance Attribute Summary collapse

Instance Method Summary collapse

Methods inherited from RegexpTree

#*, #+, alt, backref, charclass, #closure, #empty_sequence?, #group, inherited, #inspect, linebeg, lineend, #lookahead, #negative_lookahead, non_word_boundary, #nongreedy_closure, #nongreedy_ntimes, #nongreedy_optional, #nongreedy_positive_closure, #nongreedy_rep, #ntimes, #optional, #paren, #parenthesize, #positive_closure, #pretty_print, previous_match, #regexp, #rep, rep, seq, str, strbeg, strend, strlineend, #to_s, word_boundary, #|

Constructor Details

#initialize(natset) ⇒ CharClass

Returns a new instance of CharClass.



368
369
370
# File 'lib/regexptree.rb', line 368

def initialize(natset)
  @natset = natset
end

Instance Attribute Details

#natsetObject (readonly)

Returns the value of attribute natset.



371
372
373
# File 'lib/regexptree.rb', line 371

def natset
  @natset
end

Instance Method Details

#case_insensitive?Boolean

Returns:

  • (Boolean)


377
378
379
380
381
382
383
384
385
# File 'lib/regexptree.rb', line 377

def case_insensitive?
  up = @natset & UpAlpha
  low = @natset & LowAlpha
  return false if up.es.length != low.es.length
  up.es.map! {|ch|
    ch - 0x41 + 0x61 # ?A + ?a
  }
  up == low
end

#downcaseObject



391
392
393
394
395
396
397
# File 'lib/regexptree.rb', line 391

def downcase
  up = @natset & UpAlpha
  up.es.map! {|ch|
    ch - 0x41 + 0x61 # ?A + ?a
  }
  CharClass.new((@natset - UpAlpha) | up)
end

#empty_set?Boolean

Returns:

  • (Boolean)


373
374
375
# File 'lib/regexptree.rb', line 373

def empty_set?
  @natset.empty?
end

#encode_elt(e) ⇒ Object



439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
# File 'lib/regexptree.rb', line 439

def encode_elt(e)
  case e
  when 0x09; '\t'
  when 0x0a; '\n'
  when 0x0d; '\r'
  when 0x0c; '\f'
  when 0x0b; '\v'
  when 0x07; '\a'
  when 0x1b; '\e'
  #when ?!, ?", ?%, ?&, ?', ?,, ?:, ?;, ?<, ?=, ?>, ?/, ?0..?9, ?@, ?A..?Z, ?_, ?`, ?a..?z, ?~
  when 0x21, 0x22, 0x25, 0x26, 0x27, 0x2c, 0x3a, 0x3b, 0x3c, 0x3d, 0x3e, 0x2f, 0x30..0x39, 0x40, 0x41..0x5a, 0x5f, 0x60, 0x61..0x7a, 0x7e
    sprintf("%c", e)
  else
    sprintf("\\x%02x", e)
  end
end

#multiline_insensitive?Boolean

Returns:

  • (Boolean)


387
388
389
# File 'lib/regexptree.rb', line 387

def multiline_insensitive?
  @natset != NonNL
end

#pretty_format(out) ⇒ Object



399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
# File 'lib/regexptree.rb', line 399

def pretty_format(out)
  case @natset
  when None; out.text '(?!)'
  when Any; out.text '[\s\S]'
  when NL; out.text '\n'
  when NonNL; out.text '.'
  when Word; out.text '\w'
  when NonWord; out.text '\W'
  when Space; out.text '\s'
  when NonSpace; out.text '\S'
  when Digit; out.text '\d'
  when NonDigit; out.text '\D'
  else
    if val = @natset.singleton?
      out.text encode_elt(val)
	else
	  if @natset.open?
 neg_mark = '^'
 es = (~@natset).es
	  else
 neg_mark = ''
 es = @natset.es.dup
	  end
	  r = ''
	  until es.empty?
 if es[0] + 1 == es[1]
   r << encode_elt(es[0])
 elsif es[0] + 2 == es[1]
   r << encode_elt(es[0]) << encode_elt(es[1] - 1)
 else
   r << encode_elt(es[0]) << '-' << encode_elt(es[1] - 1)
 end
 es.shift
 es.shift
	  end
	  out.text "[#{neg_mark}#{r}]"
    end
  end
end