Class: ObjectRegex

Inherits:
Object
  • Object
show all
Defined in:
lib/object_regex/implementation.rb

Overview

Provides general-purpose regex searching on any object implementing #reg_desc. See design_docs/object_regex for the mini-paper explaining it. With any luck, this will make it into Ripper so I won’t have to do this here.

Constant Summary collapse

TOKEN_MATCHER =

Mapping Generation #########################

/[A-Za-z][\w]*/
MAPPING_CHARS =
('a'..'z').to_a + ('A'..'Z').to_a + ('0'..'9').to_a

Instance Method Summary collapse

Constructor Details

#initialize(pattern) ⇒ ObjectRegex

Returns a new instance of ObjectRegex.



5
6
7
8
# File 'lib/object_regex/implementation.rb', line 5

def initialize(pattern)
  @map = generate_map(pattern)
  @pattern = generate_pattern(pattern)
end

Instance Method Details

#all_matches(input) ⇒ Object



76
77
78
79
80
81
82
83
84
85
# File 'lib/object_regex/implementation.rb', line 76

def all_matches(input)
  new_input = mapped_input(input)
  result, pos = [], 0
  while (match = new_input.match(@pattern, pos))
    start, stop = match.begin(0) / @item_size, match.end(0) / @item_size
    result << input[start...stop]
    pos = match.end(0)
  end
  result
end

#convert_to_mapping_radix(repr_size, num) ⇒ Object



35
36
37
38
39
40
41
42
# File 'lib/object_regex/implementation.rb', line 35

def convert_to_mapping_radix(repr_size, num)
  result = []
  repr_size.times do
    result.unshift(num % MAPPING_CHARS.size)
    num /= MAPPING_CHARS.size
  end
  result
end

#fix_dots(pattern) ⇒ Object



56
57
58
# File 'lib/object_regex/implementation.rb', line 56

def fix_dots(pattern)
  pattern.gsub('.', '.' * (@item_size - 1) + ';')
end

#generate_map(pattern) ⇒ Object



18
19
20
21
22
23
24
25
26
27
# File 'lib/object_regex/implementation.rb', line 18

def generate_map(pattern)
  alphabet = pattern.scan(TOKEN_MATCHER).uniq
  repr_size = Math.log(alphabet.size + 1, MAPPING_CHARS.size).ceil
  @item_size = repr_size + 1

  map = Hash[alphabet.map.with_index do |symbol, idx|
    [symbol, mapping_for_idx(repr_size, idx)]
  end]
  map.merge!(FAILBOAT: mapping_for_idx(repr_size, map.size))
end

#generate_pattern(pattern) ⇒ Object

Pattern transformation #################



46
47
48
# File 'lib/object_regex/implementation.rb', line 46

def generate_pattern(pattern)
  replace_tokens(fix_dots(remove_ranges(pattern)))
end

#mapped_input(input) ⇒ Object



87
88
89
# File 'lib/object_regex/implementation.rb', line 87

def mapped_input(input)
  input.map { |object| object.reg_desc }.map { |desc| mapped_value(desc) }.join
end

#mapped_value(reg_desc) ⇒ Object



10
11
12
# File 'lib/object_regex/implementation.rb', line 10

def mapped_value(reg_desc)
  @map[reg_desc] || @map[:FAILBOAT]
end

#mapping_for_idx(repr_size, idx) ⇒ Object



29
30
31
32
33
# File 'lib/object_regex/implementation.rb', line 29

def mapping_for_idx(repr_size, idx)
  convert_to_mapping_radix(repr_size, idx).map do |char|
    MAPPING_CHARS[char]
  end.join + ';'
end

#match(input, pos = 0) ⇒ Object

Matching ##########################



68
69
70
71
72
73
74
# File 'lib/object_regex/implementation.rb', line 68

def match(input, pos=0)
  new_input = mapped_input(input)
  if (match = new_input.match(@pattern, pos))
    start, stop = match.begin(0) / @item_size, match.end(0) / @item_size
    input[start...stop]
  end
end

#remove_ranges(pattern) ⇒ Object



50
51
52
53
54
# File 'lib/object_regex/implementation.rb', line 50

def remove_ranges(pattern)
  pattern.gsub(/\[([\w\t ]*)\]/) do |match|
    '(?:' + match[1..-2].split(/\s+/).join('|') + ')'
  end
end

#replace_tokens(pattern) ⇒ Object



60
61
62
63
64
# File 'lib/object_regex/implementation.rb', line 60

def replace_tokens(pattern)
  pattern.gsub(TOKEN_MATCHER) do |match|
    '(?:' + mapped_value(match) + ')'
  end.gsub(/\s/, '')
end