Class: RDoc::Markup::AttributeManager

Inherits:
Object
  • Object
show all
Defined in:
lib/rdoc/markup/attribute_manager.rb

Overview

Manages changes of attributes in a block of text

Constant Summary collapse

NULL =

The NUL character

"\000".freeze
A_PROTECT =

– We work by substituting non-printing characters in to the text. For now I’m assuming that I can substitute a character in the range 0..8 for a 7 bit character without damaging the encoded string, but this might be optimistic ++

004
PROTECT_ATTR =

Special mask character to prevent inline markup handling

A_PROTECT.chr
NON_PRINTING_START =

:nodoc:

"\1"
NON_PRINTING_END =

:nodoc:

"\2"

Instance Attribute Summary collapse

Instance Method Summary collapse

Constructor Details

#initializeAttributeManager

Creates a new attribute manager that understands bold, emphasized and teletype text.



80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
# File 'lib/rdoc/markup/attribute_manager.rb', line 80

def initialize
  @html_tags = {}
  @matching_word_pairs = {}
  @protectable = %w[<]
  @regexp_handlings = []
  @word_pair_map = {}
  @exclusive_bitmap = 0
  @attributes = RDoc::Markup::Attributes.new

  add_word_pair "*", "*", :BOLD, true
  add_word_pair "_", "_", :EM, true
  add_word_pair "+", "+", :TT, true
  add_word_pair "`", "`", :TT, true

  add_html "em", :EM, true
  add_html "i",  :EM, true
  add_html "b",  :BOLD, true
  add_html "tt",   :TT, true
  add_html "code", :TT, true
  add_html "s",   :STRIKE, true
  add_html "del", :STRIKE, true

  @word_pair_chars = @matching_word_pairs.keys.join

  # Matches a word pair delimiter (*, _, +, `) that is NOT already protected.
  # Used by #protect_code_markup to escape delimiters inside <code>/<tt> tags.
  @unprotected_word_pair_regexp = /([#{@word_pair_chars}])(?!#{PROTECT_ATTR})/
end

Instance Attribute Details

#attributesObject (readonly)

The attributes enabled for this markup object.



40
41
42
# File 'lib/rdoc/markup/attribute_manager.rb', line 40

def attributes
  @attributes
end

#exclusive_bitmapObject (readonly)

A bits of exclusive maps



74
75
76
# File 'lib/rdoc/markup/attribute_manager.rb', line 74

def exclusive_bitmap
  @exclusive_bitmap
end

#html_tagsObject (readonly)

This maps HTML tags to the corresponding attribute char



58
59
60
# File 'lib/rdoc/markup/attribute_manager.rb', line 58

def html_tags
  @html_tags
end

#matching_word_pairsObject (readonly)

This maps delimiters that occur around words (such as bold or tt) where the start and end delimiters and the same. This lets us optimize the regexp



47
48
49
# File 'lib/rdoc/markup/attribute_manager.rb', line 47

def matching_word_pairs
  @matching_word_pairs
end

#protectableObject (readonly)

A \ in front of a character that would normally be processed turns off processing. We do this by turning < into <#PROTECT



64
65
66
# File 'lib/rdoc/markup/attribute_manager.rb', line 64

def protectable
  @protectable
end

#regexp_handlingsObject (readonly)

And this maps _regexp handling_ sequences to a name. A regexp handling sequence is something like a WikiWord



70
71
72
# File 'lib/rdoc/markup/attribute_manager.rb', line 70

def regexp_handlings
  @regexp_handlings
end

#word_pair_mapObject (readonly)

And this is used when the delimiters aren’t the same. In this case the hash maps a pattern to the attribute character



53
54
55
# File 'lib/rdoc/markup/attribute_manager.rb', line 53

def word_pair_map
  @word_pair_map
end

Instance Method Details

#add_html(tag, name, exclusive = false) ⇒ Object

Adds a markup class with name for words surrounded by HTML tag tag. To process emphasis tags:

am.add_html 'em', :EM


312
313
314
315
316
# File 'lib/rdoc/markup/attribute_manager.rb', line 312

def add_html(tag, name, exclusive = false)
  bitmap = @attributes.bitmap_for name
  @html_tags[tag.downcase] = bitmap
  @exclusive_bitmap |= bitmap if exclusive
end

#add_regexp_handling(pattern, name, exclusive = false) ⇒ Object

Adds a regexp handling for pattern with name. A simple URL handler would be:

@am.add_regexp_handling(/((https?:)\S+\w)/, :HYPERLINK)


324
325
326
327
328
# File 'lib/rdoc/markup/attribute_manager.rb', line 324

def add_regexp_handling(pattern, name, exclusive = false)
  bitmap = @attributes.bitmap_for(name)
  @regexp_handlings << [pattern, bitmap]
  @exclusive_bitmap |= bitmap if exclusive
end

#add_word_pair(start, stop, name, exclusive = false) ⇒ Object

Adds a markup class with name for words wrapped in the start and stop character. To make words wrapped with “*” bold:

am.add_word_pair '*', '*', :BOLD

Raises:

  • (ArgumentError)


287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
# File 'lib/rdoc/markup/attribute_manager.rb', line 287

def add_word_pair(start, stop, name, exclusive = false)
  raise ArgumentError, "Word flags may not start with '<'" if
    start[0, 1] == '<'

  bitmap = @attributes.bitmap_for name

  if start == stop then
    @matching_word_pairs[start] = bitmap
  else
    pattern = /(#{Regexp.escape start})(\S+)(#{Regexp.escape stop})/
    @word_pair_map[pattern] = bitmap
  end

  @protectable << start[0, 1]
  @protectable.uniq!

  @exclusive_bitmap |= bitmap if exclusive
end

#attribute(turn_on, turn_off) ⇒ Object

Return an attribute object with the given turn_on and turn_off bits set



112
113
114
# File 'lib/rdoc/markup/attribute_manager.rb', line 112

def attribute(turn_on, turn_off)
  RDoc::Markup::AttrChanger.new turn_on, turn_off
end

#change_attribute(current, new) ⇒ Object

Changes the current attribute from current to new



119
120
121
122
# File 'lib/rdoc/markup/attribute_manager.rb', line 119

def change_attribute(current, new)
  diff = current ^ new
  attribute(new & diff, current & diff)
end

#changed_attribute_by_name(current_set, new_set) ⇒ Object

Used by the tests to change attributes by name from current_set to new_set



128
129
130
131
132
133
134
135
136
137
138
139
# File 'lib/rdoc/markup/attribute_manager.rb', line 128

def changed_attribute_by_name(current_set, new_set)
  current = new = 0
  current_set.each do |name|
    current |= @attributes.bitmap_for(name)
  end

  new_set.each do |name|
    new |= @attributes.bitmap_for(name)
  end

  change_attribute(current, new)
end

#convert_attrs(str, attrs, exclusive = false) ⇒ Object

Map attributes like textto the sequence 001002<char>001003<char>, where <char> is a per-attribute specific character



163
164
165
166
# File 'lib/rdoc/markup/attribute_manager.rb', line 163

def convert_attrs(str, attrs, exclusive = false)
  convert_attrs_matching_word_pairs(str, attrs, exclusive)
  convert_attrs_word_pair_map(str, attrs, exclusive)
end

#convert_attrs_matching_word_pairs(str, attrs, exclusive) ⇒ Object

:nodoc:



169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
# File 'lib/rdoc/markup/attribute_manager.rb', line 169

def convert_attrs_matching_word_pairs(str, attrs, exclusive)
  # first do matching ones
  tags = @matching_word_pairs.select { |start, bitmap|
    exclusive == exclusive?(bitmap)
  }.keys
  return if tags.empty?
  tags = "[#{tags.join("")}](?!#{PROTECT_ATTR})"
  all_tags = "[#{@word_pair_chars}](?!#{PROTECT_ATTR})"

  re = /(?:^|\W|#{all_tags})\K(#{tags})(\1*[#\\]?[\w:#{PROTECT_ATTR}.\/\[\]-]+?\S?)\1(?!\1)(?=#{all_tags}|\W|$)/

  1 while str.gsub!(re) { |orig|
    a, w = (m = $~).values_at(1, 2)
    attr = @matching_word_pairs[a]
    if attrs.set_attrs(m.begin(2), w.length, attr)
      a = NULL * a.length
    else
      a = NON_PRINTING_START + a + NON_PRINTING_END
    end
    a + w + a
  }
  str.delete!(NON_PRINTING_START + NON_PRINTING_END)
end

#convert_attrs_word_pair_map(str, attrs, exclusive) ⇒ Object

:nodoc:



194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
# File 'lib/rdoc/markup/attribute_manager.rb', line 194

def convert_attrs_word_pair_map(str, attrs, exclusive)
  # then non-matching
  unless @word_pair_map.empty? then
    @word_pair_map.each do |regexp, attr|
      next unless exclusive == exclusive?(attr)
      1 while str.gsub!(regexp) { |orig|
        w = (m = ($~))[2]
        updated = attrs.set_attrs(m.begin(2), w.length, attr)
        if updated
          NULL * m.match_length(1) + w + NULL * m.match_length(3)
        else
          orig
        end
      }
    end
  end
end

#convert_html(str, attrs, exclusive = false) ⇒ Object

Converts HTML tags to RDoc attributes



215
216
217
218
219
220
221
222
223
224
225
226
227
# File 'lib/rdoc/markup/attribute_manager.rb', line 215

def convert_html(str, attrs, exclusive = false)
  tags = @html_tags.select { |start, bitmap|
    exclusive == exclusive?(bitmap)
  }.keys.join '|'

  1 while str.gsub!(/<(#{tags})>(.*?)<\/\1>/i) { |orig|
    attr = @html_tags[$1.downcase]
    html_length = $~.match_length(1) + 2 # "<>".length
    seq = NULL * html_length
    attrs.set_attrs($~.begin(2), $~.match_length(2), attr)
    seq + $2 + seq + NULL
  }
end

#convert_regexp_handlings(str, attrs, exclusive = false) ⇒ Object

Converts regexp handling sequences to RDoc attributes



232
233
234
235
236
237
238
239
240
241
242
243
# File 'lib/rdoc/markup/attribute_manager.rb', line 232

def convert_regexp_handlings(str, attrs, exclusive = false)
  @regexp_handlings.each do |regexp, attribute|
    next unless exclusive == exclusive?(attribute)
    str.scan(regexp) do
      capture = $~.size == 1 ? 0 : 1

      s, e = $~.offset capture

      attrs.set_attrs s, e - s, attribute | @attributes.regexp_handling
    end
  end
end

#copy_string(start_pos, end_pos) ⇒ Object

Copies start_pos to end_pos from the current string



144
145
146
147
148
# File 'lib/rdoc/markup/attribute_manager.rb', line 144

def copy_string(start_pos, end_pos)
  res = @str[start_pos...end_pos]
  res.gsub!(/\000/, '')
  res
end

#display_attributesObject

Debug method that prints a string along with its attributes



356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
# File 'lib/rdoc/markup/attribute_manager.rb', line 356

def display_attributes
  puts
  puts @str.tr(NULL, "!")
  bit = 1
  16.times do |bno|
    line = ""
    @str.length.times do |i|
      if (@attrs[i] & bit) == 0
        line << " "
      else
        if bno.zero?
          line << "S"
        else
          line << ("%d" % (bno+1))
        end
      end
    end
    puts(line) unless line =~ /^ *$/
    bit <<= 1
  end
end

#exclusive?(attr) ⇒ Boolean

:nodoc:

Returns:

  • (Boolean)


151
152
153
# File 'lib/rdoc/markup/attribute_manager.rb', line 151

def exclusive?(attr)
  (attr & @exclusive_bitmap) != 0
end

#flow(str) ⇒ Object

Processes str converting attributes, HTML and regexp handlings



333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
# File 'lib/rdoc/markup/attribute_manager.rb', line 333

def flow(str)
  @str = str.dup

  mask_protected_sequences
  protect_code_markup

  @attrs = RDoc::Markup::AttrSpan.new @str.length, @exclusive_bitmap

  convert_attrs            @str, @attrs, true
  convert_html             @str, @attrs, true
  convert_regexp_handlings @str, @attrs, true
  convert_attrs            @str, @attrs
  convert_html             @str, @attrs
  convert_regexp_handlings @str, @attrs

  unmask_protected_sequences

  split_into_flow
end

#mask_protected_sequencesObject

Escapes regexp handling sequences of text to prevent conversion to RDoc



248
249
250
251
252
253
254
255
# File 'lib/rdoc/markup/attribute_manager.rb', line 248

def mask_protected_sequences
  # protect __send__, __FILE__, etc.
  @str.gsub!(/__([a-z]+)__/i,
    "_#{PROTECT_ATTR}_#{PROTECT_ATTR}\\1_#{PROTECT_ATTR}_#{PROTECT_ATTR}")
  @str.gsub!(/(\A|[^\\])\\([#{Regexp.escape @protectable.join}])/m,
             "\\1\\2#{PROTECT_ATTR}")
  @str.gsub!(/\\(\\[#{Regexp.escape @protectable.join}])/m, "\\1")
end

#protect_code_markupObject

Protects word pair delimiters (*, _, ) inside <code> and <tt> tags from being processed as inline formatting. For example, bold in bold+ will NOT be rendered as bold.



262
263
264
265
266
267
268
269
270
271
272
# File 'lib/rdoc/markup/attribute_manager.rb', line 262

def protect_code_markup
  @str.gsub!(/<(code|tt)>(.*?)<\/\1>/im) do
    tag = $1
    content = $2
    # Protect word pair delimiters (*, _, +) from being processed
    escaped = content.gsub(@unprotected_word_pair_regexp, "\\1#{PROTECT_ATTR}")
    # Protect HTML-like tags from being processed (e.g., <del> inside code)
    escaped = escaped.gsub(/<(?!#{PROTECT_ATTR})/, "<#{PROTECT_ATTR}")
    "<#{tag}>#{escaped}</#{tag}>"
  end
end

#split_into_flowObject

Splits the string into chunks by attribute change



381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
# File 'lib/rdoc/markup/attribute_manager.rb', line 381

def split_into_flow
  res = []
  current_attr = 0

  str_len = @str.length

  # skip leading invisible text
  i = 0
  i += 1 while i < str_len and @str[i].chr == "\0"
  start_pos = i

  # then scan the string, chunking it on attribute changes
  while i < str_len
    new_attr = @attrs[i]
    if new_attr != current_attr
      if i > start_pos
        res << copy_string(start_pos, i)
        start_pos = i
      end

      res << change_attribute(current_attr, new_attr)
      current_attr = new_attr

      if (current_attr & @attributes.regexp_handling) != 0 then
        i += 1 while
          i < str_len and (@attrs[i] & @attributes.regexp_handling) != 0

        res << RDoc::Markup::RegexpHandling.new(current_attr,
                                                copy_string(start_pos, i))
        start_pos = i
        next
      end
    end

    # move on, skipping any invisible characters
    begin
      i += 1
    end while i < str_len and @str[i].chr == "\0"
  end

  # tidy up trailing text
  if start_pos < str_len
    res << copy_string(start_pos, str_len)
  end

  # and reset to all attributes off
  res << change_attribute(current_attr, 0) if current_attr != 0

  res
end

#unmask_protected_sequencesObject

Unescapes regexp handling sequences of text



277
278
279
# File 'lib/rdoc/markup/attribute_manager.rb', line 277

def unmask_protected_sequences
  @str.gsub!(/(.)#{PROTECT_ATTR}/, "\\1\000")
end