Class: RDoc::Markup::ToHtml

Inherits:
Formatter show all
Includes:
Text
Defined in:
lib/rdoc/markup/to_html.rb

Overview

Outputs RDoc markup as HTML.

Direct Known Subclasses

LinkLabelToHtml, ToHtmlCrossref, ToHtmlSnippet

Constant Summary collapse

LIST_TYPE_TO_HTML =

Maps RDoc::Markup::Parser::LIST_TOKENS types to HTML tags

{
  :BULLET => ['<ul>',                                      '</ul>'],
  :LABEL  => ['<dl class="rdoc-list label-list">',         '</dl>'],
  :LALPHA => ['<ol style="list-style-type: lower-alpha">', '</ol>'],
  :NOTE   => ['<dl class="rdoc-list note-list">',          '</dl>'],
  :NUMBER => ['<ol>',                                      '</ol>'],
  :UALPHA => ['<ol style="list-style-type: upper-alpha">', '</ol>'],
}
URL_CHARACTERS_REGEXP_STR =

:nodoc:

/[A-Za-z0-9\-._~:\/\?#\[\]@!$&'\(\)*+,;%=]/.source

Constants included from Text

Text::MARKUP_FORMAT, Text::SPACE_SEPARATED_LETTER_CLASS, Text::TO_HTML_CHARACTERS

Instance Attribute Summary collapse

Attributes included from Text

#language

Instance Method Summary collapse

Methods included from Text

decode_legacy_label, encode_fallback, expand_tabs, #flush_left, #markup, #normalize_comment, #parse, #snippet, #strip_hashes, #strip_newlines, #strip_stars, to_anchor, #to_html_characters, #wrap

Methods inherited from Formatter

#accept_document, #add_regexp_handling_RDOCLINK, #annotate, #apply_regexp_handling, #convert, gen_relative_url, #handle_TEXT, #ignore, #parse_url, #traverse_inline_nodes, #tt?

Constructor Details

#initialize(options, markup = nil) ⇒ ToHtml

Creates a new formatter that will output HTML



46
47
48
49
50
51
52
53
54
55
56
57
58
# File 'lib/rdoc/markup/to_html.rb', line 46

def initialize(options, markup = nil)
  super

  @code_object = nil
  @from_path = ''
  @in_list_entry = nil
  @list = nil
  @th = nil
  @in_tidylink_label = false
  @hard_break = "<br>\n"

  init_regexp_handlings
end

Instance Attribute Details

#code_objectObject

The RDoc::CodeObject HTML is being generated for. This is used to generate namespaced URI fragments



34
35
36
# File 'lib/rdoc/markup/to_html.rb', line 34

def code_object
  @code_object
end

#from_pathObject

Path to this document for relative links



39
40
41
# File 'lib/rdoc/markup/to_html.rb', line 39

def from_path
  @from_path
end

#in_list_entryObject (readonly)

:nodoc:



27
28
29
# File 'lib/rdoc/markup/to_html.rb', line 27

def in_list_entry
  @in_list_entry
end

#listObject (readonly)

:nodoc:



28
29
30
# File 'lib/rdoc/markup/to_html.rb', line 28

def list
  @list
end

#resObject (readonly)

:nodoc:



26
27
28
# File 'lib/rdoc/markup/to_html.rb', line 26

def res
  @res
end

Instance Method Details

#accept_blank_line(blank_line) ⇒ Object

Adds blank_line to the output



405
406
407
# File 'lib/rdoc/markup/to_html.rb', line 405

def accept_blank_line(blank_line)
  # @res << annotate("<p />") << "\n"
end

#accept_block_quote(block_quote) ⇒ Object

Adds block_quote to the output



298
299
300
301
302
303
304
305
306
# File 'lib/rdoc/markup/to_html.rb', line 298

def accept_block_quote(block_quote)
  @res << "\n<blockquote>"

  block_quote.parts.each do |part|
    part.accept self
  end

  @res << "</blockquote>\n"
end

#accept_heading(heading) ⇒ Object

Adds heading to the output. The headings greater than 6 are trimmed to level 6.



413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
# File 'lib/rdoc/markup/to_html.rb', line 413

def accept_heading(heading)
  level = [6, heading.level].min

  label = deduplicate_heading_id(heading.label(@code_object))
  legacy_label = deduplicate_heading_id(heading.legacy_label(@code_object))

  # Add legacy anchor before the heading for backward compatibility.
  # This allows old links with label- prefix to still work.
  if @options.output_decoration && !@options.pipe
    @res << "\n<span id=\"#{legacy_label}\" class=\"legacy-anchor\"></span>"
  end

  @res << if @options.output_decoration
            "\n<h#{level} id=\"#{label}\">"
          else
            "\n<h#{level}>"
          end

  if @options.pipe
    @res << to_html(heading.text)
  else
    @res << "<a href=\"##{label}\">#{to_html(heading.text)}</a>"
  end

  @res << "</h#{level}>\n"
end

#accept_list_end(list) ⇒ Object

Finishes consumption of list



376
377
378
379
380
381
382
# File 'lib/rdoc/markup/to_html.rb', line 376

def accept_list_end(list)
  @list.pop
  if tag = @in_list_entry.pop
    @res << tag
  end
  @res << html_list_name(list.type, false) << "\n"
end

#accept_list_item_end(list_item) ⇒ Object

Finishes consumption of list_item



398
399
400
# File 'lib/rdoc/markup/to_html.rb', line 398

def accept_list_item_end(list_item)
  @in_list_entry[-1] = list_end_for(@list.last)
end

#accept_list_item_start(list_item) ⇒ Object

Prepares the visitor for consuming list_item



387
388
389
390
391
392
393
# File 'lib/rdoc/markup/to_html.rb', line 387

def accept_list_item_start(list_item)
  if tag = @in_list_entry.last
    @res << tag
  end

  @res << list_item_start(list_item, @list.last)
end

#accept_list_start(list) ⇒ Object

Prepares the visitor for consuming list



367
368
369
370
371
# File 'lib/rdoc/markup/to_html.rb', line 367

def accept_list_start(list)
  @list << list.type
  @res << html_list_name(list.type, true)
  @in_list_entry.push false
end

#accept_paragraph(paragraph) ⇒ Object

Adds paragraph to the output



311
312
313
314
315
316
317
318
319
# File 'lib/rdoc/markup/to_html.rb', line 311

def accept_paragraph(paragraph)
  @res << "\n<p>"
  text = paragraph.text @hard_break
  text = text.gsub(/(#{SPACE_SEPARATED_LETTER_CLASS})?\K\r?\n(?=(?(1)(#{SPACE_SEPARATED_LETTER_CLASS})?))/o) {
    defined?($2) && ' '
  }
  @res << to_html(text)
  @res << "</p>\n"
end

#accept_raw(raw) ⇒ Object

Adds raw to the output



443
444
445
# File 'lib/rdoc/markup/to_html.rb', line 443

def accept_raw(raw)
  @res << raw.parts.join("\n")
end

#accept_rule(rule) ⇒ Object

Adds rule to the output



360
361
362
# File 'lib/rdoc/markup/to_html.rb', line 360

def accept_rule(rule)
  @res << "<hr>\n"
end

#accept_table(header, body, aligns) ⇒ Object

Adds table to the output



450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
# File 'lib/rdoc/markup/to_html.rb', line 450

def accept_table(header, body, aligns)
  @res << "\n<table role=\"table\">\n<thead>\n<tr>\n"
  header.zip(aligns) do |text, align|
    @res << '<th'
    @res << ' align="' << align << '"' if align
    @res << '>' << to_html(text) << "</th>\n"
  end
  @res << "</tr>\n</thead>\n<tbody>\n"
  body.each do |row|
    @res << "<tr>\n"
    row.zip(aligns) do |text, align|
      @res << '<td'
      @res << ' align="' << align << '"' if align
      @res << '>' << to_html(text) << "</td>\n"
    end
    @res << "</tr>\n"
  end
  @res << "</tbody>\n</table>\n"
end

#accept_verbatim(verbatim) ⇒ Object

Adds verbatim to the output



324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
# File 'lib/rdoc/markup/to_html.rb', line 324

def accept_verbatim(verbatim)
  text = verbatim.text.rstrip
  format = verbatim.format

  klass = nil

  # Apply Ruby syntax highlighting if
  # - explicitly marked as Ruby (via ruby? which accepts :ruby or :rb)
  # - no format specified but the text is parseable as Ruby
  # Otherwise, add language class when applicable and skip Ruby highlighting
  content = if verbatim.ruby? || (format.nil? && parseable?(text))
              begin
                tokens = RDoc::Parser::RipperStateLex.parse text
                klass  = ' class="ruby"'

                result = RDoc::TokenStream.to_html tokens
                result = result + "\n" unless "\n" == result[-1]
                result
              rescue
                CGI.escapeHTML text
              end
            else
              klass = " class=\"#{format}\"" if format
              CGI.escapeHTML text
            end

  if @options.pipe then
    @res << "\n<pre><code>#{CGI.escapeHTML text}\n</code></pre>\n"
  else
    @res << "\n<pre#{klass}>#{content}</pre>\n"
  end
end

Special handling for tidy link labels. When a tidy link is {rdoc-image:path/to/image.jpg:alt text}[http://example.com], label part is normally considered RDOCLINK rdoc-image:path/to/image.jpg:alt and a text " text" but RDoc’s test code expects the whole label part to be treated as RDOCLINK only in tidy link label. When a tidy link is {^1}[url] or {*1}[url], the label part needs to drop leading * or ^. TODO: reconsider this workaround.



184
185
186
187
188
189
190
# File 'lib/rdoc/markup/to_html.rb', line 184

def apply_tidylink_label_special_handling(label, url)
  # ^1 *1 will be converted to just 1 in tidy link label.
  return label[1..] if label.match?(/\A[*^]\d+\z/)

  # rdoc-image in label specially allows spaces in alt text.
  return handle_RDOCLINK(label) if label.start_with?('rdoc-image:')
end

#convert_string(text) ⇒ Object

CGI-escapes text



489
490
491
# File 'lib/rdoc/markup/to_html.rb', line 489

def convert_string(text)
  CGI.escapeHTML text
end

#deduplicate_heading_id(id) ⇒ Object

Returns a unique heading ID, appending -1, -2, etc. for duplicates. Matches GitHub’s behavior for duplicate heading anchors.



476
477
478
479
480
481
482
483
484
# File 'lib/rdoc/markup/to_html.rb', line 476

def deduplicate_heading_id(id)
  if @heading_ids.key?(id)
    @heading_ids[id] += 1
    "#{id}-#{@heading_ids[id]}"
  else
    @heading_ids[id] = 0
    id
  end
end

#emit_inline(text) ⇒ Object



167
168
169
# File 'lib/rdoc/markup/to_html.rb', line 167

def emit_inline(text)
  @inline_output << text
end

#end_acceptingObject

Returns the generated output



291
292
293
# File 'lib/rdoc/markup/to_html.rb', line 291

def end_accepting
  @res.join
end

#gen_url(url, text) ⇒ Object

Generates an HTML link or image tag for the given url and text.

  • Image URLs (http/https/link ending in .gif, .png, .jpg, .jpeg, .bmp) become <img> tags

  • File references (.rb, .rdoc, .md) are converted to .html paths

  • Anchor URLs (#foo) pass through unchanged for GitHub-style header linking

  • Footnote links get wrapped in <sup> tags



502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
# File 'lib/rdoc/markup/to_html.rb', line 502

def gen_url(url, text)
  scheme, url, id = parse_url url

  if %w[http https link].include?(scheme) && url =~ /\.(gif|png|jpg|jpeg|bmp)\z/
    "<img src=\"#{url}\" />"
  else
    if scheme != 'link' and %r%\A((?!https?:)(?:[^/#]*/)*+)([^/#]+)\.(rb|rdoc|md)(?=\z|#)%i =~ url
      url = "#$1#{$2.tr('.', '_')}_#$3.html#$'"
    end

    text = text.sub %r%^#{scheme}:/*%i, ''
    text = text.sub %r%^[*\^](\d+)$%,   '\1'

    link = "<a#{id} href=\"#{url}\">#{text}</a>"

    if /"foot/.match?(id)
      "<sup>#{link}</sup>"
    else
      link
    end
  end
end

#handle_BOLD(nodes) ⇒ Object



127
128
129
130
131
# File 'lib/rdoc/markup/to_html.rb', line 127

def handle_BOLD(nodes)
  emit_inline('<strong>')
  super
  emit_inline('</strong>')
end

#handle_BOLD_WORD(word) ⇒ Object



139
140
141
142
143
# File 'lib/rdoc/markup/to_html.rb', line 139

def handle_BOLD_WORD(word)
  emit_inline('<strong>')
  super
  emit_inline('</strong>')
end

#handle_EM(nodes) ⇒ Object



133
134
135
136
137
# File 'lib/rdoc/markup/to_html.rb', line 133

def handle_EM(nodes)
  emit_inline('<em>')
  super
  emit_inline('</em>')
end

#handle_EM_WORD(word) ⇒ Object



145
146
147
148
149
# File 'lib/rdoc/markup/to_html.rb', line 145

def handle_EM_WORD(word)
  emit_inline('<em>')
  super
  emit_inline('</em>')
end

#handle_HARD_BREAKObject



163
164
165
# File 'lib/rdoc/markup/to_html.rb', line 163

def handle_HARD_BREAK
  emit_inline('<br>')
end

#handle_inline(text) ⇒ Object

:nodoc:



228
229
230
231
232
233
234
# File 'lib/rdoc/markup/to_html.rb', line 228

def handle_inline(text) # :nodoc:
  @inline_output = +''
  super
  out = @inline_output
  @inline_output = nil
  out
end

#handle_PLAIN_TEXT(text) ⇒ Object



119
120
121
# File 'lib/rdoc/markup/to_html.rb', line 119

def handle_PLAIN_TEXT(text)
  emit_inline(convert_string(text))
end

:nodoc:



88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
# File 'lib/rdoc/markup/to_html.rb', line 88

def handle_RDOCLINK(url) # :nodoc:
  case url
  when /^rdoc-ref:/
    CGI.escapeHTML($')
  when /^rdoc-label:/
    return CGI.escapeHTML(url) if in_tidylink_label?
    text = $'

    text = case text
           when /\Alabel-/    then $'
           when /\Afootmark-/ then $'
           when /\Afoottext-/ then $'
           else                    text
           end

    gen_url CGI.escapeHTML(url), CGI.escapeHTML(text)
  when /^rdoc-image:/
    # Split the string after "rdoc-image:" into url and alt.
    #   "path/to/image.jpg:alt text" => ["path/to/image.jpg", "alt text"]
    #   "http://example.com/path/to/image.jpg:alt text" => ["http://example.com/path/to/image.jpg", "alt text"]
    url, alt = $'.split(/:(?!\/)/, 2)
    if alt && !alt.empty?
      %[<img src="#{CGI.escapeHTML(url)}" alt="#{CGI.escapeHTML(alt)}">]
    else
      %[<img src="#{CGI.escapeHTML(url)}">]
    end
  when /\Ardoc-[a-z]+:/
    CGI.escapeHTML($')
  end
end

#handle_REGEXP_HANDLING_TEXT(text) ⇒ Object



123
124
125
# File 'lib/rdoc/markup/to_html.rb', line 123

def handle_REGEXP_HANDLING_TEXT(text)
  emit_inline(text)
end

target is a potential link. The following schemes are handled:

mailto:

Inserted as-is.

http:

Links are checked to see if they reference an image. If so, that image gets inserted using an <img> tag. Otherwise a conventional <a href> is used.

link:

Reference to a local file relative to the output directory.



254
255
256
257
258
259
# File 'lib/rdoc/markup/to_html.rb', line 254

def handle_regexp_HYPERLINK(text)
  return convert_string(text) if in_tidylink_label?

  url = CGI.escapeHTML(text)
  gen_url url, url
end

target is an rdoc-schemed link that will be converted into a hyperlink.

For the rdoc-ref scheme the named reference will be returned without creating a link.

For the rdoc-label scheme the footnote and label prefixes are stripped when creating a link. All other contents will be linked verbatim.



270
271
272
# File 'lib/rdoc/markup/to_html.rb', line 270

def handle_regexp_RDOCLINK(text)
  handle_RDOCLINK text
end

#handle_regexp_SUPPRESSED_CROSSREF(text) ⇒ Object

Converts suppressed cross-reference text to HTML by removing the leading backslash.



238
239
240
# File 'lib/rdoc/markup/to_html.rb', line 238

def handle_regexp_SUPPRESSED_CROSSREF(text)
  convert_string(text.delete_prefix('\\'))
end

#handle_STRIKE(nodes) ⇒ Object



157
158
159
160
161
# File 'lib/rdoc/markup/to_html.rb', line 157

def handle_STRIKE(nodes)
  emit_inline('<del>')
  super
  emit_inline('</del>')
end


192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
# File 'lib/rdoc/markup/to_html.rb', line 192

def handle_TIDYLINK(label_part, url)
  # When url is an image, ignore label part (maybe bug?) and just generate img tag.
  if url.match?(/\Ahttps?:\/\/.+\.(png|gif|jpg|jpeg|bmp)\z/)
    emit_inline("<img src=\"#{CGI.escapeHTML(url)}\" />")
    return
  elsif url.match?(/\Ardoc-image:/)
    emit_inline(handle_RDOCLINK(url))
    return
  end

  if label_part.size == 1 && String === label_part[0]
    raw_label = label_part[0]

    @in_tidylink_label = true
    special = apply_tidylink_label_special_handling(raw_label, url)
    @in_tidylink_label = false

    if special
      tag = gen_url(CGI.escapeHTML(url), special)
      unless tag.empty?
        emit_inline(tag)
        return
      end
    end
  end

  tag = gen_url(CGI.escapeHTML(url), '')
  open_tag, close_tag = tag.split(/(?=<\/a>)/, 2)
  valid_tag = open_tag && close_tag
  emit_inline(open_tag) if valid_tag
  @in_tidylink_label = true
  traverse_inline_nodes(label_part)
  @in_tidylink_label = false
  emit_inline(close_tag) if valid_tag
end

#handle_TT(code) ⇒ Object



151
152
153
154
155
# File 'lib/rdoc/markup/to_html.rb', line 151

def handle_TT(code)
  emit_inline('<code>')
  super
  emit_inline('</code>')
end

#html_list_name(list_type, open_tag) ⇒ Object

Determines the HTML list element for list_type and open_tag

Raises:



528
529
530
531
532
# File 'lib/rdoc/markup/to_html.rb', line 528

def html_list_name(list_type, open_tag)
  tags = LIST_TYPE_TO_HTML[list_type]
  raise RDoc::Error, "Invalid list type: #{list_type.inspect}" unless tags
  tags[open_tag ? 0 : 1]
end

Returns true if we are processing inside a tidy link label.



173
174
175
# File 'lib/rdoc/markup/to_html.rb', line 173

def in_tidylink_label?
  @in_tidylink_label
end

Adds regexp handlings about link notations.



84
85
86
# File 'lib/rdoc/markup/to_html.rb', line 84

def init_link_notation_regexp_handlings
  add_regexp_handling_RDOCLINK
end

#init_regexp_handlingsObject

Adds regexp handlings.



70
71
72
73
74
75
76
77
78
79
# File 'lib/rdoc/markup/to_html.rb', line 70

def init_regexp_handlings
  # external links
  @markup.add_regexp_handling(/(?:link:|https?:|mailto:|ftp:|irc:|www\.)#{URL_CHARACTERS_REGEXP_STR}+\w/,
                              :HYPERLINK)

  # suppress crossref: \#method \::method \ClassName \method_with_underscores
  @markup.add_regexp_handling(/\\(?:[#:A-Z]|[a-z]+_[a-z0-9])/, :SUPPRESSED_CROSSREF)

  init_link_notation_regexp_handlings
end

#list_end_for(list_type) ⇒ Object

Returns the HTML end-tag for list_type



554
555
556
557
558
559
560
561
562
563
# File 'lib/rdoc/markup/to_html.rb', line 554

def list_end_for(list_type)
  case list_type
  when :BULLET, :LALPHA, :NUMBER, :UALPHA then
    "</li>"
  when :LABEL, :NOTE then
    "</dd>"
  else
    raise RDoc::Error, "Invalid list type: #{list_type.inspect}"
  end
end

#list_item_start(list_item, list_type) ⇒ Object

Returns the HTML tag for list_type, possible using a label from list_item



538
539
540
541
542
543
544
545
546
547
548
549
# File 'lib/rdoc/markup/to_html.rb', line 538

def list_item_start(list_item, list_type)
  case list_type
  when :BULLET, :LALPHA, :NUMBER, :UALPHA then
    "<li>"
  when :LABEL, :NOTE then
    Array(list_item.label).map do |label|
      "<dt>#{to_html label}</dt>\n"
    end.join << "<dd>"
  else
    raise RDoc::Error, "Invalid list type: #{list_type.inspect}"
  end
end

#parseable?(text) ⇒ Boolean

Returns true if text is valid ruby syntax



568
569
570
571
572
573
574
575
576
577
# File 'lib/rdoc/markup/to_html.rb', line 568

def parseable?(text)
  verbose, $VERBOSE = $VERBOSE, nil
  catch(:valid) do
    eval("BEGIN { throw :valid, true }\n#{text}")
  end
rescue SyntaxError
  false
ensure
  $VERBOSE = verbose
end

#start_acceptingObject

Prepares the visitor for HTML generation



281
282
283
284
285
286
# File 'lib/rdoc/markup/to_html.rb', line 281

def start_accepting
  @res = []
  @in_list_entry = []
  @list = []
  @heading_ids = {}
end

#to_html(item) ⇒ Object

Converts item to HTML using RDoc::Text#to_html



582
583
584
585
586
587
# File 'lib/rdoc/markup/to_html.rb', line 582

def to_html(item)
  # Ideally, we should convert html characters at handle_PLAIN_TEXT or somewhere else,
  # but we need to convert it here for now because to_html_characters converts pair of backticks to ’‘ and pair of double backticks to ”“.
  # Known bugs: `...` in `<code>def f(...); end</code>` and `(c) in `<a href="(c)">` will be wrongly converted.
  to_html_characters(handle_inline(item))
end