Module: RDoc::Text
- Included in:
- CodeObject, Comment, Context::Section, Generator::JsonIndex, Markup::Parser, Markup::ToHtml, Parser::C, Stats
- Defined in:
- lib/rdoc/text.rb
Overview
Methods for manipulating comment text
Constant Summary collapse
- MARKUP_FORMAT =
Maps markup formats to classes that can parse them. If the format is unknown, “rdoc” format is used.
{ 'markdown' => RDoc::Markdown, 'rdoc' => RDoc::Markup, 'rd' => RDoc::RD, 'tomdoc' => RDoc::TomDoc, }
- TO_HTML_CHARACTERS =
Maps an encoding to a Hash of characters properly transcoded for that encoding.
See also encode_fallback.
Hash.new do |h, encoding| h[encoding] = { :close_dquote => encode_fallback('”', encoding, '"'), :close_squote => encode_fallback('’', encoding, '\''), :copyright => encode_fallback('©', encoding, '(c)'), :ellipsis => encode_fallback('…', encoding, '...'), :em_dash => encode_fallback('—', encoding, '---'), :en_dash => encode_fallback('–', encoding, '--'), :open_dquote => encode_fallback('“', encoding, '"'), :open_squote => encode_fallback('‘', encoding, '\''), :trademark => encode_fallback('®', encoding, '(r)'), } end
- SPACE_SEPARATED_LETTER_CLASS =
Character class to be separated by a space when concatenating lines.
/[\p{Nd}\p{Lc}\p{Pc}]|[!-~&&\W]/
Instance Attribute Summary collapse
-
#language ⇒ Object
The language for this text.
Class Method Summary collapse
-
.encode_fallback(character, encoding, fallback) ⇒ Object
Transcodes
character
toencoding
with afallback
character.
Instance Method Summary collapse
-
#expand_tabs(text) ⇒ Object
Expands tab characters in
text
to eight spaces. -
#flush_left(text) ⇒ Object
Flush
text
left based on the shortest line. -
#markup(text) ⇒ Object
Convert a string in markup format into HTML.
-
#normalize_comment(text) ⇒ Object
Strips hashes, expands tabs then flushes
text
to the left. -
#parse(text, format = 'rdoc') ⇒ Object
Normalizes
text
then builds a RDoc::Markup::Document from it. -
#snippet(text, limit = 100) ⇒ Object
The first
limit
characters oftext
as HTML. -
#strip_hashes(text) ⇒ Object
Strips leading # characters from
text
. -
#strip_newlines(text) ⇒ Object
Strips leading and trailing n characters from
text
. -
#strip_stars(text) ⇒ Object
Strips /* */ style comments.
-
#to_html(text) ⇒ Object
Converts ampersand, dashes, ellipsis, quotes, copyright and registered trademark symbols in
text
to properly encoded characters. -
#wrap(txt, line_len = 76) ⇒ Object
Wraps
txt
toline_len
.
Instance Attribute Details
#language ⇒ Object
The language for this text. This affects stripping comments markers.
17 18 19 |
# File 'lib/rdoc/text.rb', line 17 def language @language end |
Class Method Details
.encode_fallback(character, encoding, fallback) ⇒ Object
Transcodes character
to encoding
with a fallback
character.
55 56 57 58 |
# File 'lib/rdoc/text.rb', line 55 def self.encode_fallback character, encoding, fallback character.encode(encoding, :fallback => { character => fallback }, :undef => :replace, :replace => fallback) end |
Instance Method Details
#expand_tabs(text) ⇒ Object
Expands tab characters in text
to eight spaces
63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 |
# File 'lib/rdoc/text.rb', line 63 def text = [] text.each_line do |line| nil while line.gsub!(/(?:\G|\r)((?:.{8})*?)([^\t\r\n]{0,7})\t/) do r = "#{$1}#{$2}#{' ' * (8 - $2.size)}" r = RDoc::Encoding.change_encoding r, text.encoding r end << line end .join end |
#flush_left(text) ⇒ Object
Flush text
left based on the shortest line
82 83 84 85 86 87 88 89 90 91 92 93 94 |
# File 'lib/rdoc/text.rb', line 82 def flush_left text indent = 9999 text.each_line do |line| line_indent = line =~ /\S/ || 9999 indent = line_indent if indent > line_indent end empty = '' empty = RDoc::Encoding.change_encoding empty, text.encoding text.gsub(/^ {0,#{indent}}/, empty) end |
#markup(text) ⇒ Object
Convert a string in markup format into HTML.
Requires the including class to implement #formatter
101 102 103 104 105 106 107 108 109 110 111 112 |
# File 'lib/rdoc/text.rb', line 101 def markup text if @store.rdoc. locale = @store.rdoc..locale else locale = nil end if locale i18n_text = RDoc::I18n::Text.new(text) text = i18n_text.translate(locale) end parse(text).accept formatter end |
#normalize_comment(text) ⇒ Object
Strips hashes, expands tabs then flushes text
to the left
117 118 119 120 121 122 123 124 125 126 127 128 129 130 |
# File 'lib/rdoc/text.rb', line 117 def normalize_comment text return text if text.empty? case language when :ruby text = strip_hashes text when :c text = strip_stars text end text = text text = flush_left text text = strip_newlines text text end |
#parse(text, format = 'rdoc') ⇒ Object
Normalizes text
then builds a RDoc::Markup::Document from it
135 136 137 138 139 140 141 142 143 144 |
# File 'lib/rdoc/text.rb', line 135 def parse text, format = 'rdoc' return text if RDoc::Markup::Document === text return text.parse if RDoc::Comment === text text = normalize_comment text # TODO remove, should not be necessary return RDoc::Markup::Document.new if text =~ /\A\n*\z/ MARKUP_FORMAT[format].parse text end |
#snippet(text, limit = 100) ⇒ Object
The first limit
characters of text
as HTML
149 150 151 152 153 |
# File 'lib/rdoc/text.rb', line 149 def snippet text, limit = 100 document = parse text RDoc::Markup::ToHtmlSnippet.new(, limit).convert document end |
#strip_hashes(text) ⇒ Object
Strips leading # characters from text
158 159 160 161 162 163 164 165 |
# File 'lib/rdoc/text.rb', line 158 def strip_hashes text return text if text =~ /^(?>\s*)[^\#]/ empty = '' empty = RDoc::Encoding.change_encoding empty, text.encoding text.gsub(/^\s*(#+)/) { $1.tr '#', ' ' }.gsub(/^\s+$/, empty) end |
#strip_newlines(text) ⇒ Object
Strips leading and trailing n characters from text
170 171 172 |
# File 'lib/rdoc/text.rb', line 170 def strip_newlines text text.gsub(/\A\n*(.*?)\n*\z/m) do $1 end # block preserves String encoding end |
#strip_stars(text) ⇒ Object
Strips /* */ style comments
177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 |
# File 'lib/rdoc/text.rb', line 177 def strip_stars text return text unless text =~ %r%/\*.*\*/%m encoding = text.encoding text = text.gsub %r%Document-method:\s+[\w:.#=!?|^&<>~+\-/*\%@`\[\]]+%, '' space = ' ' space = RDoc::Encoding.change_encoding space, encoding if encoding text.sub! %r%/\*+% do space * $&.length end text.sub! %r%\*+/% do space * $&.length end text.gsub! %r%^[ \t]*\*%m do space * $&.length end empty = '' empty = RDoc::Encoding.change_encoding empty, encoding if encoding text.gsub(/^\s+$/, empty) end |
#to_html(text) ⇒ Object
Converts ampersand, dashes, ellipsis, quotes, copyright and registered trademark symbols in text
to properly encoded characters.
200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 |
# File 'lib/rdoc/text.rb', line 200 def to_html text html = (''.encode text.encoding).dup encoded = RDoc::Text::TO_HTML_CHARACTERS[text.encoding] s = StringScanner.new text insquotes = false indquotes = false after_word = nil until s.eos? do case when s.scan(/<(tt|code)>.*?<\/\1>/) then # skip contents of tt html << s.matched.gsub('\\\\', '\\') when s.scan(/<(tt|code)>.*?/) then warn "mismatched <#{s[1]}> tag" # TODO signal file/line html << s.matched when s.scan(/<[^>]+\/?s*>/) then # skip HTML tags html << s.matched when s.scan(/\\(\S)/) then # unhandled suppressed crossref html << s[1] after_word = nil when s.scan(/\.\.\.(\.?)/) then html << s[1] << encoded[:ellipsis] after_word = nil when s.scan(/\(c\)/i) then html << encoded[:copyright] after_word = nil when s.scan(/\(r\)/i) then html << encoded[:trademark] after_word = nil when s.scan(/---/) then html << encoded[:em_dash] after_word = nil when s.scan(/--/) then html << encoded[:en_dash] after_word = nil when s.scan(/"|"/) then html << encoded[indquotes ? :close_dquote : :open_dquote] indquotes = !indquotes after_word = nil when s.scan(/``/) then # backtick double quote html << encoded[:open_dquote] after_word = nil when s.scan(/(?:'|'){2}/) then # tick double quote html << encoded[:close_dquote] after_word = nil when s.scan(/`/) then # backtick if insquotes or after_word html << '`' after_word = false else html << encoded[:open_squote] insquotes = true end when s.scan(/'|'/) then # single quote if insquotes html << encoded[:close_squote] insquotes = false elsif after_word # Mary's dog, my parents' house: do not start paired quotes html << encoded[:close_squote] else html << encoded[:open_squote] insquotes = true end after_word = nil else # advance to the next potentially significant character match = s.scan(/.+?(?=[<\\.("'`&-])/) #" if match then html << match after_word = match =~ /\w$/ else html << s.rest break end end end html end |
#wrap(txt, line_len = 76) ⇒ Object
Wraps txt
to line_len
287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 |
# File 'lib/rdoc/text.rb', line 287 def wrap(txt, line_len = 76) res = [] sp = 0 ep = txt.length while sp < ep # scan back for a space p = sp + line_len - 1 if p >= ep p = ep else while p > sp and txt[p] != ?\s p -= 1 end if p <= sp p = sp + line_len while p < ep and txt[p] != ?\s p += 1 end end end res << txt[sp...p] << "\n" sp = p sp += 1 while sp < ep and txt[sp] == ?\s end res.join.strip end |