Class: RubyPants

Inherits:
String
  • Object
show all
Extended by:
RubyPantsVersion
Defined in:
lib/rubypants.rb

Constant Summary collapse

SPECIAL_HTML_TAGS =
%r!\A<(/?)(pre|code|kbd|script|style|math)[\s>]!
NON_WHITESPACE_CHARS =
/\S/

Constants included from RubyPantsVersion

RubyPantsVersion::VERSION

Instance Method Summary collapse

Constructor Details

#initialize(string, options = [2], entities = {}) ⇒ RubyPants

Create a new RubyPants instance with the text in string.

Allowed elements in the options array:

0

do nothing

1

enable all, using only em-dash shortcuts

2

enable all, using old school en- and em-dash shortcuts (default)

3

enable all, using inverted old school en and em-dash shortcuts

-1

stupefy (translate HTML entities to their ASCII-counterparts)

If you don’t like any of these defaults, you can pass symbols to change RubyPants’ behavior:

:quotes

quotes

:backticks

backtick quotes (“double” only)

:allbackticks

backtick quotes (“double” and ‘single’)

:dashes

dashes

:oldschool

old school dashes

:inverted

inverted old school dashes

:ellipses

ellipses

:prevent_breaks

use nbsp and word-joiner to avoid breaking before dashes and ellipses

:named_entities

used named entities instead of the default decimal entities (see below)

:convertquotes

convert &quot; entities to "

:stupefy

translate RubyPants HTML entities to their ASCII counterparts.

In addition, you can customize the HTML entities that will be injected by passing in a hash for the final argument. The defaults for these entities are as follows:

:single_left_quote

&#8216;

:double_left_quote

&#8220;

:single_right_quote

&#8217;

:double_right_quote

&#8221;

:em_dash

&#8212;

:en_dash

&#8211;

:ellipsis

&#8230;

:non_breaking_space

&nbsp;

:word_joiner

&#8288;

If the :named_entities option is used, the default entities are as follows:

:single_left_quote

&lsquo;

:double_left_quote

&ldquo;

:single_right_quote

&rsquo;

:double_right_quote

&rdquo;

:em_dash

&mdash;

:en_dash

&ndash;

:ellipsis

&hellip;

:non_breaking_space

&nbsp;

:word_joiner

&#8288;

If the :character_entities option is used, RubyPants will emit Unicode characters directly, rather than HTML entities. By default this excludes the space characters (non-breaking space and word-joiner). To additionally emit Unicode space characters, use the :character_spaces option.



68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
# File 'lib/rubypants.rb', line 68

def initialize(string, options=[2], entities = {})
  super string

  @options = [*options]
  @entities = default_entities
  @entities.merge!(named_entities)     if @options.include?(:named_entities)
  @entities.merge!(character_entities) if @options.include?(:character_entities)
  @entities.merge!(character_spaces)   if @options.include?(:character_spaces)
  @entities.merge!(entities)

  @single_left_quote  = @entities[:single_left_quote]
  @single_right_quote = @entities[:single_right_quote]
  @double_left_quote  = @entities[:double_left_quote]
  @double_right_quote = @entities[:double_right_quote]
  @ellipsis           = @entities[:ellipsis]
  @em_dash            = @entities[:em_dash]
  @en_dash            = @entities[:en_dash]
end

Instance Method Details

#to_htmlObject

Apply SmartyPants transformations.



91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
# File 'lib/rubypants.rb', line 91

def to_html
  do_quotes = do_backticks = do_dashes = do_ellipses = do_stupify = nil
  convert_quotes = prevent_breaks = nil

  if @options.include?(0)
    # Do nothing.
    return self
  elsif @options.include?(1)
    # Do everything, turn all options on.
    do_quotes = do_backticks = do_ellipses = true
    do_dashes = :normal
  elsif @options.include?(2)
    # Do everything, turn all options on, use old school dash shorthand.
    do_quotes = do_backticks = do_ellipses = true
    do_dashes = :oldschool
  elsif @options.include?(3)
    # Do everything, turn all options on, use inverted old school
    # dash shorthand.
    do_quotes = do_backticks = do_ellipses = true
    do_dashes = :inverted
  elsif @options.include?(-1)
    do_stupefy = true
  end

  # Explicit flags override numeric flag groups.
  do_quotes      = true       if @options.include?(:quotes)
  do_backticks   = true       if @options.include?(:backticks)
  do_backticks   = :both      if @options.include?(:allbackticks)
  do_dashes      = :normal    if @options.include?(:dashes)
  do_dashes      = :oldschool if @options.include?(:oldschool)
  do_dashes      = :inverted  if @options.include?(:inverted)
  prevent_breaks = true       if @options.include?(:prevent_breaks)
  do_ellipses    = true       if @options.include?(:ellipses)
  convert_quotes = true       if @options.include?(:convertquotes)
  do_stupefy     = true       if @options.include?(:stupefy)

  # Parse the HTML
  tokens = tokenize

  # Keep track of when we're inside <pre> or <code> tags.
  in_pre = nil

  # Here is the result stored in.
  result = ""

  # This is a cheat, used to get some context for one-character
  # tokens that consist of just a quote char. What we do is remember
  # the last character of the previous text token, to use as context
  # to curl single- character quote tokens correctly.
  prev_token_last_char = nil

  tokens.each do |token|
    if token.first == :tag
      result << token[1]
      if token[1].end_with? '/>'
        # ignore self-closing tags
      elsif token[1] =~ SPECIAL_HTML_TAGS
        if $1 == '' && ! in_pre
          in_pre = $2
        elsif $1 == '/' && $2 == in_pre
          in_pre = nil
        end
      end
    else
      t = token[1]

      # Remember last char of this token before processing.
      last_char = t[-1].chr

      unless in_pre
        t = process_escapes t

        t.gsub!('&quot;', '"') if convert_quotes

        if do_dashes
          t = educate_dashes t, prevent_breaks           if do_dashes == :normal
          t = educate_dashes_oldschool t, prevent_breaks if do_dashes == :oldschool
          t = educate_dashes_inverted t, prevent_breaks  if do_dashes == :inverted
        end

        t = educate_ellipses t, prevent_breaks if do_ellipses

        # Note: backticks need to be processed before quotes.
        if do_backticks
          t = educate_backticks t
          t = educate_single_backticks t if do_backticks == :both
        end

        if do_quotes
          if t == "'"
            # Special case: single-character ' token
            if prev_token_last_char =~ NON_WHITESPACE_CHARS
              t = @single_right_quote
            else
              t = @single_left_quote
            end
          elsif t == '"'
            # Special case: single-character " token
            if prev_token_last_char =~ NON_WHITESPACE_CHARS
              t = @double_right_quote
            else
              t = @double_left_quote
            end
          else
            # Normal case:
            t = educate_quotes t
          end
        end

        t = stupefy_entities t if do_stupefy
      end

      prev_token_last_char = last_char
      result << t
    end
  end

  # Done
  result
end