Class: RubyPants

Inherits:

String

Object
String
RubyPants

show all

Extended by:: RubyPantsVersion

Defined in:: lib/rubypants.rb

Constant Summary collapse

SPECIAL_HTML_TAGS =

%r!\A<(/?)(pre|code|kbd|script|style|math)[\s>]!

NON_WHITESPACE_CHARS =

/\S/

Constants included from RubyPantsVersion

RubyPantsVersion::VERSION

Instance Method Summary collapse

#initialize(string, options = [2], entities = {}) ⇒ RubyPants constructor

Create a new RubyPants instance with the text in string.
#to_html ⇒ Object

Apply SmartyPants transformations.

Constructor Details

#initialize(string, options = [2], entities = {}) ⇒ `RubyPants`

Create a new RubyPants instance with the text in string.

Allowed elements in the options array:

0: do nothing
1: enable all, using only em-dash shortcuts
2: enable all, using old school en- and em-dash shortcuts (default)
3: enable all, using inverted old school en and em-dash shortcuts
-1: stupefy (translate HTML entities to their ASCII-counterparts)

If you don’t like any of these defaults, you can pass symbols to change RubyPants’ behavior:

:quotes: quotes
:backticks: backtick quotes (“double” only)
:allbackticks: backtick quotes (“double” and ‘single’)
:dashes: dashes
:oldschool: old school dashes
:inverted: inverted old school dashes
:ellipses: ellipses
:prevent_breaks: use nbsp and word-joiner to avoid breaking before dashes and ellipses
:named_entities: used named entities instead of the default decimal entities (see below)
:convertquotes: convert " entities to "
:stupefy: translate RubyPants HTML entities to their ASCII counterparts.

In addition, you can customize the HTML entities that will be injected by passing in a hash for the final argument. The defaults for these entities are as follows:

:single_left_quote: ‘
:double_left_quote: “
:single_right_quote: ’
:double_right_quote: ”
:em_dash: —
:en_dash: –
:ellipsis: …
:non_breaking_space:  
:word_joiner: ⁠

If the :named_entities option is used, the default entities are as follows:

:single_left_quote: ‘
:double_left_quote: “
:single_right_quote: ’
:double_right_quote: ”
:em_dash: —
:en_dash: –
:ellipsis: …
:non_breaking_space:  
:word_joiner: ⁠

If the :character_entities option is used, RubyPants will emit Unicode characters directly, rather than HTML entities. By default this excludes the space characters (non-breaking space and word-joiner). To additionally emit Unicode space characters, use the :character_spaces option.

# File 'lib/rubypants.rb', line 68

def initialize(string, options=[2], entities = {})
  super string

  @options = [*options]
  @entities = default_entities
  @entities.merge!(named_entities)     if @options.include?(:named_entities)
  @entities.merge!(character_entities) if @options.include?(:character_entities)
  @entities.merge!(character_spaces)   if @options.include?(:character_spaces)
  @entities.merge!(entities)

  @single_left_quote  = @entities[:single_left_quote]
  @single_right_quote = @entities[:single_right_quote]
  @double_left_quote  = @entities[:double_left_quote]
  @double_right_quote = @entities[:double_right_quote]
  @ellipsis           = @entities[:ellipsis]
  @em_dash            = @entities[:em_dash]
  @en_dash            = @entities[:en_dash]
end

Instance Method Details

#to_html ⇒ `Object`

Apply SmartyPants transformations.

# File 'lib/rubypants.rb', line 91

def to_html
  do_quotes = do_backticks = do_dashes = do_ellipses = do_stupify = nil
  convert_quotes = prevent_breaks = nil

  if @options.include?(0)
    # Do nothing.
    return self
  elsif @options.include?(1)
    # Do everything, turn all options on.
    do_quotes = do_backticks = do_ellipses = true
    do_dashes = :normal
  elsif @options.include?(2)
    # Do everything, turn all options on, use old school dash shorthand.
    do_quotes = do_backticks = do_ellipses = true
    do_dashes = :oldschool
  elsif @options.include?(3)
    # Do everything, turn all options on, use inverted old school
    # dash shorthand.
    do_quotes = do_backticks = do_ellipses = true
    do_dashes = :inverted
  elsif @options.include?(-1)
    do_stupefy = true
  end

  # Explicit flags override numeric flag groups.
  do_quotes      = true       if @options.include?(:quotes)
  do_backticks   = true       if @options.include?(:backticks)
  do_backticks   = :both      if @options.include?(:allbackticks)
  do_dashes      = :normal    if @options.include?(:dashes)
  do_dashes      = :oldschool if @options.include?(:oldschool)
  do_dashes      = :inverted  if @options.include?(:inverted)
  prevent_breaks = true       if @options.include?(:prevent_breaks)
  do_ellipses    = true       if @options.include?(:ellipses)
  convert_quotes = true       if @options.include?(:convertquotes)
  do_stupefy     = true       if @options.include?(:stupefy)

  # Parse the HTML
  tokens = tokenize

  # Keep track of when we're inside <pre> or <code> tags.
  in_pre = nil

  # Here is the result stored in.
  result = ""

  # This is a cheat, used to get some context for one-character
  # tokens that consist of just a quote char. What we do is remember
  # the last character of the previous text token, to use as context
  # to curl single- character quote tokens correctly.
  prev_token_last_char = nil

  tokens.each do |token|
    if token.first == :tag
      result << token[1]
      if token[1].end_with? '/>'
        # ignore self-closing tags
      elsif token[1] =~ SPECIAL_HTML_TAGS
        if $1 == '' && ! in_pre
          in_pre = $2
        elsif $1 == '/' && $2 == in_pre
          in_pre = nil
        end
      end
    else
      t = token[1]

      # Remember last char of this token before processing.
      last_char = t[-1].chr

      unless in_pre
        t = process_escapes t

        t.gsub!('&quot;', '"') if convert_quotes

        if do_dashes
          t = educate_dashes t, prevent_breaks           if do_dashes == :normal
          t = educate_dashes_oldschool t, prevent_breaks if do_dashes == :oldschool
          t = educate_dashes_inverted t, prevent_breaks  if do_dashes == :inverted
        end

        t = educate_ellipses t, prevent_breaks if do_ellipses

        # Note: backticks need to be processed before quotes.
        if do_backticks
          t = educate_backticks t
          t = educate_single_backticks t if do_backticks == :both
        end

        if do_quotes
          if t == "'"
            # Special case: single-character ' token
            if prev_token_last_char =~ NON_WHITESPACE_CHARS
              t = @single_right_quote
            else
              t = @single_left_quote
            end
          elsif t == '"'
            # Special case: single-character " token
            if prev_token_last_char =~ NON_WHITESPACE_CHARS
              t = @double_right_quote
            else
              t = @double_left_quote
            end
          else
            # Normal case:
            t = educate_quotes t
          end
        end

        t = stupefy_entities t if do_stupefy
      end

      prev_token_last_char = last_char
      result << t
    end
  end

  # Done
  result
end