Module: WordpressFormatting::Wpautop

Defined in:: lib/wordpress_formatting/wpautop.rb

Constant Summary collapse

MULTIPLE_BR =

%r{<br\s*/?>\s*<br\s*/?>}

ALLBLOCKS_OPEN =

%r{(?<block><(?:table|thead|tfoot|caption|col|colgroup|tbody|tr|td|th|div|dl|dd|dt|ul|ol|li|pre|form|map|area|blockquote|address|math|style|p|h[1-6]|hr|fieldset|legend|section|article|aside|hgroup|header|footer|nav|figure|figcaption|details|menu|summary)[\s/>])}

ALLBLOCKS_CLOSE =

%r{(?<block></(?:table|thead|tfoot|caption|col|colgroup|tbody|tr|td|th|div|dl|dd|dt|ul|ol|li|pre|form|map|area|blockquote|address|math|style|p|h[1-6]|hr|fieldset|legend|section|article|aside|hgroup|header|footer|nav|figure|figcaption|details|menu|summary)>)}

HR =

%r{(?<hr><hr\s*?/?>)}

CRLF =

%r{(\r\n|\r)}

MANY_NL =

%r{\n\n+}

SPLIT_NL =

%r{\n\s*\n}

EMPTY_P =

%r{<p>\s*</p>}

CLOSE_P =

%r{<p>(?<content>[^<]+)</(?<tag>div|address|form)>}

UNWRAP_P =

%r{<p>\s*(?<unwrap></?(?:table|thead|tfoot|caption|col|colgroup|tbody|tr|td|th|div|dl|dd|dt|ul|ol|li|pre|form|map|area|blockquote|address|math|style|p|h[1-6]|hr|fieldset|legend|section|article|aside|hgroup|header|footer|nav|figure|figcaption|details|menu|summary)[^>]*>)\s*</p>}

UNWRAP_LI =

%r{<p>(?<unwrap><li.+?)</p>}

UNWRAP_BLOCKQUOTE =

%r{<p><blockquote(?<unwrap>[^>]*)>}i

REMOVE_BLOCK_OPENING =

%r{<p>\s*(?<remove></?(?:table|thead|tfoot|caption|col|colgroup|tbody|tr|td|th|div|dl|dd|dt|ul|ol|li|pre|form|map|area|blockquote|address|math|style|p|h[1-6]|hr|fieldset|legend|section|article|aside|hgroup|header|footer|nav|figure|figcaption|details|menu|summary)[^>]*>)}

REMOVE_BLOCK_CLOSING =

%r{(?<remove></?(?:table|thead|tfoot|caption|col|colgroup|tbody|tr|td|th|div|dl|dd|dt|ul|ol|li|pre|form|map|area|blockquote|address|math|style|p|h[1-6]|hr|fieldset|legend|section|article|aside|hgroup|header|footer|nav|figure|figcaption|details|menu|summary)[^>]*>)\s*</p>}

NLBR =

%r{(?<!<br />)\s*\n}

REMOVE_BR =

%r{(?<remove></?(?:table|thead|tfoot|caption|col|colgroup|tbody|tr|td|th|div|dl|dd|dt|ul|ol|li|pre|form|map|area|blockquote|address|math|style|p|h[1-6]|hr|fieldset|legend|section|article|aside|hgroup|header|footer|nav|figure|figcaption|details|menu|summary)[^>]*>)\s*<br />}

REMOVE_BR_SUBSET =

%r{<br />(?<remove>\s*</?(?:p|li|div|dl|dd|dt|th|pre|td|ul|ol)[^>]*>)}

PRESERVE_NL =

%r{<(script|style|svg).*?<\/\1>}

HTML_SPLIT = XXX: Simplified because the RegExp with comments and CDATA doesn’t compile

%r{(<[^>]*>?)}

Class Method Summary collapse

.included(base) ⇒ Object

Allows to extend a class (like String) to perform WP formatting.
.wpautop(pee, br = true) ⇒ String

Replaces double line breaks with paragraph elements.

Class Method Details

.included(base) ⇒ `Object`

Allows to extend a class (like String) to perform WP formatting.

# File 'lib/wordpress_formatting/wpautop.rb', line 45

def included(base)
  base.class_eval do
    def wpautop(br = true)
      WordpressFormatting::Wpautop.wpautop(to_s, br)
    end
  end
end

.wpautop(pee, br = true) ⇒ `String`

Replaces double line breaks with paragraph elements.

A group of regex replaces used to identify text formatted with newlines and replace double line breaks with HTML paragraph tags. The remaining line breaks after conversion become <<br />> tags, unless $br is set to ‘0’ or ‘false’.

remaining line breaks after paragraphing. Line breaks within ‘<script>`, `<style>`, and `<svg>` tags are not affected. Default true. paragraph tags.

Parameters:

The (String) —

text which has to be formatted.
Optional. (Boolean) —

If set, this will convert all

Returns:

(String) —

Text which has been converted into correct

# File 'lib/wordpress_formatting/wpautop.rb', line 67

def wpautop(pee, br = true)
  return '' if pee.blank?
  pee = pee.dup
  pre_tags = {}

  # Just to make things a little easier, pad the end.
  pee << "\n"

  if pee.include? '<pre'
    pee_parts = pee.split '</pre>'
    last_pee = pee_parts.pop
    pee = ''.dup

    pee_parts.each_with_index do |pee_part, i|
      start = pee_part.index('<pre')

      # Malformed HTML?
      unless start
        pee << pee_part
        next
      end

      name = "<pre wp-pre-tag-#{i}></pre>"

      pre_tags[name] = "#{pee_part[start..]}</pre>"
      pee << pee_part[0..start-1]
      pee << name
    end

    pee << last_pee
  end

  # Change multiple <br>'s into two line breaks, which will turn into paragraphs.
  pee.gsub! MULTIPLE_BR, "\n\n"

  # Add a double line break above block-level opening tags.
  pee.gsub! ALLBLOCKS_OPEN, "\n\n\\k<block>"

  # Add a double line break below block-level closing tags.
  pee.gsub! ALLBLOCKS_CLOSE, "\\k<block>\n\n"

  # Add a double line break after hr tags, which are self closing.
  pee.gsub! HR, "\\k<hr>\n\n"

  # Standardize newline characters to "\n".
  pee.gsub! CRLF, "\n"

  # Find newlines in all elements and add placeholders.
  pee = pee.split(HTML_SPLIT).reduce(''.dup) do |new_pee, part|
    new_pee << (part == "\n" ? ' <!-- wpnl --> ' : part)
  end

  # Remove more than two contiguous line breaks.
  pee.gsub! MANY_NL, "\n\n"

  # Split up the contents into an array of strings, separated by double line breaks.
  pees = pee.split(SPLIT_NL).reject(&:empty?)

  # Reset pee prior to rebuilding.
  pee = ''.dup

  # Rebuild the content as a string, wrapping every bit with a <p>.
  pees.each do |tinkle|
    pee << "<p>#{tinkle.strip}</p>"
  end

  # Under certain strange conditions it could create a P of entirely whitespace.
  pee.gsub! EMPTY_P, ''

  # Add a closing <p> inside <div>, <address>, or <form> tag if missing.
  pee.gsub! CLOSE_P, '<p>\\k<content></p></\\k<tag>>'

  # If an opening or closing block element tag is wrapped in a <p>, unwrap it.
  pee.gsub! UNWRAP_P, '\\k<unwrap>'

  # In some cases <li> may get wrapped in <p>, fix them.
  pee.gsub! UNWRAP_LI, '\\k<unwrap>'

  # If a <blockquote> is wrapped with a <p>, move it inside the <blockquote>.
  pee.gsub! UNWRAP_BLOCKQUOTE, '<blockquote\\k<unwrap>><p>'
  pee.gsub! '</blockquote></p>', '</p></blockquote>'

  # If an opening or closing block element tag is preceded by an opening <p> tag, remove it.
  pee.gsub! REMOVE_BLOCK_OPENING, '\\k<remove>'

  # If an opening or closing block element tag is followed by a closing <p> tag, remove it.
  pee.gsub! REMOVE_BLOCK_CLOSING, '\\k<remove>'

  # Optionally insert line breaks.
  if br
    # Replace newlines that shouldn't be touched with a placeholder.
    pee.scan(PRESERVE_NL).each do |match|
      pee.gsub! match, match.gsub("\n", '<WPPreserveNewline />')
    end

    # Normalize <br>
    pee.gsub! '<br>', '<br />'
    pee.gsub! '<br/>', '<br />'

    # Replace any new line characters that aren't preceded by a <br /> with a <br />.
    pee.gsub! NLBR, "<br />\n"

    # Replace newline placeholders with newlines.
    pee.gsub! '<WPPreserveNewline />', "\n"
  end

  # If a <br /> tag is after an opening or closing block tag, remove it.
  pee.gsub! REMOVE_BR, '\\k<remove>'

  # If a <br /> tag is before a subset of opening or closing block tags, remove it.
  pee.gsub! REMOVE_BR_SUBSET, '\\k<remove>'
  pee.gsub! "\n</p>$", '</p>'

  # Replace placeholder <pre> tags with their original content.
  pre_tags.each_pair do |key, value|
    pee.gsub! key, value
  end

  # Restore newlines in all elements.
  pee.gsub! ' <!-- wpnl --> ', "<br />"
  pee.gsub! '<!-- wpnl -->', "<br />"

  pee
end