Module: WordpressFormatting::Wpautop

Defined in:
lib/wordpress_formatting/wpautop.rb

Constant Summary collapse

MULTIPLE_BR =
%r{<br\s*/?>\s*<br\s*/?>}
ALLBLOCKS_OPEN =
%r{(?<block><(?:table|thead|tfoot|caption|col|colgroup|tbody|tr|td|th|div|dl|dd|dt|ul|ol|li|pre|form|map|area|blockquote|address|math|style|p|h[1-6]|hr|fieldset|legend|section|article|aside|hgroup|header|footer|nav|figure|figcaption|details|menu|summary)[\s/>])}
ALLBLOCKS_CLOSE =
%r{(?<block></(?:table|thead|tfoot|caption|col|colgroup|tbody|tr|td|th|div|dl|dd|dt|ul|ol|li|pre|form|map|area|blockquote|address|math|style|p|h[1-6]|hr|fieldset|legend|section|article|aside|hgroup|header|footer|nav|figure|figcaption|details|menu|summary)>)}
HR =
%r{(?<hr><hr\s*?/?>)}
CRLF =
%r{(\r\n|\r)}
MANY_NL =
%r{\n\n+}
SPLIT_NL =
%r{\n\s*\n}
EMPTY_P =
%r{<p>\s*</p>}
CLOSE_P =
%r{<p>(?<content>[^<]+)</(?<tag>div|address|form)>}
UNWRAP_P =
%r{<p>\s*(?<unwrap></?(?:table|thead|tfoot|caption|col|colgroup|tbody|tr|td|th|div|dl|dd|dt|ul|ol|li|pre|form|map|area|blockquote|address|math|style|p|h[1-6]|hr|fieldset|legend|section|article|aside|hgroup|header|footer|nav|figure|figcaption|details|menu|summary)[^>]*>)\s*</p>}
UNWRAP_LI =
%r{<p>(?<unwrap><li.+?)</p>}
UNWRAP_BLOCKQUOTE =
%r{<p><blockquote(?<unwrap>[^>]*)>}i
REMOVE_BLOCK_OPENING =
%r{<p>\s*(?<remove></?(?:table|thead|tfoot|caption|col|colgroup|tbody|tr|td|th|div|dl|dd|dt|ul|ol|li|pre|form|map|area|blockquote|address|math|style|p|h[1-6]|hr|fieldset|legend|section|article|aside|hgroup|header|footer|nav|figure|figcaption|details|menu|summary)[^>]*>)}
REMOVE_BLOCK_CLOSING =
%r{(?<remove></?(?:table|thead|tfoot|caption|col|colgroup|tbody|tr|td|th|div|dl|dd|dt|ul|ol|li|pre|form|map|area|blockquote|address|math|style|p|h[1-6]|hr|fieldset|legend|section|article|aside|hgroup|header|footer|nav|figure|figcaption|details|menu|summary)[^>]*>)\s*</p>}
NLBR =
%r{(?<!<br />)\s*\n}
REMOVE_BR =
%r{(?<remove></?(?:table|thead|tfoot|caption|col|colgroup|tbody|tr|td|th|div|dl|dd|dt|ul|ol|li|pre|form|map|area|blockquote|address|math|style|p|h[1-6]|hr|fieldset|legend|section|article|aside|hgroup|header|footer|nav|figure|figcaption|details|menu|summary)[^>]*>)\s*<br />}
REMOVE_BR_SUBSET =
%r{<br />(?<remove>\s*</?(?:p|li|div|dl|dd|dt|th|pre|td|ul|ol)[^>]*>)}
PRESERVE_NL =
%r{<(script|style|svg).*?<\/\1>}
HTML_SPLIT =

XXX: Simplified because the RegExp with comments and CDATA doesn’t compile

%r{(<[^>]*>?)}

Class Method Summary collapse

Class Method Details

.included(base) ⇒ Object

Allows to extend a class (like String) to perform WP formatting.



45
46
47
48
49
50
51
# File 'lib/wordpress_formatting/wpautop.rb', line 45

def included(base)
  base.class_eval do
    def wpautop(br = true)
      WordpressFormatting::Wpautop.wpautop(to_s, br)
    end
  end
end

.wpautop(pee, br = true) ⇒ String

Replaces double line breaks with paragraph elements.

A group of regex replaces used to identify text formatted with newlines and replace double line breaks with HTML paragraph tags. The remaining line breaks after conversion become <<br />> tags, unless $br is set to ‘0’ or ‘false’.

remaining line breaks after paragraphing. Line breaks within ‘<script>`, `<style>`, and `<svg>` tags are not affected. Default true. paragraph tags.

Parameters:

  • The (String)

    text which has to be formatted.

  • Optional. (Boolean)

    If set, this will convert all

Returns:

  • (String)

    Text which has been converted into correct



67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
# File 'lib/wordpress_formatting/wpautop.rb', line 67

def wpautop(pee, br = true)
  return '' if pee.blank?
  pee = pee.dup
  pre_tags = {}

  # Just to make things a little easier, pad the end.
  pee << "\n"

  if pee.include? '<pre'
    pee_parts = pee.split '</pre>'
    last_pee = pee_parts.pop
    pee = ''.dup

    pee_parts.each_with_index do |pee_part, i|
      start = pee_part.index('<pre')

      # Malformed HTML?
      unless start
        pee << pee_part
        next
      end

      name = "<pre wp-pre-tag-#{i}></pre>"

      pre_tags[name] = "#{pee_part[start..]}</pre>"
      pee << pee_part[0..start-1]
      pee << name
    end

    pee << last_pee
  end

  # Change multiple <br>'s into two line breaks, which will turn into paragraphs.
  pee.gsub! MULTIPLE_BR, "\n\n"

  # Add a double line break above block-level opening tags.
  pee.gsub! ALLBLOCKS_OPEN, "\n\n\\k<block>"

  # Add a double line break below block-level closing tags.
  pee.gsub! ALLBLOCKS_CLOSE, "\\k<block>\n\n"

  # Add a double line break after hr tags, which are self closing.
  pee.gsub! HR, "\\k<hr>\n\n"

  # Standardize newline characters to "\n".
  pee.gsub! CRLF, "\n"

  # Find newlines in all elements and add placeholders.
  pee = pee.split(HTML_SPLIT).reduce(''.dup) do |new_pee, part|
    new_pee << (part == "\n" ? ' <!-- wpnl --> ' : part)
  end

  # Remove more than two contiguous line breaks.
  pee.gsub! MANY_NL, "\n\n"

  # Split up the contents into an array of strings, separated by double line breaks.
  pees = pee.split(SPLIT_NL).reject(&:empty?)

  # Reset pee prior to rebuilding.
  pee = ''.dup

  # Rebuild the content as a string, wrapping every bit with a <p>.
  pees.each do |tinkle|
    pee << "<p>#{tinkle.strip}</p>"
  end

  # Under certain strange conditions it could create a P of entirely whitespace.
  pee.gsub! EMPTY_P, ''

  # Add a closing <p> inside <div>, <address>, or <form> tag if missing.
  pee.gsub! CLOSE_P, '<p>\\k<content></p></\\k<tag>>'

  # If an opening or closing block element tag is wrapped in a <p>, unwrap it.
  pee.gsub! UNWRAP_P, '\\k<unwrap>'

  # In some cases <li> may get wrapped in <p>, fix them.
  pee.gsub! UNWRAP_LI, '\\k<unwrap>'

  # If a <blockquote> is wrapped with a <p>, move it inside the <blockquote>.
  pee.gsub! UNWRAP_BLOCKQUOTE, '<blockquote\\k<unwrap>><p>'
  pee.gsub! '</blockquote></p>', '</p></blockquote>'

  # If an opening or closing block element tag is preceded by an opening <p> tag, remove it.
  pee.gsub! REMOVE_BLOCK_OPENING, '\\k<remove>'

  # If an opening or closing block element tag is followed by a closing <p> tag, remove it.
  pee.gsub! REMOVE_BLOCK_CLOSING, '\\k<remove>'

  # Optionally insert line breaks.
  if br
    # Replace newlines that shouldn't be touched with a placeholder.
    pee.scan(PRESERVE_NL).each do |match|
      pee.gsub! match, match.gsub("\n", '<WPPreserveNewline />')
    end

    # Normalize <br>
    pee.gsub! '<br>', '<br />'
    pee.gsub! '<br/>', '<br />'

    # Replace any new line characters that aren't preceded by a <br /> with a <br />.
    pee.gsub! NLBR, "<br />\n"

    # Replace newline placeholders with newlines.
    pee.gsub! '<WPPreserveNewline />', "\n"
  end

  # If a <br /> tag is after an opening or closing block tag, remove it.
  pee.gsub! REMOVE_BR, '\\k<remove>'

  # If a <br /> tag is before a subset of opening or closing block tags, remove it.
  pee.gsub! REMOVE_BR_SUBSET, '\\k<remove>'
  pee.gsub! "\n</p>$", '</p>'

  # Replace placeholder <pre> tags with their original content.
  pre_tags.each_pair do |key, value|
    pee.gsub! key, value
  end

  # Restore newlines in all elements.
  pee.gsub! ' <!-- wpnl --> ', "<br />"
  pee.gsub! '<!-- wpnl -->', "<br />"

  pee
end