Module: WordpressFormatting::Wpautop
- Defined in:
- lib/wordpress_formatting/wpautop.rb
Constant Summary collapse
- MULTIPLE_BR =
%r{<br\s*/?>\s*<br\s*/?>}
- ALLBLOCKS_OPEN =
%r{(?<block><(?:table|thead|tfoot|caption|col|colgroup|tbody|tr|td|th|div|dl|dd|dt|ul|ol|li|pre|form|map|area|blockquote|address|math|style|p|h[1-6]|hr|fieldset|legend|section|article|aside|hgroup|header|footer|nav|figure|figcaption|details|menu|summary)[\s/>])}
- ALLBLOCKS_CLOSE =
%r{(?<block></(?:table|thead|tfoot|caption|col|colgroup|tbody|tr|td|th|div|dl|dd|dt|ul|ol|li|pre|form|map|area|blockquote|address|math|style|p|h[1-6]|hr|fieldset|legend|section|article|aside|hgroup|header|footer|nav|figure|figcaption|details|menu|summary)>)}
- HR =
%r{(?<hr><hr\s*?/?>)}
- CRLF =
%r{(\r\n|\r)}
- MANY_NL =
%r{\n\n+}
- SPLIT_NL =
%r{\n\s*\n}
- EMPTY_P =
%r{<p>\s*</p>}
- CLOSE_P =
%r{<p>(?<content>[^<]+)</(?<tag>div|address|form)>}
- UNWRAP_P =
%r{<p>\s*(?<unwrap></?(?:table|thead|tfoot|caption|col|colgroup|tbody|tr|td|th|div|dl|dd|dt|ul|ol|li|pre|form|map|area|blockquote|address|math|style|p|h[1-6]|hr|fieldset|legend|section|article|aside|hgroup|header|footer|nav|figure|figcaption|details|menu|summary)[^>]*>)\s*</p>}
- UNWRAP_LI =
%r{<p>(?<unwrap><li.+?)</p>}
- UNWRAP_BLOCKQUOTE =
%r{<p><blockquote(?<unwrap>[^>]*)>}i
- REMOVE_BLOCK_OPENING =
%r{<p>\s*(?<remove></?(?:table|thead|tfoot|caption|col|colgroup|tbody|tr|td|th|div|dl|dd|dt|ul|ol|li|pre|form|map|area|blockquote|address|math|style|p|h[1-6]|hr|fieldset|legend|section|article|aside|hgroup|header|footer|nav|figure|figcaption|details|menu|summary)[^>]*>)}
- REMOVE_BLOCK_CLOSING =
%r{(?<remove></?(?:table|thead|tfoot|caption|col|colgroup|tbody|tr|td|th|div|dl|dd|dt|ul|ol|li|pre|form|map|area|blockquote|address|math|style|p|h[1-6]|hr|fieldset|legend|section|article|aside|hgroup|header|footer|nav|figure|figcaption|details|menu|summary)[^>]*>)\s*</p>}
- NLBR =
%r{(?<!<br />)\s*\n}
- REMOVE_BR =
%r{(?<remove></?(?:table|thead|tfoot|caption|col|colgroup|tbody|tr|td|th|div|dl|dd|dt|ul|ol|li|pre|form|map|area|blockquote|address|math|style|p|h[1-6]|hr|fieldset|legend|section|article|aside|hgroup|header|footer|nav|figure|figcaption|details|menu|summary)[^>]*>)\s*<br />}
- REMOVE_BR_SUBSET =
%r{<br />(?<remove>\s*</?(?:p|li|div|dl|dd|dt|th|pre|td|ul|ol)[^>]*>)}
- PRESERVE_NL =
%r{<(script|style|svg).*?<\/\1>}
- HTML_SPLIT =
XXX: Simplified because the RegExp with comments and CDATA doesn’t compile
%r{(<[^>]*>?)}
Class Method Summary collapse
-
.included(base) ⇒ Object
Allows to extend a class (like String) to perform WP formatting.
-
.wpautop(pee, br = true) ⇒ String
Replaces double line breaks with paragraph elements.
Class Method Details
.included(base) ⇒ Object
Allows to extend a class (like String) to perform WP formatting.
45 46 47 48 49 50 51 |
# File 'lib/wordpress_formatting/wpautop.rb', line 45 def included(base) base.class_eval do def wpautop(br = true) WordpressFormatting::Wpautop.wpautop(to_s, br) end end end |
.wpautop(pee, br = true) ⇒ String
Replaces double line breaks with paragraph elements.
A group of regex replaces used to identify text formatted with newlines and replace double line breaks with HTML paragraph tags. The remaining line breaks after conversion become <<br />> tags, unless $br is set to ‘0’ or ‘false’.
remaining line breaks after paragraphing. Line breaks within ‘<script>`, `<style>`, and `<svg>` tags are not affected. Default true. paragraph tags.
67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 |
# File 'lib/wordpress_formatting/wpautop.rb', line 67 def wpautop(pee, br = true) return '' if pee.blank? pee = pee.dup = {} # Just to make things a little easier, pad the end. pee << "\n" if pee.include? '<pre' pee_parts = pee.split '</pre>' last_pee = pee_parts.pop pee = ''.dup pee_parts.each_with_index do |pee_part, i| start = pee_part.index('<pre') # Malformed HTML? unless start pee << pee_part next end name = "<pre wp-pre-tag-#{i}></pre>" [name] = "#{pee_part[start..]}</pre>" pee << pee_part[0..start-1] pee << name end pee << last_pee end # Change multiple <br>'s into two line breaks, which will turn into paragraphs. pee.gsub! MULTIPLE_BR, "\n\n" # Add a double line break above block-level opening tags. pee.gsub! ALLBLOCKS_OPEN, "\n\n\\k<block>" # Add a double line break below block-level closing tags. pee.gsub! ALLBLOCKS_CLOSE, "\\k<block>\n\n" # Add a double line break after hr tags, which are self closing. pee.gsub! HR, "\\k<hr>\n\n" # Standardize newline characters to "\n". pee.gsub! CRLF, "\n" # Find newlines in all elements and add placeholders. pee = pee.split(HTML_SPLIT).reduce(''.dup) do |new_pee, part| new_pee << (part == "\n" ? ' <!-- wpnl --> ' : part) end # Remove more than two contiguous line breaks. pee.gsub! MANY_NL, "\n\n" # Split up the contents into an array of strings, separated by double line breaks. pees = pee.split(SPLIT_NL).reject(&:empty?) # Reset pee prior to rebuilding. pee = ''.dup # Rebuild the content as a string, wrapping every bit with a <p>. pees.each do |tinkle| pee << "<p>#{tinkle.strip}</p>" end # Under certain strange conditions it could create a P of entirely whitespace. pee.gsub! EMPTY_P, '' # Add a closing <p> inside <div>, <address>, or <form> tag if missing. pee.gsub! CLOSE_P, '<p>\\k<content></p></\\k<tag>>' # If an opening or closing block element tag is wrapped in a <p>, unwrap it. pee.gsub! UNWRAP_P, '\\k<unwrap>' # In some cases <li> may get wrapped in <p>, fix them. pee.gsub! UNWRAP_LI, '\\k<unwrap>' # If a <blockquote> is wrapped with a <p>, move it inside the <blockquote>. pee.gsub! UNWRAP_BLOCKQUOTE, '<blockquote\\k<unwrap>><p>' pee.gsub! '</blockquote></p>', '</p></blockquote>' # If an opening or closing block element tag is preceded by an opening <p> tag, remove it. pee.gsub! REMOVE_BLOCK_OPENING, '\\k<remove>' # If an opening or closing block element tag is followed by a closing <p> tag, remove it. pee.gsub! REMOVE_BLOCK_CLOSING, '\\k<remove>' # Optionally insert line breaks. if br # Replace newlines that shouldn't be touched with a placeholder. pee.scan(PRESERVE_NL).each do |match| pee.gsub! match, match.gsub("\n", '<WPPreserveNewline />') end # Normalize <br> pee.gsub! '<br>', '<br />' pee.gsub! '<br/>', '<br />' # Replace any new line characters that aren't preceded by a <br /> with a <br />. pee.gsub! NLBR, "<br />\n" # Replace newline placeholders with newlines. pee.gsub! '<WPPreserveNewline />', "\n" end # If a <br /> tag is after an opening or closing block tag, remove it. pee.gsub! REMOVE_BR, '\\k<remove>' # If a <br /> tag is before a subset of opening or closing block tags, remove it. pee.gsub! REMOVE_BR_SUBSET, '\\k<remove>' pee.gsub! "\n</p>$", '</p>' # Replace placeholder <pre> tags with their original content. .each_pair do |key, value| pee.gsub! key, value end # Restore newlines in all elements. pee.gsub! ' <!-- wpnl --> ', "<br />" pee.gsub! '<!-- wpnl -->', "<br />" pee end |