Class: HtmlCompressor::Compressor

Inherits:
Object
  • Object
show all
Defined in:
lib/middleman-minify-html/vendor/htmlcompressor-0.0.6/lib/htmlcompressor/compressor.rb

Constant Summary collapse

JS_COMPRESSOR_YUI =
"yui"
JS_COMPRESSOR_CLOSURE =
"closure"
PHP_TAG_PATTERN =

Predefined pattern that matches <?php ... ?> tags. Could be passed inside a list to #setPreservePatterns(List) setPreservePatterns method.

/<\?php.*?\?>/im
SERVER_SCRIPT_TAG_PATTERN =

Predefined pattern that matches &lt;% ... %> tags. Could be passed inside a list to #setPreservePatterns(List) setPreservePatterns method.

/<%.*?%>/m
SERVER_SIDE_INCLUDE_PATTERN =

Predefined pattern that matches &lt;--# ... --> tags. Could be passed inside a list to #setPreservePatterns(List) setPreservePatterns method.

/<!--\s*#.*?-->/m
BLOCK_TAGS_MIN =

Predefined list of tags that are very likely to be block-level. Could be passed to #setRemoveSurroundingSpaces(String) setRemoveSurroundingSpaces method.

"html,head,body,br,p"
BLOCK_TAGS_MAX =

Predefined list of tags that are block-level by default, excluding &lt;div> and &lt;li> tags. Table tags are also included. Could be passed to #setRemoveSurroundingSpaces(String) setRemoveSurroundingSpaces method.

BLOCK_TAGS_MIN + ",h1,h2,h3,h4,h5,h6,blockquote,center,dl,fieldset,form,frame,frameset,hr,noframes,ol,table,tbody,tr,td,th,tfoot,thead,ul"
ALL_TAGS =

Could be passed to #setRemoveSurroundingSpaces(String) setRemoveSurroundingSpaces method to remove all surrounding spaces (not recommended).

"all"
TEMP_COND_COMMENT_BLOCK =

temp replacements for preserved blocks

"%%%~COMPRESS~COND~{0,number,#}~%%%"
TEMP_PRE_BLOCK =
"%%%~COMPRESS~PRE~{0,number,#}~%%%"
TEMP_TEXT_AREA_BLOCK =
"%%%~COMPRESS~TEXTAREA~{0,number,#}~%%%"
TEMP_SCRIPT_BLOCK =
"%%%~COMPRESS~SCRIPT~{0,number,#}~%%%"
TEMP_STYLE_BLOCK =
"%%%~COMPRESS~STYLE~{0,number,#}~%%%"
TEMP_EVENT_BLOCK =
"%%%~COMPRESS~EVENT~{0,number,#}~%%%"
TEMP_LINE_BREAK_BLOCK =
"%%%~COMPRESS~LT~{0,number,#}~%%%"
TEMP_SKIP_BLOCK =
"%%%~COMPRESS~SKIP~{0,number,#}~%%%"
TEMP_USER_BLOCK =
"%%%~COMPRESS~USER{0,number,#}~{1,number,#}~%%%"
EMPTY_PATTERN =

compiled regex patterns

Regexp.new("\\s")
SKIP_PATTERN =
Regexp.new("<!--\\s*\\{\\{\\{\\s*-->(.*?)<!--\\s*\\}\\}\\}\\s*-->", Regexp::MULTILINE | Regexp::IGNORECASE)
COND_COMMENT_PATTERN =
Regexp.new("(<!(?:--)?\\[[^\\]]+?\\]>)(.*?)(<!\\[[^\\]]+\\]-->)", Regexp::MULTILINE | Regexp::IGNORECASE)
COMMENT_PATTERN =
Regexp.new("<!---->|<!--[^\\[].*?-->", Regexp::MULTILINE | Regexp::IGNORECASE)
INTERTAG_PATTERN_TAG_TAG =
Regexp.new(">\\s+<", Regexp::MULTILINE | Regexp::IGNORECASE)
INTERTAG_PATTERN_TAG_CUSTOM =
Regexp.new(">\\s+%%%~", Regexp::MULTILINE | Regexp::IGNORECASE)
INTERTAG_PATTERN_CUSTOM_TAG =
Regexp.new("~%%%\\s+<", Regexp::MULTILINE | Regexp::IGNORECASE)
INTERTAG_PATTERN_CUSTOM_CUSTOM =
Regexp.new("~%%%\\s+%%%~", Regexp::MULTILINE | Regexp::IGNORECASE)
MULTISPACE_PATTERN =
Regexp.new("\\s+", Regexp::MULTILINE | Regexp::IGNORECASE)
TAG_END_SPACE_PATTERN =
Regexp.new("(<(?:[^>]+?))(?:\\s+?)(/?>)", Regexp::MULTILINE | Regexp::IGNORECASE)
TAG_LAST_UNQUOTED_VALUE_PATTERN =
Regexp.new("=\\s*[a-z0-9\\-_]+$", Regexp::IGNORECASE)
TAG_QUOTE_PATTERN =
Regexp.new("\\s*=\\s*([\"'])([a-z0-9\\-_]+?)\\1(/?)(?=[^<]*?>)", Regexp::IGNORECASE)
PRE_PATTERN =
Regexp.new("(<pre[^>]*?>)(.*?)(</pre>)", Regexp::MULTILINE | Regexp::IGNORECASE)
TA_PATTERN =
Regexp.new("(<textarea[^>]*?>)(.*?)(</textarea>)", Regexp::MULTILINE | Regexp::IGNORECASE)
SCRIPT_PATTERN =
Regexp.new("(<script[^>]*?>)(.*?)(</script>)", Regexp::MULTILINE | Regexp::IGNORECASE)
STYLE_PATTERN =
Regexp.new("(<style[^>]*?>)(.*?)(</style>)", Regexp::MULTILINE | Regexp::IGNORECASE)
TAG_PROPERTY_PATTERN =
Regexp.new("(\\s\\w+)\\s*=\\s*(?=[^<]*?>)", Regexp::IGNORECASE)
CDATA_PATTERN =
Regexp.new("\\s*<!\\[CDATA\\[(.*?)\\]\\]>\\s*", Regexp::MULTILINE | Regexp::IGNORECASE)
DOCTYPE_PATTERN =
Regexp.new("<!DOCTYPE[^>]*>", Regexp::MULTILINE | Regexp::IGNORECASE)
TYPE_ATTR_PATTERN =
Regexp.new("type\\s*=\\s*([\\\"']*)(.+?)\\1", Regexp::MULTILINE | Regexp::IGNORECASE)
JS_TYPE_ATTR_PATTERN =
Regexp.new("(<script[^>]*)type\\s*=\\s*([\"']*)(?:text|application)\/javascript\\2([^>]*>)", Regexp::MULTILINE | Regexp::IGNORECASE)
JS_LANG_ATTR_PATTERN =
Regexp.new("(<script[^>]*)language\\s*=\\s*([\"']*)javascript\\2([^>]*>)", Regexp::MULTILINE | Regexp::IGNORECASE)
STYLE_TYPE_ATTR_PATTERN =
Regexp.new("(<style[^>]*)type\\s*=\\s*([\"']*)text/style\\2([^>]*>)", Regexp::MULTILINE | Regexp::IGNORECASE)
Regexp.new("(<link[^>]*)type\\s*=\\s*([\"']*)text/(?:css|plain)\\2([^>]*>)", Regexp::MULTILINE | Regexp::IGNORECASE)
Regexp.new("<link(?:[^>]*)rel\\s*=\\s*([\"']*)(?:alternate\\s+)?stylesheet\\1(?:[^>]*)>", Regexp::MULTILINE | Regexp::IGNORECASE)
FORM_METHOD_ATTR_PATTERN =
Regexp.new("(<form[^>]*)method\\s*=\\s*([\"']*)get\\2([^>]*>)", Regexp::MULTILINE | Regexp::IGNORECASE)
INPUT_TYPE_ATTR_PATTERN =
Regexp.new("(<input[^>]*)type\\s*=\\s*([\"']*)text\\2([^>]*>)", Regexp::MULTILINE | Regexp::IGNORECASE)
BOOLEAN_ATTR_PATTERN =
Regexp.new("(<\\w+[^>]*)(checked|selected|disabled|readonly)\\s*=\\s*([\"']*)\\w*\\3([^>]*>)", Regexp::MULTILINE | Regexp::IGNORECASE)
EVENT_JS_PROTOCOL_PATTERN =
Regexp.new("^javascript:\\s*(.+)", Regexp::MULTILINE | Regexp::IGNORECASE)
HTTP_PROTOCOL_PATTERN =
Regexp.new("(<[^>]+?(?:href|src|cite|action)\\s*=\\s*['\"])http:(//[^>]+?>)", Regexp::MULTILINE | Regexp::IGNORECASE)
HTTPS_PROTOCOL_PATTERN =
Regexp.new("(<[^>]+?(?:href|src|cite|action)\\s*=\\s*['\"])https:(//[^>]+?>)", Regexp::MULTILINE | Regexp::IGNORECASE)
REL_EXTERNAL_PATTERN =
Regexp.new("<(?:[^>]*)rel\\s*=\\s*([\"']*)(?:alternate\\s+)?external\\1(?:[^>]*)>", Regexp::MULTILINE | Regexp::IGNORECASE)
EVENT_PATTERN1 =

unmasked: son+s*=s*”*(?:\.[^“\rn]*)*”“

Regexp.new("(\\son[a-z]+\\s*=\\s*\")([^\"\\\\\\r\\n]*(?:\\\\.[^\"\\\\\\r\\n]*)*)(\")", Regexp::IGNORECASE)
EVENT_PATTERN2 =
Regexp.new("(\\son[a-z]+\\s*=\\s*')([^'\\\\\\r\\n]*(?:\\\\.[^'\\\\\\r\\n]*)*)(')", Regexp::IGNORECASE)
LINE_BREAK_PATTERN =
Regexp.new("(?:[[:blank:]]*(\\r?\\n)[[:blank:]]*)+")
SURROUNDING_SPACES_MIN_PATTERN =
Regexp.new("\\s*(</?(?:" + BLOCK_TAGS_MIN.gsub(",", "|") + ")(?:>|[\\s/][^>]*>))\\s*", Regexp::MULTILINE | Regexp::IGNORECASE)
SURROUNDING_SPACES_MAX_PATTERN =
Regexp.new("\\s*(</?(?:" + BLOCK_TAGS_MAX.gsub(",", "|") + ")(?:>|[\\s/][^>]*>))\\s*", Regexp::MULTILINE | Regexp::IGNORECASE)
SURROUNDING_SPACES_ALL_PATTERN =
Regexp.new("\\s*(<[^>]+>)\\s*", Regexp::MULTILINE | Regexp::IGNORECASE)
TEMP_COND_COMMENT_PATTERN =

patterns for searching for temporary replacements

Regexp.new("%%%~COMPRESS~COND~(\\d+?)~%%%")
TEMP_PRE_PATTERN =
Regexp.new("%%%~COMPRESS~PRE~(\\d+?)~%%%")
TEMP_TEXT_AREA_PATTERN =
Regexp.new("%%%~COMPRESS~TEXTAREA~(\\d+?)~%%%")
TEMP_SCRIPT_PATTERN =
Regexp.new("%%%~COMPRESS~SCRIPT~(\\d+?)~%%%")
TEMP_STYLE_PATTERN =
Regexp.new("%%%~COMPRESS~STYLE~(\\d+?)~%%%")
TEMP_EVENT_PATTERN =
Regexp.new("%%%~COMPRESS~EVENT~(\\d+?)~%%%")
TEMP_SKIP_PATTERN =
Regexp.new("%%%~COMPRESS~SKIP~(\\d+?)~%%%")
TEMP_LINE_BREAK_PATTERN =
Regexp.new("%%%~COMPRESS~LT~(\\d+?)~%%%")
DEFAULT_OPTIONS =
{
  :enabled => true,

  # default settings
  :remove_comments => true,
  :remove_multi_spaces => true,

  # optional settings
  :remove_intertag_spaces => false,
  :remove_quotes => false,
  :compress_javascript => false,
  :compress_css => false,
  :simple_doctype => false,
  :remove_script_attributes => false,
  :remove_style_attributes => false,
  :remove_link_attributes => false,
  :remove_form_attributes => false,
  :remove_input_attributes => false,
  :simple_boolean_attributes => false,
  :remove_javascript_protocol => false,
  :remove_http_protocol => false,
  :remove_https_protocol => false,
  :preserve_line_breaks => false,
  :remove_surrounding_spaces => nil,

  :preserve_patterns => nil,
  :javascript_compressor => nil,
  :css_compressor => nil
}

Instance Method Summary collapse

Constructor Details

#initialize(options = {}) ⇒ Compressor

Returns a new instance of Compressor.



125
126
127
128
129
130
131
132
133
134
135
# File 'lib/middleman-minify-html/vendor/htmlcompressor-0.0.6/lib/htmlcompressor/compressor.rb', line 125

def initialize(options = {})

  @options = DEFAULT_OPTIONS.merge(options)

  # YUICompressor settings
  @yuiCssLineBreak = -1
  @yuiJsNoMunge = false
  @yuiJsPreserveAllSemiColons = false
  @yuiJsDisableOptimizations = false

end

Instance Method Details

#compress(html) ⇒ Object



137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
# File 'lib/middleman-minify-html/vendor/htmlcompressor-0.0.6/lib/htmlcompressor/compressor.rb', line 137

def compress html
  if not @options[:enabled] or html.nil? or html.length == 0
    return html
  end

  # preserved block containers
  condCommentBlocks = []
  preBlocks = []
  taBlocks = []
  scriptBlocks = []
  styleBlocks = []
  eventBlocks = []
  skipBlocks = []
  lineBreakBlocks = []
  userBlocks = []

  # preserve blocks
  html = preserve_blocks(html, preBlocks, taBlocks, scriptBlocks, styleBlocks, eventBlocks, condCommentBlocks, skipBlocks, lineBreakBlocks, userBlocks)

  # process pure html
  html = process_html(html)

  # process preserved blocks
  process_preserved_blocks(preBlocks, taBlocks, scriptBlocks, styleBlocks, eventBlocks, condCommentBlocks, skipBlocks, lineBreakBlocks, userBlocks)

  # put preserved blocks back
  html = return_blocks(html, preBlocks, taBlocks, scriptBlocks, styleBlocks, eventBlocks, condCommentBlocks, skipBlocks, lineBreakBlocks, userBlocks)

  html
end