Module: HTML5
- Included in:
- HTMLInputStreamTest, Html5EncodingTestCase, Html5ParserTestCase, SanitizeTest, TestFeedTypeSniffer
- Defined in:
- lib/feed_tools/vendor/html5/lib/html5/treebuilders/base.rb,
lib/feed_tools/vendor/html5/lib/html5.rb,
lib/feed_tools/vendor/html5/tests/preamble.rb,
lib/feed_tools/vendor/html5/lib/html5/sniffer.rb,
lib/feed_tools/vendor/html5/lib/html5/version.rb,
lib/feed_tools/vendor/html5/lib/html5/constants.rb,
lib/feed_tools/vendor/html5/lib/html5/tokenizer.rb,
lib/feed_tools/vendor/html5/lib/html5/html5parser.rb,
lib/feed_tools/vendor/html5/lib/html5/treewalkers.rb,
lib/feed_tools/vendor/html5/lib/html5/filters/base.rb,
lib/feed_tools/vendor/html5/lib/html5/treebuilders.rb,
lib/feed_tools/vendor/html5/lib/html5/liberalxmlparser.rb,
lib/feed_tools/vendor/html5/lib/html5/treewalkers/base.rb,
lib/feed_tools/vendor/html5/lib/html5/filters/sanitizer.rb,
lib/feed_tools/vendor/html5/lib/html5/html5parser/phase.rb,
lib/feed_tools/vendor/html5/lib/html5/treewalkers/rexml.rb,
lib/feed_tools/vendor/html5/lib/html5/filters/whitespace.rb,
lib/feed_tools/vendor/html5/lib/html5/treebuilders/rexml.rb,
lib/feed_tools/vendor/html5/lib/html5/treewalkers/hpricot.rb,
lib/feed_tools/vendor/html5/lib/html5/filters/optionaltags.rb,
lib/feed_tools/vendor/html5/lib/html5/treebuilders/hpricot.rb,
lib/feed_tools/vendor/html5/lib/html5/treewalkers/simpletree.rb,
lib/feed_tools/vendor/html5/lib/html5/treebuilders/simpletree.rb,
lib/feed_tools/vendor/html5/lib/html5/html5parser/in_row_phase.rb,
lib/feed_tools/vendor/html5/lib/html5/html5parser/in_body_phase.rb,
lib/feed_tools/vendor/html5/lib/html5/html5parser/in_cell_phase.rb,
lib/feed_tools/vendor/html5/lib/html5/html5parser/in_head_phase.rb,
lib/feed_tools/vendor/html5/lib/html5/html5parser/initial_phase.rb,
lib/feed_tools/vendor/html5/lib/html5/serializer/htmlserializer.rb,
lib/feed_tools/vendor/html5/lib/html5/html5parser/in_table_phase.rb,
lib/feed_tools/vendor/html5/lib/html5/serializer/xhtmlserializer.rb,
lib/feed_tools/vendor/html5/lib/html5/filters/inject_meta_charset.rb,
lib/feed_tools/vendor/html5/lib/html5/html5parser/in_select_phase.rb,
lib/feed_tools/vendor/html5/lib/html5/html5parser/after_body_phase.rb,
lib/feed_tools/vendor/html5/lib/html5/html5parser/after_head_phase.rb,
lib/feed_tools/vendor/html5/lib/html5/html5parser/in_caption_phase.rb,
lib/feed_tools/vendor/html5/lib/html5/html5parser/before_head_phase.rb,
lib/feed_tools/vendor/html5/lib/html5/html5parser/in_frameset_phase.rb,
lib/feed_tools/vendor/html5/lib/html5/html5parser/root_element_phase.rb,
lib/feed_tools/vendor/html5/lib/html5/html5parser/trailing_end_phase.rb,
lib/feed_tools/vendor/html5/lib/html5/html5parser/in_table_body_phase.rb,
lib/feed_tools/vendor/html5/lib/html5/html5parser/after_frameset_phase.rb,
lib/feed_tools/vendor/html5/lib/html5/html5parser/in_column_group_phase.rb
Overview
XXX - TODO; make the default interface more ElementTree-like rather than DOM-like
Defined Under Namespace
Modules: Filters, Sniffer, TestSupport, TreeBuilders, TreeWalkers Classes: AfterBodyPhase, AfterFramesetPhase, AfterHeadPhase, AssertionError, BeforeHeadPhase, EOF, HTMLParser, HTMLSerializer, HTMLTokenizer, InBodyPhase, InCaptionPhase, InCellPhase, InColumnGroupPhase, InFramesetPhase, InHeadPhase, InRowPhase, InSelectPhase, InTableBodyPhase, InTablePhase, InitialPhase, ParseError, Phase, RootElementPhase, SerializeError, TrailingEndPhase, XHTMLParser, XHTMLSerializer, XMLParser, XhmlRootPhase, XmlElementPhase, XmlRootPhase
Constant Summary collapse
- VERSION =
'0.10.0'
- CONTENT_MODEL_FLAGS =
[ :PCDATA, :RCDATA, :CDATA, :PLAINTEXT ]
- SCOPING_ELEMENTS =
%w[ button caption html marquee object table td th ]
- FORMATTING_ELEMENTS =
%w[ a b big em font i nobr s small strike strong tt u ]
- SPECIAL_ELEMENTS =
%w[ address area base basefont bgsound blockquote body br center col colgroup dd dir div dl dt embed fieldset form frame frameset h1 h2 h3 h4 h5 h6 head hr iframe image img input isindex li link listing menu meta noembed noframes noscript ol optgroup option p param plaintext pre script select spacer style tbody textarea tfoot thead title tr ul wbr ]
- SPACE_CHARACTERS =
%W[ \t \n \x0B \x0C \x20 \r ]
- TABLE_INSERT_MODE_ELEMENTS =
%w[ table tbody tfoot thead tr ]
- ASCII_LOWERCASE =
('a'..'z').to_a.join('')
- ASCII_UPPERCASE =
('A'..'Z').to_a.join('')
- ASCII_LETTERS =
ASCII_LOWERCASE + ASCII_UPPERCASE
- DIGITS =
'0'..'9'
- HEX_DIGITS =
DIGITS.to_a + ('a'..'f').to_a + ('A'..'F').to_a
- HEADING_ELEMENTS =
Heading elements need to be ordered
%w[ h1 h2 h3 h4 h5 h6 ]
- VOID_ELEMENTS =
XXX What about event-source and command?
%w[ base link meta hr br img embed param area col input ]
- CDATA_ELEMENTS =
%w[title textarea]
- RCDATA_ELEMENTS =
%w[ style script xmp iframe noembed noframes noscript ]
- BOOLEAN_ATTRIBUTES =
{ :global => %w[irrelevant], 'style' => %w[scoped], 'img' => %w[ismap], 'audio' => %w[autoplay controls], 'video' => %w[autoplay controls], 'script' => %w[defer async], 'details' => %w[open], 'datagrid' => %w[multiple disabled], 'command' => %w[hidden disabled checked default], 'menu' => %w[autosubmit], 'fieldset' => %w[disabled readonly], 'option' => %w[disabled readonly selected], 'optgroup' => %w[disabled readonly], 'button' => %w[disabled autofocus], 'input' => %w[disabled readonly required autofocus checked ismap], 'select' => %w[disabled readonly autofocus multiple], 'output' => %w[disabled readonly] }
- ENTITIES_WINDOWS1252 =
entitiesWindows1252 has to be ordered and needs to have an index.
[ 8364, # 0x80 0x20AC EURO SIGN 65533, # 0x81 UNDEFINED 8218, # 0x82 0x201A SINGLE LOW-9 QUOTATION MARK 402, # 0x83 0x0192 LATIN SMALL LETTER F WITH HOOK 8222, # 0x84 0x201E DOUBLE LOW-9 QUOTATION MARK 8230, # 0x85 0x2026 HORIZONTAL ELLIPSIS 8224, # 0x86 0x2020 DAGGER 8225, # 0x87 0x2021 DOUBLE DAGGER 710, # 0x88 0x02C6 MODIFIER LETTER CIRCUMFLEX ACCENT 8240, # 0x89 0x2030 PER MILLE SIGN 352, # 0x8A 0x0160 LATIN CAPITAL LETTER S WITH CARON 8249, # 0x8B 0x2039 SINGLE LEFT-POINTING ANGLE QUOTATION MARK 338, # 0x8C 0x0152 LATIN CAPITAL LIGATURE OE 65533, # 0x8D UNDEFINED 381, # 0x8E 0x017D LATIN CAPITAL LETTER Z WITH CARON 65533, # 0x8F UNDEFINED 65533, # 0x90 UNDEFINED 8216, # 0x91 0x2018 LEFT SINGLE QUOTATION MARK 8217, # 0x92 0x2019 RIGHT SINGLE QUOTATION MARK 8220, # 0x93 0x201C LEFT DOUBLE QUOTATION MARK 8221, # 0x94 0x201D RIGHT DOUBLE QUOTATION MARK 8226, # 0x95 0x2022 BULLET 8211, # 0x96 0x2013 EN DASH 8212, # 0x97 0x2014 EM DASH 732, # 0x98 0x02DC SMALL TILDE 8482, # 0x99 0x2122 TRADE MARK SIGN 353, # 0x9A 0x0161 LATIN SMALL LETTER S WITH CARON 8250, # 0x9B 0x203A SINGLE RIGHT-POINTING ANGLE QUOTATION MARK 339, # 0x9C 0x0153 LATIN SMALL LIGATURE OE 65533, # 0x9D UNDEFINED 382, # 0x9E 0x017E LATIN SMALL LETTER Z WITH CARON 376 # 0x9F 0x0178 LATIN CAPITAL LETTER Y WITH DIAERESIS ]
- ENTITIES =
ENTITIES was generated from Python using the following code:
import constants entities = constants.entities.items() entities.sort() list = [ ‘ ’.join([repr(entity), ‘=>’, ord(value)<128 and
repr(str(value)) or repr(value.encode('utf-8')).replace("'",'"')]) for entity, value in entities] print ' ENTITIES = {\n ' + ',\n '.join(list) + '\n }'
{ 'AElig' => "\xc3\x86", 'AElig;' => "\xc3\x86", 'AMP' => '&', 'AMP;' => '&', 'Aacute' => "\xc3\x81", 'Aacute;' => "\xc3\x81", 'Acirc' => "\xc3\x82", 'Acirc;' => "\xc3\x82", 'Agrave' => "\xc3\x80", 'Agrave;' => "\xc3\x80", 'Alpha;' => "\xce\x91", 'Aring' => "\xc3\x85", 'Aring;' => "\xc3\x85", 'Atilde' => "\xc3\x83", 'Atilde;' => "\xc3\x83", 'Auml' => "\xc3\x84", 'Auml;' => "\xc3\x84", 'Beta;' => "\xce\x92", 'COPY' => "\xc2\xa9", 'COPY;' => "\xc2\xa9", 'Ccedil' => "\xc3\x87", 'Ccedil;' => "\xc3\x87", 'Chi;' => "\xce\xa7", 'Dagger;' => "\xe2\x80\xa1", 'Delta;' => "\xce\x94", 'ETH' => "\xc3\x90", 'ETH;' => "\xc3\x90", 'Eacute' => "\xc3\x89", 'Eacute;' => "\xc3\x89", 'Ecirc' => "\xc3\x8a", 'Ecirc;' => "\xc3\x8a", 'Egrave' => "\xc3\x88", 'Egrave;' => "\xc3\x88", 'Epsilon;' => "\xce\x95", 'Eta;' => "\xce\x97", 'Euml' => "\xc3\x8b", 'Euml;' => "\xc3\x8b", 'GT' => '>', 'GT;' => '>', 'Gamma;' => "\xce\x93", 'Iacute' => "\xc3\x8d", 'Iacute;' => "\xc3\x8d", 'Icirc' => "\xc3\x8e", 'Icirc;' => "\xc3\x8e", 'Igrave' => "\xc3\x8c", 'Igrave;' => "\xc3\x8c", 'Iota;' => "\xce\x99", 'Iuml' => "\xc3\x8f", 'Iuml;' => "\xc3\x8f", 'Kappa;' => "\xce\x9a", 'LT' => '<', 'LT;' => '<', 'Lambda;' => "\xce\x9b", 'Mu;' => "\xce\x9c", 'Ntilde' => "\xc3\x91", 'Ntilde;' => "\xc3\x91", 'Nu;' => "\xce\x9d", 'OElig;' => "\xc5\x92", 'Oacute' => "\xc3\x93", 'Oacute;' => "\xc3\x93", 'Ocirc' => "\xc3\x94", 'Ocirc;' => "\xc3\x94", 'Ograve' => "\xc3\x92", 'Ograve;' => "\xc3\x92", 'Omega;' => "\xce\xa9", 'Omicron;' => "\xce\x9f", 'Oslash' => "\xc3\x98", 'Oslash;' => "\xc3\x98", 'Otilde' => "\xc3\x95", 'Otilde;' => "\xc3\x95", 'Ouml' => "\xc3\x96", 'Ouml;' => "\xc3\x96", 'Phi;' => "\xce\xa6", 'Pi;' => "\xce\xa0", 'Prime;' => "\xe2\x80\xb3", 'Psi;' => "\xce\xa8", 'QUOT' => '"', 'QUOT;' => '"', 'REG' => "\xc2\xae", 'REG;' => "\xc2\xae", 'Rho;' => "\xce\xa1", 'Scaron;' => "\xc5\xa0", 'Sigma;' => "\xce\xa3", 'THORN' => "\xc3\x9e", 'THORN;' => "\xc3\x9e", 'TRADE;' => "\xe2\x84\xa2", 'Tau;' => "\xce\xa4", 'Theta;' => "\xce\x98", 'Uacute' => "\xc3\x9a", 'Uacute;' => "\xc3\x9a", 'Ucirc' => "\xc3\x9b", 'Ucirc;' => "\xc3\x9b", 'Ugrave' => "\xc3\x99", 'Ugrave;' => "\xc3\x99", 'Upsilon;' => "\xce\xa5", 'Uuml' => "\xc3\x9c", 'Uuml;' => "\xc3\x9c", 'Xi;' => "\xce\x9e", 'Yacute' => "\xc3\x9d", 'Yacute;' => "\xc3\x9d", 'Yuml;' => "\xc5\xb8", 'Zeta;' => "\xce\x96", 'aacute' => "\xc3\xa1", 'aacute;' => "\xc3\xa1", 'acirc' => "\xc3\xa2", 'acirc;' => "\xc3\xa2", 'acute' => "\xc2\xb4", 'acute;' => "\xc2\xb4", 'aelig' => "\xc3\xa6", 'aelig;' => "\xc3\xa6", 'agrave' => "\xc3\xa0", 'agrave;' => "\xc3\xa0", 'alefsym;' => "\xe2\x84\xb5", 'alpha;' => "\xce\xb1", 'amp' => '&', 'amp;' => '&', 'and;' => "\xe2\x88\xa7", 'ang;' => "\xe2\x88\xa0", 'apos;' => "'", 'aring' => "\xc3\xa5", 'aring;' => "\xc3\xa5", 'asymp;' => "\xe2\x89\x88", 'atilde' => "\xc3\xa3", 'atilde;' => "\xc3\xa3", 'auml' => "\xc3\xa4", 'auml;' => "\xc3\xa4", 'bdquo;' => "\xe2\x80\x9e", 'beta;' => "\xce\xb2", 'brvbar' => "\xc2\xa6", 'brvbar;' => "\xc2\xa6", 'bull;' => "\xe2\x80\xa2", 'cap;' => "\xe2\x88\xa9", 'ccedil' => "\xc3\xa7", 'ccedil;' => "\xc3\xa7", 'cedil' => "\xc2\xb8", 'cedil;' => "\xc2\xb8", 'cent' => "\xc2\xa2", 'cent;' => "\xc2\xa2", 'chi;' => "\xcf\x87", 'circ;' => "\xcb\x86", 'clubs;' => "\xe2\x99\xa3", 'cong;' => "\xe2\x89\x85", 'copy' => "\xc2\xa9", 'copy;' => "\xc2\xa9", 'crarr;' => "\xe2\x86\xb5", 'cup;' => "\xe2\x88\xaa", 'curren' => "\xc2\xa4", 'curren;' => "\xc2\xa4", 'dArr;' => "\xe2\x87\x93", 'dagger;' => "\xe2\x80\xa0", 'darr;' => "\xe2\x86\x93", 'deg' => "\xc2\xb0", 'deg;' => "\xc2\xb0", 'delta;' => "\xce\xb4", 'diams;' => "\xe2\x99\xa6", 'divide' => "\xc3\xb7", 'divide;' => "\xc3\xb7", 'eacute' => "\xc3\xa9", 'eacute;' => "\xc3\xa9", 'ecirc' => "\xc3\xaa", 'ecirc;' => "\xc3\xaa", 'egrave' => "\xc3\xa8", 'egrave;' => "\xc3\xa8", 'empty;' => "\xe2\x88\x85", 'emsp;' => "\xe2\x80\x83", 'ensp;' => "\xe2\x80\x82", 'epsilon;' => "\xce\xb5", 'equiv;' => "\xe2\x89\xa1", 'eta;' => "\xce\xb7", 'eth' => "\xc3\xb0", 'eth;' => "\xc3\xb0", 'euml' => "\xc3\xab", 'euml;' => "\xc3\xab", 'euro;' => "\xe2\x82\xac", 'exist;' => "\xe2\x88\x83", 'fnof;' => "\xc6\x92", 'forall;' => "\xe2\x88\x80", 'frac12' => "\xc2\xbd", 'frac12;' => "\xc2\xbd", 'frac14' => "\xc2\xbc", 'frac14;' => "\xc2\xbc", 'frac34' => "\xc2\xbe", 'frac34;' => "\xc2\xbe", 'frasl;' => "\xe2\x81\x84", 'gamma;' => "\xce\xb3", 'ge;' => "\xe2\x89\xa5", 'gt' => '>', 'gt;' => '>', 'hArr;' => "\xe2\x87\x94", 'harr;' => "\xe2\x86\x94", 'hearts;' => "\xe2\x99\xa5", 'hellip;' => "\xe2\x80\xa6", 'iacute' => "\xc3\xad", 'iacute;' => "\xc3\xad", 'icirc' => "\xc3\xae", 'icirc;' => "\xc3\xae", 'iexcl' => "\xc2\xa1", 'iexcl;' => "\xc2\xa1", 'igrave' => "\xc3\xac", 'igrave;' => "\xc3\xac", 'image;' => "\xe2\x84\x91", 'infin;' => "\xe2\x88\x9e", 'int;' => "\xe2\x88\xab", 'iota;' => "\xce\xb9", 'iquest' => "\xc2\xbf", 'iquest;' => "\xc2\xbf", 'isin;' => "\xe2\x88\x88", 'iuml' => "\xc3\xaf", 'iuml;' => "\xc3\xaf", 'kappa;' => "\xce\xba", 'lArr;' => "\xe2\x87\x90", 'lambda;' => "\xce\xbb", 'lang;' => "\xe3\x80\x88", 'laquo' => "\xc2\xab", 'laquo;' => "\xc2\xab", 'larr;' => "\xe2\x86\x90", 'lceil;' => "\xe2\x8c\x88", 'ldquo;' => "\xe2\x80\x9c", 'le;' => "\xe2\x89\xa4", 'lfloor;' => "\xe2\x8c\x8a", 'lowast;' => "\xe2\x88\x97", 'loz;' => "\xe2\x97\x8a", 'lrm;' => "\xe2\x80\x8e", 'lsaquo;' => "\xe2\x80\xb9", 'lsquo;' => "\xe2\x80\x98", 'lt' => '<', 'lt;' => '<', 'macr' => "\xc2\xaf", 'macr;' => "\xc2\xaf", 'mdash;' => "\xe2\x80\x94", 'micro' => "\xc2\xb5", 'micro;' => "\xc2\xb5", 'middot' => "\xc2\xb7", 'middot;' => "\xc2\xb7", 'minus;' => "\xe2\x88\x92", 'mu;' => "\xce\xbc", 'nabla;' => "\xe2\x88\x87", 'nbsp' => "\xc2\xa0", 'nbsp;' => "\xc2\xa0", 'ndash;' => "\xe2\x80\x93", 'ne;' => "\xe2\x89\xa0", 'ni;' => "\xe2\x88\x8b", 'not' => "\xc2\xac", 'not;' => "\xc2\xac", 'notin;' => "\xe2\x88\x89", 'nsub;' => "\xe2\x8a\x84", 'ntilde' => "\xc3\xb1", 'ntilde;' => "\xc3\xb1", 'nu;' => "\xce\xbd", 'oacute' => "\xc3\xb3", 'oacute;' => "\xc3\xb3", 'ocirc' => "\xc3\xb4", 'ocirc;' => "\xc3\xb4", 'oelig;' => "\xc5\x93", 'ograve' => "\xc3\xb2", 'ograve;' => "\xc3\xb2", 'oline;' => "\xe2\x80\xbe", 'omega;' => "\xcf\x89", 'omicron;' => "\xce\xbf", 'oplus;' => "\xe2\x8a\x95", 'or;' => "\xe2\x88\xa8", 'ordf' => "\xc2\xaa", 'ordf;' => "\xc2\xaa", 'ordm' => "\xc2\xba", 'ordm;' => "\xc2\xba", 'oslash' => "\xc3\xb8", 'oslash;' => "\xc3\xb8", 'otilde' => "\xc3\xb5", 'otilde;' => "\xc3\xb5", 'otimes;' => "\xe2\x8a\x97", 'ouml' => "\xc3\xb6", 'ouml;' => "\xc3\xb6", 'para' => "\xc2\xb6", 'para;' => "\xc2\xb6", 'part;' => "\xe2\x88\x82", 'permil;' => "\xe2\x80\xb0", 'perp;' => "\xe2\x8a\xa5", 'phi;' => "\xcf\x86", 'pi;' => "\xcf\x80", 'piv;' => "\xcf\x96", 'plusmn' => "\xc2\xb1", 'plusmn;' => "\xc2\xb1", 'pound' => "\xc2\xa3", 'pound;' => "\xc2\xa3", 'prime;' => "\xe2\x80\xb2", 'prod;' => "\xe2\x88\x8f", 'prop;' => "\xe2\x88\x9d", 'psi;' => "\xcf\x88", 'quot' => '"', 'quot;' => '"', 'rArr;' => "\xe2\x87\x92", 'radic;' => "\xe2\x88\x9a", 'rang;' => "\xe3\x80\x89", 'raquo' => "\xc2\xbb", 'raquo;' => "\xc2\xbb", 'rarr;' => "\xe2\x86\x92", 'rceil;' => "\xe2\x8c\x89", 'rdquo;' => "\xe2\x80\x9d", 'real;' => "\xe2\x84\x9c", 'reg' => "\xc2\xae", 'reg;' => "\xc2\xae", 'rfloor;' => "\xe2\x8c\x8b", 'rho;' => "\xcf\x81", 'rlm;' => "\xe2\x80\x8f", 'rsaquo;' => "\xe2\x80\xba", 'rsquo;' => "\xe2\x80\x99", 'sbquo;' => "\xe2\x80\x9a", 'scaron;' => "\xc5\xa1", 'sdot;' => "\xe2\x8b\x85", 'sect' => "\xc2\xa7", 'sect;' => "\xc2\xa7", 'shy' => "\xc2\xad", 'shy;' => "\xc2\xad", 'sigma;' => "\xcf\x83", 'sigmaf;' => "\xcf\x82", 'sim;' => "\xe2\x88\xbc", 'spades;' => "\xe2\x99\xa0", 'sub;' => "\xe2\x8a\x82", 'sube;' => "\xe2\x8a\x86", 'sum;' => "\xe2\x88\x91", 'sup1' => "\xc2\xb9", 'sup1;' => "\xc2\xb9", 'sup2' => "\xc2\xb2", 'sup2;' => "\xc2\xb2", 'sup3' => "\xc2\xb3", 'sup3;' => "\xc2\xb3", 'sup;' => "\xe2\x8a\x83", 'supe;' => "\xe2\x8a\x87", 'szlig' => "\xc3\x9f", 'szlig;' => "\xc3\x9f", 'tau;' => "\xcf\x84", 'there4;' => "\xe2\x88\xb4", 'theta;' => "\xce\xb8", 'thetasym;' => "\xcf\x91", 'thinsp;' => "\xe2\x80\x89", 'thorn' => "\xc3\xbe", 'thorn;' => "\xc3\xbe", 'tilde;' => "\xcb\x9c", 'times' => "\xc3\x97", 'times;' => "\xc3\x97", 'trade;' => "\xe2\x84\xa2", 'uArr;' => "\xe2\x87\x91", 'uacute' => "\xc3\xba", 'uacute;' => "\xc3\xba", 'uarr;' => "\xe2\x86\x91", 'ucirc' => "\xc3\xbb", 'ucirc;' => "\xc3\xbb", 'ugrave' => "\xc3\xb9", 'ugrave;' => "\xc3\xb9", 'uml' => "\xc2\xa8", 'uml;' => "\xc2\xa8", 'upsih;' => "\xcf\x92", 'upsilon;' => "\xcf\x85", 'uuml' => "\xc3\xbc", 'uuml;' => "\xc3\xbc", 'weierp;' => "\xe2\x84\x98", 'xi;' => "\xce\xbe", 'yacute' => "\xc3\xbd", 'yacute;' => "\xc3\xbd", 'yen' => "\xc2\xa5", 'yen;' => "\xc2\xa5", 'yuml' => "\xc3\xbf", 'yuml;' => "\xc3\xbf", 'zeta;' => "\xce\xb6", 'zwj;' => "\xe2\x80\x8d", 'zwnj;' => "\xe2\x80\x8c" }
- ENCODINGS =
%w[ ansi_x3.4-1968 iso-ir-6 ansi_x3.4-1986 iso_646.irv:1991 ascii iso646-us us-ascii us ibm367 cp367 csascii ks_c_5601-1987 korean iso-2022-kr csiso2022kr euc-kr iso-2022-jp csiso2022jp iso-2022-jp-2 iso-ir-58 chinese csiso58gb231280 iso_8859-1:1987 iso-ir-100 iso_8859-1 iso-8859-1 latin1 l1 ibm819 cp819 csisolatin1 iso_8859-2:1987 iso-ir-101 iso_8859-2 iso-8859-2 latin2 l2 csisolatin2 iso_8859-3:1988 iso-ir-109 iso_8859-3 iso-8859-3 latin3 l3 csisolatin3 iso_8859-4:1988 iso-ir-110 iso_8859-4 iso-8859-4 latin4 l4 csisolatin4 iso_8859-6:1987 iso-ir-127 iso_8859-6 iso-8859-6 ecma-114 asmo-708 arabic csisolatinarabic iso_8859-7:1987 iso-ir-126 iso_8859-7 iso-8859-7 elot_928 ecma-118 greek greek8 csisolatingreek iso_8859-8:1988 iso-ir-138 iso_8859-8 iso-8859-8 hebrew csisolatinhebrew iso_8859-5:1988 iso-ir-144 iso_8859-5 iso-8859-5 cyrillic csisolatincyrillic iso_8859-9:1989 iso-ir-148 iso_8859-9 iso-8859-9 latin5 l5 csisolatin5 iso-8859-10 iso-ir-157 l6 iso_8859-10:1992 csisolatin6 latin6 hp-roman8 roman8 r8 ibm037 cp037 csibm037 ibm424 cp424 csibm424 ibm437 cp437 437 cspc8codepage437 ibm500 cp500 csibm500 ibm775 cp775 cspc775baltic ibm850 cp850 850 cspc850multilingual ibm852 cp852 852 cspcp852 ibm855 cp855 855 csibm855 ibm857 cp857 857 csibm857 ibm860 cp860 860 csibm860 ibm861 cp861 861 cp-is csibm861 ibm862 cp862 862 cspc862latinhebrew ibm863 cp863 863 csibm863 ibm864 cp864 csibm864 ibm865 cp865 865 csibm865 ibm866 cp866 866 csibm866 ibm869 cp869 869 cp-gr csibm869 ibm1026 cp1026 csibm1026 koi8-r cskoi8r koi8-u big5-hkscs ptcp154 csptcp154 pt154 cp154 utf-7 utf-16be utf-16le utf-16 utf-8 iso-8859-13 iso-8859-14 iso-ir-199 iso_8859-14:1998 iso_8859-14 latin8 iso-celtic l8 iso-8859-15 iso_8859-15 iso-8859-16 iso-ir-226 iso_8859-16:2001 iso_8859-16 latin10 l10 gbk cp936 ms936 gb18030 shift_jis ms_kanji csshiftjis euc-jp gb2312 big5 csbig5 windows-1250 windows-1251 windows-1252 windows-1253 windows-1254 windows-1255 windows-1256 windows-1257 windows-1258 tis-620 hz-gb-2312 ]
- E =
{ "null-character" => _("Null character in input stream, replaced with U+FFFD."), "incorrectly-placed-solidus" => _("Solidus (/) incorrectly placed in tag."), "incorrect-cr-newline-entity" => _("Incorrect CR newline entity, replaced with LF."), "illegal-windows-1252-entity" => _("Entity used with illegal number (windows-1252 reference)."), "cant-convert-numeric-entity" => _("Numeric entity couldn't be converted to character " + "(codepoint U+%(charAsInt)08x)."), "illegal-codepoint-for-numeric-entity" => _("Numeric entity represents an illegal codepoint=> " + "U+%(charAsInt)08x."), "numeric-entity-without-semicolon" => _("Numeric entity didn't end with ';'."), "expected-numeric-entity-but-got-eof" => _("Numeric entity expected. Got end of file instead."), "expected-numeric-entity" => _("Numeric entity expected but none found."), "named-entity-without-semicolon" => _("Named entity didn't end with ';'."), "expected-named-entity" => _("Named entity expected. Got none."), "attributes-in-end-tag" => _("End tag contains unexpected attributes."), "expected-tag-name-but-got-right-bracket" => _("Expected tag name. Got '>' instead."), "expected-tag-name-but-got-question-mark" => _("Expected tag name. Got '?' instead. (HTML doesn't " + "support processing instructions.)"), "expected-tag-name" => _("Expected tag name. Got something else instead"), "expected-closing-tag-but-got-right-bracket" => _("Expected closing tag. Got '>' instead. Ignoring '</>'."), "expected-closing-tag-but-got-eof" => _("Expected closing tag. Unexpected end of file."), "expected-closing-tag-but-got-char" => _("Expected closing tag. Unexpected character '%(data)' found."), "eof-in-tag-name" => _("Unexpected end of file in the tag name."), "expected-attribute-name-but-got-eof" => _("Unexpected end of file. Expected attribute name instead."), "eof-in-attribute-name" => _("Unexpected end of file in attribute name."), "duplicate-attribute" => _("Dropped duplicate attribute on tag."), "expected-end-of-tag-name-but-got-eof" => _("Unexpected end of file. Expected = or end of tag."), "expected-attribute-value-but-got-eof" => _("Unexpected end of file. Expected attribute value."), "eof-in-attribute-value-double-quote" => _("Unexpected end of file in attribute value (\")."), "eof-in-attribute-value-single-quote" => _("Unexpected end of file in attribute value (')."), "eof-in-attribute-value-no-quotes" => _("Unexpected end of file in attribute value."), "expected-dashes-or-doctype" => _("Expected '--' or 'DOCTYPE'. Not found."), "incorrect-comment" => _("Incorrect comment."), "eof-in-comment" => _("Unexpected end of file in comment."), "eof-in-comment-end-dash" => _("Unexpected end of file in comment (-)"), "unexpected-dash-after-double-dash-in-comment" => _("Unexpected '-' after '--' found in comment."), "eof-in-comment-double-dash" => _("Unexpected end of file in comment (--)."), "unexpected-char-in-comment" => _("Unexpected character in comment found."), "need-space-after-doctype" => _("No space after literal string 'DOCTYPE'."), "expected-doctype-name-but-got-right-bracket" => _("Unexpected > character. Expected DOCTYPE name."), "expected-doctype-name-but-got-eof" => _("Unexpected end of file. Expected DOCTYPE name."), "eof-in-doctype-name" => _("Unexpected end of file in DOCTYPE name."), "eof-in-doctype" => _("Unexpected end of file in DOCTYPE."), "expected-space-or-right-bracket-in-doctype" => _("Expected space or '>'. Got '%(data)'"), "unexpected-end-of-doctype" => _("Unexpected end of DOCTYPE."), "unexpected-char-in-doctype" => _("Unexpected character in DOCTYPE."), "eof-in-bogus-doctype" => _("Unexpected end of file in bogus doctype."), "eof-in-innerhtml" => _("XXX innerHTML EOF"), "unexpected-doctype" => _("Unexpected DOCTYPE. Ignored."), "non-html-root" => _("html needs to be the first start tag."), "expected-doctype-but-got-eof" => _("Unexpected End of file. Expected DOCTYPE."), "unknown-doctype" => _("Erroneous DOCTYPE."), "expected-doctype-but-got-chars" => _("Unexpected non-space characters. Expected DOCTYPE."), "expected-doctype-but-got-start-tag" => _("Unexpected start tag (%(name)). Expected DOCTYPE."), "expected-doctype-but-got-end-tag" => _("Unexpected end tag (%(name)). Expected DOCTYPE."), "end-tag-after-implied-root" => _("Unexpected end tag (%(name)) after the (implied) root element."), "expected-named-closing-tag-but-got-eof" => _("Unexpected end of file. Expected end tag (%(name))."), "two-heads-are-not-better-than-one" => _("Unexpected start tag head in existing head. Ignored."), "unexpected-end-tag" => _("Unexpected end tag (%(name)). Ignored."), "unexpected-start-tag-out-of-my-head" => _("Unexpected start tag (%(name)) that can be in head. Moved."), "unexpected-start-tag" => _("Unexpected start tag (%(name))."), "missing-end-tag" => _("Missing end tag (%(name))."), "missing-end-tags" => _("Missing end tags (%(name))."), "unexpected-start-tag-implies-end-tag" => _("Unexpected start tag (%(startName)) " + "implies end tag (%(endName))."), "unexpected-start-tag-treated-as" => _("Unexpected start tag (%(originalName)). Treated as %(newName)."), "deprecated-tag" => _("Unexpected start tag %(name). Don't use it!"), "unexpected-start-tag-ignored" => _("Unexpected start tag %(name). Ignored."), "expected-one-end-tag-but-got-another" => _("Unexpected end tag (%(gotName)). " + "Missing end tag (%(expectedName))."), "end-tag-too-early" => _("End tag (%(name)) seen too early. Expected other end tag."), "end-tag-too-early-named" => _("Unexpected end tag (%(gotName)). Expected end tag (%(expectedName))."), "end-tag-too-early-ignored" => _("End tag (%(name)) seen too early. Ignored."), "adoption-agency-1.1" => _("End tag (%(name)) violates step 1, " + "paragraph 1 of the adoption agency algorithm."), "adoption-agency-1.2" => _("End tag (%(name)) violates step 1, " + "paragraph 2 of the adoption agency algorithm."), "adoption-agency-1.3" => _("End tag (%(name)) violates step 1, " + "paragraph 3 of the adoption agency algorithm."), "unexpected-end-tag-treated-as" => _("Unexpected end tag (%(originalName)). Treated as %(newName)."), "no-end-tag" => _("This element (%(name)) has no end tag."), "unexpected-implied-end-tag-in-table" => _("Unexpected implied end tag (%(name)) in the table phase."), "unexpected-implied-end-tag-in-table-body" => _("Unexpected implied end tag (%(name)) in the table body phase."), "unexpected-char-implies-table-voodoo" => _("Unexpected non-space characters in " + "table context caused voodoo mode."), "unexpected-start-tag-implies-table-voodoo" => _("Unexpected start tag (%(name)) in " + "table context caused voodoo mode."), "unexpected-end-tag-implies-table-voodoo" => _("Unexpected end tag (%(name)) in " + "table context caused voodoo mode."), "unexpected-cell-in-table-body" => _("Unexpected table cell start tag (%(name)) " + "in the table body phase."), "unexpected-cell-end-tag" => _("Got table cell end tag (%(name)) " + "while required end tags are missing."), "unexpected-end-tag-in-table-body" => _("Unexpected end tag (%(name)) in the table body phase. Ignored."), "unexpected-implied-end-tag-in-table-row" => _("Unexpected implied end tag (%(name)) in the table row phase."), "unexpected-end-tag-in-table-row" => _("Unexpected end tag (%(name)) in the table row phase. Ignored."), "unexpected-select-in-select" => _("Unexpected select start tag in the select phase " + "implies select start tag."), "unexpected-start-tag-in-select" => _("Unexpected start tag token (%(name) in the select phase. " + "Ignored."), "unexpected-end-tag-in-select" => _("Unexpected end tag (%(name)) in the select phase. Ignored."), "unexpected-char-after-body" => _("Unexpected non-space characters in the after body phase."), "unexpected-start-tag-after-body" => _("Unexpected start tag token (%(name))" + " in the after body phase."), "unexpected-end-tag-after-body" => _("Unexpected end tag token (%(name))" + " in the after body phase."), "unexpected-char-in-frameset" => _("Unepxected characters in the frameset phase. Characters ignored."), "unexpected-start-tag-in-frameset" => _("Unexpected start tag token (%(name))" + " in the frameset phase. Ignored."), "unexpected-frameset-in-frameset-innerhtml" => _("Unexpected end tag token (frameset) " + "in the frameset phase (innerHTML)."), "unexpected-end-tag-in-frameset" => _("Unexpected end tag token (%(name))" + " in the frameset phase. Ignored."), "unexpected-char-after-frameset" => _("Unexpected non-space characters in the " + "after frameset phase. Ignored."), "unexpected-start-tag-after-frameset" => _("Unexpected start tag (%(name))" + " in the after frameset phase. Ignored."), "unexpected-end-tag-after-frameset" => _("Unexpected end tag (%(name))" + " in the after frameset phase. Ignored."), "expected-eof-but-got-char" => _("Unexpected non-space characters. Expected end of file."), "expected-eof-but-got-start-tag" => _("Unexpected start tag (%(name))" + ". Expected end of file."), "expected-eof-but-got-end-tag" => _("Unexpected end tag (%(name))" + ". Expected end of file."), "unexpected-end-table-in-caption" => _("Unexpected end table tag in caption. Generates implied end caption.") }
- Marker =
The scope markers are inserted when entering buttons, object elements, marquees, table cells, and table captions, and are used to prevent formatting from “leaking” into tables, buttons, object elements, and marquees.
nil
Class Method Summary collapse
- ._(str) ⇒ Object
- .parse(stream, options = {}) ⇒ Object
- .parse_fragment(stream, options = {}) ⇒ Object
Class Method Details
._(str) ⇒ Object
5 |
# File 'lib/feed_tools/vendor/html5/lib/html5/constants.rb', line 5 def self._(str); str end |
.parse(stream, options = {}) ⇒ Object
6 7 8 |
# File 'lib/feed_tools/vendor/html5/lib/html5.rb', line 6 def self.parse(stream, ={}) HTMLParser.parse(stream, ) end |
.parse_fragment(stream, options = {}) ⇒ Object
10 11 12 |
# File 'lib/feed_tools/vendor/html5/lib/html5.rb', line 10 def self.parse_fragment(stream, ={}) HTMLParser.parse(stream, ) end |