AmendHub

Download:

jcs

/

detritus

/

amendments

/

57

html_tokenize: Import HTML tokenizer written to WHATWG spec docs


jcs made amendment 57 about 1 year ago
--- html_data.c Wed Dec 11 11:20:37 2024 +++ html_data.c Wed Dec 11 11:20:37 2024 @@ -0,0 +1,319 @@ +/* + * Copyright (c) 2024 joshua stein <jcs@jcs.org> + * + * Permission to use, copy, modify, and distribute this software for any + * purpose with or without fee is hereby granted, provided that the above + * copyright notice and this permission notice appear in all copies. + * + * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES + * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF + * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR + * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES + * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN + * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF + * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. + */ + +#include "html.h" + +#ifdef HTML_ENABLE_DEBUGGING +const char *html_mode_names[] = { + "NONE", + "INITIAL", + "BEFORE_HTML", + "BEFORE_HEAD", + "IN_HEAD", + "IN_HEAD_NOSCRIPT", + "AFTER_HEAD", + "IN_BODY", + "TEXT", + "IN_TABLE", + "IN_TABLE_TEXT", + "IN_CAPTION", + "IN_COLUMN_GROUP", + "IN_TABLE_BODY", + "IN_ROW", + "IN_CELL", + "IN_SELECT", + "IN_SELECT_IN_TABLE", + "IN_TEMPLATE", + "AFTER_BODY", + "IN_FRAMESET", + "AFTER_FRAMESET", + "AFTER_AFTER_BODY", + "AFTER_AFTER_FRAMESET" +}; + +const char *html_state_names[] = { + "NONE", + "DATA", + "RCDATA", + "RAWTEXT", + "SCRIPT_DATA", + "PLAINTEXT", + "TAG_OPEN", + "END_TAG_OPEN", + "TAG_NAME", + "RCDATA_LESS_THAN_SIGN", + "RCDATA_END_TAG_OPEN", + "RCDATA_END_TAG_NAME", + "RAWTEXT_LESS_THAN_SIGN", + "RAWTEXT_END_TAG_OPEN", + "RAWTEXT_END_TAG_NAME", + "SCRIPT_DATA_LESS_THAN_SIGN", + "SCRIPT_DATA_END_TAG_OPEN", + "SCRIPT_DATA_END_TAG_NAME", + "SCRIPT_DATA_ESCAPE_START", + "SCRIPT_DATA_ESCAPE_START_DASH", + "SCRIPT_DATA_ESCAPED", + "SCRIPT_DATA_ESCAPED_DASH", + "SCRIPT_DATA_ESCAPED_DASH_DASH", + "SCRIPT_DATA_ESCAPED_LESS_THAN_SIGN", + "SCRIPT_DATA_ESCAPED_END_TAG_OPEN", + "SCRIPT_DATA_ESCAPED_END_TAG_NAME", + "SCRIPT_DATA_DOUBLE_ESCAPE_START", + "SCRIPT_DATA_DOUBLE_ESCAPED", + "SCRIPT_DATA_DOUBLE_ESCAPED_DASH", + "SCRIPT_DATA_DOUBLE_ESCAPED_DASH_DASH", + "SCRIPT_DATA_DOUBLE_ESCAPED_LESS_THAN_SIGN", + "SCRIPT_DATA_DOUBLE_ESCAPE_END", + "BEFORE_ATTRIBUTE_NAME", + "ATTRIBUTE_NAME", + "AFTER_ATTRIBUTE_NAME", + "BEFORE_ATTRIBUTE_VALUE", + "ATTRIBUTE_VALUE_DOUBLE_QUOTED", + "ATTRIBUTE_VALUE_SINGLE_QUOTED", + "ATTRIBUTE_VALUE_UNQUOTED", + "AFTER_ATTRIBUTE_VALUE_QUOTED", + "SELF_CLOSING_START_TAG", + "BOGUS_COMMENT", + "MARKUP_DECLARATION_OPEN", + "COMMENT_START", + "COMMENT_START_DASH", + "COMMENT", + "COMMENT_LESS_THAN_SIGN", + "COMMENT_LESS_THAN_SIGN_BANG", + "COMMENT_LESS_THAN_SIGN_BANG_DASH", + "COMMENT_LESS_THAN_SIGN_BANG_DASH_DASH", + "COMMENT_END_DASH", + "COMMENT_END", + "COMMENT_END_BANG", + "DOCTYPE", + "BEFORE_DOCTYPE_NAME", + "DOCTYPE_NAME", + "AFTER_DOCTYPE_NAME", + "AFTER_DOCTYPE_PUBLIC_KEYWORD", + "BEFORE_DOCTYPE_PUBLIC_IDENTIFIER", + "DOCTYPE_PUBLIC_IDENTIFIER_DOUBLE_QUOTED", + "DOCTYPE_PUBLIC_IDENTIFIER_SINGLE_QUOTED", + "AFTER_DOCTYPE_PUBLIC_IDENTIFIER", + "BETWEEN_DOCTYPE_PUBLIC_AND_SYSTEM_IDENTIFIERS", + "AFTER_DOCTYPE_SYSTEM_KEYWORD", + "BEFORE_DOCTYPE_SYSTEM_IDENTIFIER", + "DOCTYPE_SYSTEM_IDENTIFIER_DOUBLE_QUOTED", + "DOCTYPE_SYSTEM_IDENTIFIER_SINGLE_QUOTED", + "AFTER_DOCTYPE_SYSTEM_IDENTIFIER", + "BOGUS_DOCTYPE", + "CDATA_SECTION", + "CDATA_SECTION_BRACKET", + "CDATA_SECTION_END", + "CHARACTER_REFERENCE", + "NAMED_CHARACTER_REFERENCE", + "AMBIGUOUS_AMPERSAND", + "NUMERIC_CHARACTER_REFERENCE", + "HEXADECIMAL_CHARACTER_REFERENCE_START", + "DECIMAL_CHARACTER_REFERENCE_START", + "HEXADECIMAL_CHARACTER_REFERENCE", + "DECIMAL_CHARACTER_REFERENCE", + "NUMERIC_CHARACTER_REFERENCE_END" +}; + +const char *html_error_strings[] = { + "NONE", + "ABRUPT_CLOSING_OF_EMPTY_COMMENT", + "ABRUPT_DOCTYPE_PUBLIC_IDENTIFIER", + "ABRUPT_DOCTYPE_SYSTEM_IDENTIFIER", + "ABSENCE_OF_DIGITS_IN_NUMERIC_CHARACTER_REFERENCE", + "CDATA_IN_HTML_CONTENT", + "CHARACTER_REFERENCE_OUTSIDE_UNICODE_RANGE", + "CONTROL_CHARACTER_IN_INPUT_STREAM", + "CONTROL_CHARACTER_REFERENCE", + "DUPLICATE_ATTRIBUTE", + "END_TAG_WITH_ATTRIBUTES", + "END_TAG_WITH_TRAILING_SOLIDUS", + "EOF_BEFORE_TAG_NAME", + "EOF_IN_CDATA", + "EOF_IN_COMMENT", + "EOF_IN_DOCTYPE", + "EOF_IN_SCRIPT_HTML_COMMENT_LIKE_TEXT", + "EOF_IN_TAG", + "INCORRECTLY_CLOSED_COMMENT", + "INCORRECTLY_OPENED_COMMENT", + "INVALID_CHARACTER_SEQUENCE_AFTER_DOCTYPE_NAME", + "INVALID_FIRST_CHARACTER_OF_TAG_NAME", + "MISSING_ATTRIBUTE_VALUE", + "MISSING_DOCTYPE_NAME", + "MISSING_DOCTYPE_PUBLIC_IDENTIFIER", + "MISSING_DOCTYPE_SYSTEM_IDENTIFIER", + "MISSING_END_TAG_NAME", + "MISSING_QUOTE_BEFORE_DOCTYPE_PUBLIC_IDENTIFIER", + "MISSING_QUOTE_BEFORE_DOCTYPE_SYSTEM_IDENTIFIER", + "MISSING_SEMICOLON_AFTER_CHARACTER_REFERENCE", + "MISSING_WHITESPACE_AFTER_DOCTYPE_PUBLIC_KEYWORD", + "MISSING_WHITESPACE_AFTER_DOCTYPE_SYSTEM_KEYWORD", + "MISSING_WHITESPACE_BEFORE_DOCTYPE_NAME", + "MISSING_WHITESPACE_BETWEEN_ATTRIBUTES", + "MISSING_WHITESPACE_BETWEEN_DOCTYPE_PUBLIC_AND_SYSTEM_IDENTIFIERS", + "NESTED_COMMENT", + "NONCHARACTER_CHARACTER_REFERENCE", + "NONCHARACTER_IN_INPUT_STREAM", + "NON_VOID_HTML_ELEMENT_START_TAG_WITH_TRAILING_SOLIDUS", + "NULL_CHARACTER_REFERENCE", + "SURROGATE_CHARACTER_REFERENCE", + "SURROGATE_IN_INPUT_STREAM", + "UNEXPECTED_CHARACTER_AFTER_DOCTYPE_SYSTEM_IDENTIFIER", + "UNEXPECTED_CHARACTER_IN_ATTRIBUTE_NAME", + "UNEXPECTED_CHARACTER_IN_UNQUOTED_ATTRIBUTE_VALUE", + "UNEXPECTED_EQUALS_SIGN_BEFORE_ATTRIBUTE_NAME", + "UNEXPECTED_NULL_CHARACTER", + "UNEXPECTED_QUESTION_MARK_INSTEAD_OF_TAG_NAME", + "UNEXPECTED_SOLIDUS_IN_TAG", + "UNKNOWN_NAMED_CHARACTER_REFERENCE" +}; + +const char *html_token_names[] = { + "INVALID", + + "DOCTYPE", + "START_TAG", + "END_TAG", + "COMMENT", + "CHARACTER", + "EOF" +}; +#endif + +/* this doesn't have to list all tags, just ones the docs reference */ +const char *html_tag_names[] = { + NULL, + + "a", /* 1 */ + "address", + "applet", + "area", + "article", + "aside", + "b", + "base", + "basefont", + "bgsound", + "big", + "blockquote", + "body", + "br", + "button", + "caption", + "center", + "cite", + "code", + "col", + "colgroup", + "dd", + "details", + "dfn", + "dialog", + "dir", + "div", + "dl", + "dt", + "em", + "embed", + "fieldset", + "figcaption", + "figure", + "font", + "footer", + "form", + "frame", + "frameset", + "h1", + "h2", + "h3", + "h4", + "h5", + "h6", + "head", + "header", + "hgroup", + "hr", + "html", + "i", + "iframe", + "image", + "img", + "input", + "ins", + "kbd", + "keygen", + "li", + "link", + "listing", + "main", + "marquee", + "math", + "menu", + "meta", + "nav", + "nobr", + "noembed", + "noframes", + "noscript", + "object", + "ol", + "optgroup", + "option", + "p", + "param", + "plaintext", + "pre", + "rb", + "rp", + "rt", + "rtc", + "ruby", + "s", + "samp", + "script", + "search", + "section", + "select", + "small", + "source", + "span", + "strike", + "strong", + "style", + "sub", + "sup", + "summary", + "svg", + "table", + "tbody", + "td", + "template", + "textarea", + "tfoot", + "th", + "thead", + "title", + "tr", + "track", + "tt", + "u", + "ul", + "var", + "wbr", + "xmp", + + NULL +}; --- html_entities.c Tue Dec 10 22:39:10 2024 +++ html_entities.c Tue Dec 10 22:39:10 2024 @@ -0,0 +1,2300 @@ +/* + * Copyright (c) 2024 joshua stein <jcs@jcs.org> + * + * Permission to use, copy, modify, and distribute this software for any + * purpose with or without fee is hereby granted, provided that the above + * copyright notice and this permission notice appear in all copies. + * + * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES + * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF + * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR + * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES + * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN + * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF + * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. + */ + +#include "html.h" + +/* https://html.spec.whatwg.org/multipage/named-characters.html#named-character-references */ +const html_entity html_entities[] = { + /* super common ones to speed up searching */ + { "&nbsp", 0x000000a0, }, + { "&nbsp;", 0x000000a0, }, + { "&lt;", 0x0000003c, }, + { "&gt;", 0x0000003e, }, + { "&amp", 0x00000026, }, + { "&amp;", 0x00000026, }, + + /* list taken from https://www.w3.org/wiki/Common_HTML_entities_used_for_typography */ + { "&cent;", 0x000000a2, }, + { "&pound;", 0x000000a3, }, + { "&sect;", 0x000000a7, }, + { "&copy;", 0x000000a9, }, + { "&laquo;", 0x000000ab, }, + { "&raquo;", 0x000000bb, }, + { "&reg;", 0x000000ae, }, + { "&deg;", 0x000000b0, }, + { "&plusmn;", 0x000000b1, }, + { "&para;", 0x000000b6, }, + { "&middot;", 0x000000b7, }, + { "&frac12;", 0x000000bd, }, + { "&ndash;", 0x00002013, }, + { "&mdash;", 0x00002014, }, + { "&lsquo;", 0x00002018, }, + { "&rsquo;", 0x00002019, }, + { "&sbquo;", 0x0000201a, }, + { "&ldquo;", 0x0000201c, }, + { "&rdquo;", 0x0000201d, }, + { "&bdquo;", 0x0000201e, }, + { "&dagger;", 0x00002020, }, + { "&Dagger;", 0x00002021, }, + { "&bull;", 0x00002022, }, + { "&bullet;", 0x00002022, }, + { "&hellip;", 0x00002026, }, + { "&prime;", 0x00002032, }, + { "&Prime;", 0x00002033, }, + { "&euro;", 0x000020ac, }, + { "&trade;", 0x00002122, }, + { "&asymp;", 0x00002248, }, + { "&ne;", 0x00002260, }, + { "&le;", 0x00002264, }, + { "&ge;", 0x00002265, }, + +#if 0 + /* not much point in supporting all these, we can't draw them anyway */ + { "&AElig", 0x000000c6, }, + { "&AElig;", 0x000000c6, }, + { "&AMP", 0x00000026, }, + { "&AMP;", 0x00000026, }, + { "&Aacute", 0x000000c1, }, + { "&Aacute;", 0x000000c1, }, + { "&Abreve;", 0x00000102, }, + { "&Acirc", 0x000000c2, }, + { "&Acirc;", 0x000000c2, }, + { "&Acy;", 0x00000410, }, + { "&Afr;", 0x0001d504, }, + { "&Agrave", 0x000000c0, }, + { "&Agrave;", 0x000000c0, }, + { "&Alpha;", 0x00000391, }, + { "&Amacr;", 0x00000100, }, + { "&And;", 0x00002a53, }, + { "&Aogon;", 0x00000104, }, + { "&Aopf;", 0x0001d538, }, + { "&ApplyFunction;", 0x00002061, }, + { "&Aring", 0x000000c5, }, + { "&Aring;", 0x000000c5, }, + { "&Ascr;", 0x0001d49c, }, + { "&Assign;", 0x00002254, }, + { "&Atilde", 0x000000c3, }, + { "&Atilde;", 0x000000c3, }, + { "&Auml", 0x000000c4, }, + { "&Auml;", 0x000000c4, }, + { "&Backslash;", 0x00002216, }, + { "&Barv;", 0x00002ae7, }, + { "&Barwed;", 0x00002306, }, + { "&Bcy;", 0x00000411, }, + { "&Because;", 0x00002235, }, + { "&Bernoullis;", 0x0000212c, }, + { "&Beta;", 0x00000392, }, + { "&Bfr;", 0x0001d505, }, + { "&Bopf;", 0x0001d539, }, + { "&Breve;", 0x000002d8, }, + { "&Bscr;", 0x0000212c, }, + { "&Bumpeq;", 0x0000224e, }, + { "&CHcy;", 0x00000427, }, + { "&COPY", 0x000000a9, }, + { "&COPY;", 0x000000a9, }, + { "&Cacute;", 0x00000106, }, + { "&Cap;", 0x000022d2, }, + { "&CapitalDifferentialD;", 0x00002145, }, + { "&Cayleys;", 0x0000212d, }, + { "&Ccaron;", 0x0000010c, }, + { "&Ccedil", 0x000000c7, }, + { "&Ccedil;", 0x000000c7, }, + { "&Ccirc;", 0x00000108, }, + { "&Cconint;", 0x00002230, }, + { "&Cdot;", 0x0000010a, }, + { "&Cedilla;", 0x000000b8, }, + { "&CenterDot;", 0x000000b7, }, + { "&Cfr;", 0x0000212d, }, + { "&Chi;", 0x000003a7, }, + { "&CircleDot;", 0x00002299, }, + { "&CircleMinus;", 0x00002296, }, + { "&CirclePlus;", 0x00002295, }, + { "&CircleTimes;", 0x00002297, }, + { "&ClockwiseContourIntegral;", 0x00002232, }, + { "&CloseCurlyDoubleQuote;", 0x0000201d, }, + { "&CloseCurlyQuote;", 0x00002019, }, + { "&Colon;", 0x00002237, }, + { "&Colone;", 0x00002a74, }, + { "&Congruent;", 0x00002261, }, + { "&Conint;", 0x0000222f, }, + { "&ContourIntegral;", 0x0000222e, }, + { "&Copf;", 0x00002102, }, + { "&Coproduct;", 0x00002210, }, + { "&CounterClockwiseContourIntegral;", 0x00002233, }, + { "&Cross;", 0x00002a2f, }, + { "&Cscr;", 0x0001d49e, }, + { "&Cup;", 0x000022d3, }, + { "&CupCap;", 0x0000224d, }, + { "&DD;", 0x00002145, }, + { "&DDotrahd;", 0x00002911, }, + { "&DJcy;", 0x00000402, }, + { "&DScy;", 0x00000405, }, + { "&DZcy;", 0x0000040f, }, + { "&Dagger;", 0x00002021, }, + { "&Darr;", 0x000021a1, }, + { "&Dashv;", 0x00002ae4, }, + { "&Dcaron;", 0x0000010e, }, + { "&Dcy;", 0x00000414, }, + { "&Del;", 0x00002207, }, + { "&Delta;", 0x00000394, }, + { "&Dfr;", 0x0001d507, }, + { "&DiacriticalAcute;", 0x000000b4, }, + { "&DiacriticalDot;", 0x000002d9, }, + { "&DiacriticalDoubleAcute;", 0x000002dd, }, + { "&DiacriticalGrave;", 0x00000060, }, + { "&DiacriticalTilde;", 0x000002dc, }, + { "&Diamond;", 0x000022c4, }, + { "&DifferentialD;", 0x00002146, }, + { "&Dopf;", 0x0001d53b, }, + { "&Dot;", 0x000000a8, }, + { "&DotDot;", 0x000020dc, }, + { "&DotEqual;", 0x00002250, }, + { "&DoubleContourIntegral;", 0x0000222f, }, + { "&DoubleDot;", 0x000000a8, }, + { "&DoubleDownArrow;", 0x000021d3, }, + { "&DoubleLeftArrow;", 0x000021d0, }, + { "&DoubleLeftRightArrow;", 0x000021d4, }, + { "&DoubleLeftTee;", 0x00002ae4, }, + { "&DoubleLongLeftArrow;", 0x000027f8, }, + { "&DoubleLongLeftRightArrow;", 0x000027fa, }, + { "&DoubleLongRightArrow;", 0x000027f9, }, + { "&DoubleRightArrow;", 0x000021d2, }, + { "&DoubleRightTee;", 0x000022a8, }, + { "&DoubleUpArrow;", 0x000021d1, }, + { "&DoubleUpDownArrow;", 0x000021d5, }, + { "&DoubleVerticalBar;", 0x00002225, }, + { "&DownArrow;", 0x00002193, }, + { "&DownArrowBar;", 0x00002913, }, + { "&DownArrowUpArrow;", 0x000021f5, }, + { "&DownBreve;", 0x00000311, }, + { "&DownLeftRightVector;", 0x00002950, }, + { "&DownLeftTeeVector;", 0x0000295e, }, + { "&DownLeftVector;", 0x000021bd, }, + { "&DownLeftVectorBar;", 0x00002956, }, + { "&DownRightTeeVector;", 0x0000295f, }, + { "&DownRightVector;", 0x000021c1, }, + { "&DownRightVectorBar;", 0x00002957, }, + { "&DownTee;", 0x000022a4, }, + { "&DownTeeArrow;", 0x000021a7, }, + { "&Downarrow;", 0x000021d3, }, + { "&Dscr;", 0x0001d49f, }, + { "&Dstrok;", 0x00000110, }, + { "&ENG;", 0x0000014a, }, + { "&ETH", 0x000000d0, }, + { "&ETH;", 0x000000d0, }, + { "&Eacute", 0x000000c9, }, + { "&Eacute;", 0x000000c9, }, + { "&Ecaron;", 0x0000011a, }, + { "&Ecirc", 0x000000ca, }, + { "&Ecirc;", 0x000000ca, }, + { "&Ecy;", 0x0000042d, }, + { "&Edot;", 0x00000116, }, + { "&Efr;", 0x0001d508, }, + { "&Egrave", 0x000000c8, }, + { "&Egrave;", 0x000000c8, }, + { "&Element;", 0x00002208, }, + { "&Emacr;", 0x00000112, }, + { "&EmptySmallSquare;", 0x000025fb, }, + { "&EmptyVerySmallSquare;", 0x000025ab, }, + { "&Eogon;", 0x00000118, }, + { "&Eopf;", 0x0001d53c, }, + { "&Epsilon;", 0x00000395, }, + { "&Equal;", 0x00002a75, }, + { "&EqualTilde;", 0x00002242, }, + { "&Equilibrium;", 0x000021cc, }, + { "&Escr;", 0x00002130, }, + { "&Esim;", 0x00002a73, }, + { "&Eta;", 0x00000397, }, + { "&Euml", 0x000000cb, }, + { "&Euml;", 0x000000cb, }, + { "&Exists;", 0x00002203, }, + { "&ExponentialE;", 0x00002147, }, + { "&Fcy;", 0x00000424, }, + { "&Ffr;", 0x0001d509, }, + { "&FilledSmallSquare;", 0x000025fc, }, + { "&FilledVerySmallSquare;", 0x000025aa, }, + { "&Fopf;", 0x0001d53d, }, + { "&ForAll;", 0x00002200, }, + { "&Fouriertrf;", 0x00002131, }, + { "&Fscr;", 0x00002131, }, + { "&GJcy;", 0x00000403, }, + { "&GT", 0x0000003e, }, + { "&GT;", 0x0000003e, }, + { "&Gamma;", 0x00000393, }, + { "&Gammad;", 0x000003dc, }, + { "&Gbreve;", 0x0000011e, }, + { "&Gcedil;", 0x00000122, }, + { "&Gcirc;", 0x0000011c, }, + { "&Gcy;", 0x00000413, }, + { "&Gdot;", 0x00000120, }, + { "&Gfr;", 0x0001d50a, }, + { "&Gg;", 0x000022d9, }, + { "&Gopf;", 0x0001d53e, }, + { "&GreaterEqual;", 0x00002265, }, + { "&GreaterEqualLess;", 0x000022db, }, + { "&GreaterFullEqual;", 0x00002267, }, + { "&GreaterGreater;", 0x00002aa2, }, + { "&GreaterLess;", 0x00002277, }, + { "&GreaterSlantEqual;", 0x00002a7e, }, + { "&GreaterTilde;", 0x00002273, }, + { "&Gscr;", 0x0001d4a2, }, + { "&Gt;", 0x0000226b, }, + { "&HARDcy;", 0x0000042a, }, + { "&Hacek;", 0x000002c7, }, + { "&Hat;", 0x0000005e, }, + { "&Hcirc;", 0x00000124, }, + { "&Hfr;", 0x0000210c, }, + { "&HilbertSpace;", 0x0000210b, }, + { "&Hopf;", 0x0000210d, }, + { "&HorizontalLine;", 0x00002500, }, + { "&Hscr;", 0x0000210b, }, + { "&Hstrok;", 0x00000126, }, + { "&HumpDownHump;", 0x0000224e, }, + { "&HumpEqual;", 0x0000224f, }, + { "&IEcy;", 0x00000415, }, + { "&IJlig;", 0x00000132, }, + { "&IOcy;", 0x00000401, }, + { "&Iacute", 0x000000cd, }, + { "&Iacute;", 0x000000cd, }, + { "&Icirc", 0x000000ce, }, + { "&Icirc;", 0x000000ce, }, + { "&Icy;", 0x00000418, }, + { "&Idot;", 0x00000130, }, + { "&Ifr;", 0x00002111, }, + { "&Igrave", 0x000000cc, }, + { "&Igrave;", 0x000000cc, }, + { "&Im;", 0x00002111, }, + { "&Imacr;", 0x0000012a, }, + { "&ImaginaryI;", 0x00002148, }, + { "&Implies;", 0x000021d2, }, + { "&Int;", 0x0000222c, }, + { "&Integral;", 0x0000222b, }, + { "&Intersection;", 0x000022c2, }, + { "&InvisibleComma;", 0x00002063, }, + { "&InvisibleTimes;", 0x00002062, }, + { "&Iogon;", 0x0000012e, }, + { "&Iopf;", 0x0001d540, }, + { "&Iota;", 0x00000399, }, + { "&Iscr;", 0x00002110, }, + { "&Itilde;", 0x00000128, }, + { "&Iukcy;", 0x00000406, }, + { "&Iuml", 0x000000cf, }, + { "&Iuml;", 0x000000cf, }, + { "&Jcirc;", 0x00000134, }, + { "&Jcy;", 0x00000419, }, + { "&Jfr;", 0x0001d50d, }, + { "&Jopf;", 0x0001d541, }, + { "&Jscr;", 0x0001d4a5, }, + { "&Jsercy;", 0x00000408, }, + { "&Jukcy;", 0x00000404, }, + { "&KHcy;", 0x00000425, }, + { "&KJcy;", 0x0000040c, }, + { "&Kappa;", 0x0000039a, }, + { "&Kcedil;", 0x00000136, }, + { "&Kcy;", 0x0000041a, }, + { "&Kfr;", 0x0001d50e, }, + { "&Kopf;", 0x0001d542, }, + { "&Kscr;", 0x0001d4a6, }, + { "&LJcy;", 0x00000409, }, + { "&LT", 0x0000003c, }, + { "&LT;", 0x0000003c, }, + { "&Lacute;", 0x00000139, }, + { "&Lambda;", 0x0000039b, }, + { "&Lang;", 0x000027ea, }, + { "&Laplacetrf;", 0x00002112, }, + { "&Larr;", 0x0000219e, }, + { "&Lcaron;", 0x0000013d, }, + { "&Lcedil;", 0x0000013b, }, + { "&Lcy;", 0x0000041b, }, + { "&LeftAngleBracket;", 0x000027e8, }, + { "&LeftArrow;", 0x00002190, }, + { "&LeftArrowBar;", 0x000021e4, }, + { "&LeftArrowRightArrow;", 0x000021c6, }, + { "&LeftCeiling;", 0x00002308, }, + { "&LeftDoubleBracket;", 0x000027e6, }, + { "&LeftDownTeeVector;", 0x00002961, }, + { "&LeftDownVector;", 0x000021c3, }, + { "&LeftDownVectorBar;", 0x00002959, }, + { "&LeftFloor;", 0x0000230a, }, + { "&LeftRightArrow;", 0x00002194, }, + { "&LeftRightVector;", 0x0000294e, }, + { "&LeftTee;", 0x000022a3, }, + { "&LeftTeeArrow;", 0x000021a4, }, + { "&LeftTeeVector;", 0x0000295a, }, + { "&LeftTriangle;", 0x000022b2, }, + { "&LeftTriangleBar;", 0x000029cf, }, + { "&LeftTriangleEqual;", 0x000022b4, }, + { "&LeftUpDownVector;", 0x00002951, }, + { "&LeftUpTeeVector;", 0x00002960, }, + { "&LeftUpVector;", 0x000021bf, }, + { "&LeftUpVectorBar;", 0x00002958, }, + { "&LeftVector;", 0x000021bc, }, + { "&LeftVectorBar;", 0x00002952, }, + { "&Leftarrow;", 0x000021d0, }, + { "&Leftrightarrow;", 0x000021d4, }, + { "&LessEqualGreater;", 0x000022da, }, + { "&LessFullEqual;", 0x00002266, }, + { "&LessGreater;", 0x00002276, }, + { "&LessLess;", 0x00002aa1, }, + { "&LessSlantEqual;", 0x00002a7d, }, + { "&LessTilde;", 0x00002272, }, + { "&Lfr;", 0x0001d50f, }, + { "&Ll;", 0x000022d8, }, + { "&Lleftarrow;", 0x000021da, }, + { "&Lmidot;", 0x0000013f, }, + { "&LongLeftArrow;", 0x000027f5, }, + { "&LongLeftRightArrow;", 0x000027f7, }, + { "&LongRightArrow;", 0x000027f6, }, + { "&Longleftarrow;", 0x000027f8, }, + { "&Longleftrightarrow;", 0x000027fa, }, + { "&Longrightarrow;", 0x000027f9, }, + { "&Lopf;", 0x0001d543, }, + { "&LowerLeftArrow;", 0x00002199, }, + { "&LowerRightArrow;", 0x00002198, }, + { "&Lscr;", 0x00002112, }, + { "&Lsh;", 0x000021b0, }, + { "&Lstrok;", 0x00000141, }, + { "&Lt;", 0x0000226a, }, + { "&Map;", 0x00002905, }, + { "&Mcy;", 0x0000041c, }, + { "&MediumSpace;", 0x0000205f, }, + { "&Mellintrf;", 0x00002133, }, + { "&Mfr;", 0x0001d510, }, + { "&MinusPlus;", 0x00002213, }, + { "&Mopf;", 0x0001d544, }, + { "&Mscr;", 0x00002133, }, + { "&Mu;", 0x0000039c, }, + { "&NJcy;", 0x0000040a, }, + { "&Nacute;", 0x00000143, }, + { "&Ncaron;", 0x00000147, }, + { "&Ncedil;", 0x00000145, }, + { "&Ncy;", 0x0000041d, }, + { "&NegativeMediumSpace;", 0x0000200b, }, + { "&NegativeThickSpace;", 0x0000200b, }, + { "&NegativeThinSpace;", 0x0000200b, }, + { "&NegativeVeryThinSpace;", 0x0000200b, }, + { "&NestedGreaterGreater;", 0x0000226b, }, + { "&NestedLessLess;", 0x0000226a, }, + { "&NewLine;", 0x0000000a, }, + { "&Nfr;", 0x0001d511, }, + { "&NoBreak;", 0x00002060, }, + { "&NonBreakingSpace;", 0x000000a0, }, + { "&Nopf;", 0x00002115, }, + { "&Not;", 0x00002aec, }, + { "&NotCongruent;", 0x00002262, }, + { "&NotCupCap;", 0x0000226d, }, + { "&NotDoubleVerticalBar;", 0x00002226, }, + { "&NotElement;", 0x00002209, }, + { "&NotEqual;", 0x00002260, }, +// { "&NotEqualTilde;", 0x00002242, 0x00000338, }, + { "&NotExists;", 0x00002204, }, + { "&NotGreater;", 0x0000226f, }, + { "&NotGreaterEqual;", 0x00002271, }, +// { "&NotGreaterFullEqual;", 8807, 0x00000338, }, +// { "&NotGreaterGreater;", 8811, 0x00000338, }, + { "&NotGreaterLess;", 0x00002279, }, +// { "&NotGreaterSlantEqual;", 10878, 0x00000338, }, + { "&NotGreaterTilde;", 0x00002275, }, +// { "&NotHumpDownHump;", 8782, 0x00000338, }, +// { "&NotHumpEqual;", 8783, 0x00000338, }, + { "&NotLeftTriangle;", 0x000022ea, }, +// { "&NotLeftTriangleBar;", 10703, 0x00000338, }, + { "&NotLeftTriangleEqual;", 0x000022ec, }, + { "&NotLess;", 0x0000226e, }, + { "&NotLessEqual;", 0x00002270, }, + { "&NotLessGreater;", 0x00002278, }, +// { "&NotLessLess;", 8810, 0x00000338, }, +// { "&NotLessSlantEqual;", 10877, 0x00000338, }, + { "&NotLessTilde;", 0x00002274, }, +// { "&NotNestedGreaterGreater;",10914, 0x00000338, }, +// { "&NotNestedLessLess;", 10913, 0x00000338, }, + { "&NotPrecedes;", 0x00002280, }, +// { "&NotPrecedesEqual;", 10927, 0x00000338, }, + { "&NotPrecedesSlantEqual;", 0x000022e0, }, + { "&NotReverseElement;", 0x0000220c, }, + { "&NotRightTriangle;", 0x000022eb, }, +// { "&NotRightTriangleBar;", 10704, 0x00000338, }, + { "&NotRightTriangleEqual;", 0x000022ed, }, +// { "&NotSquareSubset;", 8847, 0x00000338, }, + { "&NotSquareSubsetEqual;", 0x000022e2, }, +// { "&NotSquareSuperset;", 8848, 0x00000338, }, + { "&NotSquareSupersetEqual;", 0x000022e3, }, +// { "&NotSubset;", 8834, 0x000020d2, }, + { "&NotSubsetEqual;", 0x00002288, }, + { "&NotSucceeds;", 0x00002281, }, +// { "&NotSucceedsEqual;", 10928, 0x00000338, }, + { "&NotSucceedsSlantEqual;", 0x000022e1, }, +// { "&NotSucceedsTilde;", 8831, 0x00000338, }, +// { "&NotSuperset;", 8835, 0x000020d2, }, + { "&NotSupersetEqual;", 0x00002289, }, + { "&NotTilde;", 0x00002241, }, + { "&NotTildeEqual;", 0x00002244, }, + { "&NotTildeFullEqual;", 0x00002247, }, + { "&NotTildeTilde;", 0x00002249, }, + { "&NotVerticalBar;", 0x00002224, }, + { "&Nscr;", 0x0001d4a9, }, + { "&Ntilde", 0x000000d1, }, + { "&Ntilde;", 0x000000d1, }, + { "&Nu;", 0x0000039d, }, + { "&OElig;", 0x00000152, }, + { "&Oacute", 0x000000d3, }, + { "&Oacute;", 0x000000d3, }, + { "&Ocirc", 0x000000d4, }, + { "&Ocirc;", 0x000000d4, }, + { "&Ocy;", 0x0000041e, }, + { "&Odblac;", 0x00000150, }, + { "&Ofr;", 0x0001d512, }, + { "&Ograve", 0x000000d2, }, + { "&Ograve;", 0x000000d2, }, + { "&Omacr;", 0x0000014c, }, + { "&Omega;", 0x000003a9, }, + { "&Omicron;", 0x0000039f, }, + { "&Oopf;", 0x0001d546, }, + { "&OpenCurlyDoubleQuote;", 0x0000201c, }, + { "&OpenCurlyQuote;", 0x00002018, }, + { "&Or;", 0x00002a54, }, + { "&Oscr;", 0x0001d4aa, }, + { "&Oslash", 0x000000d8, }, + { "&Oslash;", 0x000000d8, }, + { "&Otilde", 0x000000d5, }, + { "&Otilde;", 0x000000d5, }, + { "&Otimes;", 0x00002a37, }, + { "&Ouml", 0x000000d6, }, + { "&Ouml;", 0x000000d6, }, + { "&OverBar;", 0x0000203e, }, + { "&OverBrace;", 0x000023de, }, + { "&OverBracket;", 0x000023b4, }, + { "&OverParenthesis;", 0x000023dc, }, + { "&PartialD;", 0x00002202, }, + { "&Pcy;", 0x0000041f, }, + { "&Pfr;", 0x0001d513, }, + { "&Phi;", 0x000003a6, }, + { "&Pi;", 0x000003a0, }, + { "&PlusMinus;", 0x000000b1, }, + { "&Poincareplane;", 0x0000210c, }, + { "&Popf;", 0x00002119, }, + { "&Pr;", 0x00002abb, }, + { "&Precedes;", 0x0000227a, }, + { "&PrecedesEqual;", 0x00002aaf, }, + { "&PrecedesSlantEqual;", 0x0000227c, }, + { "&PrecedesTilde;", 0x0000227e, }, + { "&Prime;", 0x00002033, }, + { "&Product;", 0x0000220f, }, + { "&Proportion;", 0x00002237, }, + { "&Proportional;", 0x0000221d, }, + { "&Pscr;", 0x0001d4ab, }, + { "&Psi;", 0x000003a8, }, + { "&QUOT", 0x00000022, }, + { "&QUOT;", 0x00000022, }, + { "&Qfr;", 0x0001d514, }, + { "&Qopf;", 0x0000211a, }, + { "&Qscr;", 0x0001d4ac, }, + { "&RBarr;", 0x00002910, }, + { "&REG", 0x000000ae, }, + { "&REG;", 0x000000ae, }, + { "&Racute;", 0x00000154, }, + { "&Rang;", 0x000027eb, }, + { "&Rarr;", 0x000021a0, }, + { "&Rarrtl;", 0x00002916, }, + { "&Rcaron;", 0x00000158, }, + { "&Rcedil;", 0x00000156, }, + { "&Rcy;", 0x00000420, }, + { "&Re;", 0x0000211c, }, + { "&ReverseElement;", 0x0000220b, }, + { "&ReverseEquilibrium;", 0x000021cb, }, + { "&ReverseUpEquilibrium;", 0x0000296f, }, + { "&Rfr;", 0x0000211c, }, + { "&Rho;", 0x000003a1, }, + { "&RightAngleBracket;", 0x000027e9, }, + { "&RightArrow;", 0x00002192, }, + { "&RightArrowBar;", 0x000021e5, }, + { "&RightArrowLeftArrow;", 0x000021c4, }, + { "&RightCeiling;", 0x00002309, }, + { "&RightDoubleBracket;", 0x000027e7, }, + { "&RightDownTeeVector;", 0x0000295d, }, + { "&RightDownVector;", 0x000021c2, }, + { "&RightDownVectorBar;", 0x00002955, }, + { "&RightFloor;", 0x0000230b, }, + { "&RightTee;", 0x000022a2, }, + { "&RightTeeArrow;", 0x000021a6, }, + { "&RightTeeVector;", 0x0000295b, }, + { "&RightTriangle;", 0x000022b3, }, + { "&RightTriangleBar;", 0x000029d0, }, + { "&RightTriangleEqual;", 0x000022b5, }, + { "&RightUpDownVector;", 0x0000294f, }, + { "&RightUpTeeVector;", 0x0000295c, }, + { "&RightUpVector;", 0x000021be, }, + { "&RightUpVectorBar;", 0x00002954, }, + { "&RightVector;", 0x000021c0, }, + { "&RightVectorBar;", 0x00002953, }, + { "&Rightarrow;", 0x000021d2, }, + { "&Ropf;", 0x0000211d, }, + { "&RoundImplies;", 0x00002970, }, + { "&Rrightarrow;", 0x000021db, }, + { "&Rscr;", 0x0000211b, }, + { "&Rsh;", 0x000021b1, }, + { "&RuleDelayed;", 0x000029f4, }, + { "&SHCHcy;", 0x00000429, }, + { "&SHcy;", 0x00000428, }, + { "&SOFTcy;", 0x0000042c, }, + { "&Sacute;", 0x0000015a, }, + { "&Sc;", 0x00002abc, }, + { "&Scaron;", 0x00000160, }, + { "&Scedil;", 0x0000015e, }, + { "&Scirc;", 0x0000015c, }, + { "&Scy;", 0x00000421, }, + { "&Sfr;", 0x0001d516, }, + { "&ShortDownArrow;", 0x00002193, }, + { "&ShortLeftArrow;", 0x00002190, }, + { "&ShortRightArrow;", 0x00002192, }, + { "&ShortUpArrow;", 0x00002191, }, + { "&Sigma;", 0x000003a3, }, + { "&SmallCircle;", 0x00002218, }, + { "&Sopf;", 0x0001d54a, }, + { "&Sqrt;", 0x0000221a, }, + { "&Square;", 0x000025a1, }, + { "&SquareIntersection;", 0x00002293, }, + { "&SquareSubset;", 0x0000228f, }, + { "&SquareSubsetEqual;", 0x00002291, }, + { "&SquareSuperset;", 0x00002290, }, + { "&SquareSupersetEqual;", 0x00002292, }, + { "&SquareUnion;", 0x00002294, }, + { "&Sscr;", 0x0001d4ae, }, + { "&Star;", 0x000022c6, }, + { "&Sub;", 0x000022d0, }, + { "&Subset;", 0x000022d0, }, + { "&SubsetEqual;", 0x00002286, }, + { "&Succeeds;", 0x0000227b, }, + { "&SucceedsEqual;", 0x00002ab0, }, + { "&SucceedsSlantEqual;", 0x0000227d, }, + { "&SucceedsTilde;", 0x0000227f, }, + { "&SuchThat;", 0x0000220b, }, + { "&Sum;", 0x00002211, }, + { "&Sup;", 0x000022d1, }, + { "&Superset;", 0x00002283, }, + { "&SupersetEqual;", 0x00002287, }, + { "&Supset;", 0x000022d1, }, + { "&THORN", 0x000000de, }, + { "&THORN;", 0x000000de, }, + { "&TRADE;", 0x00002122, }, + { "&TSHcy;", 0x0000040b, }, + { "&TScy;", 0x00000426, }, + { "&Tab;", 0x00000009, }, + { "&Tau;", 0x000003a4, }, + { "&Tcaron;", 0x00000164, }, + { "&Tcedil;", 0x00000162, }, + { "&Tcy;", 0x00000422, }, + { "&Tfr;", 0x0001d517, }, + { "&Therefore;", 0x00002234, }, + { "&Theta;", 0x00000398, }, +// { "&ThickSpace;", 8287, 0x0000200a, }, + { "&ThinSpace;", 0x00002009, }, + { "&Tilde;", 0x0000223c, }, + { "&TildeEqual;", 0x00002243, }, + { "&TildeFullEqual;", 0x00002245, }, + { "&TildeTilde;", 0x00002248, }, + { "&Topf;", 0x0001d54b, }, + { "&TripleDot;", 0x000020db, }, + { "&Tscr;", 0x0001d4af, }, + { "&Tstrok;", 0x00000166, }, + { "&Uacute", 0x000000da, }, + { "&Uacute;", 0x000000da, }, + { "&Uarr;", 0x0000219f, }, + { "&Uarrocir;", 0x00002949, }, + { "&Ubrcy;", 0x0000040e, }, + { "&Ubreve;", 0x0000016c, }, + { "&Ucirc", 0x000000db, }, + { "&Ucirc;", 0x000000db, }, + { "&Ucy;", 0x00000423, }, + { "&Udblac;", 0x00000170, }, + { "&Ufr;", 0x0001d518, }, + { "&Ugrave", 0x000000d9, }, + { "&Ugrave;", 0x000000d9, }, + { "&Umacr;", 0x0000016a, }, + { "&UnderBar;", 0x0000005f, }, + { "&UnderBrace;", 0x000023df, }, + { "&UnderBracket;", 0x000023b5, }, + { "&UnderParenthesis;", 0x000023dd, }, + { "&Union;", 0x000022c3, }, + { "&UnionPlus;", 0x0000228e, }, + { "&Uogon;", 0x00000172, }, + { "&Uopf;", 0x0001d54c, }, + { "&UpArrow;", 0x00002191, }, + { "&UpArrowBar;", 0x00002912, }, + { "&UpArrowDownArrow;", 0x000021c5, }, + { "&UpDownArrow;", 0x00002195, }, + { "&UpEquilibrium;", 0x0000296e, }, + { "&UpTee;", 0x000022a5, }, + { "&UpTeeArrow;", 0x000021a5, }, + { "&Uparrow;", 0x000021d1, }, + { "&Updownarrow;", 0x000021d5, }, + { "&UpperLeftArrow;", 0x00002196, }, + { "&UpperRightArrow;", 0x00002197, }, + { "&Upsi;", 0x000003d2, }, + { "&Upsilon;", 0x000003a5, }, + { "&Uring;", 0x0000016e, }, + { "&Uscr;", 0x0001d4b0, }, + { "&Utilde;", 0x00000168, }, + { "&Uuml", 0x000000dc, }, + { "&Uuml;", 0x000000dc, }, + { "&VDash;", 0x000022ab, }, + { "&Vbar;", 0x00002aeb, }, + { "&Vcy;", 0x00000412, }, + { "&Vdash;", 0x000022a9, }, + { "&Vdashl;", 0x00002ae6, }, + { "&Vee;", 0x000022c1, }, + { "&Verbar;", 0x00002016, }, + { "&Vert;", 0x00002016, }, + { "&VerticalBar;", 0x00002223, }, + { "&VerticalLine;", 0x0000007c, }, + { "&VerticalSeparator;", 0x00002758, }, + { "&VerticalTilde;", 0x00002240, }, + { "&VeryThinSpace;", 0x0000200a, }, + { "&Vfr;", 0x0001d519, }, + { "&Vopf;", 0x0001d54d, }, + { "&Vscr;", 0x0001d4b1, }, + { "&Vvdash;", 0x000022aa, }, + { "&Wcirc;", 0x00000174, }, + { "&Wedge;", 0x000022c0, }, + { "&Wfr;", 0x0001d51a, }, + { "&Wopf;", 0x0001d54e, }, + { "&Wscr;", 0x0001d4b2, }, + { "&Xfr;", 0x0001d51b, }, + { "&Xi;", 0x0000039e, }, + { "&Xopf;", 0x0001d54f, }, + { "&Xscr;", 0x0001d4b3, }, + { "&YAcy;", 0x0000042f, }, + { "&YIcy;", 0x00000407, }, + { "&YUcy;", 0x0000042e, }, + { "&Yacute", 0x000000dd, }, + { "&Yacute;", 0x000000dd, }, + { "&Ycirc;", 0x00000176, }, + { "&Ycy;", 0x0000042b, }, + { "&Yfr;", 0x0001d51c, }, + { "&Yopf;", 0x0001d550, }, + { "&Yscr;", 0x0001d4b4, }, + { "&Yuml;", 0x00000178, }, + { "&ZHcy;", 0x00000416, }, + { "&Zacute;", 0x00000179, }, + { "&Zcaron;", 0x0000017d, }, + { "&Zcy;", 0x00000417, }, + { "&Zdot;", 0x0000017b, }, + { "&ZeroWidthSpace;", 0x0000200b, }, + { "&Zeta;", 0x00000396, }, + { "&Zfr;", 0x00002128, }, + { "&Zopf;", 0x00002124, }, + { "&Zscr;", 0x0001d4b5, }, + { "&aacute", 0x000000e1, }, + { "&aacute;", 0x000000e1, }, + { "&abreve;", 0x00000103, }, + { "&ac;", 0x0000223e, }, +// { "&acE;", 8766, 0x00000333, }, + { "&acd;", 0x0000223f, }, + { "&acirc", 0x000000e2, }, + { "&acirc;", 0x000000e2, }, + { "&acute", 0x000000b4, }, + { "&acute;", 0x000000b4, }, + { "&acy;", 0x00000430, }, + { "&aelig", 0x000000e6, }, + { "&aelig;", 0x000000e6, }, + { "&af;", 0x00002061, }, + { "&afr;", 0x0001d51e, }, + { "&agrave", 0x000000e0, }, + { "&agrave;", 0x000000e0, }, + { "&alefsym;", 0x00002135, }, + { "&aleph;", 0x00002135, }, + { "&alpha;", 0x000003b1, }, + { "&amacr;", 0x00000101, }, + { "&amalg;", 0x00002a3f, }, + { "&amp", 0x00000026, }, + { "&amp;", 0x00000026, }, + { "&and;", 0x00002227, }, + { "&andand;", 0x00002a55, }, + { "&andd;", 0x00002a5c, }, + { "&andslope;", 0x00002a58, }, + { "&andv;", 0x00002a5a, }, + { "&ang;", 0x00002220, }, + { "&ange;", 0x000029a4, }, + { "&angle;", 0x00002220, }, + { "&angmsd;", 0x00002221, }, + { "&angmsdaa;", 0x000029a8, }, + { "&angmsdab;", 0x000029a9, }, + { "&angmsdac;", 0x000029aa, }, + { "&angmsdad;", 0x000029ab, }, + { "&angmsdae;", 0x000029ac, }, + { "&angmsdaf;", 0x000029ad, }, + { "&angmsdag;", 0x000029ae, }, + { "&angmsdah;", 0x000029af, }, + { "&angrt;", 0x0000221f, }, + { "&angrtvb;", 0x000022be, }, + { "&angrtvbd;", 0x0000299d, }, + { "&angsph;", 0x00002222, }, + { "&angst;", 0x000000c5, }, + { "&angzarr;", 0x0000237c, }, + { "&aogon;", 0x00000105, }, + { "&aopf;", 0x0001d552, }, + { "&ap;", 0x00002248, }, + { "&apE;", 0x00002a70, }, + { "&apacir;", 0x00002a6f, }, + { "&ape;", 0x0000224a, }, + { "&apid;", 0x0000224b, }, + { "&apos;", 0x00000027, }, + { "&approx;", 0x00002248, }, + { "&approxeq;", 0x0000224a, }, + { "&aring", 0x000000e5, }, + { "&aring;", 0x000000e5, }, + { "&ascr;", 0x0001d4b6, }, + { "&ast;", 0x0000002a, }, + { "&asymp;", 0x00002248, }, + { "&asympeq;", 0x0000224d, }, + { "&atilde", 0x000000e3, }, + { "&atilde;", 0x000000e3, }, + { "&auml", 0x000000e4, }, + { "&auml;", 0x000000e4, }, + { "&awconint;", 0x00002233, }, + { "&awint;", 0x00002a11, }, + { "&bNot;", 0x00002aed, }, + { "&backcong;", 0x0000224c, }, + { "&backepsilon;", 0x000003f6, }, + { "&backprime;", 0x00002035, }, + { "&backsim;", 0x0000223d, }, + { "&backsimeq;", 0x000022cd, }, + { "&barvee;", 0x000022bd, }, + { "&barwed;", 0x00002305, }, + { "&barwedge;", 0x00002305, }, + { "&bbrk;", 0x000023b5, }, + { "&bbrktbrk;", 0x000023b6, }, + { "&bcong;", 0x0000224c, }, + { "&bcy;", 0x00000431, }, + { "&bdquo;", 0x0000201e, }, + { "&becaus;", 0x00002235, }, + { "&because;", 0x00002235, }, + { "&bemptyv;", 0x000029b0, }, + { "&bepsi;", 0x000003f6, }, + { "&bernou;", 0x0000212c, }, + { "&beta;", 0x000003b2, }, + { "&beth;", 0x00002136, }, + { "&between;", 0x0000226c, }, + { "&bfr;", 0x0001d51f, }, + { "&bigcap;", 0x000022c2, }, + { "&bigcirc;", 0x000025ef, }, + { "&bigcup;", 0x000022c3, }, + { "&bigodot;", 0x00002a00, }, + { "&bigoplus;", 0x00002a01, }, + { "&bigotimes;", 0x00002a02, }, + { "&bigsqcup;", 0x00002a06, }, + { "&bigstar;", 0x00002605, }, + { "&bigtriangledown;", 0x000025bd, }, + { "&bigtriangleup;", 0x000025b3, }, + { "&biguplus;", 0x00002a04, }, + { "&bigvee;", 0x000022c1, }, + { "&bigwedge;", 0x000022c0, }, + { "&bkarow;", 0x0000290d, }, + { "&blacklozenge;", 0x000029eb, }, + { "&blacksquare;", 0x000025aa, }, + { "&blacktriangle;", 0x000025b4, }, + { "&blacktriangledown;", 0x000025be, }, + { "&blacktriangleleft;", 0x000025c2, }, + { "&blacktriangleright;", 0x000025b8, }, + { "&blank;", 0x00002423, }, + { "&blk12;", 0x00002592, }, + { "&blk14;", 0x00002591, }, + { "&blk34;", 0x00002593, }, + { "&block;", 0x00002588, }, +// { "&bne;", 61, 0x000020e5, }, +// { "&bnequiv;", 8801, 0x000020e5, }, + { "&bnot;", 0x00002310, }, + { "&bopf;", 0x0001d553, }, + { "&bot;", 0x000022a5, }, + { "&bottom;", 0x000022a5, }, + { "&bowtie;", 0x000022c8, }, + { "&boxDL;", 0x00002557, }, + { "&boxDR;", 0x00002554, }, + { "&boxDl;", 0x00002556, }, + { "&boxDr;", 0x00002553, }, + { "&boxH;", 0x00002550, }, + { "&boxHD;", 0x00002566, }, + { "&boxHU;", 0x00002569, }, + { "&boxHd;", 0x00002564, }, + { "&boxHu;", 0x00002567, }, + { "&boxUL;", 0x0000255d, }, + { "&boxUR;", 0x0000255a, }, + { "&boxUl;", 0x0000255c, }, + { "&boxUr;", 0x00002559, }, + { "&boxV;", 0x00002551, }, + { "&boxVH;", 0x0000256c, }, + { "&boxVL;", 0x00002563, }, + { "&boxVR;", 0x00002560, }, + { "&boxVh;", 0x0000256b, }, + { "&boxVl;", 0x00002562, }, + { "&boxVr;", 0x0000255f, }, + { "&boxbox;", 0x000029c9, }, + { "&boxdL;", 0x00002555, }, + { "&boxdR;", 0x00002552, }, + { "&boxdl;", 0x00002510, }, + { "&boxdr;", 0x0000250c, }, + { "&boxh;", 0x00002500, }, + { "&boxhD;", 0x00002565, }, + { "&boxhU;", 0x00002568, }, + { "&boxhd;", 0x0000252c, }, + { "&boxhu;", 0x00002534, }, + { "&boxminus;", 0x0000229f, }, + { "&boxplus;", 0x0000229e, }, + { "&boxtimes;", 0x000022a0, }, + { "&boxuL;", 0x0000255b, }, + { "&boxuR;", 0x00002558, }, + { "&boxul;", 0x00002518, }, + { "&boxur;", 0x00002514, }, + { "&boxv;", 0x00002502, }, + { "&boxvH;", 0x0000256a, }, + { "&boxvL;", 0x00002561, }, + { "&boxvR;", 0x0000255e, }, + { "&boxvh;", 0x0000253c, }, + { "&boxvl;", 0x00002524, }, + { "&boxvr;", 0x0000251c, }, + { "&bprime;", 0x00002035, }, + { "&breve;", 0x000002d8, }, + { "&brvbar", 0x000000a6, }, + { "&brvbar;", 0x000000a6, }, + { "&bscr;", 0x0001d4b7, }, + { "&bsemi;", 0x0000204f, }, + { "&bsim;", 0x0000223d, }, + { "&bsime;", 0x000022cd, }, + { "&bsol;", 0x0000005c, }, + { "&bsolb;", 0x000029c5, }, + { "&bsolhsub;", 0x000027c8, }, + { "&bull;", 0x00002022, }, + { "&bullet;", 0x00002022, }, + { "&bump;", 0x0000224e, }, + { "&bumpE;", 0x00002aae, }, + { "&bumpe;", 0x0000224f, }, + { "&bumpeq;", 0x0000224f, }, + { "&cacute;", 0x00000107, }, + { "&cap;", 0x00002229, }, + { "&capand;", 0x00002a44, }, + { "&capbrcup;", 0x00002a49, }, + { "&capcap;", 0x00002a4b, }, + { "&capcup;", 0x00002a47, }, + { "&capdot;", 0x00002a40, }, +// { "&caps;", 8745, 0x0000fe00, }, + { "&caret;", 0x00002041, }, + { "&caron;", 0x000002c7, }, + { "&ccaps;", 0x00002a4d, }, + { "&ccaron;", 0x0000010d, }, + { "&ccedil", 0x000000e7, }, + { "&ccedil;", 0x000000e7, }, + { "&ccirc;", 0x00000109, }, + { "&ccups;", 0x00002a4c, }, + { "&ccupssm;", 0x00002a50, }, + { "&cdot;", 0x0000010b, }, + { "&cedil", 0x000000b8, }, + { "&cedil;", 0x000000b8, }, + { "&cemptyv;", 0x000029b2, }, + { "&cent", 0x000000a2, }, + { "&cent;", 0x000000a2, }, + { "&centerdot;", 0x000000b7, }, + { "&cfr;", 0x0001d520, }, + { "&chcy;", 0x00000447, }, + { "&check;", 0x00002713, }, + { "&checkmark;", 0x00002713, }, + { "&chi;", 0x000003c7, }, + { "&cir;", 0x000025cb, }, + { "&cirE;", 0x000029c3, }, + { "&circ;", 0x000002c6, }, + { "&circeq;", 0x00002257, }, + { "&circlearrowleft;", 0x000021ba, }, + { "&circlearrowright;", 0x000021bb, }, + { "&circledR;", 0x000000ae, }, + { "&circledS;", 0x000024c8, }, + { "&circledast;", 0x0000229b, }, + { "&circledcirc;", 0x0000229a, }, + { "&circleddash;", 0x0000229d, }, + { "&cire;", 0x00002257, }, + { "&cirfnint;", 0x00002a10, }, + { "&cirmid;", 0x00002aef, }, + { "&cirscir;", 0x000029c2, }, + { "&clubs;", 0x00002663, }, + { "&clubsuit;", 0x00002663, }, + { "&colon;", 0x0000003a, }, + { "&colone;", 0x00002254, }, + { "&coloneq;", 0x00002254, }, + { "&comma;", 0x0000002c, }, + { "&commat;", 0x00000040, }, + { "&comp;", 0x00002201, }, + { "&compfn;", 0x00002218, }, + { "&complement;", 0x00002201, }, + { "&complexes;", 0x00002102, }, + { "&cong;", 0x00002245, }, + { "&congdot;", 0x00002a6d, }, + { "&conint;", 0x0000222e, }, + { "&copf;", 0x0001d554, }, + { "&coprod;", 0x00002210, }, + { "&copy", 0x000000a9, }, + { "&copy;", 0x000000a9, }, + { "&copysr;", 0x00002117, }, + { "&crarr;", 0x000021b5, }, + { "&cross;", 0x00002717, }, + { "&cscr;", 0x0001d4b8, }, + { "&csub;", 0x00002acf, }, + { "&csube;", 0x00002ad1, }, + { "&csup;", 0x00002ad0, }, + { "&csupe;", 0x00002ad2, }, + { "&ctdot;", 0x000022ef, }, + { "&cudarrl;", 0x00002938, }, + { "&cudarrr;", 0x00002935, }, + { "&cuepr;", 0x000022de, }, + { "&cuesc;", 0x000022df, }, + { "&cularr;", 0x000021b6, }, + { "&cularrp;", 0x0000293d, }, + { "&cup;", 0x0000222a, }, + { "&cupbrcap;", 0x00002a48, }, + { "&cupcap;", 0x00002a46, }, + { "&cupcup;", 0x00002a4a, }, + { "&cupdot;", 0x0000228d, }, + { "&cupor;", 0x00002a45, }, +// { "&cups;", 8746, 0x0000fe00, }, + { "&curarr;", 0x000021b7, }, + { "&curarrm;", 0x0000293c, }, + { "&curlyeqprec;", 0x000022de, }, + { "&curlyeqsucc;", 0x000022df, }, + { "&curlyvee;", 0x000022ce, }, + { "&curlywedge;", 0x000022cf, }, + { "&curren", 0x000000a4, }, + { "&curren;", 0x000000a4, }, + { "&curvearrowleft;", 0x000021b6, }, + { "&curvearrowright;", 0x000021b7, }, + { "&cuvee;", 0x000022ce, }, + { "&cuwed;", 0x000022cf, }, + { "&cwconint;", 0x00002232, }, + { "&cwint;", 0x00002231, }, + { "&cylcty;", 0x0000232d, }, + { "&dArr;", 0x000021d3, }, + { "&dHar;", 0x00002965, }, + { "&dagger;", 0x00002020, }, + { "&daleth;", 0x00002138, }, + { "&darr;", 0x00002193, }, + { "&dash;", 0x00002010, }, + { "&dashv;", 0x000022a3, }, + { "&dbkarow;", 0x0000290f, }, + { "&dblac;", 0x000002dd, }, + { "&dcaron;", 0x0000010f, }, + { "&dcy;", 0x00000434, }, + { "&dd;", 0x00002146, }, + { "&ddagger;", 0x00002021, }, + { "&ddarr;", 0x000021ca, }, + { "&ddotseq;", 0x00002a77, }, + { "&deg", 0x000000b0, }, + { "&deg;", 0x000000b0, }, + { "&delta;", 0x000003b4, }, + { "&demptyv;", 0x000029b1, }, + { "&dfisht;", 0x0000297f, }, + { "&dfr;", 0x0001d521, }, + { "&dharl;", 0x000021c3, }, + { "&dharr;", 0x000021c2, }, + { "&diam;", 0x000022c4, }, + { "&diamond;", 0x000022c4, }, + { "&diamondsuit;", 0x00002666, }, + { "&diams;", 0x00002666, }, + { "&die;", 0x000000a8, }, + { "&digamma;", 0x000003dd, }, + { "&disin;", 0x000022f2, }, + { "&div;", 0x000000f7, }, + { "&divide", 0x000000f7, }, + { "&divide;", 0x000000f7, }, + { "&divideontimes;", 0x000022c7, }, + { "&divonx;", 0x000022c7, }, + { "&djcy;", 0x00000452, }, + { "&dlcorn;", 0x0000231e, }, + { "&dlcrop;", 0x0000230d, }, + { "&dollar;", 0x00000024, }, + { "&dopf;", 0x0001d555, }, + { "&dot;", 0x000002d9, }, + { "&doteq;", 0x00002250, }, + { "&doteqdot;", 0x00002251, }, + { "&dotminus;", 0x00002238, }, + { "&dotplus;", 0x00002214, }, + { "&dotsquare;", 0x000022a1, }, + { "&doublebarwedge;", 0x00002306, }, + { "&downarrow;", 0x00002193, }, + { "&downdownarrows;", 0x000021ca, }, + { "&downharpoonleft;", 0x000021c3, }, + { "&downharpoonright;", 0x000021c2, }, + { "&drbkarow;", 0x00002910, }, + { "&drcorn;", 0x0000231f, }, + { "&drcrop;", 0x0000230c, }, + { "&dscr;", 0x0001d4b9, }, + { "&dscy;", 0x00000455, }, + { "&dsol;", 0x000029f6, }, + { "&dstrok;", 0x00000111, }, + { "&dtdot;", 0x000022f1, }, + { "&dtri;", 0x000025bf, }, + { "&dtrif;", 0x000025be, }, + { "&duarr;", 0x000021f5, }, + { "&duhar;", 0x0000296f, }, + { "&dwangle;", 0x000029a6, }, + { "&dzcy;", 0x0000045f, }, + { "&dzigrarr;", 0x000027ff, }, + { "&eDDot;", 0x00002a77, }, + { "&eDot;", 0x00002251, }, + { "&eacute", 0x000000e9, }, + { "&eacute;", 0x000000e9, }, + { "&easter;", 0x00002a6e, }, + { "&ecaron;", 0x0000011b, }, + { "&ecir;", 0x00002256, }, + { "&ecirc", 0x000000ea, }, + { "&ecirc;", 0x000000ea, }, + { "&ecolon;", 0x00002255, }, + { "&ecy;", 0x0000044d, }, + { "&edot;", 0x00000117, }, + { "&ee;", 0x00002147, }, + { "&efDot;", 0x00002252, }, + { "&efr;", 0x0001d522, }, + { "&eg;", 0x00002a9a, }, + { "&egrave", 0x000000e8, }, + { "&egrave;", 0x000000e8, }, + { "&egs;", 0x00002a96, }, + { "&egsdot;", 0x00002a98, }, + { "&el;", 0x00002a99, }, + { "&elinters;", 0x000023e7, }, + { "&ell;", 0x00002113, }, + { "&els;", 0x00002a95, }, + { "&elsdot;", 0x00002a97, }, + { "&emacr;", 0x00000113, }, + { "&empty;", 0x00002205, }, + { "&emptyset;", 0x00002205, }, + { "&emptyv;", 0x00002205, }, + { "&emsp13;", 0x00002004, }, + { "&emsp14;", 0x00002005, }, + { "&emsp;", 0x00002003, }, + { "&eng;", 0x0000014b, }, + { "&ensp;", 0x00002002, }, + { "&eogon;", 0x00000119, }, + { "&eopf;", 0x0001d556, }, + { "&epar;", 0x000022d5, }, + { "&eparsl;", 0x000029e3, }, + { "&eplus;", 0x00002a71, }, + { "&epsi;", 0x000003b5, }, + { "&epsilon;", 0x000003b5, }, + { "&epsiv;", 0x000003f5, }, + { "&eqcirc;", 0x00002256, }, + { "&eqcolon;", 0x00002255, }, + { "&eqsim;", 0x00002242, }, + { "&eqslantgtr;", 0x00002a96, }, + { "&eqslantless;", 0x00002a95, }, + { "&equals;", 0x0000003d, }, + { "&equest;", 0x0000225f, }, + { "&equiv;", 0x00002261, }, + { "&equivDD;", 0x00002a78, }, + { "&eqvparsl;", 0x000029e5, }, + { "&erDot;", 0x00002253, }, + { "&erarr;", 0x00002971, }, + { "&escr;", 0x0000212f, }, + { "&esdot;", 0x00002250, }, + { "&esim;", 0x00002242, }, + { "&eta;", 0x000003b7, }, + { "&eth", 0x000000f0, }, + { "&eth;", 0x000000f0, }, + { "&euml", 0x000000eb, }, + { "&euml;", 0x000000eb, }, + { "&euro;", 0x000020ac, }, + { "&excl;", 0x00000021, }, + { "&exist;", 0x00002203, }, + { "&expectation;", 0x00002130, }, + { "&exponentiale;", 0x00002147, }, + { "&fallingdotseq;", 0x00002252, }, + { "&fcy;", 0x00000444, }, + { "&female;", 0x00002640, }, + { "&ffilig;", 0x0000fb03, }, + { "&fflig;", 0x0000fb00, }, + { "&ffllig;", 0x0000fb04, }, + { "&ffr;", 0x0001d523, }, + { "&filig;", 0x0000fb01, }, +// { "&fjlig;", 102, 0x0000006a, }, + { "&flat;", 0x0000266d, }, + { "&fllig;", 0x0000fb02, }, + { "&fltns;", 0x000025b1, }, + { "&fnof;", 0x00000192, }, + { "&fopf;", 0x0001d557, }, + { "&forall;", 0x00002200, }, + { "&fork;", 0x000022d4, }, + { "&forkv;", 0x00002ad9, }, + { "&fpartint;", 0x00002a0d, }, + { "&frac12", 0x000000bd, }, + { "&frac12;", 0x000000bd, }, + { "&frac13;", 0x00002153, }, + { "&frac14", 0x000000bc, }, + { "&frac14;", 0x000000bc, }, + { "&frac15;", 0x00002155, }, + { "&frac16;", 0x00002159, }, + { "&frac18;", 0x0000215b, }, + { "&frac23;", 0x00002154, }, + { "&frac25;", 0x00002156, }, + { "&frac34", 0x000000be, }, + { "&frac34;", 0x000000be, }, + { "&frac35;", 0x00002157, }, + { "&frac38;", 0x0000215c, }, + { "&frac45;", 0x00002158, }, + { "&frac56;", 0x0000215a, }, + { "&frac58;", 0x0000215d, }, + { "&frac78;", 0x0000215e, }, + { "&frasl;", 0x00002044, }, + { "&frown;", 0x00002322, }, + { "&fscr;", 0x0001d4bb, }, + { "&gE;", 0x00002267, }, + { "&gEl;", 0x00002a8c, }, + { "&gacute;", 0x000001f5, }, + { "&gamma;", 0x000003b3, }, + { "&gammad;", 0x000003dd, }, + { "&gap;", 0x00002a86, }, + { "&gbreve;", 0x0000011f, }, + { "&gcirc;", 0x0000011d, }, + { "&gcy;", 0x00000433, }, + { "&gdot;", 0x00000121, }, + { "&ge;", 0x00002265, }, + { "&gel;", 0x000022db, }, + { "&geq;", 0x00002265, }, + { "&geqq;", 0x00002267, }, + { "&geqslant;", 0x00002a7e, }, + { "&ges;", 0x00002a7e, }, + { "&gescc;", 0x00002aa9, }, + { "&gesdot;", 0x00002a80, }, + { "&gesdoto;", 0x00002a82, }, + { "&gesdotol;", 0x00002a84, }, +// { "&gesl;", 8923, 0x0000fe00, }, + { "&gesles;", 0x00002a94, }, + { "&gfr;", 0x0001d524, }, + { "&gg;", 0x0000226b, }, + { "&ggg;", 0x000022d9, }, + { "&gimel;", 0x00002137, }, + { "&gjcy;", 0x00000453, }, + { "&gl;", 0x00002277, }, + { "&glE;", 0x00002a92, }, + { "&gla;", 0x00002aa5, }, + { "&glj;", 0x00002aa4, }, + { "&gnE;", 0x00002269, }, + { "&gnap;", 0x00002a8a, }, + { "&gnapprox;", 0x00002a8a, }, + { "&gne;", 0x00002a88, }, + { "&gneq;", 0x00002a88, }, + { "&gneqq;", 0x00002269, }, + { "&gnsim;", 0x000022e7, }, + { "&gopf;", 0x0001d558, }, + { "&grave;", 0x00000060, }, + { "&gscr;", 0x0000210a, }, + { "&gsim;", 0x00002273, }, + { "&gsime;", 0x00002a8e, }, + { "&gsiml;", 0x00002a90, }, + { "&gt", 0x0000003e, }, + { "&gt;", 0x0000003e, }, + { "&gtcc;", 0x00002aa7, }, + { "&gtcir;", 0x00002a7a, }, + { "&gtdot;", 0x000022d7, }, + { "&gtlPar;", 0x00002995, }, + { "&gtquest;", 0x00002a7c, }, + { "&gtrapprox;", 0x00002a86, }, + { "&gtrarr;", 0x00002978, }, + { "&gtrdot;", 0x000022d7, }, + { "&gtreqless;", 0x000022db, }, + { "&gtreqqless;", 0x00002a8c, }, + { "&gtrless;", 0x00002277, }, + { "&gtrsim;", 0x00002273, }, +// { "&gvertneqq;", 8809, 0x0000fe00, }, +// { "&gvnE;", 8809, 0x0000fe00, }, + { "&hArr;", 0x000021d4, }, + { "&hairsp;", 0x0000200a, }, + { "&half;", 0x000000bd, }, + { "&hamilt;", 0x0000210b, }, + { "&hardcy;", 0x0000044a, }, + { "&harr;", 0x00002194, }, + { "&harrcir;", 0x00002948, }, + { "&harrw;", 0x000021ad, }, + { "&hbar;", 0x0000210f, }, + { "&hcirc;", 0x00000125, }, + { "&hearts;", 0x00002665, }, + { "&heartsuit;", 0x00002665, }, + { "&hellip;", 0x00002026, }, + { "&hercon;", 0x000022b9, }, + { "&hfr;", 0x0001d525, }, + { "&hksearow;", 0x00002925, }, + { "&hkswarow;", 0x00002926, }, + { "&hoarr;", 0x000021ff, }, + { "&homtht;", 0x0000223b, }, + { "&hookleftarrow;", 0x000021a9, }, + { "&hookrightarrow;", 0x000021aa, }, + { "&hopf;", 0x0001d559, }, + { "&horbar;", 0x00002015, }, + { "&hscr;", 0x0001d4bd, }, + { "&hslash;", 0x0000210f, }, + { "&hstrok;", 0x00000127, }, + { "&hybull;", 0x00002043, }, + { "&hyphen;", 0x00002010, }, + { "&iacute", 0x000000ed, }, + { "&iacute;", 0x000000ed, }, + { "&ic;", 0x00002063, }, + { "&icirc", 0x000000ee, }, + { "&icirc;", 0x000000ee, }, + { "&icy;", 0x00000438, }, + { "&iecy;", 0x00000435, }, + { "&iexcl", 0x000000a1, }, + { "&iexcl;", 0x000000a1, }, + { "&iff;", 0x000021d4, }, + { "&ifr;", 0x0001d526, }, + { "&igrave", 0x000000ec, }, + { "&igrave;", 0x000000ec, }, + { "&ii;", 0x00002148, }, + { "&iiiint;", 0x00002a0c, }, + { "&iiint;", 0x0000222d, }, + { "&iinfin;", 0x000029dc, }, + { "&iiota;", 0x00002129, }, + { "&ijlig;", 0x00000133, }, + { "&imacr;", 0x0000012b, }, + { "&image;", 0x00002111, }, + { "&imagline;", 0x00002110, }, + { "&imagpart;", 0x00002111, }, + { "&imath;", 0x00000131, }, + { "&imof;", 0x000022b7, }, + { "&imped;", 0x000001b5, }, + { "&in;", 0x00002208, }, + { "&incare;", 0x00002105, }, + { "&infin;", 0x0000221e, }, + { "&infintie;", 0x000029dd, }, + { "&inodot;", 0x00000131, }, + { "&int;", 0x0000222b, }, + { "&intcal;", 0x000022ba, }, + { "&integers;", 0x00002124, }, + { "&intercal;", 0x000022ba, }, + { "&intlarhk;", 0x00002a17, }, + { "&intprod;", 0x00002a3c, }, + { "&iocy;", 0x00000451, }, + { "&iogon;", 0x0000012f, }, + { "&iopf;", 0x0001d55a, }, + { "&iota;", 0x000003b9, }, + { "&iprod;", 0x00002a3c, }, + { "&iquest", 0x000000bf, }, + { "&iquest;", 0x000000bf, }, + { "&iscr;", 0x0001d4be, }, + { "&isin;", 0x00002208, }, + { "&isinE;", 0x000022f9, }, + { "&isindot;", 0x000022f5, }, + { "&isins;", 0x000022f4, }, + { "&isinsv;", 0x000022f3, }, + { "&isinv;", 0x00002208, }, + { "&it;", 0x00002062, }, + { "&itilde;", 0x00000129, }, + { "&iukcy;", 0x00000456, }, + { "&iuml", 0x000000ef, }, + { "&iuml;", 0x000000ef, }, + { "&jcirc;", 0x00000135, }, + { "&jcy;", 0x00000439, }, + { "&jfr;", 0x0001d527, }, + { "&jmath;", 0x00000237, }, + { "&jopf;", 0x0001d55b, }, + { "&jscr;", 0x0001d4bf, }, + { "&jsercy;", 0x00000458, }, + { "&jukcy;", 0x00000454, }, + { "&kappa;", 0x000003ba, }, + { "&kappav;", 0x000003f0, }, + { "&kcedil;", 0x00000137, }, + { "&kcy;", 0x0000043a, }, + { "&kfr;", 0x0001d528, }, + { "&kgreen;", 0x00000138, }, + { "&khcy;", 0x00000445, }, + { "&kjcy;", 0x0000045c, }, + { "&kopf;", 0x0001d55c, }, + { "&kscr;", 0x0001d4c0, }, + { "&lAarr;", 0x000021da, }, + { "&lArr;", 0x000021d0, }, + { "&lAtail;", 0x0000291b, }, + { "&lBarr;", 0x0000290e, }, + { "&lE;", 0x00002266, }, + { "&lEg;", 0x00002a8b, }, + { "&lHar;", 0x00002962, }, + { "&lacute;", 0x0000013a, }, + { "&laemptyv;", 0x000029b4, }, + { "&lagran;", 0x00002112, }, + { "&lambda;", 0x000003bb, }, + { "&lang;", 0x000027e8, }, + { "&langd;", 0x00002991, }, + { "&langle;", 0x000027e8, }, + { "&lap;", 0x00002a85, }, + { "&laquo", 0x000000ab, }, + { "&laquo;", 0x000000ab, }, + { "&larr;", 0x00002190, }, + { "&larrb;", 0x000021e4, }, + { "&larrbfs;", 0x0000291f, }, + { "&larrfs;", 0x0000291d, }, + { "&larrhk;", 0x000021a9, }, + { "&larrlp;", 0x000021ab, }, + { "&larrpl;", 0x00002939, }, + { "&larrsim;", 0x00002973, }, + { "&larrtl;", 0x000021a2, }, + { "&lat;", 0x00002aab, }, + { "&latail;", 0x00002919, }, + { "&late;", 0x00002aad, }, +// { "&lates;", 10925, 0x0000fe00, }, + { "&lbarr;", 0x0000290c, }, + { "&lbbrk;", 0x00002772, }, + { "&lbrace;", 0x0000007b, }, + { "&lbrack;", 0x0000005b, }, + { "&lbrke;", 0x0000298b, }, + { "&lbrksld;", 0x0000298f, }, + { "&lbrkslu;", 0x0000298d, }, + { "&lcaron;", 0x0000013e, }, + { "&lcedil;", 0x0000013c, }, + { "&lceil;", 0x00002308, }, + { "&lcub;", 0x0000007b, }, + { "&lcy;", 0x0000043b, }, + { "&ldca;", 0x00002936, }, + { "&ldquo;", 0x0000201c, }, + { "&ldquor;", 0x0000201e, }, + { "&ldrdhar;", 0x00002967, }, + { "&ldrushar;", 0x0000294b, }, + { "&ldsh;", 0x000021b2, }, + { "&le;", 0x00002264, }, + { "&leftarrow;", 0x00002190, }, + { "&leftarrowtail;", 0x000021a2, }, + { "&leftharpoondown;", 0x000021bd, }, + { "&leftharpoonup;", 0x000021bc, }, + { "&leftleftarrows;", 0x000021c7, }, + { "&leftrightarrow;", 0x00002194, }, + { "&leftrightarrows;", 0x000021c6, }, + { "&leftrightharpoons;", 0x000021cb, }, + { "&leftrightsquigarrow;", 0x000021ad, }, + { "&leftthreetimes;", 0x000022cb, }, + { "&leg;", 0x000022da, }, + { "&leq;", 0x00002264, }, + { "&leqq;", 0x00002266, }, + { "&leqslant;", 0x00002a7d, }, + { "&les;", 0x00002a7d, }, + { "&lescc;", 0x00002aa8, }, + { "&lesdot;", 0x00002a7f, }, + { "&lesdoto;", 0x00002a81, }, + { "&lesdotor;", 0x00002a83, }, +// { "&lesg;", 8922, 0x0000fe00, }, + { "&lesges;", 0x00002a93, }, + { "&lessapprox;", 0x00002a85, }, + { "&lessdot;", 0x000022d6, }, + { "&lesseqgtr;", 0x000022da, }, + { "&lesseqqgtr;", 0x00002a8b, }, + { "&lessgtr;", 0x00002276, }, + { "&lesssim;", 0x00002272, }, + { "&lfisht;", 0x0000297c, }, + { "&lfloor;", 0x0000230a, }, + { "&lfr;", 0x0001d529, }, + { "&lg;", 0x00002276, }, + { "&lgE;", 0x00002a91, }, + { "&lhard;", 0x000021bd, }, + { "&lharu;", 0x000021bc, }, + { "&lharul;", 0x0000296a, }, + { "&lhblk;", 0x00002584, }, + { "&ljcy;", 0x00000459, }, + { "&ll;", 0x0000226a, }, + { "&llarr;", 0x000021c7, }, + { "&llcorner;", 0x0000231e, }, + { "&llhard;", 0x0000296b, }, + { "&lltri;", 0x000025fa, }, + { "&lmidot;", 0x00000140, }, + { "&lmoust;", 0x000023b0, }, + { "&lmoustache;", 0x000023b0, }, + { "&lnE;", 0x00002268, }, + { "&lnap;", 0x00002a89, }, + { "&lnapprox;", 0x00002a89, }, + { "&lne;", 0x00002a87, }, + { "&lneq;", 0x00002a87, }, + { "&lneqq;", 0x00002268, }, + { "&lnsim;", 0x000022e6, }, + { "&loang;", 0x000027ec, }, + { "&loarr;", 0x000021fd, }, + { "&lobrk;", 0x000027e6, }, + { "&longleftarrow;", 0x000027f5, }, + { "&longleftrightarrow;", 0x000027f7, }, + { "&longmapsto;", 0x000027fc, }, + { "&longrightarrow;", 0x000027f6, }, + { "&looparrowleft;", 0x000021ab, }, + { "&looparrowright;", 0x000021ac, }, + { "&lopar;", 0x00002985, }, + { "&lopf;", 0x0001d55d, }, + { "&loplus;", 0x00002a2d, }, + { "&lotimes;", 0x00002a34, }, + { "&lowast;", 0x00002217, }, + { "&lowbar;", 0x0000005f, }, + { "&loz;", 0x000025ca, }, + { "&lozenge;", 0x000025ca, }, + { "&lozf;", 0x000029eb, }, + { "&lpar;", 0x00000028, }, + { "&lparlt;", 0x00002993, }, + { "&lrarr;", 0x000021c6, }, + { "&lrcorner;", 0x0000231f, }, + { "&lrhar;", 0x000021cb, }, + { "&lrhard;", 0x0000296d, }, + { "&lrm;", 0x0000200e, }, + { "&lrtri;", 0x000022bf, }, + { "&lsaquo;", 0x00002039, }, + { "&lscr;", 0x0001d4c1, }, + { "&lsh;", 0x000021b0, }, + { "&lsim;", 0x00002272, }, + { "&lsime;", 0x00002a8d, }, + { "&lsimg;", 0x00002a8f, }, + { "&lsqb;", 0x0000005b, }, + { "&lsquo;", 0x00002018, }, + { "&lsquor;", 0x0000201a, }, + { "&lstrok;", 0x00000142, }, + { "&lt", 0x0000003c, }, + { "&lt;", 0x0000003c, }, + { "&ltcc;", 0x00002aa6, }, + { "&ltcir;", 0x00002a79, }, + { "&ltdot;", 0x000022d6, }, + { "&lthree;", 0x000022cb, }, + { "&ltimes;", 0x000022c9, }, + { "&ltlarr;", 0x00002976, }, + { "&ltquest;", 0x00002a7b, }, + { "&ltrPar;", 0x00002996, }, + { "&ltri;", 0x000025c3, }, + { "&ltrie;", 0x000022b4, }, + { "&ltrif;", 0x000025c2, }, + { "&lurdshar;", 0x0000294a, }, + { "&luruhar;", 0x00002966, }, +// { "&lvertneqq;", 8808, 0x0000fe00, }, +// { "&lvnE;", 8808, 0x0000fe00, }, + { "&mDDot;", 0x0000223a, }, + { "&macr", 0x000000af, }, + { "&macr;", 0x000000af, }, + { "&male;", 0x00002642, }, + { "&malt;", 0x00002720, }, + { "&maltese;", 0x00002720, }, + { "&map;", 0x000021a6, }, + { "&mapsto;", 0x000021a6, }, + { "&mapstodown;", 0x000021a7, }, + { "&mapstoleft;", 0x000021a4, }, + { "&mapstoup;", 0x000021a5, }, + { "&marker;", 0x000025ae, }, + { "&mcomma;", 0x00002a29, }, + { "&mcy;", 0x0000043c, }, + { "&mdash;", 0x00002014, }, + { "&measuredangle;", 0x00002221, }, + { "&mfr;", 0x0001d52a, }, + { "&mho;", 0x00002127, }, + { "&micro", 0x000000b5, }, + { "&micro;", 0x000000b5, }, + { "&mid;", 0x00002223, }, + { "&midast;", 0x0000002a, }, + { "&midcir;", 0x00002af0, }, + { "&middot", 0x000000b7, }, + { "&middot;", 0x000000b7, }, + { "&minus;", 0x00002212, }, + { "&minusb;", 0x0000229f, }, + { "&minusd;", 0x00002238, }, + { "&minusdu;", 0x00002a2a, }, + { "&mlcp;", 0x00002adb, }, + { "&mldr;", 0x00002026, }, + { "&mnplus;", 0x00002213, }, + { "&models;", 0x000022a7, }, + { "&mopf;", 0x0001d55e, }, + { "&mp;", 0x00002213, }, + { "&mscr;", 0x0001d4c2, }, + { "&mstpos;", 0x0000223e, }, + { "&mu;", 0x000003bc, }, + { "&multimap;", 0x000022b8, }, + { "&mumap;", 0x000022b8, }, +// { "&nGg;", 8921, 0x00000338, }, +// { "&nGt;", 8811, 0x000020d2, }, +// { "&nGtv;", 8811, 0x00000338, }, + { "&nLeftarrow;", 0x000021cd, }, + { "&nLeftrightarrow;", 0x000021ce, }, +// { "&nLl;", 8920, 0x00000338, }, +// { "&nLt;", 8810, 0x000020d2, }, +// { "&nLtv;", 8810, 0x00000338, }, + { "&nRightarrow;", 0x000021cf, }, + { "&nVDash;", 0x000022af, }, + { "&nVdash;", 0x000022ae, }, + { "&nabla;", 0x00002207, }, + { "&nacute;", 0x00000144, }, +// { "&nang;", 8736, 0x000020d2, }, + { "&nap;", 0x00002249, }, +// { "&napE;", 10864, 0x00000338, }, +// { "&napid;", 8779, 0x00000338, }, + { "&napos;", 0x00000149, }, + { "&napprox;", 0x00002249, }, + { "&natur;", 0x0000266e, }, + { "&natural;", 0x0000266e, }, + { "&naturals;", 0x00002115, }, + { "&nbsp", 0x000000a0, }, + { "&nbsp;", 0x000000a0, }, +// { "&nbump;", 8782, 0x00000338, }, +// { "&nbumpe;", 8783, 0x00000338, }, + { "&ncap;", 0x00002a43, }, + { "&ncaron;", 0x00000148, }, + { "&ncedil;", 0x00000146, }, + { "&ncong;", 0x00002247, }, +// { "&ncongdot;", 10861, 0x00000338, }, + { "&ncup;", 0x00002a42, }, + { "&ncy;", 0x0000043d, }, + { "&ndash;", 0x00002013, }, + { "&ne;", 0x00002260, }, + { "&neArr;", 0x000021d7, }, + { "&nearhk;", 0x00002924, }, + { "&nearr;", 0x00002197, }, + { "&nearrow;", 0x00002197, }, +// { "&nedot;", 8784, 0x00000338, }, + { "&nequiv;", 0x00002262, }, + { "&nesear;", 0x00002928, }, +// { "&nesim;", 8770, 0x00000338, }, + { "&nexist;", 0x00002204, }, + { "&nexists;", 0x00002204, }, + { "&nfr;", 0x0001d52b, }, +// { "&ngE;", 8807, 0x00000338, }, + { "&nge;", 0x00002271, }, + { "&ngeq;", 0x00002271, }, +// { "&ngeqq;", 8807, 0x00000338, }, +// { "&ngeqslant;", 10878, 0x00000338, }, +// { "&nges;", 10878, 0x00000338, }, + { "&ngsim;", 0x00002275, }, + { "&ngt;", 0x0000226f, }, + { "&ngtr;", 0x0000226f, }, + { "&nhArr;", 0x000021ce, }, + { "&nharr;", 0x000021ae, }, + { "&nhpar;", 0x00002af2, }, + { "&ni;", 0x0000220b, }, + { "&nis;", 0x000022fc, }, + { "&nisd;", 0x000022fa, }, + { "&niv;", 0x0000220b, }, + { "&njcy;", 0x0000045a, }, + { "&nlArr;", 0x000021cd, }, +// { "&nlE;", 8806, 0x00000338, }, + { "&nlarr;", 0x0000219a, }, + { "&nldr;", 0x00002025, }, + { "&nle;", 0x00002270, }, + { "&nleftarrow;", 0x0000219a, }, + { "&nleftrightarrow;", 0x000021ae, }, + { "&nleq;", 0x00002270, }, +// { "&nleqq;", 8806, 0x00000338, }, +// { "&nleqslant;", 10877, 0x00000338, }, +// { "&nles;", 10877, 0x00000338, }, + { "&nless;", 0x0000226e, }, + { "&nlsim;", 0x00002274, }, + { "&nlt;", 0x0000226e, }, + { "&nltri;", 0x000022ea, }, + { "&nltrie;", 0x000022ec, }, + { "&nmid;", 0x00002224, }, + { "&nopf;", 0x0001d55f, }, + { "&not", 0x000000ac, }, + { "&not;", 0x000000ac, }, + { "&notin;", 0x00002209, }, +// { "&notinE;", 8953, 0x00000338, }, +// { "&notindot;", 8949, 0x00000338, }, + { "&notinva;", 0x00002209, }, + { "&notinvb;", 0x000022f7, }, + { "&notinvc;", 0x000022f6, }, + { "&notni;", 0x0000220c, }, + { "&notniva;", 0x0000220c, }, + { "&notnivb;", 0x000022fe, }, + { "&notnivc;", 0x000022fd, }, + { "&npar;", 0x00002226, }, + { "&nparallel;", 0x00002226, }, +// { "&nparsl;", 11005, 0x000020e5, }, +// { "&npart;", 8706, 0x00000338, }, + { "&npolint;", 0x00002a14, }, + { "&npr;", 0x00002280, }, + { "&nprcue;", 0x000022e0, }, +// { "&npre;", 10927, 0x00000338, }, + { "&nprec;", 0x00002280, }, +// { "&npreceq;", 10927, 0x00000338, }, + { "&nrArr;", 0x000021cf, }, + { "&nrarr;", 0x0000219b, }, +// { "&nrarrc;", 10547, 0x00000338, }, +// { "&nrarrw;", 8605, 0x00000338, }, + { "&nrightarrow;", 0x0000219b, }, + { "&nrtri;", 0x000022eb, }, + { "&nrtrie;", 0x000022ed, }, + { "&nsc;", 0x00002281, }, + { "&nsccue;", 0x000022e1, }, +// { "&nsce;", 10928, 0x00000338, }, + { "&nscr;", 0x0001d4c3, }, + { "&nshortmid;", 0x00002224, }, + { "&nshortparallel;", 0x00002226, }, + { "&nsim;", 0x00002241, }, + { "&nsime;", 0x00002244, }, + { "&nsimeq;", 0x00002244, }, + { "&nsmid;", 0x00002224, }, + { "&nspar;", 0x00002226, }, + { "&nsqsube;", 0x000022e2, }, + { "&nsqsupe;", 0x000022e3, }, + { "&nsub;", 0x00002284, }, +// { "&nsubE;", 10949, 0x00000338, }, + { "&nsube;", 0x00002288, }, +// { "&nsubset;", 8834, 0x000020d2, }, + { "&nsubseteq;", 0x00002288, }, +// { "&nsubseteqq;", 10949, 0x00000338, }, + { "&nsucc;", 0x00002281, }, +// { "&nsucceq;", 10928, 0x00000338, }, + { "&nsup;", 0x00002285, }, +// { "&nsupE;", 10950, 0x00000338, }, + { "&nsupe;", 0x00002289, }, +// { "&nsupset;", 8835, 0x000020d2, }, + { "&nsupseteq;", 0x00002289, }, +// { "&nsupseteqq;", 10950, 0x00000338, }, + { "&ntgl;", 0x00002279, }, + { "&ntilde", 0x000000f1, }, + { "&ntilde;", 0x000000f1, }, + { "&ntlg;", 0x00002278, }, + { "&ntriangleleft;", 0x000022ea, }, + { "&ntrianglelefteq;", 0x000022ec, }, + { "&ntriangleright;", 0x000022eb, }, + { "&ntrianglerighteq;", 0x000022ed, }, + { "&nu;", 0x000003bd, }, + { "&num;", 0x00000023, }, + { "&numero;", 0x00002116, }, + { "&numsp;", 0x00002007, }, + { "&nvDash;", 0x000022ad, }, + { "&nvHarr;", 0x00002904, }, +// { "&nvap;", 8781, 0x000020d2, }, + { "&nvdash;", 0x000022ac, }, +// { "&nvge;", 8805, 0x000020d2, }, +// { "&nvgt;", 62, 0x000020d2, }, + { "&nvinfin;", 0x000029de, }, + { "&nvlArr;", 0x00002902, }, +// { "&nvle;", 8804, 0x000020d2, }, +// { "&nvlt;", 60, 0x000020d2, }, +// { "&nvltrie;", 8884, 0x000020d2, }, + { "&nvrArr;", 0x00002903, }, +// { "&nvrtrie;", 8885, 0x000020d2, }, +// { "&nvsim;", 8764, 0x000020d2, }, + { "&nwArr;", 0x000021d6, }, + { "&nwarhk;", 0x00002923, }, + { "&nwarr;", 0x00002196, }, + { "&nwarrow;", 0x00002196, }, + { "&nwnear;", 0x00002927, }, + { "&oS;", 0x000024c8, }, + { "&oacute", 0x000000f3, }, + { "&oacute;", 0x000000f3, }, + { "&oast;", 0x0000229b, }, + { "&ocir;", 0x0000229a, }, + { "&ocirc", 0x000000f4, }, + { "&ocirc;", 0x000000f4, }, + { "&ocy;", 0x0000043e, }, + { "&odash;", 0x0000229d, }, + { "&odblac;", 0x00000151, }, + { "&odiv;", 0x00002a38, }, + { "&odot;", 0x00002299, }, + { "&odsold;", 0x000029bc, }, + { "&oelig;", 0x00000153, }, + { "&ofcir;", 0x000029bf, }, + { "&ofr;", 0x0001d52c, }, + { "&ogon;", 0x000002db, }, + { "&ograve", 0x000000f2, }, + { "&ograve;", 0x000000f2, }, + { "&ogt;", 0x000029c1, }, + { "&ohbar;", 0x000029b5, }, + { "&ohm;", 0x000003a9, }, + { "&oint;", 0x0000222e, }, + { "&olarr;", 0x000021ba, }, + { "&olcir;", 0x000029be, }, + { "&olcross;", 0x000029bb, }, + { "&oline;", 0x0000203e, }, + { "&olt;", 0x000029c0, }, + { "&omacr;", 0x0000014d, }, + { "&omega;", 0x000003c9, }, + { "&omicron;", 0x000003bf, }, + { "&omid;", 0x000029b6, }, + { "&ominus;", 0x00002296, }, + { "&oopf;", 0x0001d560, }, + { "&opar;", 0x000029b7, }, + { "&operp;", 0x000029b9, }, + { "&oplus;", 0x00002295, }, + { "&or;", 0x00002228, }, + { "&orarr;", 0x000021bb, }, + { "&ord;", 0x00002a5d, }, + { "&order;", 0x00002134, }, + { "&orderof;", 0x00002134, }, + { "&ordf", 0x000000aa, }, + { "&ordf;", 0x000000aa, }, + { "&ordm", 0x000000ba, }, + { "&ordm;", 0x000000ba, }, + { "&origof;", 0x000022b6, }, + { "&oror;", 0x00002a56, }, + { "&orslope;", 0x00002a57, }, + { "&orv;", 0x00002a5b, }, + { "&oscr;", 0x00002134, }, + { "&oslash", 0x000000f8, }, + { "&oslash;", 0x000000f8, }, + { "&osol;", 0x00002298, }, + { "&otilde", 0x000000f5, }, + { "&otilde;", 0x000000f5, }, + { "&otimes;", 0x00002297, }, + { "&otimesas;", 0x00002a36, }, + { "&ouml", 0x000000f6, }, + { "&ouml;", 0x000000f6, }, + { "&ovbar;", 0x0000233d, }, + { "&par;", 0x00002225, }, + { "&para", 0x000000b6, }, + { "&para;", 0x000000b6, }, + { "&parallel;", 0x00002225, }, + { "&parsim;", 0x00002af3, }, + { "&parsl;", 0x00002afd, }, + { "&part;", 0x00002202, }, + { "&pcy;", 0x0000043f, }, + { "&percnt;", 0x00000025, }, + { "&period;", 0x0000002e, }, + { "&permil;", 0x00002030, }, + { "&perp;", 0x000022a5, }, + { "&pertenk;", 0x00002031, }, + { "&pfr;", 0x0001d52d, }, + { "&phi;", 0x000003c6, }, + { "&phiv;", 0x000003d5, }, + { "&phmmat;", 0x00002133, }, + { "&phone;", 0x0000260e, }, + { "&pi;", 0x000003c0, }, + { "&pitchfork;", 0x000022d4, }, + { "&piv;", 0x000003d6, }, + { "&planck;", 0x0000210f, }, + { "&planckh;", 0x0000210e, }, + { "&plankv;", 0x0000210f, }, + { "&plus;", 0x0000002b, }, + { "&plusacir;", 0x00002a23, }, + { "&plusb;", 0x0000229e, }, + { "&pluscir;", 0x00002a22, }, + { "&plusdo;", 0x00002214, }, + { "&plusdu;", 0x00002a25, }, + { "&pluse;", 0x00002a72, }, + { "&plusmn", 0x000000b1, }, + { "&plusmn;", 0x000000b1, }, + { "&plussim;", 0x00002a26, }, + { "&plustwo;", 0x00002a27, }, + { "&pm;", 0x000000b1, }, + { "&pointint;", 0x00002a15, }, + { "&popf;", 0x0001d561, }, + { "&pound", 0x000000a3, }, + { "&pound;", 0x000000a3, }, + { "&pr;", 0x0000227a, }, + { "&prE;", 0x00002ab3, }, + { "&prap;", 0x00002ab7, }, + { "&prcue;", 0x0000227c, }, + { "&pre;", 0x00002aaf, }, + { "&prec;", 0x0000227a, }, + { "&precapprox;", 0x00002ab7, }, + { "&preccurlyeq;", 0x0000227c, }, + { "&preceq;", 0x00002aaf, }, + { "&precnapprox;", 0x00002ab9, }, + { "&precneqq;", 0x00002ab5, }, + { "&precnsim;", 0x000022e8, }, + { "&precsim;", 0x0000227e, }, + { "&prime;", 0x00002032, }, + { "&primes;", 0x00002119, }, + { "&prnE;", 0x00002ab5, }, + { "&prnap;", 0x00002ab9, }, + { "&prnsim;", 0x000022e8, }, + { "&prod;", 0x0000220f, }, + { "&profalar;", 0x0000232e, }, + { "&profline;", 0x00002312, }, + { "&profsurf;", 0x00002313, }, + { "&prop;", 0x0000221d, }, + { "&propto;", 0x0000221d, }, + { "&prsim;", 0x0000227e, }, + { "&prurel;", 0x000022b0, }, + { "&pscr;", 0x0001d4c5, }, + { "&psi;", 0x000003c8, }, + { "&puncsp;", 0x00002008, }, + { "&qfr;", 0x0001d52e, }, + { "&qint;", 0x00002a0c, }, + { "&qopf;", 0x0001d562, }, + { "&qprime;", 0x00002057, }, + { "&qscr;", 0x0001d4c6, }, + { "&quaternions;", 0x0000210d, }, + { "&quatint;", 0x00002a16, }, + { "&quest;", 0x0000003f, }, + { "&questeq;", 0x0000225f, }, + { "&quot", 0x00000022, }, + { "&quot;", 0x00000022, }, + { "&rAarr;", 0x000021db, }, + { "&rArr;", 0x000021d2, }, + { "&rAtail;", 0x0000291c, }, + { "&rBarr;", 0x0000290f, }, + { "&rHar;", 0x00002964, }, +// { "&race;", 8765, 0x00000331, }, + { "&racute;", 0x00000155, }, + { "&radic;", 0x0000221a, }, + { "&raemptyv;", 0x000029b3, }, + { "&rang;", 0x000027e9, }, + { "&rangd;", 0x00002992, }, + { "&range;", 0x000029a5, }, + { "&rangle;", 0x000027e9, }, + { "&raquo", 0x000000bb, }, + { "&raquo;", 0x000000bb, }, + { "&rarr;", 0x00002192, }, + { "&rarrap;", 0x00002975, }, + { "&rarrb;", 0x000021e5, }, + { "&rarrbfs;", 0x00002920, }, + { "&rarrc;", 0x00002933, }, + { "&rarrfs;", 0x0000291e, }, + { "&rarrhk;", 0x000021aa, }, + { "&rarrlp;", 0x000021ac, }, + { "&rarrpl;", 0x00002945, }, + { "&rarrsim;", 0x00002974, }, + { "&rarrtl;", 0x000021a3, }, + { "&rarrw;", 0x0000219d, }, + { "&ratail;", 0x0000291a, }, + { "&ratio;", 0x00002236, }, + { "&rationals;", 0x0000211a, }, + { "&rbarr;", 0x0000290d, }, + { "&rbbrk;", 0x00002773, }, + { "&rbrace;", 0x0000007d, }, + { "&rbrack;", 0x0000005d, }, + { "&rbrke;", 0x0000298c, }, + { "&rbrksld;", 0x0000298e, }, + { "&rbrkslu;", 0x00002990, }, + { "&rcaron;", 0x00000159, }, + { "&rcedil;", 0x00000157, }, + { "&rceil;", 0x00002309, }, + { "&rcub;", 0x0000007d, }, + { "&rcy;", 0x00000440, }, + { "&rdca;", 0x00002937, }, + { "&rdldhar;", 0x00002969, }, + { "&rdquo;", 0x0000201d, }, + { "&rdquor;", 0x0000201d, }, + { "&rdsh;", 0x000021b3, }, + { "&real;", 0x0000211c, }, + { "&realine;", 0x0000211b, }, + { "&realpart;", 0x0000211c, }, + { "&reals;", 0x0000211d, }, + { "&rect;", 0x000025ad, }, + { "&reg", 0x000000ae, }, + { "&reg;", 0x000000ae, }, + { "&rfisht;", 0x0000297d, }, + { "&rfloor;", 0x0000230b, }, + { "&rfr;", 0x0001d52f, }, + { "&rhard;", 0x000021c1, }, + { "&rharu;", 0x000021c0, }, + { "&rharul;", 0x0000296c, }, + { "&rho;", 0x000003c1, }, + { "&rhov;", 0x000003f1, }, + { "&rightarrow;", 0x00002192, }, + { "&rightarrowtail;", 0x000021a3, }, + { "&rightharpoondown;", 0x000021c1, }, + { "&rightharpoonup;", 0x000021c0, }, + { "&rightleftarrows;", 0x000021c4, }, + { "&rightleftharpoons;", 0x000021cc, }, + { "&rightrightarrows;", 0x000021c9, }, + { "&rightsquigarrow;", 0x0000219d, }, + { "&rightthreetimes;", 0x000022cc, }, + { "&ring;", 0x000002da, }, + { "&risingdotseq;", 0x00002253, }, + { "&rlarr;", 0x000021c4, }, + { "&rlhar;", 0x000021cc, }, + { "&rlm;", 0x0000200f, }, + { "&rmoust;", 0x000023b1, }, + { "&rmoustache;", 0x000023b1, }, + { "&rnmid;", 0x00002aee, }, + { "&roang;", 0x000027ed, }, + { "&roarr;", 0x000021fe, }, + { "&robrk;", 0x000027e7, }, + { "&ropar;", 0x00002986, }, + { "&ropf;", 0x0001d563, }, + { "&roplus;", 0x00002a2e, }, + { "&rotimes;", 0x00002a35, }, + { "&rpar;", 0x00000029, }, + { "&rpargt;", 0x00002994, }, + { "&rppolint;", 0x00002a12, }, + { "&rrarr;", 0x000021c9, }, + { "&rsaquo;", 0x0000203a, }, + { "&rscr;", 0x0001d4c7, }, + { "&rsh;", 0x000021b1, }, + { "&rsqb;", 0x0000005d, }, + { "&rsquo;", 0x00002019, }, + { "&rsquor;", 0x00002019, }, + { "&rthree;", 0x000022cc, }, + { "&rtimes;", 0x000022ca, }, + { "&rtri;", 0x000025b9, }, + { "&rtrie;", 0x000022b5, }, + { "&rtrif;", 0x000025b8, }, + { "&rtriltri;", 0x000029ce, }, + { "&ruluhar;", 0x00002968, }, + { "&rx;", 0x0000211e, }, + { "&sacute;", 0x0000015b, }, + { "&sbquo;", 0x0000201a, }, + { "&sc;", 0x0000227b, }, + { "&scE;", 0x00002ab4, }, + { "&scap;", 0x00002ab8, }, + { "&scaron;", 0x00000161, }, + { "&sccue;", 0x0000227d, }, + { "&sce;", 0x00002ab0, }, + { "&scedil;", 0x0000015f, }, + { "&scirc;", 0x0000015d, }, + { "&scnE;", 0x00002ab6, }, + { "&scnap;", 0x00002aba, }, + { "&scnsim;", 0x000022e9, }, + { "&scpolint;", 0x00002a13, }, + { "&scsim;", 0x0000227f, }, + { "&scy;", 0x00000441, }, + { "&sdot;", 0x000022c5, }, + { "&sdotb;", 0x000022a1, }, + { "&sdote;", 0x00002a66, }, + { "&seArr;", 0x000021d8, }, + { "&searhk;", 0x00002925, }, + { "&searr;", 0x00002198, }, + { "&searrow;", 0x00002198, }, + { "&sect", 0x000000a7, }, + { "&sect;", 0x000000a7, }, + { "&semi;", 0x0000003b, }, + { "&seswar;", 0x00002929, }, + { "&setminus;", 0x00002216, }, + { "&setmn;", 0x00002216, }, + { "&sext;", 0x00002736, }, + { "&sfr;", 0x0001d530, }, + { "&sfrown;", 0x00002322, }, + { "&sharp;", 0x0000266f, }, + { "&shchcy;", 0x00000449, }, + { "&shcy;", 0x00000448, }, + { "&shortmid;", 0x00002223, }, + { "&shortparallel;", 0x00002225, }, + { "&shy", 0x000000ad, }, + { "&shy;", 0x000000ad, }, + { "&sigma;", 0x000003c3, }, + { "&sigmaf;", 0x000003c2, }, + { "&sigmav;", 0x000003c2, }, + { "&sim;", 0x0000223c, }, + { "&simdot;", 0x00002a6a, }, + { "&sime;", 0x00002243, }, + { "&simeq;", 0x00002243, }, + { "&simg;", 0x00002a9e, }, + { "&simgE;", 0x00002aa0, }, + { "&siml;", 0x00002a9d, }, + { "&simlE;", 0x00002a9f, }, + { "&simne;", 0x00002246, }, + { "&simplus;", 0x00002a24, }, + { "&simrarr;", 0x00002972, }, + { "&slarr;", 0x00002190, }, + { "&smallsetminus;", 0x00002216, }, + { "&smashp;", 0x00002a33, }, + { "&smeparsl;", 0x000029e4, }, + { "&smid;", 0x00002223, }, + { "&smile;", 0x00002323, }, + { "&smt;", 0x00002aaa, }, + { "&smte;", 0x00002aac, }, +// { "&smtes;", 10924, 0x0000fe00, }, + { "&softcy;", 0x0000044c, }, + { "&sol;", 0x0000002f, }, + { "&solb;", 0x000029c4, }, + { "&solbar;", 0x0000233f, }, + { "&sopf;", 0x0001d564, }, + { "&spades;", 0x00002660, }, + { "&spadesuit;", 0x00002660, }, + { "&spar;", 0x00002225, }, + { "&sqcap;", 0x00002293, }, +// { "&sqcaps;", 8851, 0x0000fe00, }, + { "&sqcup;", 0x00002294, }, +// { "&sqcups;", 8852, 0x0000fe00, }, + { "&sqsub;", 0x0000228f, }, + { "&sqsube;", 0x00002291, }, + { "&sqsubset;", 0x0000228f, }, + { "&sqsubseteq;", 0x00002291, }, + { "&sqsup;", 0x00002290, }, + { "&sqsupe;", 0x00002292, }, + { "&sqsupset;", 0x00002290, }, + { "&sqsupseteq;", 0x00002292, }, + { "&squ;", 0x000025a1, }, + { "&square;", 0x000025a1, }, + { "&squarf;", 0x000025aa, }, + { "&squf;", 0x000025aa, }, + { "&srarr;", 0x00002192, }, + { "&sscr;", 0x0001d4c8, }, + { "&ssetmn;", 0x00002216, }, + { "&ssmile;", 0x00002323, }, + { "&sstarf;", 0x000022c6, }, + { "&star;", 0x00002606, }, + { "&starf;", 0x00002605, }, + { "&straightepsilon;", 0x000003f5, }, + { "&straightphi;", 0x000003d5, }, + { "&strns;", 0x000000af, }, + { "&sub;", 0x00002282, }, + { "&subE;", 0x00002ac5, }, + { "&subdot;", 0x00002abd, }, + { "&sube;", 0x00002286, }, + { "&subedot;", 0x00002ac3, }, + { "&submult;", 0x00002ac1, }, + { "&subnE;", 0x00002acb, }, + { "&subne;", 0x0000228a, }, + { "&subplus;", 0x00002abf, }, + { "&subrarr;", 0x00002979, }, + { "&subset;", 0x00002282, }, + { "&subseteq;", 0x00002286, }, + { "&subseteqq;", 0x00002ac5, }, + { "&subsetneq;", 0x0000228a, }, + { "&subsetneqq;", 0x00002acb, }, + { "&subsim;", 0x00002ac7, }, + { "&subsub;", 0x00002ad5, }, + { "&subsup;", 0x00002ad3, }, + { "&succ;", 0x0000227b, }, + { "&succapprox;", 0x00002ab8, }, + { "&succcurlyeq;", 0x0000227d, }, + { "&succeq;", 0x00002ab0, }, + { "&succnapprox;", 0x00002aba, }, + { "&succneqq;", 0x00002ab6, }, + { "&succnsim;", 0x000022e9, }, + { "&succsim;", 0x0000227f, }, + { "&sum;", 0x00002211, }, + { "&sung;", 0x0000266a, }, + { "&sup1", 0x000000b9, }, + { "&sup1;", 0x000000b9, }, + { "&sup2", 0x000000b2, }, + { "&sup2;", 0x000000b2, }, + { "&sup3", 0x000000b3, }, + { "&sup3;", 0x000000b3, }, + { "&sup;", 0x00002283, }, + { "&supE;", 0x00002ac6, }, + { "&supdot;", 0x00002abe, }, + { "&supdsub;", 0x00002ad8, }, + { "&supe;", 0x00002287, }, + { "&supedot;", 0x00002ac4, }, + { "&suphsol;", 0x000027c9, }, + { "&suphsub;", 0x00002ad7, }, + { "&suplarr;", 0x0000297b, }, + { "&supmult;", 0x00002ac2, }, + { "&supnE;", 0x00002acc, }, + { "&supne;", 0x0000228b, }, + { "&supplus;", 0x00002ac0, }, + { "&supset;", 0x00002283, }, + { "&supseteq;", 0x00002287, }, + { "&supseteqq;", 0x00002ac6, }, + { "&supsetneq;", 0x0000228b, }, + { "&supsetneqq;", 0x00002acc, }, + { "&supsim;", 0x00002ac8, }, + { "&supsub;", 0x00002ad4, }, + { "&supsup;", 0x00002ad6, }, + { "&swArr;", 0x000021d9, }, + { "&swarhk;", 0x00002926, }, + { "&swarr;", 0x00002199, }, + { "&swarrow;", 0x00002199, }, + { "&swnwar;", 0x0000292a, }, + { "&szlig", 0x000000df, }, + { "&szlig;", 0x000000df, }, + { "&target;", 0x00002316, }, + { "&tau;", 0x000003c4, }, + { "&tbrk;", 0x000023b4, }, + { "&tcaron;", 0x00000165, }, + { "&tcedil;", 0x00000163, }, + { "&tcy;", 0x00000442, }, + { "&tdot;", 0x000020db, }, + { "&telrec;", 0x00002315, }, + { "&tfr;", 0x0001d531, }, + { "&there4;", 0x00002234, }, + { "&therefore;", 0x00002234, }, + { "&theta;", 0x000003b8, }, + { "&thetasym;", 0x000003d1, }, + { "&thetav;", 0x000003d1, }, + { "&thickapprox;", 0x00002248, }, + { "&thicksim;", 0x0000223c, }, + { "&thinsp;", 0x00002009, }, + { "&thkap;", 0x00002248, }, + { "&thksim;", 0x0000223c, }, + { "&thorn", 0x000000fe, }, + { "&thorn;", 0x000000fe, }, + { "&tilde;", 0x000002dc, }, + { "&times", 0x000000d7, }, + { "&times;", 0x000000d7, }, + { "&timesb;", 0x000022a0, }, + { "&timesbar;", 0x00002a31, }, + { "&timesd;", 0x00002a30, }, + { "&tint;", 0x0000222d, }, + { "&toea;", 0x00002928, }, + { "&top;", 0x000022a4, }, + { "&topbot;", 0x00002336, }, + { "&topcir;", 0x00002af1, }, + { "&topf;", 0x0001d565, }, + { "&topfork;", 0x00002ada, }, + { "&tosa;", 0x00002929, }, + { "&tprime;", 0x00002034, }, + { "&trade;", 0x00002122, }, + { "&triangle;", 0x000025b5, }, + { "&triangledown;", 0x000025bf, }, + { "&triangleleft;", 0x000025c3, }, + { "&trianglelefteq;", 0x000022b4, }, + { "&triangleq;", 0x0000225c, }, + { "&triangleright;", 0x000025b9, }, + { "&trianglerighteq;", 0x000022b5, }, + { "&tridot;", 0x000025ec, }, + { "&trie;", 0x0000225c, }, + { "&triminus;", 0x00002a3a, }, + { "&triplus;", 0x00002a39, }, + { "&trisb;", 0x000029cd, }, + { "&tritime;", 0x00002a3b, }, + { "&trpezium;", 0x000023e2, }, + { "&tscr;", 0x0001d4c9, }, + { "&tscy;", 0x00000446, }, + { "&tshcy;", 0x0000045b, }, + { "&tstrok;", 0x00000167, }, + { "&twixt;", 0x0000226c, }, + { "&twoheadleftarrow;", 0x0000219e, }, + { "&twoheadrightarrow;", 0x000021a0, }, + { "&uArr;", 0x000021d1, }, + { "&uHar;", 0x00002963, }, + { "&uacute", 0x000000fa, }, + { "&uacute;", 0x000000fa, }, + { "&uarr;", 0x00002191, }, + { "&ubrcy;", 0x0000045e, }, + { "&ubreve;", 0x0000016d, }, + { "&ucirc", 0x000000fb, }, + { "&ucirc;", 0x000000fb, }, + { "&ucy;", 0x00000443, }, + { "&udarr;", 0x000021c5, }, + { "&udblac;", 0x00000171, }, + { "&udhar;", 0x0000296e, }, + { "&ufisht;", 0x0000297e, }, + { "&ufr;", 0x0001d532, }, + { "&ugrave", 0x000000f9, }, + { "&ugrave;", 0x000000f9, }, + { "&uharl;", 0x000021bf, }, + { "&uharr;", 0x000021be, }, + { "&uhblk;", 0x00002580, }, + { "&ulcorn;", 0x0000231c, }, + { "&ulcorner;", 0x0000231c, }, + { "&ulcrop;", 0x0000230f, }, + { "&ultri;", 0x000025f8, }, + { "&umacr;", 0x0000016b, }, + { "&uml", 0x000000a8, }, + { "&uml;", 0x000000a8, }, + { "&uogon;", 0x00000173, }, + { "&uopf;", 0x0001d566, }, + { "&uparrow;", 0x00002191, }, + { "&updownarrow;", 0x00002195, }, + { "&upharpoonleft;", 0x000021bf, }, + { "&upharpoonright;", 0x000021be, }, + { "&uplus;", 0x0000228e, }, + { "&upsi;", 0x000003c5, }, + { "&upsih;", 0x000003d2, }, + { "&upsilon;", 0x000003c5, }, + { "&upuparrows;", 0x000021c8, }, + { "&urcorn;", 0x0000231d, }, + { "&urcorner;", 0x0000231d, }, + { "&urcrop;", 0x0000230e, }, + { "&uring;", 0x0000016f, }, + { "&urtri;", 0x000025f9, }, + { "&uscr;", 0x0001d4ca, }, + { "&utdot;", 0x000022f0, }, + { "&utilde;", 0x00000169, }, + { "&utri;", 0x000025b5, }, + { "&utrif;", 0x000025b4, }, + { "&uuarr;", 0x000021c8, }, + { "&uuml", 0x000000fc, }, + { "&uuml;", 0x000000fc, }, + { "&uwangle;", 0x000029a7, }, + { "&vArr;", 0x000021d5, }, + { "&vBar;", 0x00002ae8, }, + { "&vBarv;", 0x00002ae9, }, + { "&vDash;", 0x000022a8, }, + { "&vangrt;", 0x0000299c, }, + { "&varepsilon;", 0x000003f5, }, + { "&varkappa;", 0x000003f0, }, + { "&varnothing;", 0x00002205, }, + { "&varphi;", 0x000003d5, }, + { "&varpi;", 0x000003d6, }, + { "&varpropto;", 0x0000221d, }, + { "&varr;", 0x00002195, }, + { "&varrho;", 0x000003f1, }, + { "&varsigma;", 0x000003c2, }, +// { "&varsubsetneq;", 8842, 0x0000fe00, }, +// { "&varsubsetneqq;", 10955, 0x0000fe00, }, +// { "&varsupsetneq;", 8843, 0x0000fe00, }, +// { "&varsupsetneqq;", 10956, 0x0000fe00, }, + { "&vartheta;", 0x000003d1, }, + { "&vartriangleleft;", 0x000022b2, }, + { "&vartriangleright;", 0x000022b3, }, + { "&vcy;", 0x00000432, }, + { "&vdash;", 0x000022a2, }, + { "&vee;", 0x00002228, }, + { "&veebar;", 0x000022bb, }, + { "&veeeq;", 0x0000225a, }, + { "&vellip;", 0x000022ee, }, + { "&verbar;", 0x0000007c, }, + { "&vert;", 0x0000007c, }, + { "&vfr;", 0x0001d533, }, + { "&vltri;", 0x000022b2, }, +// { "&vnsub;", 8834, 0x000020d2, }, +// { "&vnsup;", 8835, 0x000020d2, }, + { "&vopf;", 0x0001d567, }, + { "&vprop;", 0x0000221d, }, + { "&vrtri;", 0x000022b3, }, + { "&vscr;", 0x0001d4cb, }, +// { "&vsubnE;", 10955, 0x0000fe00, }, +// { "&vsubne;", 8842, 0x0000fe00, }, +// { "&vsupnE;", 10956, 0x0000fe00, }, +// { "&vsupne;", 8843, 0x0000fe00, }, + { "&vzigzag;", 0x0000299a, }, + { "&wcirc;", 0x00000175, }, + { "&wedbar;", 0x00002a5f, }, + { "&wedge;", 0x00002227, }, + { "&wedgeq;", 0x00002259, }, + { "&weierp;", 0x00002118, }, + { "&wfr;", 0x0001d534, }, + { "&wopf;", 0x0001d568, }, + { "&wp;", 0x00002118, }, + { "&wr;", 0x00002240, }, + { "&wreath;", 0x00002240, }, + { "&wscr;", 0x0001d4cc, }, + { "&xcap;", 0x000022c2, }, + { "&xcirc;", 0x000025ef, }, + { "&xcup;", 0x000022c3, }, + { "&xdtri;", 0x000025bd, }, + { "&xfr;", 0x0001d535, }, + { "&xhArr;", 0x000027fa, }, + { "&xharr;", 0x000027f7, }, + { "&xi;", 0x000003be, }, + { "&xlArr;", 0x000027f8, }, + { "&xlarr;", 0x000027f5, }, + { "&xmap;", 0x000027fc, }, + { "&xnis;", 0x000022fb, }, + { "&xodot;", 0x00002a00, }, + { "&xopf;", 0x0001d569, }, + { "&xoplus;", 0x00002a01, }, + { "&xotime;", 0x00002a02, }, + { "&xrArr;", 0x000027f9, }, + { "&xrarr;", 0x000027f6, }, + { "&xscr;", 0x0001d4cd, }, + { "&xsqcup;", 0x00002a06, }, + { "&xuplus;", 0x00002a04, }, + { "&xutri;", 0x000025b3, }, + { "&xvee;", 0x000022c1, }, + { "&xwedge;", 0x000022c0, }, + { "&yacute", 0x000000fd, }, + { "&yacute;", 0x000000fd, }, + { "&yacy;", 0x0000044f, }, + { "&ycirc;", 0x00000177, }, + { "&ycy;", 0x0000044b, }, + { "&yen", 0x000000a5, }, + { "&yen;", 0x000000a5, }, + { "&yfr;", 0x0001d536, }, + { "&yicy;", 0x00000457, }, + { "&yopf;", 0x0001d56a, }, + { "&yscr;", 0x0001d4ce, }, + { "&yucy;", 0x0000044e, }, + { "&yuml", 0x000000ff, }, + { "&yuml;", 0x000000ff, }, + { "&zacute;", 0x0000017a, }, + { "&zcaron;", 0x0000017e, }, + { "&zcy;", 0x00000437, }, + { "&zdot;", 0x0000017c, }, + { "&zeetrf;", 0x00002128, }, + { "&zeta;", 0x000003b6, }, + { "&zfr;", 0x0001d537, }, + { "&zhcy;", 0x00000436, }, + { "&zigrarr;", 0x000021dd, }, + { "&zopf;", 0x0001d56b, }, + { "&zscr;", 0x0001d4cf, }, + { "&zwj;", 0x0000200d, }, + { "&zwnj;", 0x0000200c, }, +#endif + + { NULL, 0 }, +}; --- html_tokenize.c Wed Dec 11 11:21:18 2024 +++ html_tokenize.c Wed Dec 11 11:21:18 2024 @@ -0,0 +1,2201 @@ +/* + * Copyright (c) 2024 joshua stein <jcs@jcs.org> + * + * Permission to use, copy, modify, and distribute this software for any + * purpose with or without fee is hereby granted, provided that the above + * copyright notice and this permission notice appear in all copies. + * + * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES + * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF + * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR + * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES + * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN + * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF + * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. + */ + +/* + * Tokenization + * https://html.spec.whatwg.org/multipage/parsing.html#tokenization + * + * Handles characters output from html_parse() and turns them into tokens, + * which are emitted to the tree builder. + */ + +#include "html.h" + +void html_tokenize(struct html_page *html, short cc); +bool html_appropriate_end_tag_token(struct html_page *html, html_token *token); +html_tag_type html_find_tag_type(char *tag_name); +void html_lookahead_consume(struct html_page *html, short count); + +void +html_lookahead_consume(struct html_page *html, short count) +{ + short n, j; + + for (n = 0; n < count && html->lookahead_len; n++) { + HTML_DEBUG((": consuming '%c' from lookahead", html->lookahead[0])); + for (j = 0; j < HTML_LOOKAHEAD_SIZE - 1; j++) + html->lookahead[j] = html->lookahead[j + 1]; + html->lookahead_len--; + } +} + +void +html_tokenize(struct html_page *html, short cc) +{ + html_state was_state; + struct html_attr *attr; + const html_entity *found_entity; + short tcc, n, j, i; + + was_state = html->state; + + if (html->lookahead_len < HTML_LOOKAHEAD_SIZE && cc != EOF) { + /* fill lookahead */ + html->lookahead[html->lookahead_len++] = cc; + return; + } + + if (html->lookahead_len) { + /* take a character from the head of lookahead and shift down */ + tcc = html->lookahead[0]; + for (n = 0; n < HTML_LOOKAHEAD_SIZE - 1; n++) + html->lookahead[n] = html->lookahead[n + 1]; + if (cc == EOF) { + if (html->lookahead_len) + html->lookahead_len--; + } else + html->lookahead[HTML_LOOKAHEAD_SIZE - 1] = cc; + cc = tcc; + } + +#ifdef HTML_ENABLE_DEBUGGING + HTML_DEBUG(("pos % 4ld:", html->input_pos++)); + + if (cc == '\n') + HTML_DEBUG((" \\n")); + else if (cc == '\r') + HTML_DEBUG((" \\r")); + else if (cc == '\t') + HTML_DEBUG((" \\t")); + else if (cc == '\f') + HTML_DEBUG((" \\f")); + else if (cc == '\0') + HTML_DEBUG((" \\0")); + else if (cc == ' ') + HTML_DEBUG((" ")); + else if (cc == EOF) + HTML_DEBUG(("EOF")); + else + HTML_DEBUG((" %c", cc)); + + HTML_DEBUG((": state %s", html_state_names[html->state])); +#endif + + was_state = html->state; + +reconsume: + if (html->state != was_state) { + HTML_DEBUG((": reconsume as %s", html_state_names[html->state])); + was_state = html->state; + } + + switch (html->state) { + case HTML_STATE_DATA: + switch (cc) { + case '&': + html->return_state = html->state; + html->tmp_len = 0; + html->state = HTML_STATE_CHARACTER_REFERENCE; + break; + case '<': + html->state = HTML_STATE_TAG_OPEN; + break; + case '\0': + html->error = HTML_ERROR_UNEXPECTED_NULL_CHARACTER; + html_emit_char_token(html, cc); + break; + case EOF: + html_emit_eof_token(html); + break; + default: + html_emit_char_token(html, cc); + break; + } + break; + case HTML_STATE_RCDATA: + switch (cc) { + case '&': + html->return_state = html->state; + html->tmp_len = 0; + html->state = HTML_STATE_CHARACTER_REFERENCE; + break; + case '<': + html->state = HTML_STATE_RCDATA_LESS_THAN_SIGN; + break; + case '\0': + html->error = HTML_ERROR_UNEXPECTED_NULL_CHARACTER; + html_emit_char_token(html, HTML_REPLACEMENT_CHARACTER); + break; + case EOF: + html_emit_eof_token(html); + break; + default: + html_emit_char_token(html, cc); + break; + } + break; + case HTML_STATE_RAWTEXT: + switch (cc) { + case '<': + html->state = HTML_STATE_RAWTEXT_LESS_THAN_SIGN; + break; + case '\0': + html->error = HTML_ERROR_UNEXPECTED_NULL_CHARACTER; + html_emit_char_token(html, HTML_REPLACEMENT_CHARACTER); + break; + case EOF: + html_emit_eof_token(html); + break; + default: + html_emit_char_token(html, cc); + break; + } + break; + case HTML_STATE_SCRIPT_DATA: + switch (cc) { + case '<': + html->state = HTML_STATE_SCRIPT_DATA_LESS_THAN_SIGN; + break; + case '\0': + html->error = HTML_ERROR_UNEXPECTED_NULL_CHARACTER; + if (!html->ignore_script_data) + html_emit_char_token(html, HTML_REPLACEMENT_CHARACTER); + break; + case EOF: + html_emit_eof_token(html); + break; + default: + if (!html->ignore_script_data) + html_emit_char_token(html, cc); + break; + } + break; + case HTML_STATE_PLAINTEXT: + switch (cc) { + case '\0': + html->error = HTML_ERROR_UNEXPECTED_NULL_CHARACTER; + html_emit_char_token(html, HTML_REPLACEMENT_CHARACTER); + break; + case EOF: + html_emit_eof_token(html); + break; + default: + html_emit_char_token(html, cc); + break; + } + break; + case HTML_STATE_TAG_OPEN: + switch (cc) { + case '!': + html->state = HTML_STATE_MARKUP_DECLARATION_OPEN; + html->tmp_len = 0; + break; + case '/': + html->state = HTML_STATE_END_TAG_OPEN; + break; + case '?': + html->error = + HTML_ERROR_UNEXPECTED_QUESTION_MARK_INSTEAD_OF_TAG_NAME; + html_emit_comment(html, &html->new_token.comment); + html->state = HTML_STATE_BOGUS_COMMENT; + goto reconsume; + case EOF: + html->error = HTML_ERROR_EOF_BEFORE_TAG_NAME; + html_emit_char_token(html, '<'); + html_emit_eof_token(html); + break; + default: + if (IS_ALPHA(cc)) { + html_prep_new_token(html, HTML_TOKEN_START_TAG); + html->state = HTML_STATE_TAG_NAME; + goto reconsume; + } + html->error = HTML_ERROR_INVALID_FIRST_CHARACTER_OF_TAG_NAME; + html_emit_char_token(html, '<'); + html->state = HTML_STATE_DATA; + goto reconsume; + } + break; + case HTML_STATE_END_TAG_OPEN: + switch (cc) { + case '>': + html->error = HTML_ERROR_MISSING_END_TAG_NAME; + html->state = HTML_STATE_DATA; + break; + case EOF: + html->error = HTML_ERROR_EOF_BEFORE_TAG_NAME; + html_emit_char_token(html, '<'); + html_emit_char_token(html, '/'); + html_emit_eof_token(html); + break; + default: + if (IS_ALPHA(cc)) { + html_prep_new_token(html, HTML_TOKEN_END_TAG); + html->state = HTML_STATE_TAG_NAME; + goto reconsume; + } + html->error = HTML_ERROR_INVALID_FIRST_CHARACTER_OF_TAG_NAME; + html_prep_new_token(html, HTML_TOKEN_COMMENT); + html->state = HTML_STATE_BOGUS_COMMENT; + goto reconsume; + } + break; + case HTML_STATE_TAG_NAME: + switch (cc) { + case '\t': + case '\n': + case '\f': + case ' ': + html->new_token.tag.type = + html_find_tag_type(html->new_token.tag.name); + html->state = HTML_STATE_BEFORE_ATTRIBUTE_NAME; + break; + case '/': + html->new_token.tag.type = + html_find_tag_type(html->new_token.tag.name); + html->state = HTML_STATE_SELF_CLOSING_START_TAG; + break; + case '>': + html->state = HTML_STATE_DATA; + html->new_token.tag.type = + html_find_tag_type(html->new_token.tag.name); + html_emit_token(html, &html->new_token); + break; + case '\0': + html->error = HTML_ERROR_UNEXPECTED_NULL_CHARACTER; + STR_APPEND(html->new_token.tag.name, + html->new_token.tag.name_len, HTML_REPLACEMENT_CHARACTER); + html->new_token.tag.type = 0; + break; + case EOF: + html->error = HTML_ERROR_EOF_IN_TAG; + html_emit_eof_token(html); + break; + default: + if (IS_UPPER_ALPHA(cc)) + cc += 0x20; + STR_APPEND(html->new_token.tag.name, + html->new_token.tag.name_len, cc); + break; + } + break; + case HTML_STATE_RCDATA_LESS_THAN_SIGN: + switch (cc) { + case '/': + html->state = HTML_STATE_RCDATA_END_TAG_OPEN; + html->tmp_len = 0; + break; + default: + html->state = HTML_STATE_RCDATA; + if (!html->ignore_comment_data) + html_emit_char_token(html, '<'); + goto reconsume; + } + break; + case HTML_STATE_RCDATA_END_TAG_OPEN: + if (IS_ALPHA(cc)) { + html_prep_new_token(html, HTML_TOKEN_END_TAG); + html->state = HTML_STATE_RCDATA_END_TAG_NAME; + goto reconsume; + } + if (!html->ignore_comment_data) { + html_emit_char_token(html, '<'); + html_emit_char_token(html, '/'); + } + html->state = HTML_STATE_RCDATA; + goto reconsume; + case HTML_STATE_RCDATA_END_TAG_NAME: + switch (cc) { + case '\t': + case '\n': + case '\f': + case ' ': + if (!html_appropriate_end_tag_token(html, &html->new_token)) + goto HTML_STATE_RCDATA_END_TAG_NAME_anything_else; + html->state = HTML_STATE_BEFORE_ATTRIBUTE_NAME; + break; + case '/': + if (!html_appropriate_end_tag_token(html, &html->new_token)) + goto HTML_STATE_RCDATA_END_TAG_NAME_anything_else; + html->state = HTML_STATE_SELF_CLOSING_START_TAG; + break; + case '>': + if (!html_appropriate_end_tag_token(html, &html->new_token)) + goto HTML_STATE_RCDATA_END_TAG_NAME_anything_else; + html->state = HTML_STATE_DATA; + break; + default: + if (IS_UPPER_ALPHA(cc)) + cc += 0x20; + if (IS_LOWER_ALPHA(cc)) { + STR_APPEND(html->new_token.tag.name, + html->new_token.tag.name_len, cc); + STR_APPEND(html->tmp, html->tmp_len, cc); + break; + } + /* FALLTHROUGH */ + HTML_STATE_RCDATA_END_TAG_NAME_anything_else: + if (!html->ignore_comment_data) { + html_emit_char_token(html, '<'); + html_emit_char_token(html, '/'); + for (n = 0; n < html->tmp_len; n++) + html_emit_char_token(html, html->tmp[n]); + } + html->state = HTML_STATE_RCDATA; + goto reconsume; + } + break; + case HTML_STATE_RAWTEXT_LESS_THAN_SIGN: + switch (cc) { + case '/': + html->tmp_len = 0; + html->state = HTML_STATE_RAWTEXT_END_TAG_OPEN; + break; + default: + if (!html->ignore_comment_data) { + html_emit_char_token(html, '<'); + } + html->state = HTML_STATE_RAWTEXT; + goto reconsume; + } + break; + case HTML_STATE_RAWTEXT_END_TAG_OPEN: + if (IS_ALPHA(cc)) { + html_prep_new_token(html, HTML_TOKEN_END_TAG); + html->state = HTML_STATE_RAWTEXT_END_TAG_NAME; + goto reconsume; + } + if (!html->ignore_comment_data) { + html_emit_char_token(html, '<'); + html_emit_char_token(html, '/'); + } + html->state = HTML_STATE_RAWTEXT; + goto reconsume; + case HTML_STATE_RAWTEXT_END_TAG_NAME: + switch (cc) { + case '\t': + case '\n': + case '\f': + case ' ': + if (!html_appropriate_end_tag_token(html, &html->new_token)) + goto HTML_STATE_RAWTEXT_END_TAG_NAME_anything_else; + html->state = HTML_STATE_BEFORE_ATTRIBUTE_NAME; + break; + case '/': + if (!html_appropriate_end_tag_token(html, &html->new_token)) + goto HTML_STATE_RAWTEXT_END_TAG_NAME_anything_else; + html->state = HTML_STATE_SELF_CLOSING_START_TAG; + break; + case '>': + if (!html_appropriate_end_tag_token(html, &html->new_token)) + goto HTML_STATE_RAWTEXT_END_TAG_NAME_anything_else; + html->state = HTML_STATE_DATA; + break; + default: + if (IS_UPPER_ALPHA(cc)) + cc += 0x20; + if (IS_LOWER_ALPHA(cc)) { + STR_APPEND(html->new_token.tag.name, + html->new_token.tag.name_len, cc); + STR_APPEND(html->tmp, html->tmp_len, cc); + break; + } + /* FALLTHROUGH */ + HTML_STATE_RAWTEXT_END_TAG_NAME_anything_else: + if (!html->ignore_comment_data) { + html_emit_char_token(html, '<'); + html_emit_char_token(html, '/'); + for (n = 0; n < html->tmp_len; n++) + html_emit_char_token(html, html->tmp[n]); + } + html->state = HTML_STATE_RAWTEXT; + goto reconsume; + } + break; + case HTML_STATE_SCRIPT_DATA_LESS_THAN_SIGN: + switch (cc) { + case '/': + html->tmp_len = 0; + html->state = HTML_STATE_SCRIPT_DATA_END_TAG_OPEN; + break; + case '!': + html->state = HTML_STATE_SCRIPT_DATA_ESCAPE_START; + if (!html->ignore_comment_data) { + html_emit_char_token(html, '<'); + html_emit_char_token(html, '!'); + } + break; + default: + if (!html->ignore_comment_data) { + html_emit_char_token(html, '<'); + } + html->state = HTML_STATE_SCRIPT_DATA; + goto reconsume; + } + break; + case HTML_STATE_SCRIPT_DATA_END_TAG_OPEN: + if (IS_ALPHA(cc)) { + html_prep_new_token(html, HTML_TOKEN_END_TAG); + html->state = HTML_STATE_SCRIPT_DATA_END_TAG_NAME; + goto reconsume; + } + if (!html->ignore_script_data) { + html_emit_char_token(html, '<'); + html_emit_char_token(html, '/'); + } + html->state = HTML_STATE_SCRIPT_DATA; + goto reconsume; + case HTML_STATE_SCRIPT_DATA_END_TAG_NAME: + switch (cc) { + case '\t': + case '\n': + case '\f': + case ' ': + if (!html_appropriate_end_tag_token(html, &html->new_token)) + goto HTML_STATE_SCRIPT_DATA_END_TAG_NAME_anything_else; + html->state = HTML_STATE_BEFORE_ATTRIBUTE_NAME; + break; + case '/': + if (!html_appropriate_end_tag_token(html, &html->new_token)) + goto HTML_STATE_SCRIPT_DATA_END_TAG_NAME_anything_else; + html->state = HTML_STATE_SELF_CLOSING_START_TAG; + break; + case '>': + if (!html_appropriate_end_tag_token(html, &html->new_token)) + goto HTML_STATE_SCRIPT_DATA_END_TAG_NAME_anything_else; + html->state = HTML_STATE_DATA; + break; + default: + if (IS_UPPER_ALPHA(cc)) + cc += 0x20; + if (IS_LOWER_ALPHA(cc)) { + STR_APPEND(html->new_token.tag.name, + html->new_token.tag.name_len, cc); + STR_APPEND(html->tmp, html->tmp_len, cc); + break; + } + /* FALLTHROUGH */ + HTML_STATE_SCRIPT_DATA_END_TAG_NAME_anything_else: + if (!html->ignore_script_data) { + html_emit_char_token(html, '<'); + html_emit_char_token(html, '/'); + for (n = 0; n < html->tmp_len; n++) + html_emit_char_token(html, html->tmp[n]); + } + html->state = HTML_STATE_SCRIPT_DATA; + goto reconsume; + } + break; + case HTML_STATE_SCRIPT_DATA_ESCAPE_START: + switch (cc) { + case '-': + html->state = HTML_STATE_SCRIPT_DATA_ESCAPE_START_DASH; + if (!html->ignore_script_data) { + html_emit_char_token(html, '-'); + } + break; + default: + html->state = HTML_STATE_SCRIPT_DATA; + goto reconsume; + } + break; + case HTML_STATE_SCRIPT_DATA_ESCAPE_START_DASH: + switch (cc) { + case '-': + html->state = HTML_STATE_SCRIPT_DATA_ESCAPED_DASH_DASH; + if (!html->ignore_script_data) { + html_emit_char_token(html, '-'); + } + break; + default: + html->state = HTML_STATE_SCRIPT_DATA; + goto reconsume; + } + break; + case HTML_STATE_SCRIPT_DATA_ESCAPED: + switch (cc) { + case '-': + html->state = HTML_STATE_SCRIPT_DATA_ESCAPED_DASH; + if (!html->ignore_script_data) { + html_emit_char_token(html, '-'); + } + break; + case '<': + html->state = HTML_STATE_SCRIPT_DATA_ESCAPED_LESS_THAN_SIGN; + break; + case '\0': + html->error = HTML_ERROR_UNEXPECTED_NULL_CHARACTER; + if (!html->ignore_script_data) { + html_emit_char_token(html, HTML_REPLACEMENT_CHARACTER); + } + break; + case EOF: + html->error = HTML_ERROR_EOF_IN_SCRIPT_HTML_COMMENT_LIKE_TEXT; + html_emit_eof_token(html); + break; + default: + if (!html->ignore_script_data) { + html_emit_char_token(html, cc); + } + break; + } + break; + case HTML_STATE_SCRIPT_DATA_ESCAPED_DASH: + switch (cc) { + case '-': + html->state = HTML_STATE_SCRIPT_DATA_ESCAPED_DASH_DASH; + if (!html->ignore_script_data) { + html_emit_char_token(html, '-'); + } + break; + case '<': + html->state = HTML_STATE_SCRIPT_DATA_ESCAPED_LESS_THAN_SIGN; + break; + case '\0': + html->error = HTML_ERROR_UNEXPECTED_NULL_CHARACTER; + html->state = HTML_STATE_SCRIPT_DATA_ESCAPED; + if (!html->ignore_script_data) { + html_emit_char_token(html, HTML_REPLACEMENT_CHARACTER); + } + break; + case EOF: + html->error = HTML_ERROR_EOF_IN_SCRIPT_HTML_COMMENT_LIKE_TEXT; + html_emit_eof_token(html); + break; + default: + html->state = HTML_STATE_SCRIPT_DATA_ESCAPED; + if (!html->ignore_script_data) { + html_emit_char_token(html, cc); + } + break; + } + break; + case HTML_STATE_SCRIPT_DATA_ESCAPED_DASH_DASH: + switch (cc) { + case '-': + if (!html->ignore_script_data) { + html_emit_char_token(html, '-'); + } + break; + case '<': + html->state = HTML_STATE_SCRIPT_DATA_ESCAPED_LESS_THAN_SIGN; + break; + case '>': + html->state = HTML_STATE_SCRIPT_DATA; + if (!html->ignore_script_data) { + html_emit_char_token(html, '>'); + } + break; + case '\0': + html->error = HTML_ERROR_UNEXPECTED_NULL_CHARACTER; + html->state = HTML_STATE_SCRIPT_DATA_ESCAPED; + if (!html->ignore_script_data) { + html_emit_char_token(html, HTML_REPLACEMENT_CHARACTER); + } + break; + case EOF: + html->error = HTML_ERROR_EOF_IN_SCRIPT_HTML_COMMENT_LIKE_TEXT; + html_emit_eof_token(html); + break; + default: + html->state = HTML_STATE_SCRIPT_DATA_ESCAPED; + if (!html->ignore_script_data) { + html_emit_char_token(html, cc); + } + break; + } + break; + case HTML_STATE_SCRIPT_DATA_ESCAPED_LESS_THAN_SIGN: + switch (cc) { + case '/': + html->tmp_len = 0; + html->state = HTML_STATE_SCRIPT_DATA_ESCAPED_END_TAG_OPEN; + break; + default: + if (IS_ALPHA(cc)) { + html->tmp_len = 0; + if (!html->ignore_script_data) { + html_emit_char_token(html, '<'); + } + html->state = HTML_STATE_SCRIPT_DATA_DOUBLE_ESCAPE_START; + goto reconsume; + } + html->state = HTML_STATE_SCRIPT_DATA_ESCAPED; + if (!html->ignore_script_data) { + html_emit_char_token(html, '<'); + } + goto reconsume; + } + break; + case HTML_STATE_SCRIPT_DATA_ESCAPED_END_TAG_OPEN: + if (IS_ALPHA(cc)) { + html_prep_new_token(html, HTML_TOKEN_END_TAG); + html->state = HTML_STATE_SCRIPT_DATA_ESCAPED_END_TAG_NAME; + goto reconsume; + } + if (!html->ignore_script_data) { + html_emit_char_token(html, '<'); + html_emit_char_token(html, '/'); + } + html->state = HTML_STATE_SCRIPT_DATA_ESCAPED; + goto reconsume; + case HTML_STATE_SCRIPT_DATA_ESCAPED_END_TAG_NAME: + switch (cc) { + case '\t': + case '\n': + case '\f': + case ' ': + if (!html_appropriate_end_tag_token(html, &html->new_token)) + goto HTML_STATE_SCRIPT_DATA_ESCAPED_END_TAG_NAME_anything_else; + html->state = HTML_STATE_BEFORE_ATTRIBUTE_NAME; + break; + case '/': + if (!html_appropriate_end_tag_token(html, &html->new_token)) + goto HTML_STATE_SCRIPT_DATA_ESCAPED_END_TAG_NAME_anything_else; + html->state = HTML_STATE_SELF_CLOSING_START_TAG; + break; + case '>': + if (!html_appropriate_end_tag_token(html, &html->new_token)) + goto HTML_STATE_SCRIPT_DATA_ESCAPED_END_TAG_NAME_anything_else; + html->state = HTML_STATE_DATA; + break; + default: + if (IS_UPPER_ALPHA(cc)) + cc += 0x20; + if (IS_LOWER_ALPHA(cc)) { + STR_APPEND(html->new_token.tag.name, + html->new_token.tag.name_len, cc); + STR_APPEND(html->tmp, html->tmp_len, cc); + break; + } + /* FALLTHROUGH */ + HTML_STATE_SCRIPT_DATA_ESCAPED_END_TAG_NAME_anything_else: + if (!html->ignore_script_data) { + html_emit_char_token(html, '<'); + html_emit_char_token(html, '/'); + for (n = 0; n < html->tmp_len; n++) + html_emit_char_token(html, html->tmp[n]); + } + html->state = HTML_STATE_SCRIPT_DATA_ESCAPED; + goto reconsume; + } + break; + case HTML_STATE_SCRIPT_DATA_DOUBLE_ESCAPE_START: + switch (cc) { + case '\t': + case '\n': + case '\f': + case ' ': + case '/': + case '>': + if (html->tmp_len == 6 && + memcmp(html->tmp, "script", 6) == 0) { + html->state = HTML_STATE_SCRIPT_DATA_DOUBLE_ESCAPED; + } else { + html->state = HTML_STATE_SCRIPT_DATA_ESCAPED; + html_emit_char_token(html, cc); + } + break; + default: + if (IS_UPPER_ALPHA(cc)) + cc += 0x20; + if (IS_LOWER_ALPHA(cc)) { + STR_APPEND(html->tmp, html->tmp_len, cc); + break; + } + html->state = HTML_STATE_SCRIPT_DATA_ESCAPED; + goto reconsume; + } + break; + case HTML_STATE_SCRIPT_DATA_DOUBLE_ESCAPED: + switch (cc) { + case '-': + html->state = HTML_STATE_SCRIPT_DATA_DOUBLE_ESCAPED_DASH; + html_emit_char_token(html, '-'); + break; + case '<': + html->state = HTML_STATE_SCRIPT_DATA_DOUBLE_ESCAPED_LESS_THAN_SIGN; + html_emit_char_token(html, '<'); + break; + case '\0': + html->error = HTML_ERROR_EOF_IN_SCRIPT_HTML_COMMENT_LIKE_TEXT; + html_emit_eof_token(html); + break; + default: + html_emit_char_token(html, cc); + break; + } + break; + case HTML_STATE_SCRIPT_DATA_DOUBLE_ESCAPED_DASH: + switch (cc) { + case '-': + html->state = HTML_STATE_SCRIPT_DATA_DOUBLE_ESCAPED_DASH_DASH; + html_emit_char_token(html, '-'); + break; + case '<': + html->state = HTML_STATE_SCRIPT_DATA_DOUBLE_ESCAPED_LESS_THAN_SIGN; + html_emit_char_token(html, '<'); + break; + case '\0': + html->error = HTML_ERROR_UNEXPECTED_NULL_CHARACTER; + html->state = HTML_STATE_SCRIPT_DATA_DOUBLE_ESCAPED; + html_emit_char_token(html, HTML_REPLACEMENT_CHARACTER); + break; + case EOF: + html->error = HTML_ERROR_EOF_IN_SCRIPT_HTML_COMMENT_LIKE_TEXT; + html_emit_eof_token(html); + break; + default: + html->state = HTML_STATE_SCRIPT_DATA_DOUBLE_ESCAPED; + html_emit_char_token(html, cc); + break; + } + break; + case HTML_STATE_SCRIPT_DATA_DOUBLE_ESCAPED_DASH_DASH: + switch (cc) { + case '-': + html_emit_char_token(html, '-'); + break; + case '<': + html->state = HTML_STATE_SCRIPT_DATA_DOUBLE_ESCAPED_LESS_THAN_SIGN; + html_emit_char_token(html, '<'); + break; + case '>': + html->state = HTML_STATE_SCRIPT_DATA; + html_emit_char_token(html, '<'); + break; + case '\0': + html->error = HTML_ERROR_UNEXPECTED_NULL_CHARACTER; + html->state = HTML_STATE_SCRIPT_DATA_DOUBLE_ESCAPED; + html_emit_char_token(html, HTML_REPLACEMENT_CHARACTER); + break; + case EOF: + html->error = HTML_ERROR_EOF_IN_SCRIPT_HTML_COMMENT_LIKE_TEXT; + html_emit_eof_token(html); + break; + default: + html->state = HTML_STATE_SCRIPT_DATA_DOUBLE_ESCAPED; + html_emit_char_token(html, cc); + break; + } + break; + case HTML_STATE_SCRIPT_DATA_DOUBLE_ESCAPED_LESS_THAN_SIGN: + switch (cc) { + case '/': + html->tmp_len = 0; + html->state = HTML_STATE_SCRIPT_DATA_DOUBLE_ESCAPE_END; + html_emit_char_token(html, '/'); + break; + default: + html->state = HTML_STATE_SCRIPT_DATA_DOUBLE_ESCAPED; + goto reconsume; + } + break; + case HTML_STATE_SCRIPT_DATA_DOUBLE_ESCAPE_END: + switch (cc) { + case '\t': + case '\n': + case '\f': + case ' ': + case '/': + case '>': + if (html->tmp_len == 6 && + memcmp(html->tmp, "script", 6) == 0) { + html->state = HTML_STATE_SCRIPT_DATA_ESCAPED; + } else { + html->state = HTML_STATE_SCRIPT_DATA_DOUBLE_ESCAPED; + html_emit_char_token(html, cc); + } + break; + default: + if (IS_UPPER_ALPHA(cc)) + cc += 0x20; + if (IS_LOWER_ALPHA(cc)) { + STR_APPEND(html->tmp, html->tmp_len, cc); + html_emit_char_token(html, cc); + break; + } + html->state = HTML_STATE_SCRIPT_DATA_DOUBLE_ESCAPED; + goto reconsume; + } + break; + case HTML_STATE_BEFORE_ATTRIBUTE_NAME: + switch (cc) { + case '\t': + case '\n': + case '\f': + case ' ': + /* ignore */ + break; + case '/': + case '>': + case EOF: + html->state = HTML_STATE_AFTER_ATTRIBUTE_NAME; + goto reconsume; + case '=': + html->error = + HTML_ERROR_UNEXPECTED_EQUALS_SIGN_BEFORE_ATTRIBUTE_NAME; + attr = html_prep_new_attribute(html, &html->new_token.tag); + STR_APPEND(attr->name, attr->name_len, cc); + html->state = HTML_STATE_ATTRIBUTE_NAME; + break; + default: + html_prep_new_attribute(html, &html->new_token.tag); + html->state = HTML_STATE_ATTRIBUTE_NAME; + goto reconsume; + } + break; + case HTML_STATE_ATTRIBUTE_NAME: + switch (cc) { + case '\t': + case '\n': + case '\f': + case ' ': + case '/': + case '>': + case EOF: + html->state = HTML_STATE_AFTER_ATTRIBUTE_NAME; + goto reconsume; + case '=': + html->state = HTML_STATE_BEFORE_ATTRIBUTE_VALUE; + break; + case '\0': + html->error = HTML_ERROR_UNEXPECTED_NULL_CHARACTER; + attr = &NEW_TOKEN_LAST_ATTR; + STR_APPEND(attr->name, attr->name_len, + HTML_REPLACEMENT_CHARACTER); + break; + case '"': + case '\'': + case '<': + html->error = HTML_ERROR_UNEXPECTED_CHARACTER_IN_ATTRIBUTE_NAME; + goto HTML_STATE_ATTRIBUTE_NAME_anything_else; + default: + HTML_STATE_ATTRIBUTE_NAME_anything_else: + if (IS_UPPER_ALPHA(cc)) + cc += 0x20; + attr = &NEW_TOKEN_LAST_ATTR; + STR_APPEND(attr->name, attr->name_len, cc); + /* TODO: check for duplicate attr names, discard this if match */ + break; + } + break; + case HTML_STATE_AFTER_ATTRIBUTE_NAME: + switch (cc) { + case '\t': + case '\n': + case '\f': + case ' ': + /* ignore */ + break; + case '/': + html->state = HTML_STATE_SELF_CLOSING_START_TAG; + break; + case '=': + html->state = HTML_STATE_BEFORE_ATTRIBUTE_VALUE; + break; + case '>': + html_emit_token(html, &html->new_token); + html->state = HTML_STATE_DATA; + break; + case EOF: + html->error = HTML_ERROR_EOF_IN_TAG; + html_emit_eof_token(html); + break; + default: + html_prep_new_attribute(html, &html->new_token.tag); + html->state = HTML_STATE_ATTRIBUTE_NAME; + goto reconsume; + } + break; + case HTML_STATE_BEFORE_ATTRIBUTE_VALUE: + switch (cc) { + case '\t': + case '\n': + case '\f': + case ' ': + /* ignore */ + break; + case '"': + html->state = HTML_STATE_ATTRIBUTE_VALUE_DOUBLE_QUOTED; + break; + case '\'': + html->state = HTML_STATE_ATTRIBUTE_VALUE_SINGLE_QUOTED; + break; + case '>': + html->error = HTML_ERROR_MISSING_ATTRIBUTE_VALUE; + html->state = HTML_STATE_DATA; + html_emit_token(html, &html->new_token); + break; + default: + html->state = HTML_STATE_ATTRIBUTE_VALUE_UNQUOTED; + goto reconsume; + } + break; + case HTML_STATE_ATTRIBUTE_VALUE_DOUBLE_QUOTED: + switch (cc) { + case '"': + html->state = HTML_STATE_AFTER_ATTRIBUTE_VALUE_QUOTED; + break; + case '&': + html->return_state = HTML_STATE_ATTRIBUTE_VALUE_DOUBLE_QUOTED; + html->tmp_len = 0; + html->state = HTML_STATE_CHARACTER_REFERENCE; + break; + case '\0': + html->error = HTML_ERROR_UNEXPECTED_NULL_CHARACTER; + attr = &NEW_TOKEN_LAST_ATTR; + STR_APPEND(attr->val, attr->val_len, HTML_REPLACEMENT_CHARACTER); + break; + case EOF: + html->error = HTML_ERROR_EOF_IN_TAG; + html_emit_eof_token(html); + break; + default: + attr = &NEW_TOKEN_LAST_ATTR; + STR_APPEND(attr->val, attr->val_len, cc); + break; + } + break; + case HTML_STATE_ATTRIBUTE_VALUE_SINGLE_QUOTED: + switch (cc) { + case '\'': + html->state = HTML_STATE_AFTER_ATTRIBUTE_VALUE_QUOTED; + break; + case '&': + html->return_state = HTML_STATE_ATTRIBUTE_VALUE_SINGLE_QUOTED; + html->tmp_len = 0; + html->state = HTML_STATE_CHARACTER_REFERENCE; + break; + case '\0': + html->error = HTML_ERROR_UNEXPECTED_NULL_CHARACTER; + attr = &NEW_TOKEN_LAST_ATTR; + STR_APPEND(attr->val, attr->val_len, HTML_REPLACEMENT_CHARACTER); + break; + case EOF: + html->error = HTML_ERROR_EOF_IN_TAG; + html_emit_eof_token(html); + break; + default: + attr = &NEW_TOKEN_LAST_ATTR; + STR_APPEND(attr->val, attr->val_len, cc); + break; + } + break; + case HTML_STATE_ATTRIBUTE_VALUE_UNQUOTED: + switch (cc) { + case '\t': + case '\n': + case '\f': + case ' ': + html->state = HTML_STATE_BEFORE_ATTRIBUTE_NAME; + break; + case '&': + html->return_state = HTML_STATE_ATTRIBUTE_VALUE_UNQUOTED; + html->tmp_len = 0; + html->state = HTML_STATE_CHARACTER_REFERENCE; + break; + case '>': + html->state = HTML_STATE_DATA; + html_emit_token(html, &html->new_token); + break; + case '\0': + html->error = HTML_ERROR_UNEXPECTED_NULL_CHARACTER; + attr = &NEW_TOKEN_LAST_ATTR; + STR_APPEND(attr->val, attr->val_len, HTML_REPLACEMENT_CHARACTER); + break; + case '"': + case '\'': + case '<': + case '=': + case '`': + html->error = + HTML_ERROR_UNEXPECTED_CHARACTER_IN_UNQUOTED_ATTRIBUTE_VALUE; + goto HTML_STATE_ATTRIBUTE_VALUE_UNQUOTED_anything_else; + case EOF: + html->error = HTML_ERROR_EOF_IN_TAG; + html_emit_eof_token(html); + break; + default: + HTML_STATE_ATTRIBUTE_VALUE_UNQUOTED_anything_else: + attr = &NEW_TOKEN_LAST_ATTR; + STR_APPEND(attr->val, attr->val_len, cc); + break; + } + break; + case HTML_STATE_AFTER_ATTRIBUTE_VALUE_QUOTED: + switch (cc) { + case '\t': + case '\n': + case '\f': + case ' ': + html->state = HTML_STATE_BEFORE_ATTRIBUTE_NAME; + break; + case '/': + html->state = HTML_STATE_SELF_CLOSING_START_TAG; + break; + case '>': + html->state = HTML_STATE_DATA; + html_emit_token(html, &html->new_token); + break; + case EOF: + html->error = HTML_ERROR_EOF_IN_TAG; + html_emit_eof_token(html); + break; + default: + html->error = HTML_ERROR_MISSING_WHITESPACE_BETWEEN_ATTRIBUTES; + html->state = HTML_STATE_BEFORE_ATTRIBUTE_NAME; + goto reconsume; + } + break; + case HTML_STATE_SELF_CLOSING_START_TAG: + switch (cc) { + case '>': + html->new_token.tag.self_closing = true; + html->state = HTML_STATE_DATA; + html_emit_token(html, &html->new_token); + break; + case EOF: + html->error = HTML_ERROR_EOF_IN_TAG; + html_emit_eof_token(html); + break; + default: + html->error = HTML_ERROR_UNEXPECTED_SOLIDUS_IN_TAG; + html->state = HTML_STATE_BEFORE_ATTRIBUTE_NAME; + goto reconsume; + } + break; + case HTML_STATE_BOGUS_COMMENT: + switch (cc) { + case '>': + html->state = HTML_STATE_DATA; + html_emit_comment(html, &html->new_token.comment); + break; + case EOF: + html_emit_comment(html, &html->new_token.comment); + html_emit_eof_token(html); + break; + case '\0': + html->error = HTML_ERROR_UNEXPECTED_NULL_CHARACTER; + if (!html->ignore_comment_data) { + STR_APPEND(html->new_token.comment.data, + html->new_token.comment.len, HTML_REPLACEMENT_CHARACTER); + } + break; + default: + if (!html->ignore_comment_data) { + STR_APPEND(html->new_token.comment.data, + html->new_token.comment.len, cc); + } + break; + } + break; + case HTML_STATE_MARKUP_DECLARATION_OPEN: + /* "If the next few characters are" */ + /* https://html.spec.whatwg.org/multipage/parsing.html#markup-declaration-open-state */ + + if (cc == '-' && html->lookahead[0] == '-') { + html_lookahead_consume(html, 1); + html_prep_new_token(html, HTML_TOKEN_COMMENT); + html->state = HTML_STATE_COMMENT_START; + break; + } else if ((cc == 'd' || cc == 'D') && + strncasecmp(html->lookahead, "octype", 6) == 0) { + html_lookahead_consume(html, 6); + html->state = HTML_STATE_DOCTYPE; + html_prep_new_token(html, HTML_TOKEN_DOCTYPE); + break; + } else if (cc == '[' && memcmp(html->lookahead, "CDATA[", 6) == 0) { + html_lookahead_consume(html, 6); + if (html->current_node->ns != HTML_NAMESPACE_HTML) + html->state = HTML_STATE_CDATA_SECTION; + else + html->error = HTML_ERROR_CDATA_IN_HTML_CONTENT; + + html_prep_new_token(html, HTML_TOKEN_COMMENT); + if (!html->ignore_comment_data) + html->new_token.comment.len = + strlcpy(html->new_token.comment.data, + "[CDATA[", sizeof(html->new_token.comment.data)); + html->state = HTML_STATE_BOGUS_COMMENT; + break; + } else { + html->error = HTML_ERROR_INCORRECTLY_OPENED_COMMENT; + html_prep_new_token(html, HTML_TOKEN_COMMENT); + html->state = HTML_STATE_BOGUS_COMMENT; + goto reconsume; + } + break; + case HTML_STATE_COMMENT_START: + switch (cc) { + case '-': + html->state = HTML_STATE_COMMENT_START_DASH; + break; + case '>': + html->error = HTML_ERROR_ABRUPT_CLOSING_OF_EMPTY_COMMENT; + html->state = HTML_STATE_DATA; + html_emit_comment(html, &html->new_token.comment); + break; + default: + html->state = HTML_STATE_COMMENT; + goto reconsume; + } + break; + case HTML_STATE_COMMENT_START_DASH: + switch (cc) { + case '-': + html->state = HTML_STATE_COMMENT_END_DASH; + break; + case '>': + html->error = HTML_ERROR_ABRUPT_CLOSING_OF_EMPTY_COMMENT; + html->state = HTML_STATE_DATA; + html->new_token.type = HTML_TOKEN_COMMENT; + html_emit_token(html, &html->new_token); + break; + case EOF: + html->error = HTML_ERROR_EOF_IN_COMMENT; + html->new_token.type = HTML_TOKEN_COMMENT; + html_emit_token(html, &html->new_token); + html_emit_eof_token(html); + break; + default: + if (!html->ignore_comment_data) { + STR_APPEND(html->new_token.comment.data, + html->new_token.comment.len, '-'); + } + html->state = HTML_STATE_COMMENT; + goto reconsume; + } + break; + case HTML_STATE_COMMENT: + switch (cc) { + case '<': + if (!html->ignore_comment_data) { + STR_APPEND(html->new_token.comment.data, + html->new_token.comment.len, cc); + } + html->state = HTML_STATE_COMMENT_LESS_THAN_SIGN; + break; + case '-': + html->state = HTML_STATE_COMMENT_END_DASH; + break; + case '\0': + html->error = HTML_ERROR_UNEXPECTED_NULL_CHARACTER; + if (!html->ignore_comment_data) { + STR_APPEND(html->new_token.comment.data, + html->new_token.comment.len, HTML_REPLACEMENT_CHARACTER); + } + break; + case EOF: + html->error = HTML_ERROR_EOF_IN_COMMENT; + html->new_token.type = HTML_TOKEN_COMMENT; + html_emit_token(html, &html->new_token); + html_emit_eof_token(html); + break; + default: + if (!html->ignore_comment_data) { + STR_APPEND(html->new_token.comment.data, + html->new_token.comment.len, cc); + } + break; + } + break; + case HTML_STATE_COMMENT_LESS_THAN_SIGN: + switch (cc) { + case '!': + if (!html->ignore_comment_data) { + STR_APPEND(html->new_token.comment.data, + html->new_token.comment.len, cc); + } + html->state = HTML_STATE_COMMENT_LESS_THAN_SIGN_BANG; + break; + case '<': + if (!html->ignore_comment_data) { + STR_APPEND(html->new_token.comment.data, + html->new_token.comment.len, cc); + } + break; + default: + html->state = HTML_STATE_COMMENT; + goto reconsume; + } + break; + case HTML_STATE_COMMENT_LESS_THAN_SIGN_BANG: + switch (cc) { + case '-': + html->state = HTML_STATE_COMMENT_LESS_THAN_SIGN_BANG_DASH; + break; + default: + html->state = HTML_STATE_COMMENT; + goto reconsume; + } + break; + case HTML_STATE_COMMENT_LESS_THAN_SIGN_BANG_DASH: + switch (cc) { + case '-': + html->state = HTML_STATE_COMMENT_LESS_THAN_SIGN_BANG_DASH_DASH; + break; + default: + html->state = HTML_STATE_COMMENT_END_DASH; + goto reconsume; + } + break; + case HTML_STATE_COMMENT_LESS_THAN_SIGN_BANG_DASH_DASH: + switch (cc) { + case '>': + case EOF: + html->state = HTML_STATE_COMMENT_END; + goto reconsume; + default: + html->error = HTML_ERROR_NESTED_COMMENT; + html->state = HTML_STATE_COMMENT_END; + goto reconsume; + } + break; + case HTML_STATE_COMMENT_END_DASH: + switch (cc) { + case '-': + html->state = HTML_STATE_COMMENT_END; + break; + case EOF: + html->error = HTML_ERROR_EOF_IN_COMMENT; + html->new_token.type = HTML_TOKEN_COMMENT; + html_emit_token(html, &html->new_token); + html_emit_eof_token(html); + break; + default: + if (!html->ignore_comment_data) { + STR_APPEND(html->new_token.comment.data, + html->new_token.comment.len, '-'); + } + html->state = HTML_STATE_COMMENT; + goto reconsume; + } + break; + case HTML_STATE_COMMENT_END: + switch (cc) { + case '>': + html->state = HTML_STATE_DATA; + html->new_token.type = HTML_TOKEN_COMMENT; + html_emit_token(html, &html->new_token); + break; + case '!': + html->state = HTML_STATE_COMMENT_END; + break; + case '-': + if (!html->ignore_comment_data) { + STR_APPEND(html->new_token.comment.data, + html->new_token.comment.len, '-'); + } + break; + case EOF: + html->error = HTML_ERROR_EOF_IN_COMMENT; + html->new_token.type = HTML_TOKEN_COMMENT; + html_emit_token(html, &html->new_token); + html_emit_eof_token(html); + break; + default: + if (!html->ignore_comment_data) { + STR_APPEND(html->new_token.comment.data, + html->new_token.comment.len, '-'); + STR_APPEND(html->new_token.comment.data, + html->new_token.comment.len, '-'); + } + html->state = HTML_STATE_COMMENT; + goto reconsume; + } + break; + case HTML_STATE_COMMENT_END_BANG: + switch (cc) { + case '-': + if (!html->ignore_comment_data) { + STR_APPEND(html->new_token.comment.data, + html->new_token.comment.len, '-'); + STR_APPEND(html->new_token.comment.data, + html->new_token.comment.len, '-'); + STR_APPEND(html->new_token.comment.data, + html->new_token.comment.len, '!'); + } + html->state = HTML_STATE_COMMENT_END_DASH; + break; + case '>': + html->error = HTML_ERROR_INCORRECTLY_CLOSED_COMMENT; + html->state = HTML_STATE_DATA; + html->new_token.type = HTML_TOKEN_COMMENT; + html_emit_token(html, &html->new_token); + break; + case EOF: + html->error = HTML_ERROR_EOF_IN_COMMENT; + html->new_token.type = HTML_TOKEN_COMMENT; + html_emit_token(html, &html->new_token); + html_emit_eof_token(html); + break; + default: + if (!html->ignore_comment_data) { + STR_APPEND(html->new_token.comment.data, + html->new_token.comment.len, '-'); + STR_APPEND(html->new_token.comment.data, + html->new_token.comment.len, '-'); + STR_APPEND(html->new_token.comment.data, + html->new_token.comment.len, '!'); + } + html->state = HTML_STATE_COMMENT; + goto reconsume; + } + break; + case HTML_STATE_DOCTYPE: + switch (cc) { + case '\t': + case '\n': + case '\f': + case ' ': + html->state = HTML_STATE_BEFORE_DOCTYPE_NAME; + break; + case '>': + html->state = HTML_STATE_BEFORE_DOCTYPE_NAME; + goto reconsume; + case EOF: + html->error = HTML_ERROR_EOF_IN_DOCTYPE; + html->new_token.doctype.force_quirks = true; + html_prep_new_token(html, HTML_TOKEN_DOCTYPE); + html_emit_token(html, &html->new_token); + html_emit_eof_token(html); + break; + default: + html->error = HTML_ERROR_MISSING_WHITESPACE_BEFORE_DOCTYPE_NAME; + html->state = HTML_STATE_BEFORE_DOCTYPE_NAME; + goto reconsume; + } + break; + case HTML_STATE_BEFORE_DOCTYPE_NAME: + switch (cc) { + case '\t': + case '\n': + case '\f': + case ' ': + /* ignore */ + break; + case '\0': + html->error = HTML_ERROR_UNEXPECTED_NULL_CHARACTER; + html_prep_new_token(html, HTML_TOKEN_DOCTYPE); + STR_APPEND(html->new_token.tag.name, + html->new_token.tag.name_len, '!'); + html->state = HTML_STATE_DOCTYPE_NAME; + break; + case EOF: + html->error = HTML_ERROR_EOF_IN_DOCTYPE; + html->new_token.doctype.force_quirks = true; + html_prep_new_token(html, HTML_TOKEN_DOCTYPE); + html_emit_token(html, &html->new_token); + html_emit_eof_token(html); + break; + case '>': + html->error = HTML_ERROR_MISSING_DOCTYPE_NAME; + html->new_token.doctype.force_quirks = true; + html_emit_token(html, &html->new_token); + html->state = HTML_STATE_DATA; + break; + default: + if (IS_UPPER_ALPHA(cc)) + cc += 0x20; + html_prep_new_token(html, HTML_TOKEN_DOCTYPE); + html->state = HTML_STATE_DOCTYPE_NAME; + STR_APPEND(html->new_token.doctype.name, + html->new_token.doctype.name_len, cc); + break; + } + break; + case HTML_STATE_DOCTYPE_NAME: + switch (cc) { + case '\t': + case '\n': + case '\f': + case ' ': + html->state = HTML_STATE_AFTER_DOCTYPE_NAME; + html->tmp_len = 0; + break; + case '>': + html_emit_token(html, &html->new_token); + html->state = HTML_STATE_DATA; + break; + case '\0': + html->error = HTML_ERROR_UNEXPECTED_NULL_CHARACTER; + STR_APPEND(html->new_token.doctype.name, + html->new_token.doctype.name_len, HTML_REPLACEMENT_CHARACTER); + break; + case EOF: + html->error = HTML_ERROR_EOF_IN_DOCTYPE; + html->new_token.doctype.force_quirks = true; + html_prep_new_token(html, HTML_TOKEN_DOCTYPE); + html_emit_token(html, &html->new_token); + html_emit_eof_token(html); + break; + default: + if (IS_UPPER_ALPHA(cc)) + cc += 0x20; + STR_APPEND(html->new_token.doctype.name, + html->new_token.doctype.name_len, cc); + break; + } + break; + case HTML_STATE_AFTER_DOCTYPE_NAME: + switch (cc) { + case '\t': + case '\n': + case '\f': + case ' ': + /* ignore */ + break; + case '>': + html->state = HTML_STATE_DATA; + html_emit_token(html, &html->new_token); + break; + case EOF: + html->error = HTML_ERROR_EOF_IN_DOCTYPE; + html->new_token.doctype.force_quirks = true; + html_emit_token(html, &html->new_token); + html_emit_eof_token(html); + break; + default: + if ((cc == 'p' || cc == 'P') && + strncasecmp(html->lookahead, "ublic", 5) == 0) { + html->state = HTML_STATE_AFTER_DOCTYPE_PUBLIC_KEYWORD; + html->lookahead_len = 0; + } else if ((cc == 's' || cc == 'S') && + strncasecmp(html->lookahead, "ystem", 5) == 0) { + html->state = HTML_STATE_AFTER_DOCTYPE_SYSTEM_KEYWORD; + html->lookahead_len = 0; + } else { + html->error = + HTML_ERROR_INVALID_CHARACTER_SEQUENCE_AFTER_DOCTYPE_NAME; + html->new_token.doctype.force_quirks = true; + html->state = HTML_STATE_BOGUS_DOCTYPE; + goto reconsume; + } + break; + } + break; + case HTML_STATE_AFTER_DOCTYPE_PUBLIC_KEYWORD: + switch (cc) { + case '\t': + case '\n': + case '\f': + case ' ': + html->state = HTML_STATE_BEFORE_DOCTYPE_PUBLIC_IDENTIFIER; + break; + case '"': + html->error = + HTML_ERROR_MISSING_WHITESPACE_AFTER_DOCTYPE_PUBLIC_KEYWORD; + memset(html->new_token.doctype.public_identifier, 0, + sizeof(html->new_token.doctype.public_identifier)); + html->new_token.doctype.public_identifier_len = 0; + html->state = HTML_STATE_DOCTYPE_PUBLIC_IDENTIFIER_DOUBLE_QUOTED; + break; + case '\'': + html->error = + HTML_ERROR_MISSING_WHITESPACE_AFTER_DOCTYPE_PUBLIC_KEYWORD; + memset(html->new_token.doctype.public_identifier, 0, + sizeof(html->new_token.doctype.public_identifier)); + html->new_token.doctype.public_identifier_len = 0; + html->state = HTML_STATE_DOCTYPE_PUBLIC_IDENTIFIER_SINGLE_QUOTED; + break; + case '>': + html->error = HTML_ERROR_MISSING_DOCTYPE_PUBLIC_IDENTIFIER; + html->new_token.doctype.force_quirks = true; + html->state = HTML_STATE_DATA; + html_emit_token(html, &html->new_token); + break; + case EOF: + html->error = HTML_ERROR_EOF_IN_DOCTYPE; + html->new_token.doctype.force_quirks = true; + html_emit_token(html, &html->new_token); + html_emit_eof_token(html); + break; + default: + html->error = + HTML_ERROR_MISSING_QUOTE_BEFORE_DOCTYPE_PUBLIC_IDENTIFIER; + html->new_token.doctype.force_quirks = true; + html->state = HTML_STATE_BOGUS_DOCTYPE; + goto reconsume; + } + break; + case HTML_STATE_BEFORE_DOCTYPE_PUBLIC_IDENTIFIER: + switch (cc) { + case '\t': + case '\n': + case '\f': + case ' ': + /* ignore */ + break; + case '"': + memset(html->new_token.doctype.public_identifier, 0, + sizeof(html->new_token.doctype.public_identifier)); + html->new_token.doctype.public_identifier_len = 0; + html->state = HTML_STATE_DOCTYPE_PUBLIC_IDENTIFIER_DOUBLE_QUOTED; + break; + case '\'': + memset(html->new_token.doctype.public_identifier, 0, + sizeof(html->new_token.doctype.public_identifier)); + html->new_token.doctype.public_identifier_len = 0; + html->state = HTML_STATE_DOCTYPE_PUBLIC_IDENTIFIER_SINGLE_QUOTED; + break; + case '>': + html->error = HTML_ERROR_MISSING_DOCTYPE_PUBLIC_IDENTIFIER; + html->new_token.doctype.force_quirks = true; + html->state = HTML_STATE_DATA; + html_emit_token(html, &html->new_token); + break; + case EOF: + html->error = HTML_ERROR_EOF_IN_DOCTYPE; + html->new_token.doctype.force_quirks = true; + html_emit_token(html, &html->new_token); + html_emit_eof_token(html); + break; + default: + html->error = + HTML_ERROR_MISSING_QUOTE_BEFORE_DOCTYPE_PUBLIC_IDENTIFIER; + html->new_token.doctype.force_quirks = true; + html->state = HTML_STATE_BOGUS_DOCTYPE; + goto reconsume; + } + break; + case HTML_STATE_DOCTYPE_PUBLIC_IDENTIFIER_DOUBLE_QUOTED: + switch (cc) { + case '"': + html->state = HTML_STATE_AFTER_DOCTYPE_PUBLIC_IDENTIFIER; + break; + case '\0': + html->error = HTML_ERROR_UNEXPECTED_NULL_CHARACTER; + STR_APPEND(html->new_token.doctype.public_identifier, + html->new_token.doctype.public_identifier_len, + HTML_REPLACEMENT_CHARACTER); + break; + case '>': + html->error = HTML_ERROR_ABRUPT_DOCTYPE_PUBLIC_IDENTIFIER; + html->new_token.doctype.force_quirks = true; + html->state = HTML_STATE_DATA; + html_emit_token(html, &html->new_token); + break; + case EOF: + html->error = HTML_ERROR_EOF_IN_DOCTYPE; + html->new_token.doctype.force_quirks = true; + html_emit_token(html, &html->new_token); + html_emit_eof_token(html); + break; + default: + STR_APPEND(html->new_token.doctype.public_identifier, + html->new_token.doctype.public_identifier_len, cc); + break; + } + break; + case HTML_STATE_DOCTYPE_PUBLIC_IDENTIFIER_SINGLE_QUOTED: + switch (cc) { + case '\'': + html->state = HTML_STATE_AFTER_DOCTYPE_PUBLIC_IDENTIFIER; + break; + case '\0': + html->error = HTML_ERROR_UNEXPECTED_NULL_CHARACTER; + STR_APPEND(html->new_token.doctype.public_identifier, + html->new_token.doctype.public_identifier_len, + HTML_REPLACEMENT_CHARACTER); + break; + case '>': + html->error = HTML_ERROR_ABRUPT_DOCTYPE_PUBLIC_IDENTIFIER; + html->new_token.doctype.force_quirks = true; + html->state = HTML_STATE_DATA; + html_emit_token(html, &html->new_token); + break; + case EOF: + html->error = HTML_ERROR_EOF_IN_DOCTYPE; + html->new_token.doctype.force_quirks = true; + html_emit_token(html, &html->new_token); + html_emit_eof_token(html); + break; + default: + STR_APPEND(html->new_token.doctype.public_identifier, + html->new_token.doctype.public_identifier_len, cc); + break; + } + break; + case HTML_STATE_AFTER_DOCTYPE_PUBLIC_IDENTIFIER: + switch (cc) { + case '\t': + case '\n': + case '\f': + case ' ': + html->state = + HTML_STATE_BETWEEN_DOCTYPE_PUBLIC_AND_SYSTEM_IDENTIFIERS; + break; + case '>': + html->state = HTML_STATE_DATA; + html_emit_token(html, &html->new_token); + break; + case '"': + html->error = + HTML_ERROR_MISSING_WHITESPACE_BETWEEN_DOCTYPE_PUBLIC_AND_SYSTEM_IDENTIFIERS; + memset(html->new_token.doctype.system_identifier, 0, + sizeof(html->new_token.doctype.system_identifier)); + html->new_token.doctype.system_identifier_len = 0; + html->state = HTML_STATE_DOCTYPE_SYSTEM_IDENTIFIER_DOUBLE_QUOTED; + break; + case '\'': + html->error = + HTML_ERROR_MISSING_WHITESPACE_BETWEEN_DOCTYPE_PUBLIC_AND_SYSTEM_IDENTIFIERS; + memset(html->new_token.doctype.system_identifier, 0, + sizeof(html->new_token.doctype.system_identifier)); + html->new_token.doctype.system_identifier_len = 0; + html->state = HTML_STATE_DOCTYPE_PUBLIC_IDENTIFIER_SINGLE_QUOTED; + break; + case EOF: + html->error = HTML_ERROR_EOF_IN_DOCTYPE; + html->new_token.doctype.force_quirks = true; + html_emit_token(html, &html->new_token); + html_emit_eof_token(html); + break; + default: + html->error = + HTML_ERROR_MISSING_QUOTE_BEFORE_DOCTYPE_SYSTEM_IDENTIFIER; + html->new_token.doctype.force_quirks = true; + html->state = HTML_STATE_BOGUS_DOCTYPE; + goto reconsume; + } + break; + case HTML_STATE_BETWEEN_DOCTYPE_PUBLIC_AND_SYSTEM_IDENTIFIERS: + switch (cc) { + case '\t': + case '\n': + case '\f': + case ' ': + /* ignore */ + break; + case '>': + html->state = HTML_STATE_DATA; + html_emit_token(html, &html->new_token); + break; + case '"': + memset(html->new_token.doctype.system_identifier, 0, + sizeof(html->new_token.doctype.system_identifier)); + html->new_token.doctype.system_identifier_len = 0; + html->state = HTML_STATE_DOCTYPE_SYSTEM_IDENTIFIER_DOUBLE_QUOTED; + break; + case '\'': + memset(html->new_token.doctype.system_identifier, 0, + sizeof(html->new_token.doctype.system_identifier)); + html->new_token.doctype.system_identifier_len = 0; + html->state = HTML_STATE_DOCTYPE_PUBLIC_IDENTIFIER_SINGLE_QUOTED; + break; + case EOF: + html->error = HTML_ERROR_EOF_IN_DOCTYPE; + html->new_token.doctype.force_quirks = true; + html_emit_token(html, &html->new_token); + html_emit_eof_token(html); + break; + default: + html->error = + HTML_ERROR_MISSING_QUOTE_BEFORE_DOCTYPE_SYSTEM_IDENTIFIER; + html->new_token.doctype.force_quirks = true; + html->state = HTML_STATE_BOGUS_DOCTYPE; + goto reconsume; + } + break; + case HTML_STATE_AFTER_DOCTYPE_SYSTEM_KEYWORD: + switch (cc) { + case '\t': + case '\n': + case '\f': + case ' ': + html->state = HTML_STATE_BEFORE_DOCTYPE_SYSTEM_IDENTIFIER; + break; + case '"': + html->error = + HTML_ERROR_MISSING_WHITESPACE_AFTER_DOCTYPE_SYSTEM_KEYWORD; + memset(html->new_token.doctype.system_identifier, 0, + sizeof(html->new_token.doctype.system_identifier)); + html->new_token.doctype.system_identifier_len = 0; + html->state = HTML_STATE_DOCTYPE_SYSTEM_IDENTIFIER_DOUBLE_QUOTED; + break; + case '\'': + html->error = + HTML_ERROR_MISSING_WHITESPACE_AFTER_DOCTYPE_SYSTEM_KEYWORD; + memset(html->new_token.doctype.system_identifier, 0, + sizeof(html->new_token.doctype.system_identifier)); + html->new_token.doctype.system_identifier_len = 0; + html->state = HTML_STATE_DOCTYPE_PUBLIC_IDENTIFIER_SINGLE_QUOTED; + break; + case '>': + html->error = HTML_ERROR_MISSING_DOCTYPE_SYSTEM_IDENTIFIER; + html->new_token.doctype.force_quirks = true; + html_emit_token(html, &html->new_token); + break; + case EOF: + html->error = HTML_ERROR_EOF_IN_DOCTYPE; + html->new_token.doctype.force_quirks = true; + html_emit_token(html, &html->new_token); + html_emit_eof_token(html); + break; + default: + html->error = + HTML_ERROR_MISSING_QUOTE_BEFORE_DOCTYPE_SYSTEM_IDENTIFIER; + html->new_token.doctype.force_quirks = true; + html->state = HTML_STATE_BOGUS_DOCTYPE; + goto reconsume; + } + break; + case HTML_STATE_BEFORE_DOCTYPE_SYSTEM_IDENTIFIER: + switch (cc) { + case '\t': + case '\n': + case '\f': + case ' ': + /* ignore */ + break; + case '"': + memset(html->new_token.doctype.system_identifier, 0, + sizeof(html->new_token.doctype.system_identifier)); + html->new_token.doctype.system_identifier_len = 0; + html->state = HTML_STATE_DOCTYPE_SYSTEM_IDENTIFIER_DOUBLE_QUOTED; + break; + case '\'': + memset(html->new_token.doctype.system_identifier, 0, + sizeof(html->new_token.doctype.system_identifier)); + html->new_token.doctype.system_identifier_len = 0; + html->state = HTML_STATE_DOCTYPE_PUBLIC_IDENTIFIER_SINGLE_QUOTED; + break; + case '>': + html->error = HTML_ERROR_MISSING_DOCTYPE_SYSTEM_IDENTIFIER; + html->new_token.doctype.force_quirks = true; + html_emit_token(html, &html->new_token); + break; + case EOF: + html->error = HTML_ERROR_EOF_IN_DOCTYPE; + html->new_token.doctype.force_quirks = true; + html_emit_token(html, &html->new_token); + html_emit_eof_token(html); + break; + default: + html->error = + HTML_ERROR_MISSING_QUOTE_BEFORE_DOCTYPE_SYSTEM_IDENTIFIER; + html->new_token.doctype.force_quirks = true; + html->state = HTML_STATE_BOGUS_DOCTYPE; + goto reconsume; + } + break; + case HTML_STATE_DOCTYPE_SYSTEM_IDENTIFIER_DOUBLE_QUOTED: + switch (cc) { + case '"': + html->state = HTML_STATE_AFTER_DOCTYPE_SYSTEM_IDENTIFIER; + break; + case '\0': + html->error = HTML_ERROR_UNEXPECTED_NULL_CHARACTER; + STR_APPEND(html->new_token.doctype.system_identifier, + html->new_token.doctype.system_identifier_len, + HTML_REPLACEMENT_CHARACTER); + break; + case '>': + html->error = HTML_ERROR_ABRUPT_DOCTYPE_SYSTEM_IDENTIFIER; + html->new_token.doctype.force_quirks = true; + html_emit_token(html, &html->new_token); + break; + case EOF: + html->error = HTML_ERROR_EOF_IN_DOCTYPE; + html->new_token.doctype.force_quirks = true; + html_emit_token(html, &html->new_token); + html_emit_eof_token(html); + break; + default: + STR_APPEND(html->new_token.doctype.system_identifier, + html->new_token.doctype.system_identifier_len, cc); + break; + } + break; + case HTML_STATE_DOCTYPE_SYSTEM_IDENTIFIER_SINGLE_QUOTED: + switch (cc) { + case '\'': + html->state = HTML_STATE_AFTER_DOCTYPE_SYSTEM_IDENTIFIER; + break; + case '\0': + html->error = HTML_ERROR_UNEXPECTED_NULL_CHARACTER; + STR_APPEND(html->new_token.doctype.system_identifier, + html->new_token.doctype.system_identifier_len, + HTML_REPLACEMENT_CHARACTER); + break; + case '>': + html->error = HTML_ERROR_ABRUPT_DOCTYPE_SYSTEM_IDENTIFIER; + html->new_token.doctype.force_quirks = true; + html_emit_token(html, &html->new_token); + break; + case EOF: + html->error = HTML_ERROR_EOF_IN_DOCTYPE; + html->new_token.doctype.force_quirks = true; + html_emit_token(html, &html->new_token); + html_emit_eof_token(html); + break; + default: + STR_APPEND(html->new_token.doctype.system_identifier, + html->new_token.doctype.system_identifier_len, cc); + break; + } + break; + case HTML_STATE_AFTER_DOCTYPE_SYSTEM_IDENTIFIER: + switch (cc) { + case '\t': + case '\n': + case '\f': + case ' ': + /* ignore */ + break; + case '>': + html->state = HTML_STATE_DATA; + html_emit_token(html, &html->new_token); + break; + case EOF: + html->error = HTML_ERROR_EOF_IN_DOCTYPE; + html->new_token.doctype.force_quirks = true; + html_emit_token(html, &html->new_token); + html_emit_eof_token(html); + break; + default: + html->error = + HTML_ERROR_UNEXPECTED_CHARACTER_AFTER_DOCTYPE_SYSTEM_IDENTIFIER; + html->state = HTML_STATE_BOGUS_DOCTYPE; + goto reconsume; + } + break; + case HTML_STATE_BOGUS_DOCTYPE: + switch (cc) { + case '>': + html->state = HTML_STATE_DATA; + html_emit_token(html, &html->new_token); + break; + case '\0': + html->error = HTML_ERROR_UNEXPECTED_NULL_CHARACTER; + /* ignore */ + break; + case EOF: + html_emit_token(html, &html->new_token); + html_emit_eof_token(html); + break; + default: + /* ignore */ + break; + } + break; + case HTML_STATE_CDATA_SECTION: + switch (cc) { + case ']': + html->state = HTML_STATE_CDATA_SECTION_BRACKET; + break; + case EOF: + html->error = HTML_ERROR_EOF_IN_CDATA; + html_emit_eof_token(html); + break; + default: + if (!html->ignore_comment_data) + html_emit_char_token(html, cc); + break; + } + break; + case HTML_STATE_CDATA_SECTION_BRACKET: + switch (cc) { + case ']': + html->state = HTML_STATE_CDATA_SECTION_END; + break; + default: + if (!html->ignore_comment_data) + html_emit_char_token(html, ']'); + html->state = HTML_STATE_CDATA_SECTION; + goto reconsume; + } + break; + case HTML_STATE_CDATA_SECTION_END: + switch (cc) { + case ']': + if (!html->ignore_comment_data) + html_emit_char_token(html, ']'); + break; + case '>': + html->state = HTML_STATE_DATA; + break; + default: + if (!html->ignore_comment_data) { + html_emit_char_token(html, ']'); + html_emit_char_token(html, ']'); + } + html->state = HTML_STATE_CDATA_SECTION; + goto reconsume; + } + break; + case HTML_STATE_CHARACTER_REFERENCE: + STR_APPEND(html->tmp, html->tmp_len, '&'); + + if (cc == '#') { + STR_APPEND(html->tmp, html->tmp_len, cc); + html->state = HTML_STATE_NUMERIC_CHARACTER_REFERENCE; + break; + } + if (IS_ALPHANUMERIC(cc)) { + html->state = HTML_STATE_NAMED_CHARACTER_REFERENCE; + goto reconsume; + } + + /* flush consumed */ + if (CONSUMED_AS_PART_OF_AN_ATTRIBUTE) { + /* consumed as part of an attribute */ + for (n = 0; n < html->tmp_len; n++) { + attr = &NEW_TOKEN_LAST_ATTR; + STR_APPEND(attr->val, attr->val_len, html->tmp[n]); + } + } else { + /* TODO: check return state for comment ones if ignoring */ + + for (n = 0; n < html->tmp_len; n++) + html_emit_char_token(html, html->tmp[n]); + } + html->tmp_len = 0; + html->state = html->return_state; + goto reconsume; + case HTML_STATE_NAMED_CHARACTER_REFERENCE: + found_entity = NULL; + + STR_APPEND(html->tmp, html->tmp_len, cc); + + for (n = 0; n < html->lookahead_len; n++) { + STR_APPEND(html->tmp, html->tmp_len, html->lookahead[n]); + if (html->lookahead[n] == ';') + break; + } + + HTML_DEBUG((": trying to match '%s'", html->tmp)); + + found_entity = NULL; + for (j = 0; html_entities[j].entity != NULL; j++) { + for (i = 0; ; i++) { + if (html_entities[j].entity[i] == '\0') { + /* + * If we have an ; in our buffer, match the longer + * version of this entity instead (&amp; instead of + * &amp) + */ + if (html_entities[j].entity[i - 1] != ';' && + html->tmp[i] == ';') + goto next_entity; + found_entity = &html_entities[j]; + HTML_DEBUG((": matched lookahead to entity '%s'", + found_entity->entity)); + html_lookahead_consume(html, i - 2); + break; + } + if (i >= html->tmp_len || + html_entities[j].entity[i] != html->tmp[i]) + goto next_entity; + } +next_entity: + continue; + } + + if (found_entity != NULL) { + if (CONSUMED_AS_PART_OF_AN_ATTRIBUTE && + html->tmp[html->tmp_len - 1] != ';' && + (html->lookahead[0] == '=' || + IS_ALPHANUMERIC(html->lookahead[0]))) { + /* + * "for historical reasons, flush code points consumed as a + * character reference and switch to the return state." + */ + HTML_DEBUG((": doing historical flush thing")); + attr = &NEW_TOKEN_LAST_ATTR; + for (n = 0; n < html->tmp_len; n++) { + STR_APPEND(attr->val, attr->val_len, html->tmp[n]); + } + html->tmp_len = 0; + html->state = html->return_state; + break; + } + + /* otherwise... */ + if (html->tmp[html->tmp_len - 1] != ';') + html->error = + HTML_ERROR_MISSING_SEMICOLON_AFTER_CHARACTER_REFERENCE; + + html->tmp_len = 0; + + if ((j = (found_entity->codepoint >> 24) & 0xff)) + html->tmp[html->tmp_len++] = j; + if ((j = (found_entity->codepoint >> 16) & 0xff)) + html->tmp[html->tmp_len++] = j; + if ((j = (found_entity->codepoint >> 8) & 0xff)) + html->tmp[html->tmp_len++] = j; + if ((j = found_entity->codepoint & 0xff)) + html->tmp[html->tmp_len++] = j; + + /* fall through */ + } else { + HTML_DEBUG((": no entity found for '%s'", html->tmp)); + + /* pretend we didn't copy anything into tmp after & and cc */ + html->tmp_len = 2; + html->tmp[html->tmp_len] = '\0'; + } + + if (CONSUMED_AS_PART_OF_AN_ATTRIBUTE) { + attr = &NEW_TOKEN_LAST_ATTR; + for (n = 0; n < html->tmp_len; n++) { + STR_APPEND(attr->val, attr->val_len, html->tmp[n]); + } + HTML_DEBUG((": attribute %s=\"%s\"", attr->name, attr->val)); + } else { + for (j = 0; j < html->tmp_len; j++) + html_emit_char_token(html, html->tmp[j]); + } + + html->tmp_len = 0; + if (found_entity == NULL) + html->state = HTML_STATE_AMBIGUOUS_AMPERSAND; + else + html->state = html->return_state; + break; + case HTML_STATE_AMBIGUOUS_AMPERSAND: + if (IS_ALPHANUMERIC(cc)) { + if (CONSUMED_AS_PART_OF_AN_ATTRIBUTE) { + attr = &NEW_TOKEN_LAST_ATTR; + STR_APPEND(attr->val, attr->val_len, cc); + } else { + html_emit_char_token(html, cc); + } + break; + } + if (cc == ';') { + html->error = HTML_ERROR_UNKNOWN_NAMED_CHARACTER_REFERENCE; + html->state = html->return_state; + goto reconsume; + } + html->state = html->return_state; + goto reconsume; + case HTML_STATE_NUMERIC_CHARACTER_REFERENCE: + switch (cc) { + case 'x': + case 'X': + STR_APPEND(html->tmp, html->tmp_len, cc); + html->state = HTML_STATE_HEXADECIMAL_CHARACTER_REFERENCE_START; + break; + default: + html->state = HTML_STATE_DECIMAL_CHARACTER_REFERENCE_START; + goto reconsume; + } + break; + case HTML_STATE_HEXADECIMAL_CHARACTER_REFERENCE_START: + if (IS_HEX_DIGIT(cc)) { + html->state = HTML_STATE_HEXADECIMAL_CHARACTER_REFERENCE; + goto reconsume; + } + + html->error = + HTML_ERROR_ABSENCE_OF_DIGITS_IN_NUMERIC_CHARACTER_REFERENCE; + if (CONSUMED_AS_PART_OF_AN_ATTRIBUTE) { + attr = &NEW_TOKEN_LAST_ATTR; + STR_APPEND(attr->val, attr->val_len, cc); + } else { + html_emit_char_token(html, cc); + } + html->state = html->return_state; + goto reconsume; + case HTML_STATE_DECIMAL_CHARACTER_REFERENCE_START: + case HTML_STATE_HEXADECIMAL_CHARACTER_REFERENCE: + case HTML_STATE_DECIMAL_CHARACTER_REFERENCE: + case HTML_STATE_NUMERIC_CHARACTER_REFERENCE_END: + /* TODO */ + panic("state %d not supported", html->state); + break; + default: + panic("bogus tokenize state %d", html->state); + } + + if (html->state != was_state) + HTML_DEBUG((": exited state %d", html_state_names[html->state])); + + if (html->error) { + HTML_DEBUG((": error %s", html_error_strings[html->error])); + html->error = 0; + } + HTML_DEBUG(("\r")); +} + +void +html_tokenize_finish(struct html_page *html) +{ + if (html->lookahead_len) { + HTML_DEBUG(("finish requested, tokenizing remaining %d lookahead\r", + html->lookahead_len)); + + while (html->lookahead_len) + html_tokenize(html, EOF); + } + + html_tokenize(html, EOF); + + html_stop_parsing(html); +} + +void +html_prep_new_token(struct html_page *html, html_token_type token_type) +{ + memset(&html->new_token, 0, sizeof(html_token)); + html->new_token.type = token_type; +} + +struct html_attr * +html_prep_new_attribute(struct html_page *html, struct html_tag *tag) +{ + if (tag->attrs_count >= nitems(tag->attrs)) + panic("tag attr overflow"); + + tag->attrs_count++; + tag->attrs[tag->attrs_count - 1].name_len = 0; + tag->attrs[tag->attrs_count - 1].name[0] = '\0'; + tag->attrs[tag->attrs_count - 1].val_len = 0; + tag->attrs[tag->attrs_count - 1].val[0] = '\0'; + + return &tag->attrs[tag->attrs_count - 1]; +} + +bool +html_appropriate_end_tag_token(struct html_page *html, html_token *token) +{ + /* https://html.spec.whatwg.org/multipage/parsing.html#tokenization + * "an end tag token whose tag name matches the tag name of the last start + * tag to have been emitted" + */ + if (html->open_count <= 0) + return false; + + /* TODO: fix */ + + return (strcmp(html->current_node->name, html->new_token.tag.name) == 0); +} + +html_tag_type +html_find_tag_type(char *name) +{ + short n; + + for (n = 1; html_tag_names[n] != NULL; n++) { + if (strcasecmp(name, html_tag_names[n]) == 0) + return n; + } + + HTML_DEBUG((": html_find_tag_type couldn't find %s", name)); + return 0; +}