/* * Copyright (c) 2024 joshua stein * * Permission to use, copy, modify, and distribute this software for any * purpose with or without fee is hereby granted, provided that the above * copyright notice and this permission notice appear in all copies. * * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. */ #include #include #include #include #include "stdint.h" #include "util.h" //#define HTML_ENABLE #ifdef HTML_ENABLE void html_output(void *cookie, struct html_page *html, char *str, size_t len); void html_output_margin(void *cookie, struct html_page *html); void html_output_field(void *cookie, struct html_page *html, struct html_element *el); void html_debug(const char *fmt, ...); void html_have_title(void *cookie, struct html_page *html, char *str, size_t len); //#define HTML_ENABLE_DEBUGGING #ifdef HTML_ENABLE_DEBUGGING extern struct html_page *the_html; # define HTML_DEBUG(x) do { html_debug x; } while (0) #else # define HTML_DEBUG(x) {} #endif /* * tunables */ #define HTML_STACK_DEPTH 128 /* this should in theory be the max size of an html_entity but that's huge */ #define HTML_LOOKAHEAD_SIZE 10 #define HTML_OUTPUT_BUF_SIZE 64 #define HTML_TAG_TEXT_CHUNK_SIZE 512 /* * helpers */ #define IS_WHITESPACE(c) ((c) == '\t' || (c) == '\n' || (c) == '\f' || \ (c) == '\r' || (c) == ' ') #define IS_LOWER_ALPHA(c) ((c) >= 'a' && (c) <= 'z') #define IS_UPPER_ALPHA(c) ((c) >= 'A' && (c) <= 'Z') #define IS_ALPHA(c) (IS_LOWER_ALPHA((c)) || IS_UPPER_ALPHA((c))) #define IS_DIGIT(c) (((c) >= '0' && (c) <= '9')) #define IS_ALPHANUMERIC(c) (IS_ALPHA((c)) || IS_DIGIT((c))) #define IS_LOWER_HEX_DIGIT(c) ((c) >= 'a' && (c) <= 'f') #define IS_UPPER_HEX_DIGIT(c) ((c) >= 'A' && (c) <= 'F') #define IS_HEX_DIGIT(c) (IS_LOWER_HEX_DIGIT(c) || IS_UPPER_HEX_DIGIT(c)) #define IS_LEADING_SURROGATE(c) ((c) >= 0xdb00 && (c) <= 0xdbff) #define IS_TRAILING_SURROGATE(c) ((c) >= 0xdc00 && (c) <= 0xdfff) #define IS_SURROGATE(c) (IS_LEADING_SURROGATE(c) || IS_TRAILING_SURROGATE(c)) #define IS_NONCHARACTER(c) (\ ((c) >= 0xfdd0 && (c) <= 0xfdef) || \ (c) == 0xfffe || (c) == 0xffff || \ (c) == 0x1fffe || (c) == 0x1ffff || \ (c) == 0x2fffe || (c) == 0x2ffff || \ (c) == 0x3fffe || (c) == 0x3ffff || \ (c) == 0x4fffe || (c) == 0x4ffff || \ (c) == 0x5fffe || (c) == 0x5ffff || \ (c) == 0x6fffe || (c) == 0x6ffff || \ (c) == 0x7fffe || (c) == 0x7ffff || \ (c) == 0x8fffe || (c) == 0x8ffff || \ (c) == 0x9fffe || (c) == 0x9ffff || \ (c) == 0xafffe || (c) == 0xaffff || \ (c) == 0xbfffe || (c) == 0xbffff || \ (c) == 0xcfffe || (c) == 0xcffff || \ (c) == 0xdfffe || (c) == 0xdffff || \ (c) == 0xefffe || (c) == 0xeffff || \ (c) == 0xffffe || (c) == 0xfffff || \ (c) == 0x10fffe || (c) == 0x10ffff) #define IS_C0_CONTROL(c) ((c) >= 0 && (c) <= 0x1f) #define IS_CONTROL(c) (IS_C0_CONTROL((c)) || ((c) >= 0x7f && (c) <= 0x9f)) #define IS_BLOCK(tag) ((tag) < HTML_TAG_LAST_BLOCK) #define NEW_TOKEN_LAST_ATTR (html->new_token.tag.attrs[html->new_token.tag.attrs_count - 1]) /* only works on fixed-size char arrays */ #define STR_APPEND(field, len, ch) \ if ((len) < sizeof(field)) { \ (field)[(len)++] = (ch); \ (field)[(len)] = '\0'; \ } #define CONSUMED_AS_PART_OF_AN_ATTRIBUTE \ (html->return_state == HTML_STATE_ATTRIBUTE_VALUE_DOUBLE_QUOTED || \ html->return_state == HTML_STATE_ATTRIBUTE_VALUE_SINGLE_QUOTED || \ html->return_state == HTML_STATE_ATTRIBUTE_VALUE_UNQUOTED) #ifndef nitems #define nitems(what) (sizeof((what)) / sizeof((what)[0])) #endif /* insertion mode */ /* https://html.spec.whatwg.org/multipage/parsing.html#the-insertion-mode */ extern const char *html_mode_names[]; typedef enum { HTML_MODE_NONE = 0, HTML_MODE_INITIAL, HTML_MODE_BEFORE_HTML, HTML_MODE_BEFORE_HEAD, HTML_MODE_IN_HEAD, HTML_MODE_IN_HEAD_NOSCRIPT, HTML_MODE_AFTER_HEAD, HTML_MODE_IN_BODY, HTML_MODE_TEXT, HTML_MODE_IN_TABLE, HTML_MODE_IN_TABLE_TEXT, HTML_MODE_IN_CAPTION, HTML_MODE_IN_COLUMN_GROUP, HTML_MODE_IN_TABLE_BODY, HTML_MODE_IN_ROW, HTML_MODE_IN_CELL, HTML_MODE_IN_SELECT, HTML_MODE_IN_SELECT_IN_TABLE, HTML_MODE_IN_TEMPLATE, HTML_MODE_AFTER_BODY, HTML_MODE_IN_FRAMESET, HTML_MODE_AFTER_FRAMESET, HTML_MODE_AFTER_AFTER_BODY, HTML_MODE_AFTER_AFTER_FRAMESET } html_mode; /* tokenization state */ /* https://html.spec.whatwg.org/multipage/parsing.html#tokenization */ extern const char *html_state_names[]; typedef enum { HTML_STATE_NONE = 0, HTML_STATE_DATA, HTML_STATE_RCDATA, HTML_STATE_RAWTEXT, HTML_STATE_SCRIPT_DATA, HTML_STATE_PLAINTEXT, HTML_STATE_TAG_OPEN, HTML_STATE_END_TAG_OPEN, HTML_STATE_TAG_NAME, HTML_STATE_RCDATA_LESS_THAN_SIGN, HTML_STATE_RCDATA_END_TAG_OPEN, HTML_STATE_RCDATA_END_TAG_NAME, HTML_STATE_RAWTEXT_LESS_THAN_SIGN, HTML_STATE_RAWTEXT_END_TAG_OPEN, HTML_STATE_RAWTEXT_END_TAG_NAME, HTML_STATE_SCRIPT_DATA_LESS_THAN_SIGN, HTML_STATE_SCRIPT_DATA_END_TAG_OPEN, HTML_STATE_SCRIPT_DATA_END_TAG_NAME, HTML_STATE_SCRIPT_DATA_ESCAPE_START, HTML_STATE_SCRIPT_DATA_ESCAPE_START_DASH, HTML_STATE_SCRIPT_DATA_ESCAPED, HTML_STATE_SCRIPT_DATA_ESCAPED_DASH, HTML_STATE_SCRIPT_DATA_ESCAPED_DASH_DASH, HTML_STATE_SCRIPT_DATA_ESCAPED_LESS_THAN_SIGN, HTML_STATE_SCRIPT_DATA_ESCAPED_END_TAG_OPEN, HTML_STATE_SCRIPT_DATA_ESCAPED_END_TAG_NAME, HTML_STATE_SCRIPT_DATA_DOUBLE_ESCAPE_START, HTML_STATE_SCRIPT_DATA_DOUBLE_ESCAPED, HTML_STATE_SCRIPT_DATA_DOUBLE_ESCAPED_DASH, HTML_STATE_SCRIPT_DATA_DOUBLE_ESCAPED_DASH_DASH, HTML_STATE_SCRIPT_DATA_DOUBLE_ESCAPED_LESS_THAN_SIGN, HTML_STATE_SCRIPT_DATA_DOUBLE_ESCAPE_END, HTML_STATE_BEFORE_ATTRIBUTE_NAME, HTML_STATE_ATTRIBUTE_NAME, HTML_STATE_AFTER_ATTRIBUTE_NAME, HTML_STATE_BEFORE_ATTRIBUTE_VALUE, HTML_STATE_ATTRIBUTE_VALUE_DOUBLE_QUOTED, HTML_STATE_ATTRIBUTE_VALUE_SINGLE_QUOTED, HTML_STATE_ATTRIBUTE_VALUE_UNQUOTED, HTML_STATE_AFTER_ATTRIBUTE_VALUE_QUOTED, HTML_STATE_SELF_CLOSING_START_TAG, HTML_STATE_BOGUS_COMMENT, HTML_STATE_MARKUP_DECLARATION_OPEN, HTML_STATE_COMMENT_START, HTML_STATE_COMMENT_START_DASH, HTML_STATE_COMMENT, HTML_STATE_COMMENT_LESS_THAN_SIGN, HTML_STATE_COMMENT_LESS_THAN_SIGN_BANG, HTML_STATE_COMMENT_LESS_THAN_SIGN_BANG_DASH, HTML_STATE_COMMENT_LESS_THAN_SIGN_BANG_DASH_DASH, HTML_STATE_COMMENT_END_DASH, HTML_STATE_COMMENT_END, HTML_STATE_COMMENT_END_BANG, HTML_STATE_DOCTYPE, HTML_STATE_BEFORE_DOCTYPE_NAME, HTML_STATE_DOCTYPE_NAME, HTML_STATE_AFTER_DOCTYPE_NAME, HTML_STATE_AFTER_DOCTYPE_PUBLIC_KEYWORD, HTML_STATE_BEFORE_DOCTYPE_PUBLIC_IDENTIFIER, HTML_STATE_DOCTYPE_PUBLIC_IDENTIFIER_DOUBLE_QUOTED, HTML_STATE_DOCTYPE_PUBLIC_IDENTIFIER_SINGLE_QUOTED, HTML_STATE_AFTER_DOCTYPE_PUBLIC_IDENTIFIER, HTML_STATE_BETWEEN_DOCTYPE_PUBLIC_AND_SYSTEM_IDENTIFIERS, HTML_STATE_AFTER_DOCTYPE_SYSTEM_KEYWORD, HTML_STATE_BEFORE_DOCTYPE_SYSTEM_IDENTIFIER, HTML_STATE_DOCTYPE_SYSTEM_IDENTIFIER_DOUBLE_QUOTED, HTML_STATE_DOCTYPE_SYSTEM_IDENTIFIER_SINGLE_QUOTED, HTML_STATE_AFTER_DOCTYPE_SYSTEM_IDENTIFIER, HTML_STATE_BOGUS_DOCTYPE, HTML_STATE_CDATA_SECTION, HTML_STATE_CDATA_SECTION_BRACKET, HTML_STATE_CDATA_SECTION_END, HTML_STATE_CHARACTER_REFERENCE, HTML_STATE_NAMED_CHARACTER_REFERENCE, HTML_STATE_AMBIGUOUS_AMPERSAND, HTML_STATE_NUMERIC_CHARACTER_REFERENCE, HTML_STATE_HEXADECIMAL_CHARACTER_REFERENCE_START, HTML_STATE_DECIMAL_CHARACTER_REFERENCE_START, HTML_STATE_HEXADECIMAL_CHARACTER_REFERENCE, HTML_STATE_DECIMAL_CHARACTER_REFERENCE, HTML_STATE_NUMERIC_CHARACTER_REFERENCE_END } html_state; /* tokenization output */ /* https://html.spec.whatwg.org/multipage/parsing.html#tokenization */ #ifdef HTML_ENABLE_DEBUGGING extern const char *html_token_names[]; #endif typedef enum { HTML_TOKEN_DOCTYPE = 1, HTML_TOKEN_START_TAG, HTML_TOKEN_END_TAG, HTML_TOKEN_COMMENT, HTML_TOKEN_CHARACTER, HTML_TOKEN_EOF } html_token_type; /* html_process_token return states */ typedef enum { HTML_TOKEN_REPROCESS = 1, HTML_TOKEN_PROCESSED } html_token_act; /* parse errors */ /* https://html.spec.whatwg.org/multipage/parsing.html#parse-errors */ #ifdef HTML_ENABLE_DEBUGGING extern const char *html_error_strings[]; #endif typedef enum { HTML_ERROR_NONE, HTML_ERROR_ABRUPT_CLOSING_OF_EMPTY_COMMENT, HTML_ERROR_ABRUPT_DOCTYPE_PUBLIC_IDENTIFIER, HTML_ERROR_ABRUPT_DOCTYPE_SYSTEM_IDENTIFIER, HTML_ERROR_ABSENCE_OF_DIGITS_IN_NUMERIC_CHARACTER_REFERENCE, HTML_ERROR_CDATA_IN_HTML_CONTENT, HTML_ERROR_CHARACTER_REFERENCE_OUTSIDE_UNICODE_RANGE, HTML_ERROR_CONTROL_CHARACTER_IN_INPUT_STREAM, HTML_ERROR_CONTROL_CHARACTER_REFERENCE, HTML_ERROR_DUPLICATE_ATTRIBUTE, HTML_ERROR_END_TAG_WITH_ATTRIBUTES, HTML_ERROR_END_TAG_WITH_TRAILING_SOLIDUS, HTML_ERROR_EOF_BEFORE_TAG_NAME, HTML_ERROR_EOF_IN_CDATA, HTML_ERROR_EOF_IN_COMMENT, HTML_ERROR_EOF_IN_DOCTYPE, HTML_ERROR_EOF_IN_SCRIPT_HTML_COMMENT_LIKE_TEXT, HTML_ERROR_EOF_IN_TAG, HTML_ERROR_INCORRECTLY_CLOSED_COMMENT, HTML_ERROR_INCORRECTLY_OPENED_COMMENT, HTML_ERROR_INVALID_CHARACTER_SEQUENCE_AFTER_DOCTYPE_NAME, HTML_ERROR_INVALID_FIRST_CHARACTER_OF_TAG_NAME, HTML_ERROR_MISSING_ATTRIBUTE_VALUE, HTML_ERROR_MISSING_DOCTYPE_NAME, HTML_ERROR_MISSING_DOCTYPE_PUBLIC_IDENTIFIER, HTML_ERROR_MISSING_DOCTYPE_SYSTEM_IDENTIFIER, HTML_ERROR_MISSING_END_TAG_NAME, HTML_ERROR_MISSING_QUOTE_BEFORE_DOCTYPE_PUBLIC_IDENTIFIER, HTML_ERROR_MISSING_QUOTE_BEFORE_DOCTYPE_SYSTEM_IDENTIFIER, HTML_ERROR_MISSING_SEMICOLON_AFTER_CHARACTER_REFERENCE, HTML_ERROR_MISSING_WHITESPACE_AFTER_DOCTYPE_PUBLIC_KEYWORD, HTML_ERROR_MISSING_WHITESPACE_AFTER_DOCTYPE_SYSTEM_KEYWORD, HTML_ERROR_MISSING_WHITESPACE_BEFORE_DOCTYPE_NAME, HTML_ERROR_MISSING_WHITESPACE_BETWEEN_ATTRIBUTES, HTML_ERROR_MISSING_WHITESPACE_BETWEEN_DOCTYPE_PUBLIC_AND_SYSTEM_IDENTIFIERS, HTML_ERROR_NESTED_COMMENT, HTML_ERROR_NONCHARACTER_CHARACTER_REFERENCE, HTML_ERROR_NONCHARACTER_IN_INPUT_STREAM, HTML_ERROR_NON_VOID_HTML_ELEMENT_START_TAG_WITH_TRAILING_SOLIDUS, HTML_ERROR_NULL_CHARACTER_REFERENCE, HTML_ERROR_SURROGATE_CHARACTER_REFERENCE, HTML_ERROR_SURROGATE_IN_INPUT_STREAM, HTML_ERROR_UNEXPECTED_CHARACTER_AFTER_DOCTYPE_SYSTEM_IDENTIFIER, HTML_ERROR_UNEXPECTED_CHARACTER_IN_ATTRIBUTE_NAME, HTML_ERROR_UNEXPECTED_CHARACTER_IN_UNQUOTED_ATTRIBUTE_VALUE, HTML_ERROR_UNEXPECTED_EQUALS_SIGN_BEFORE_ATTRIBUTE_NAME, HTML_ERROR_UNEXPECTED_NULL_CHARACTER, HTML_ERROR_UNEXPECTED_QUESTION_MARK_INSTEAD_OF_TAG_NAME, HTML_ERROR_UNEXPECTED_SOLIDUS_IN_TAG, HTML_ERROR_UNKNOWN_NAMED_CHARACTER_REFERENCE } html_error; /* keep this in same order as html_tag_names[] */ extern const char *html_tag_names[]; typedef enum { HTML_TAG__NONE = 0, HTML_TAG_A, HTML_TAG_ADDRESS, HTML_TAG_APPLET, HTML_TAG_AREA, HTML_TAG_ARTICLE, HTML_TAG_ASIDE, HTML_TAG_B, HTML_TAG_BASE, HTML_TAG_BASEFONT, HTML_TAG_BGSOUND, HTML_TAG_BIG, HTML_TAG_BLOCKQUOTE, HTML_TAG_BODY, HTML_TAG_BR, HTML_TAG_BUTTON, HTML_TAG_CAPTION, HTML_TAG_CENTER, HTML_TAG_CITE, HTML_TAG_CODE, HTML_TAG_COL, HTML_TAG_COLGROUP, HTML_TAG_DD, HTML_TAG_DETAILS, HTML_TAG_DFN, HTML_TAG_DIALOG, HTML_TAG_DIR, HTML_TAG_DIV, HTML_TAG_DL, HTML_TAG_DT, HTML_TAG_EM, HTML_TAG_EMBED, HTML_TAG_FIELDSET, HTML_TAG_FIGCAPTION, HTML_TAG_FIGURE, HTML_TAG_FONT, HTML_TAG_FOOTER, HTML_TAG_FORM, HTML_TAG_FRAME, HTML_TAG_FRAMESET, HTML_TAG_H1, HTML_TAG_H2, HTML_TAG_H3, HTML_TAG_H4, HTML_TAG_H5, HTML_TAG_H6, HTML_TAG_HEAD, HTML_TAG_HEADER, HTML_TAG_HGROUP, HTML_TAG_HR, HTML_TAG_HTML, HTML_TAG_I, HTML_TAG_IFRAME, HTML_TAG_IMAGE, HTML_TAG_IMG, HTML_TAG_INPUT, HTML_TAG_INS, HTML_TAG_KBD, HTML_TAG_KEYGEN, HTML_TAG_LI, HTML_TAG_LINK, HTML_TAG_LISTING, HTML_TAG_MAIN, HTML_TAG_MARQUEE, HTML_TAG_MATH, HTML_TAG_MENU, HTML_TAG_META, HTML_TAG_NAV, HTML_TAG_NOBR, HTML_TAG_NOEMBED, HTML_TAG_NOFRAMES, HTML_TAG_NOSCRIPT, HTML_TAG_OBJECT, HTML_TAG_OL, HTML_TAG_OPTGROUP, HTML_TAG_OPTION, HTML_TAG_P, HTML_TAG_PARAM, HTML_TAG_PLAINTEXT, HTML_TAG_PRE, HTML_TAG_RB, HTML_TAG_RP, HTML_TAG_RT, HTML_TAG_RTC, HTML_TAG_RUBY, HTML_TAG_S, HTML_TAG_SAMP, HTML_TAG_SCRIPT, HTML_TAG_SEARCH, HTML_TAG_SECTION, HTML_TAG_SELECT, HTML_TAG_SMALL, HTML_TAG_SOURCE, HTML_TAG_SPAN, HTML_TAG_STRIKE, HTML_TAG_STRONG, HTML_TAG_STYLE, HTML_TAG_SUB, HTML_TAG_SUP, HTML_TAG_SUMMARY, HTML_TAG_SVG, HTML_TAG_TABLE, HTML_TAG_TBODY, HTML_TAG_TD, HTML_TAG_TEMPLATE, HTML_TAG_TEXTAREA, HTML_TAG_TFOOT, HTML_TAG_TH, HTML_TAG_THEAD, HTML_TAG_TITLE, HTML_TAG_TR, HTML_TAG_TRACK, HTML_TAG_TT, HTML_TAG_U, HTML_TAG_UL, HTML_TAG_VAR, HTML_TAG_WBR, HTML_TAG_XMP, HTML_TAG_MAX_ID } html_tag_type; typedef enum { HTML_SCOPE_DEFAULT, HTML_SCOPE_LIST_ITEM, HTML_SCOPE_BUTTON, HTML_SCOPE_TABLE, HTML_SCOPE_SELECT } html_scope; typedef enum { HTML_NAMESPACE_HTML, HTML_NAMESPACE_MATHML, HTML_NAMESPACE_SVG, HTML_NAMESPACE_XLINK, HTML_NAMESPACE_XML, HTML_NAMESPACE_XMLNS } html_namespace; typedef struct { const char *entity; uint32_t codepoint; } html_entity; extern const html_entity html_entities[]; struct html_attr { char name[24]; short name_len; char val[128]; short val_len; }; struct html_tag { /* this must be first */ html_tag_type token_type; html_tag_type type; html_namespace ns; char name[16]; short name_len; struct html_attr attrs[16]; short attrs_count; bool emitted; bool self_closing; bool self_closing_acked; }; struct html_element { html_tag_type type; html_namespace ns; char name[16]; short name_len; struct html_attr attrs[8]; short attrs_count; char *text; size_t text_len; size_t text_off; size_t text_size; bool has_height; short margin_top; short margin_bottom; short ol_count; short renders; TEHandle input_te; short refs; struct html_element *next_need_free; }; struct html_comment { /* this must be first */ html_token_type token_type; char data[8]; short len; }; struct html_char { /* this must be first */ html_token_type token_type; char c; }; struct html_doctype { /* this must be first */ html_token_type _pad; char name[16]; short name_len; char public_identifier[16]; short public_identifier_len; char system_identifier[16]; short system_identifier_len; bool system_identifier_found; bool force_quirks; }; /* * THINK C doesn't support anonymous unions so we can't have a * struct html_token with tag/doctype/comment at the root */ union html_token { /* every other type has html_token_type as its first member */ html_token_type type; struct html_tag tag; struct html_doctype doctype; struct html_comment comment; struct html_char ch; }; typedef union html_token html_token; struct html_formatting { bool marker; struct html_element *element; html_token_type token; }; struct html_page { void *cookie; size_t input_pos; bool eof; /* insertion mode */ html_mode mode; html_mode original_mode; html_state state; html_state return_state; html_error error; char *escaped_buf; size_t escaped_size; long char_ref_code; bool parse_last_cr; bool frameset_ok; bool parser_cannot_change_mode; bool foster_parenting; bool quirks_mode; /* rendering */ bool render_in_body; short render_list_depth; char last_output; bool last_margin_top; bool last_margin_bottom; /* configurables */ bool ignore_script_data; bool ignore_comment_data; bool scripting; bool styling; /* if the next character token should be skipped if it's \n */ bool skip_newline_char_token; /* "stack of open elements" */ struct html_element *open[HTML_STACK_DEPTH]; short open_count; struct html_element *current_node; struct html_element *need_free_list; struct html_element *need_free_tail; /* https://html.spec.whatwg.org/multipage/parsing.html#the-list-of-active-formatting-elements */ struct html_formatting active_formatting[HTML_STACK_DEPTH]; short active_formatting_count; /* https://html.spec.whatwg.org/multipage/parsing.html#the-element-pointers */ struct html_element *head; struct html_element *form; union html_token new_token; /* we'll queue some characters up before actually parsing */ char lookahead[HTML_LOOKAHEAD_SIZE]; unsigned char lookahead_len; /* some tokens need a temporary buffer to store text */ char tmp[128]; unsigned char tmp_len; }; #define HTML_REPLACEMENT_CHARACTER 0xff void html_output(void *cookie, struct html_page *html, char *str, size_t len); void html_debug(const char *fmt, ...); void html_have_title(void *cookie, struct html_page *html, char *str, size_t len); /* html.c */ struct html_page * html_init_page(void *cookie); void html_page_finish(struct html_page **htmlp); void html_xfree(struct html_page **htmlp); bool html_parse(struct html_page *html, char *str, size_t len); void html_insert_character(struct html_page *html, short cc); bool html_is_block_tag(struct html_page *html, html_tag_type tag); long html_get_attribute_value(struct html_page *html, struct html_element *element, char *name, char **ret); void html_render_current_node(struct html_page *html, bool popping); void html_parse_error(struct html_page *html); void html_debug(const char *fmt, ...); #if 0 void html_emit_token(struct html_page *html, html_token *token); #else #define html_emit_token(a, b) html_process_token(a, b) #endif void html_buffer_output(struct html_page *html, char *str, size_t len); void html_flush_output_buffer(struct html_page *html); /* html_tokenize.c */ void html_tokenize(struct html_page *html, short cc); void html_prep_new_token(struct html_page *html, html_token_type token_type); struct html_attr * html_prep_new_attribute(struct html_page *html, struct html_tag *tag); void html_tokenize_finish(struct html_page *html); html_token_act html_process_token_in_foreign_content(struct html_page *html, html_token *token); /* html_tree.c */ void html_process_token(struct html_page *html, html_token *token); void html_append_comment(struct html_page *html, struct html_comment *comment); void html_stop_parsing(struct html_page *html); char * html_escape_string(struct html_page *html, char *str, size_t *len, bool attribute_mode); void html_emit_char_token(struct html_page *html, short cc); void html_emit_eof_token(struct html_page *html); void html_emit_comment(struct html_page *html, struct html_comment *comment); #endif /* HTML_ENABLE */