jcs
/detritus
/amendments
/58
html_tree: Import HTML tree constructor written to WHATWG specs
jcs made amendment 58 about 1 year ago
--- html_tree.c Tue Dec 10 23:13:13 2024
+++ html_tree.c Tue Dec 10 23:13:13 2024
@@ -0,0 +1,3567 @@
+/*
+ * Copyright (c) 2024 joshua stein <jcs@jcs.org>
+ *
+ * Permission to use, copy, modify, and distribute this software for any
+ * purpose with or without fee is hereby granted, provided that the above
+ * copyright notice and this permission notice appear in all copies.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
+ * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
+ * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
+ * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
+ * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
+ * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
+ * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
+ */
+
+/*
+ * Tree construction
+ * https://html.spec.whatwg.org/multipage/parsing.html#tree-construction
+ *
+ * html_tokenize() outputs tokens of various types to the html_emit_*token()
+ * functions, which then output them to html_process_token() here for tree
+ * building, tag order manipulation, tag closing, etc.
+ */
+
+#include "html.h"
+
+void html_deref_element(struct html_page *html,
+ struct html_element *element);
+void html_append_element(struct html_page *html,
+ struct html_element *element);
+struct html_element * html_create_element_for_token(struct html_page *html,
+ html_token *token);
+struct html_element * html_append_element_for_token(struct html_page *html,
+ html_token *token, html_namespace ns);
+bool html_remove_open_element(struct html_page *html,
+ struct html_element *element);
+
+html_token_act html_process_token_initial(struct html_page *html,
+ html_token *token);
+html_token_act html_process_token_before_html(struct html_page *html,
+ html_token *token);
+html_token_act html_process_token_before_head(struct html_page *html,
+ html_token *token);
+html_token_act html_process_token_in_head(struct html_page *html,
+ html_token *token);
+html_token_act html_process_token_in_head_noscript(struct html_page *html,
+ html_token *token);
+html_token_act html_process_token_after_head(struct html_page *html,
+ html_token *token);
+html_token_act html_process_token_in_body(struct html_page *html,
+ html_token *token);
+html_token_act html_process_token_text(struct html_page *html,
+ html_token *token);
+html_token_act html_process_token_in_table(struct html_page *html,
+ html_token *token);
+html_token_act html_process_token_in_table_text(struct html_page *html,
+ html_token *token);
+html_token_act html_process_token_in_caption(struct html_page *html,
+ html_token *token);
+html_token_act html_process_token_in_column_group(struct html_page *html,
+ html_token *token);
+html_token_act html_process_token_in_table_body(struct html_page *html,
+ html_token *token);
+html_token_act html_process_token_in_row(struct html_page *html,
+ html_token *token);
+html_token_act html_process_token_in_cell(struct html_page *html,
+ html_token *token);
+html_token_act html_process_token_in_select(struct html_page *html,
+ html_token *token);
+html_token_act html_process_token_in_select_in_table(struct html_page *html,
+ html_token *token);
+html_token_act html_process_token_in_template(struct html_page *html,
+ html_token *token);
+html_token_act html_process_token_after_body(struct html_page *html,
+ html_token *token);
+html_token_act html_process_token_in_frameset(struct html_page *html,
+ html_token *token);
+html_token_act html_process_token_after_frameset(struct html_page *html,
+ html_token *token);
+html_token_act html_process_token_after_after_body(struct html_page *html,
+ html_token *token);
+html_token_act html_process_token_after_after_frameset(struct html_page *html,
+ html_token *token);
+
+void html_pop_current_element(struct html_page *html);
+void html_pop_nodes_until_past_tag(struct html_page *html,
+ html_tag_type stop_after);
+void html_pop_nodes_until_past_element(struct html_page *html,
+ struct html_element *element);
+void html_close_p(struct html_page *html);
+void html_generate_implied_end_tags(struct html_page *html, char *except,
+ bool thoroughly);
+
+/* active formatting */
+void html_push_active_formatting_element(struct html_page *html,
+ struct html_element *element, html_token_type token_type);
+void html_push_active_formatting_marker(struct html_page *html,
+ html_token_type token_type);
+bool html_is_tag_in_active_formatting(struct html_page *html,
+ html_tag_type tag);
+bool html_is_element_in_active_formatting(struct html_page *html,
+ struct html_element *element);
+bool html_remove_active_formatting_element(struct html_page *html,
+ struct html_element *element);
+void html_reconstruct_active_formatting(struct html_page *html);
+void html_clear_active_formatting_to_last_marker(struct html_page *html);
+bool html_run_adoption_agency(struct html_page *html, html_token *token);
+
+/* helpers */
+bool html_is_element_special(struct html_page *html, struct html_element *el);
+bool html_is_element_formatting(struct html_page *html,
+ struct html_element *el);
+bool html_is_element_open(struct html_page *html, struct html_element *el);
+bool html_has_tag_open(struct html_page *html, html_tag_type tag);
+bool html_has_element_in_scope(struct html_page *html,
+ struct html_element *element, html_scope scope);
+bool html_has_element_with_tag_open_in_scope(struct html_page *html,
+ html_tag_type tag, html_scope scope);
+bool html_has_element_or_one_with_tag_open_in_scope(struct html_page *html,
+ struct html_element *element, html_tag_type tag, html_scope scope);
+bool html_element_serializes_as_void(struct html_page *html,
+ struct html_element *element);
+
+void
+html_append_element(struct html_page *html, struct html_element *element)
+{
+ short n;
+
+ if (html->open_count >= nitems(html->open))
+ panic("ran out of tag stack space");
+
+ if (html->current_node) {
+ HTML_DEBUG((": rendering current before-append <%s>",
+ html->current_node->name));
+ html_render_current_node(html, false);
+ }
+
+ HTML_DEBUG((": appending element"));
+ if (element->ns != HTML_NAMESPACE_HTML)
+ HTML_DEBUG((" in namespace %d", element->ns));
+ HTML_DEBUG((": %d: <%s>", html->open_count, element->name));
+
+ html->open[html->open_count++] = element;
+ element->refs++;
+ html->current_node = element;
+
+ switch (element->type) {
+ case HTML_TAG_BLOCKQUOTE:
+ case HTML_TAG_CENTER:
+ case HTML_TAG_DL:
+ case HTML_TAG_H1:
+ case HTML_TAG_H2:
+ case HTML_TAG_H3:
+ case HTML_TAG_H4:
+ case HTML_TAG_H5:
+ case HTML_TAG_H6:
+ case HTML_TAG_MENU:
+ case HTML_TAG_P:
+ element->margin_top = 1;
+ element->margin_bottom = 1;
+ break;
+ case HTML_TAG_OL:
+ case HTML_TAG_UL:
+ /* only give margins if not inside another list */
+ for (n = html->open_count - 2; n >= 0; n--) {
+ if (html->open[n]->type == HTML_TAG_OL ||
+ html->open[n]->type == HTML_TAG_UL)
+ break;
+
+ if (n == 0) {
+ element->margin_top = 1;
+ element->margin_bottom = 1;
+ }
+ }
+ break;
+ }
+
+ HTML_DEBUG((": now open: "));
+ for (n = 0; n <= html->open_count - 1; n++)
+ HTML_DEBUG(("<%s>", html->open[n]->name));
+}
+
+void
+html_append_comment(struct html_page *html, struct html_comment *comment)
+{
+#if 0
+ size_t esclen;
+ char *esc;
+
+ /*
+ * https://html.spec.whatwg.org/multipage/parsing.html#serialising-html-fragments:comment-2
+ */
+
+ esclen = comment->len;
+ esc = html_escape_string(html, comment->data, &esclen, false);
+ html_buffer_output(html, "<!--", 4);
+ html_buffer_output(html, esc, esclen);
+ html_buffer_output(html, "-->", 3);
+#endif
+}
+
+struct html_element *
+html_create_element_for_token(struct html_page *html, html_token *token)
+{
+ struct html_element *element;
+
+ if (token->tag.name[0] == '\0')
+ token->tag.name_len = strlcpy(token->tag.name,
+ html_tag_names[token->tag.type], sizeof(token->tag.name));
+
+ /* TODO: do an optimized allocation only the size we need */
+ element = xmalloczero(sizeof(struct html_element));
+ element->type = token->tag.type;
+ memcpy(element->name, token->tag.name, sizeof(element->name));
+ element->name_len = token->tag.name_len;
+ memcpy(element->attrs, token->tag.attrs, sizeof(element->attrs));
+ element->attrs_count = token->tag.attrs_count;
+
+ return element;
+}
+
+void
+html_deref_element(struct html_page *html, struct html_element *element)
+{
+ if (element->refs == 0)
+ Debugger();
+ else
+ element->refs--;
+
+ if (element->refs == 0) {
+ if (html->need_free_list) {
+ html->need_free_tail->next_need_free = element;
+ html->need_free_tail = element;
+ } else {
+ html->need_free_list = element;
+ html->need_free_tail = element;
+ }
+ }
+}
+
+struct html_element *
+html_append_element_for_token(struct html_page *html, html_token *token,
+ html_namespace ns)
+{
+ struct html_element *element;
+
+ element = html_create_element_for_token(html, token);
+ element->ns = ns;
+ html_append_element(html, element);
+ return element;
+}
+
+void
+html_process_token(struct html_page *html, html_token *token)
+{
+ html_token_act ret;
+ struct html_element *el;
+
+ while (html->need_free_list) {
+ HTML_DEBUG((": freeing deref'd <%s>", html->need_free_list->name));
+ el = html->need_free_list->next_need_free;
+ if (html->need_free_list->text)
+ xfree(&html->need_free_list->text);
+ xfree(&html->need_free_list);
+ html->need_free_list = el;
+ html->need_free_tail = NULL;
+ }
+
+ /*
+ * https://html.spec.whatwg.org/multipage/parsing.html#parsing-main-inhtml
+ */
+
+ if (token->type == HTML_TOKEN_CHARACTER && token->ch.c == '\n' &&
+ html->skip_newline_char_token) {
+ html->skip_newline_char_token = false;
+ return;
+ }
+
+ HTML_DEBUG((" => token %s,", html_token_names[token->type]));
+
+reprocess:
+ HTML_DEBUG((" mode %s", html_mode_names[html->mode]));
+
+ if (!(html->current_node == NULL ||
+ html->current_node->ns == HTML_NAMESPACE_HTML ||
+ token->type == HTML_TOKEN_EOF)) {
+ /*
+ * Process the token according to the rules given in the section for
+ * parsing tokens in foreign content.
+ */
+
+ /* TODO mathml checks */
+
+ ret = html_process_token_in_foreign_content(html, token);
+ if (ret != HTML_TOKEN_REPROCESS)
+ return;
+
+ HTML_DEBUG((" -R->"));
+ /* fallthrough */
+ }
+
+ /*
+ * Process the token according to the rules given in the section
+ * corresponding to the current insertion mode in HTML content.
+ */
+ switch (html->mode) {
+ case HTML_MODE_INITIAL:
+ ret = html_process_token_initial(html, token);
+ break;
+ case HTML_MODE_BEFORE_HTML:
+ ret = html_process_token_before_html(html, token);
+ break;
+ case HTML_MODE_BEFORE_HEAD:
+ ret = html_process_token_before_head(html, token);
+ break;
+ case HTML_MODE_IN_HEAD:
+ ret = html_process_token_in_head(html, token);
+ break;
+ case HTML_MODE_IN_HEAD_NOSCRIPT:
+ ret = html_process_token_in_head_noscript(html, token);
+ break;
+ case HTML_MODE_AFTER_HEAD:
+ ret = html_process_token_after_head(html, token);
+ break;
+ case HTML_MODE_IN_BODY:
+ ret = html_process_token_in_body(html, token);
+ break;
+ case HTML_MODE_TEXT:
+ ret = html_process_token_text(html, token);
+ break;
+ case HTML_MODE_IN_TABLE:
+ ret = html_process_token_in_table(html, token);
+ break;
+ case HTML_MODE_IN_TABLE_TEXT:
+ ret = html_process_token_in_table_text(html, token);
+ break;
+ case HTML_MODE_IN_CAPTION:
+ ret = html_process_token_in_caption(html, token);
+ break;
+ case HTML_MODE_IN_COLUMN_GROUP:
+ ret = html_process_token_in_column_group(html, token);
+ break;
+ case HTML_MODE_IN_TABLE_BODY:
+ ret = html_process_token_in_table_body(html, token);
+ break;
+ case HTML_MODE_IN_ROW:
+ ret = html_process_token_in_row(html, token);
+ break;
+ case HTML_MODE_IN_CELL:
+ ret = html_process_token_in_cell(html, token);
+ break;
+ case HTML_MODE_IN_SELECT:
+ ret = html_process_token_in_select(html, token);
+ break;
+ case HTML_MODE_IN_SELECT_IN_TABLE:
+ ret = html_process_token_in_table(html, token);
+ break;
+ case HTML_MODE_IN_TEMPLATE:
+ ret = html_process_token_in_template(html, token);
+ break;
+ case HTML_MODE_AFTER_BODY:
+ ret = html_process_token_after_body(html, token);
+ break;
+ case HTML_MODE_IN_FRAMESET:
+ ret = html_process_token_in_frameset(html, token);
+ break;
+ case HTML_MODE_AFTER_FRAMESET:
+ ret = html_process_token_after_frameset(html, token);
+ break;
+ case HTML_MODE_AFTER_AFTER_BODY:
+ ret = html_process_token_after_after_body(html, token);
+ break;
+ case HTML_MODE_AFTER_AFTER_FRAMESET:
+ ret = html_process_token_after_after_frameset(html, token);
+ break;
+ default:
+ panic("bogus mode");
+ }
+
+ if (ret == HTML_TOKEN_REPROCESS) {
+ HTML_DEBUG((" -R->"));
+ goto reprocess;
+ }
+}
+
+html_token_act
+html_process_token_initial(struct html_page *html, html_token *token)
+{
+ /*
+ * https://html.spec.whatwg.org/multipage/parsing.html#the-initial-insertion-mode
+ */
+
+ if (token->type == HTML_TOKEN_CHARACTER &&
+ (token->ch.c == '\t' || token->ch.c == '\n' || token->ch.c == '\f' ||
+ token->ch.c == '\r' || token->ch.c == ' ')) {
+ /* ignore */
+ return HTML_TOKEN_PROCESSED;
+ }
+
+ if (token->type == HTML_TOKEN_COMMENT) {
+ /* XXX: insert as "last child of the Document object" */
+ html_append_comment(html, &token->comment);
+ return HTML_TOKEN_PROCESSED;
+ }
+
+ if (token->type == HTML_TOKEN_DOCTYPE) {
+ /* TODO: handle if doctype is not "html" */
+
+ html->mode = HTML_MODE_BEFORE_HTML;
+ return HTML_TOKEN_PROCESSED;
+ }
+
+ /* TODO: check if "document is not an iframe srcdoc document" */
+ if (true) {
+ html_parse_error(html);
+ if (!html->parser_cannot_change_mode)
+ html->quirks_mode = true;
+ }
+
+ html->mode = HTML_MODE_BEFORE_HTML;
+ return HTML_TOKEN_REPROCESS;
+}
+
+html_token_act
+html_process_token_before_html(struct html_page *html, html_token *token)
+{
+ html_token ttoken;
+
+ /*
+ * https://html.spec.whatwg.org/multipage/parsing.html#the-before-html-insertion-mode
+ */
+
+ if (token->type == HTML_TOKEN_DOCTYPE) {
+ /* parse error, ignore */
+ html_parse_error(html);
+ return HTML_TOKEN_PROCESSED;
+ }
+
+ if (token->type == HTML_TOKEN_COMMENT) {
+ html_append_comment(html, &token->comment);
+ return HTML_TOKEN_PROCESSED;
+ }
+
+ if (token->type == HTML_TOKEN_CHARACTER &&
+ (token->ch.c == '\t' || token->ch.c == '\n' || token->ch.c == '\f' ||
+ token->ch.c == '\r' || token->ch.c == ' ')) {
+ /* ignore */
+ return HTML_TOKEN_PROCESSED;
+ }
+
+ if (token->type == HTML_TOKEN_START_TAG &&
+ token->tag.type == HTML_TAG_HTML) {
+ html_append_element_for_token(html, token, HTML_NAMESPACE_HTML);
+ html->mode = HTML_MODE_BEFORE_HEAD;
+ return HTML_TOKEN_PROCESSED;
+ }
+
+ if (token->type == HTML_TOKEN_END_TAG &&
+ (token->tag.type == HTML_TAG_HEAD ||
+ token->tag.type == HTML_TAG_BODY ||
+ token->tag.type == HTML_TAG_HTML ||
+ token->tag.type == HTML_TAG_BR)) {
+ goto anything_else;
+ }
+
+ if (token->type == HTML_TOKEN_END_TAG) {
+ /* parse error, ignore */
+ html_parse_error(html);
+ return HTML_TOKEN_PROCESSED;
+ }
+
+anything_else:
+ memset(&ttoken, 0, sizeof(html_token));
+ ttoken.type = HTML_TOKEN_START_TAG;
+ ttoken.tag.type = HTML_TAG_HTML;
+ html_append_element_for_token(html, &ttoken, HTML_NAMESPACE_HTML);
+
+ html->mode = HTML_MODE_BEFORE_HEAD;
+ return HTML_TOKEN_REPROCESS;
+}
+
+html_token_act
+html_process_token_before_head(struct html_page *html, html_token *token)
+{
+ html_token ttoken;
+
+ /*
+ * https://html.spec.whatwg.org/multipage/parsing.html#the-before-head-insertion-mode
+ */
+
+ if (token->type == HTML_TOKEN_CHARACTER &&
+ (token->ch.c == '\t' || token->ch.c == '\n' || token->ch.c == '\f' ||
+ token->ch.c == '\r' || token->ch.c == ' ')) {
+ /* ignore */
+ return HTML_TOKEN_PROCESSED;
+ }
+
+ if (token->type == HTML_TOKEN_COMMENT) {
+ html_append_comment(html, &token->comment);
+ return HTML_TOKEN_PROCESSED;
+ }
+
+ if (token->type == HTML_TOKEN_DOCTYPE) {
+ /* parse error, ignore */
+ html_parse_error(html);
+ return HTML_TOKEN_PROCESSED;
+ }
+
+ if (token->type == HTML_TOKEN_START_TAG &&
+ token->tag.type == HTML_TAG_HTML) {
+ /* process as "in body" */
+ html_process_token_in_body(html, token);
+ return HTML_TOKEN_PROCESSED;
+ }
+
+ if (token->type == HTML_TOKEN_START_TAG &&
+ token->tag.type == HTML_TAG_HEAD) {
+ html->head = html_append_element_for_token(html, token,
+ HTML_NAMESPACE_HTML);
+ html->mode = HTML_MODE_IN_HEAD;
+ return HTML_TOKEN_PROCESSED;
+ }
+
+ if (token->type == HTML_TOKEN_END_TAG &&
+ !(token->tag.type == HTML_TAG_HEAD ||
+ token->tag.type == HTML_TAG_BODY ||
+ token->tag.type == HTML_TAG_HTML ||
+ token->tag.type == HTML_TAG_BR)) {
+ /* parse error, ignore */
+ html_parse_error(html);
+ return HTML_TOKEN_PROCESSED;
+ }
+
+ memset(&ttoken, 0, sizeof(html_token));
+ ttoken.type = HTML_TOKEN_START_TAG;
+ ttoken.tag.type = HTML_TAG_HEAD;
+ html_append_element_for_token(html, &ttoken, HTML_NAMESPACE_HTML);
+
+ html->mode = HTML_MODE_IN_HEAD;
+ return HTML_TOKEN_REPROCESS;
+}
+
+html_token_act
+html_process_token_in_head(struct html_page *html, html_token *token)
+{
+ /*
+ * https://html.spec.whatwg.org/multipage/parsing.html#parsing-main-inhead
+ */
+
+ if (token->type == HTML_TOKEN_CHARACTER &&
+ (token->ch.c == '\t' || token->ch.c == '\n' || token->ch.c == '\f' ||
+ token->ch.c == '\r' || token->ch.c == ' ')) {
+ html_insert_character(html, token->ch.c);
+ return HTML_TOKEN_PROCESSED;
+ }
+
+ if (token->type == HTML_TOKEN_COMMENT) {
+ html_append_comment(html, &token->comment);
+ return HTML_TOKEN_PROCESSED;
+ }
+
+ if (token->type == HTML_TOKEN_DOCTYPE) {
+ /* parse error, ignore */
+ html_parse_error(html);
+ return HTML_TOKEN_PROCESSED;
+ }
+
+ if (token->type == HTML_TOKEN_START_TAG &&
+ token->tag.type == HTML_TAG_HTML) {
+ /* process as "in body" */
+ html_process_token_in_body(html, token);
+ return HTML_TOKEN_PROCESSED;
+ }
+
+ if (token->type == HTML_TOKEN_START_TAG &&
+ (token->tag.type == HTML_TAG_BASE ||
+ token->tag.type == HTML_TAG_BASEFONT ||
+ token->tag.type == HTML_TAG_BGSOUND ||
+ token->tag.type == HTML_TAG_LINK)) {
+ html_append_element_for_token(html, token, HTML_NAMESPACE_HTML);
+ html_pop_current_element(html);
+
+ if (token->tag.self_closing)
+ token->tag.self_closing_acked = true;
+ return HTML_TOKEN_PROCESSED;
+ }
+
+ if (token->type == HTML_TOKEN_START_TAG &&
+ token->tag.type == HTML_TAG_META) {
+ html_append_element_for_token(html, token, HTML_NAMESPACE_HTML);
+ html_pop_current_element(html);
+
+ if (token->tag.self_closing)
+ token->tag.self_closing_acked = true;
+
+ /* TODO: check "charset" and "http-equiv" and change encoding */
+
+ return HTML_TOKEN_PROCESSED;
+ }
+
+ if (token->type == HTML_TOKEN_START_TAG &&
+ token->tag.type == HTML_TAG_TITLE) {
+ /* "RCDATA element parsing algorithm" */
+ html_append_element_for_token(html, token, HTML_NAMESPACE_HTML);
+ html->state = HTML_STATE_RCDATA;
+ html->original_mode = html->mode;
+ html->mode = HTML_MODE_TEXT;
+ return HTML_TOKEN_PROCESSED;
+ }
+
+ if (token->type == HTML_TOKEN_START_TAG &&
+ ((token->tag.type == HTML_TAG_NOSCRIPT && html->scripting) ||
+ (token->tag.type == HTML_TAG_NOFRAMES ||
+ token->tag.type == HTML_TAG_STYLE))) {
+ /* "raw text element parsing algorithm" */
+ html_append_element_for_token(html, token, HTML_NAMESPACE_HTML);
+ html->state = HTML_STATE_RAWTEXT;
+ html->original_mode = html->mode;
+ html->mode = HTML_MODE_TEXT;
+ return HTML_TOKEN_PROCESSED;
+ }
+
+ if (token->type == HTML_TOKEN_START_TAG &&
+ token->tag.type == HTML_TAG_NOSCRIPT && !html->scripting) {
+ html_append_element_for_token(html, token, HTML_NAMESPACE_HTML);
+ html->mode = HTML_MODE_IN_HEAD_NOSCRIPT;
+ return HTML_TOKEN_PROCESSED;
+ }
+
+ if (token->type == HTML_TOKEN_START_TAG &&
+ token->tag.type == HTML_TAG_SCRIPT) {
+ /* TODO: more stuff according to docs */
+
+ html_append_element_for_token(html, token, HTML_NAMESPACE_HTML);
+ html->state = HTML_STATE_SCRIPT_DATA;
+ html->original_mode = html->mode;
+ html->mode = HTML_MODE_TEXT;
+ return HTML_TOKEN_PROCESSED;
+ }
+
+ if (token->type == HTML_TOKEN_END_TAG && token->tag.type == HTML_TAG_HEAD) {
+ /* this should be head */
+ html_pop_current_element(html);
+ html->mode = HTML_MODE_AFTER_HEAD;
+ return HTML_TOKEN_PROCESSED;
+ }
+
+ if (token->type == HTML_TOKEN_END_TAG &&
+ (token->tag.type == HTML_TAG_BODY ||
+ token->tag.type == HTML_TAG_HTML ||
+ token->tag.type == HTML_TAG_BR)) {
+ goto anything_else;
+ }
+
+ if (token->type == HTML_TOKEN_START_TAG &&
+ token->tag.type == HTML_TAG_TEMPLATE) {
+ html_push_active_formatting_marker(html, token->type);
+ html->frameset_ok = false;
+ html->mode = HTML_MODE_IN_TEMPLATE;
+
+ /* TODO: draw the rest of the owl */
+
+ return HTML_TOKEN_PROCESSED;
+ }
+
+ if (token->type == HTML_TOKEN_END_TAG &&
+ token->tag.type == HTML_TAG_TEMPLATE) {
+ if (!html_has_tag_open(html, HTML_TAG_TEMPLATE)) {
+ /* parse error, ignore */
+ html_parse_error(html);
+ return HTML_TOKEN_PROCESSED;
+ }
+
+ html_generate_implied_end_tags(html, NULL, true);
+
+ if (token->tag.type != HTML_TAG_TEMPLATE) {
+ /* parse error */
+ html_parse_error(html);
+ }
+
+ html_pop_nodes_until_past_tag(html, HTML_TAG_TEMPLATE);
+
+ /*
+ * TODO: "Clear the list of active formatting elements up to the last
+ * marker."
+ */
+
+ /*
+ * TODO: "Pop the current template insertion mode off the stack of
+ * template insertion modes."
+ */
+
+ /* TODO: "Reset the insertion mode appropriately." */
+
+ return HTML_TOKEN_PROCESSED;
+ }
+
+ if ((token->type == HTML_TOKEN_START_TAG &&
+ token->tag.type == HTML_TAG_HEAD) ||
+ token->type == HTML_TOKEN_END_TAG) {
+ /* parse error, ignore */
+ html_parse_error(html);
+ return HTML_TOKEN_PROCESSED;
+ }
+
+anything_else:
+ /* this should be head */
+ html_pop_current_element(html);
+ html->mode = HTML_MODE_AFTER_HEAD;
+ return HTML_TOKEN_REPROCESS;
+}
+
+html_token_act
+html_process_token_in_head_noscript(struct html_page *html, html_token *token)
+{
+ /*
+ * https://html.spec.whatwg.org/multipage/parsing.html#parsing-main-inheadnoscript
+ */
+
+ if (token->type == HTML_TOKEN_DOCTYPE) {
+ /* parse error, ignore */
+ html_parse_error(html);
+ return HTML_TOKEN_PROCESSED;
+ }
+
+ if (token->type == HTML_TOKEN_START_TAG &&
+ token->tag.type == HTML_TAG_HTML) {
+ /* process as "in body" */
+ html_process_token_in_body(html, token);
+ return HTML_TOKEN_PROCESSED;
+ }
+
+ if (token->type == HTML_TOKEN_END_TAG &&
+ token->tag.type == HTML_TAG_NOSCRIPT) {
+ /* this should be <noscript> */
+ html_pop_current_element(html);
+ /* current tag should now be <head> */
+ html->mode = HTML_MODE_IN_HEAD;
+ return HTML_TOKEN_PROCESSED;
+ }
+
+ if ((token->type == HTML_TOKEN_CHARACTER &&
+ (token->ch.c == '\t' || token->ch.c == '\n' || token->ch.c == '\f' ||
+ token->ch.c == '\r' || token->ch.c == ' ')) ||
+ (token->type == HTML_TOKEN_COMMENT) ||
+ (token->type == HTML_TOKEN_START_TAG &&
+ (token->tag.type == HTML_TAG_BASEFONT ||
+ token->tag.type == HTML_TAG_BGSOUND ||
+ token->tag.type == HTML_TAG_LINK ||
+ token->tag.type == HTML_TAG_META ||
+ token->tag.type == HTML_TAG_NOFRAMES ||
+ token->tag.type == HTML_TAG_STYLE))) {
+ /* process as "in head" */
+ html_process_token_in_head(html, token);
+ return HTML_TOKEN_PROCESSED;
+ }
+
+ if (token->type == HTML_TOKEN_END_TAG && token->tag.type == HTML_TAG_BR)
+ goto anything_else;
+
+ if ((token->type == HTML_TOKEN_START_TAG &&
+ (token->tag.type == HTML_TAG_HEAD ||
+ token->tag.type == HTML_TAG_NOSCRIPT)) ||
+ token->type == HTML_TOKEN_END_TAG) {
+ /* parse error, ignore */
+ html_parse_error(html);
+ return HTML_TOKEN_PROCESSED;
+ }
+
+anything_else:
+ /* parse error */
+ html_parse_error(html);
+
+ /* this should be noscript */
+ html_pop_current_element(html);
+ /* current tag should now be <head> */
+ html->mode = HTML_MODE_IN_HEAD;
+ return HTML_TOKEN_REPROCESS;
+}
+
+html_token_act
+html_process_token_after_head(struct html_page *html, html_token *token)
+{
+ html_token ttoken;
+
+ /*
+ * https://html.spec.whatwg.org/multipage/parsing.html#the-after-head-insertion-mode
+ */
+
+ if (token->type == HTML_TOKEN_CHARACTER &&
+ (token->ch.c == '\t' || token->ch.c == '\n' || token->ch.c == '\f' ||
+ token->ch.c == '\r' || token->ch.c == ' ')) {
+ html_insert_character(html, token->ch.c);
+ return HTML_TOKEN_PROCESSED;
+ }
+
+ if (token->type == HTML_TOKEN_COMMENT) {
+ html_append_comment(html, &token->comment);
+ return HTML_TOKEN_PROCESSED;
+ }
+
+ if (token->type == HTML_TOKEN_DOCTYPE) {
+ /* parse error, ignore */
+ html_parse_error(html);
+ return HTML_TOKEN_PROCESSED;
+ }
+
+ if (token->type == HTML_TOKEN_START_TAG &&
+ token->tag.type == HTML_TAG_HTML) {
+ /* process as "in body" */
+ html_process_token_in_body(html, token);
+ return HTML_TOKEN_PROCESSED;
+ }
+
+ if (token->type == HTML_TOKEN_START_TAG &&
+ token->tag.type == HTML_TAG_BODY) {
+ html_append_element_for_token(html, token, HTML_NAMESPACE_HTML);
+ html->frameset_ok = false;
+ html->mode = HTML_MODE_IN_BODY;
+ return HTML_TOKEN_PROCESSED;
+ }
+
+ if (token->type == HTML_TOKEN_START_TAG &&
+ token->tag.type == HTML_TAG_FRAMESET) {
+ html_append_element_for_token(html, token, HTML_NAMESPACE_HTML);
+ html->mode = HTML_MODE_IN_FRAMESET;
+ return HTML_TOKEN_PROCESSED;
+ }
+
+ if (token->type == HTML_TOKEN_START_TAG &&
+ (token->tag.type == HTML_TAG_BASE ||
+ token->tag.type == HTML_TAG_BASEFONT ||
+ token->tag.type == HTML_TAG_BGSOUND ||
+ token->tag.type == HTML_TAG_LINK ||
+ token->tag.type == HTML_TAG_META ||
+ token->tag.type == HTML_TAG_NOFRAMES ||
+ token->tag.type == HTML_TAG_SCRIPT ||
+ token->tag.type == HTML_TAG_STYLE ||
+ token->tag.type == HTML_TAG_TEMPLATE ||
+ token->tag.type == HTML_TAG_TITLE)) {
+ /* parse error */
+ html_parse_error(html);
+
+ html_append_element(html, html->head);
+
+ /* process as "in head" */
+ html_process_token_in_head(html, token);
+
+ html_remove_open_element(html, html->head);
+ return HTML_TOKEN_PROCESSED;
+ }
+
+ if (token->type == HTML_TOKEN_END_TAG &&
+ token->tag.type == HTML_TAG_TEMPLATE) {
+ /* process as "in head" */
+ html_process_token_in_head(html, token);
+ return HTML_TOKEN_PROCESSED;
+ }
+
+ if (token->type == HTML_TOKEN_END_TAG &&
+ (token->tag.type == HTML_TAG_BODY ||
+ token->tag.type == HTML_TAG_HTML ||
+ token->tag.type == HTML_TAG_BR))
+ goto anything_else;
+
+ if ((token->type == HTML_TOKEN_START_TAG &&
+ token->tag.type == HTML_TAG_HEAD) ||
+ token->type == HTML_TOKEN_END_TAG) {
+ /* parse error, ignore */
+ html_parse_error(html);
+ return HTML_TOKEN_PROCESSED;
+ }
+
+anything_else:
+ memset(&ttoken, 0, sizeof(html_token));
+ ttoken.type = HTML_TOKEN_START_TAG;
+ ttoken.tag.type = HTML_TAG_BODY;
+ html_append_element_for_token(html, &ttoken, HTML_NAMESPACE_HTML);
+
+ html->mode = HTML_MODE_IN_BODY;
+ return HTML_TOKEN_REPROCESS;
+}
+
+html_token_act
+html_process_token_in_body(struct html_page *html, html_token *token)
+{
+ html_token ttoken;
+ struct html_element *element, *node;
+ short n;
+
+ /*
+ * https://html.spec.whatwg.org/multipage/parsing.html#parsing-main-inbody
+ */
+
+ if (token->type == HTML_TOKEN_CHARACTER && token->ch.c == '\0') {
+ /* parse error, ignore */
+ html_parse_error(html);
+ return HTML_TOKEN_PROCESSED;
+ }
+
+ if (token->type == HTML_TOKEN_CHARACTER &&
+ (token->ch.c == '\t' || token->ch.c == '\n' || token->ch.c == '\f' ||
+ token->ch.c == '\r' || token->ch.c == ' ')) {
+ if (html->active_formatting_count > 0)
+ html_reconstruct_active_formatting(html);
+ html_insert_character(html, token->ch.c);
+ return HTML_TOKEN_PROCESSED;
+ }
+
+ if (token->type == HTML_TOKEN_CHARACTER) {
+ if (html->active_formatting_count > 0)
+ html_reconstruct_active_formatting(html);
+ html_insert_character(html, token->ch.c);
+ html->frameset_ok = false;
+ return HTML_TOKEN_PROCESSED;
+ }
+
+ if (token->type == HTML_TOKEN_COMMENT) {
+ html_append_comment(html, &token->comment);
+ return HTML_TOKEN_PROCESSED;
+ }
+
+ if (token->type == HTML_TOKEN_DOCTYPE) {
+ /* parse error, ignore */
+ html_parse_error(html);
+ return HTML_TOKEN_PROCESSED;
+ }
+
+ if (token->type == HTML_TOKEN_START_TAG &&
+ token->tag.type == HTML_TAG_HTML) {
+ /* parse error */
+ html_parse_error(html);
+ if (html_has_tag_open(html, HTML_TAG_TEMPLATE)) {
+ /* ignore */
+ return HTML_TOKEN_PROCESSED;
+ }
+
+ /* TODO: add attrs to first html tag it doesn't already have */
+
+ return HTML_TOKEN_PROCESSED;
+ }
+
+ if ((token->type == HTML_TOKEN_START_TAG &&
+ (token->tag.type == HTML_TAG_BASE ||
+ token->tag.type == HTML_TAG_BASEFONT ||
+ token->tag.type == HTML_TAG_BGSOUND ||
+ token->tag.type == HTML_TAG_LINK ||
+ token->tag.type == HTML_TAG_META ||
+ token->tag.type == HTML_TAG_NOFRAMES ||
+ token->tag.type == HTML_TAG_SCRIPT ||
+ token->tag.type == HTML_TAG_STYLE ||
+ token->tag.type == HTML_TAG_TEMPLATE ||
+ token->tag.type == HTML_TAG_TITLE)) ||
+ (token->type == HTML_TOKEN_END_TAG &&
+ token->tag.type == HTML_TAG_TEMPLATE)) {
+ /* process as "in head" */
+ html_process_token_in_head(html, token);
+ return HTML_TOKEN_PROCESSED;
+ }
+
+ if (token->type == HTML_TOKEN_START_TAG &&
+ token->tag.type == HTML_TAG_BODY) {
+ /* parse error */
+ html_parse_error(html);
+ if (html->open_count == 1 || html->open[1]->type != HTML_TAG_BODY ||
+ html_has_tag_open(html, HTML_TAG_TEMPLATE)) {
+ /* ignore */
+ return HTML_TOKEN_PROCESSED;
+ }
+ html->frameset_ok = false;
+
+ /* TODO: add attrs to first body tag it doesn't already have */
+
+ return HTML_TOKEN_PROCESSED;
+ }
+
+ if (token->type == HTML_TOKEN_START_TAG &&
+ token->tag.type == HTML_TAG_FRAMESET) {
+ /* parse error */
+ html_parse_error(html);
+ if (html->open_count == 1 || html->open[1]->type != HTML_TAG_BODY ||
+ html_has_tag_open(html, HTML_TAG_TEMPLATE)) {
+ /* ignore */
+ return HTML_TOKEN_PROCESSED;
+ }
+
+ if (!html->frameset_ok) {
+ /* ignore */
+ return HTML_TOKEN_PROCESSED;
+ }
+
+ /* pop all nodes except root html */
+ while (html->open_count != 1)
+ html_pop_current_element(html);
+
+ html_append_element_for_token(html, token, HTML_NAMESPACE_HTML);
+ html->mode = HTML_MODE_IN_FRAMESET;
+ return HTML_TOKEN_PROCESSED;
+ }
+
+ if (token->type == HTML_TOKEN_EOF) {
+ /* supposed to do more here but it all ends up the same */
+ html->eof = true;
+ return HTML_TOKEN_PROCESSED;
+ }
+
+ if (token->type == HTML_TOKEN_END_TAG &&
+ (token->tag.type == HTML_TAG_BODY ||
+ token->tag.type == HTML_TAG_HTML)) {
+ if (!html_has_element_with_tag_open_in_scope(html, HTML_TAG_BODY,
+ HTML_SCOPE_DEFAULT)) {
+ /* parse error, ignore */
+ html_parse_error(html);
+ return HTML_TOKEN_PROCESSED;
+ }
+
+ for (n = 0; n < html->open_count; n++) {
+ if (html->open[n]->type == HTML_TAG_DD ||
+ html->open[n]->type == HTML_TAG_DT ||
+ html->open[n]->type == HTML_TAG_LI ||
+ html->open[n]->type == HTML_TAG_OPTGROUP ||
+ html->open[n]->type == HTML_TAG_OPTION ||
+ html->open[n]->type == HTML_TAG_P ||
+ html->open[n]->type == HTML_TAG_RB ||
+ html->open[n]->type == HTML_TAG_RP ||
+ html->open[n]->type == HTML_TAG_RT ||
+ html->open[n]->type == HTML_TAG_RTC ||
+ html->open[n]->type == HTML_TAG_TBODY ||
+ html->open[n]->type == HTML_TAG_TD ||
+ html->open[n]->type == HTML_TAG_TFOOT ||
+ html->open[n]->type == HTML_TAG_TH ||
+ html->open[n]->type == HTML_TAG_THEAD ||
+ html->open[n]->type == HTML_TAG_TR ||
+ html->open[n]->type == HTML_TAG_BODY ||
+ html->open[n]->type == HTML_TAG_HTML)
+ continue;
+
+ html_parse_error(html);
+ break;
+ }
+
+ html->mode = HTML_MODE_AFTER_BODY;
+
+ if (token->tag.type == HTML_TAG_HTML)
+ return HTML_TOKEN_REPROCESS;
+
+ return HTML_TOKEN_PROCESSED;
+ }
+
+ if (token->type == HTML_TOKEN_START_TAG &&
+ (token->tag.type == HTML_TAG_ADDRESS ||
+ token->tag.type == HTML_TAG_ARTICLE ||
+ token->tag.type == HTML_TAG_ASIDE ||
+ token->tag.type == HTML_TAG_BLOCKQUOTE ||
+ token->tag.type == HTML_TAG_CENTER ||
+ token->tag.type == HTML_TAG_DETAILS ||
+ token->tag.type == HTML_TAG_DIALOG ||
+ token->tag.type == HTML_TAG_DIR ||
+ token->tag.type == HTML_TAG_DIV ||
+ token->tag.type == HTML_TAG_DL ||
+ token->tag.type == HTML_TAG_FIELDSET ||
+ token->tag.type == HTML_TAG_FIGCAPTION ||
+ token->tag.type == HTML_TAG_FIGURE ||
+ token->tag.type == HTML_TAG_FOOTER ||
+ token->tag.type == HTML_TAG_HEADER ||
+ token->tag.type == HTML_TAG_HGROUP ||
+ token->tag.type == HTML_TAG_MAIN ||
+ token->tag.type == HTML_TAG_MENU ||
+ token->tag.type == HTML_TAG_NAV ||
+ token->tag.type == HTML_TAG_OL ||
+ token->tag.type == HTML_TAG_P ||
+ token->tag.type == HTML_TAG_SEARCH ||
+ token->tag.type == HTML_TAG_SECTION ||
+ token->tag.type == HTML_TAG_SUMMARY ||
+ token->tag.type == HTML_TAG_UL)) {
+ if (html_has_element_with_tag_open_in_scope(html, HTML_TAG_P,
+ HTML_SCOPE_BUTTON))
+ html_close_p(html);
+ html_append_element_for_token(html, token, HTML_NAMESPACE_HTML);
+ return HTML_TOKEN_PROCESSED;
+ }
+
+ if (token->type == HTML_TOKEN_START_TAG &&
+ (token->tag.type == HTML_TAG_H1 ||
+ token->tag.type == HTML_TAG_H2 ||
+ token->tag.type == HTML_TAG_H3 ||
+ token->tag.type == HTML_TAG_H4 ||
+ token->tag.type == HTML_TAG_H5 ||
+ token->tag.type == HTML_TAG_H6)) {
+ if (html_has_element_with_tag_open_in_scope(html, HTML_TAG_P,
+ HTML_SCOPE_BUTTON))
+ html_close_p(html);
+
+ if (token->type == HTML_TOKEN_START_TAG &&
+ (html->current_node->type == HTML_TAG_H1 ||
+ html->current_node->type == HTML_TAG_H2 ||
+ html->current_node->type == HTML_TAG_H3 ||
+ html->current_node->type == HTML_TAG_H4 ||
+ html->current_node->type == HTML_TAG_H5 ||
+ html->current_node->type == HTML_TAG_H6)) {
+ /* parse error */
+ html_parse_error(html);
+ html_pop_current_element(html);
+ }
+
+ html_append_element_for_token(html, token, HTML_NAMESPACE_HTML);
+ return HTML_TOKEN_PROCESSED;
+ }
+
+ if (token->type == HTML_TOKEN_START_TAG &&
+ (token->tag.type == HTML_TAG_PRE ||
+ token->tag.type == HTML_TAG_LISTING)) {
+ if (html_has_element_with_tag_open_in_scope(html, HTML_TAG_P,
+ HTML_SCOPE_BUTTON))
+ html_close_p(html);
+
+ html_append_element_for_token(html, token, HTML_NAMESPACE_HTML);
+
+ html->skip_newline_char_token = true;
+ html->frameset_ok = false;
+ return HTML_TOKEN_PROCESSED;
+ }
+
+ if (token->type == HTML_TOKEN_START_TAG &&
+ token->tag.type == HTML_TAG_FORM) {
+ if (html->form && !html_has_tag_open(html, HTML_TAG_TEMPLATE)) {
+ /* parse error, ignore */
+ html_parse_error(html);
+ return HTML_TOKEN_PROCESSED;
+ }
+
+ if (html_has_element_with_tag_open_in_scope(html, HTML_TAG_P,
+ HTML_SCOPE_BUTTON))
+ html_close_p(html);
+
+ element = html_append_element_for_token(html, token,
+ HTML_NAMESPACE_HTML);
+
+ if (!html_has_tag_open(html, HTML_TAG_TEMPLATE))
+ html->form = element;
+
+ return HTML_TOKEN_PROCESSED;
+ }
+
+ if (token->type == HTML_TOKEN_START_TAG &&
+ token->tag.type == HTML_TAG_LI) {
+ html->frameset_ok = false;
+
+ /* TODO: docs say to run a loop doing stuff here */
+
+ if (html_has_element_with_tag_open_in_scope(html, HTML_TAG_P,
+ HTML_SCOPE_BUTTON))
+ html_close_p(html);
+
+ html_append_element_for_token(html, token, HTML_NAMESPACE_HTML);
+ return HTML_TOKEN_PROCESSED;
+ }
+
+ if (token->type == HTML_TOKEN_START_TAG &&
+ (token->tag.type == HTML_TAG_DD ||
+ token->tag.type == HTML_TAG_DT)) {
+ html->frameset_ok = false;
+
+ /* TODO: docs say to run a loop doing stuff here */
+
+ if (html_has_element_with_tag_open_in_scope(html, HTML_TAG_P,
+ HTML_SCOPE_BUTTON))
+ html_close_p(html);
+
+ html_append_element_for_token(html, token, HTML_NAMESPACE_HTML);
+ return HTML_TOKEN_PROCESSED;
+ }
+
+ if (token->type == HTML_TOKEN_START_TAG &&
+ token->tag.type == HTML_TAG_PLAINTEXT) {
+ if (html_has_element_with_tag_open_in_scope(html, HTML_TAG_P,
+ HTML_SCOPE_BUTTON))
+ html_close_p(html);
+
+ html_append_element_for_token(html, token, HTML_NAMESPACE_HTML);
+ html->state = HTML_STATE_PLAINTEXT;
+ return HTML_TOKEN_PROCESSED;
+ }
+
+ if (token->type == HTML_TOKEN_START_TAG &&
+ token->tag.type == HTML_TAG_BUTTON) {
+ if (html_has_element_with_tag_open_in_scope(html, HTML_TAG_BUTTON,
+ HTML_SCOPE_DEFAULT)) {
+ /* parse error */
+ html_parse_error(html);
+ html_generate_implied_end_tags(html, NULL, false);
+ html_pop_nodes_until_past_tag(html, HTML_TAG_BUTTON);
+ }
+
+ if (html->active_formatting_count > 0)
+ html_reconstruct_active_formatting(html);
+ html_append_element_for_token(html, token, HTML_NAMESPACE_HTML);
+ html->frameset_ok = false;
+ return HTML_TOKEN_PROCESSED;
+ }
+
+ if (token->type == HTML_TOKEN_END_TAG &&
+ (token->tag.type == HTML_TAG_ADDRESS ||
+ token->tag.type == HTML_TAG_ARTICLE ||
+ token->tag.type == HTML_TAG_ASIDE ||
+ token->tag.type == HTML_TAG_BLOCKQUOTE ||
+ token->tag.type == HTML_TAG_BUTTON ||
+ token->tag.type == HTML_TAG_CENTER ||
+ token->tag.type == HTML_TAG_DETAILS ||
+ token->tag.type == HTML_TAG_DIALOG ||
+ token->tag.type == HTML_TAG_DIR ||
+ token->tag.type == HTML_TAG_DIV ||
+ token->tag.type == HTML_TAG_DL ||
+ token->tag.type == HTML_TAG_FIELDSET ||
+ token->tag.type == HTML_TAG_FIGCAPTION ||
+ token->tag.type == HTML_TAG_FIGURE ||
+ token->tag.type == HTML_TAG_FOOTER ||
+ token->tag.type == HTML_TAG_HEADER ||
+ token->tag.type == HTML_TAG_HGROUP ||
+ token->tag.type == HTML_TAG_LISTING ||
+ token->tag.type == HTML_TAG_MAIN ||
+ token->tag.type == HTML_TAG_MENU ||
+ token->tag.type == HTML_TAG_NAV ||
+ token->tag.type == HTML_TAG_OL ||
+ token->tag.type == HTML_TAG_PRE ||
+ token->tag.type == HTML_TAG_SEARCH ||
+ token->tag.type == HTML_TAG_SECTION ||
+ token->tag.type == HTML_TAG_SUMMARY ||
+ token->tag.type == HTML_TAG_UL)) {
+ if (!html_has_element_with_tag_open_in_scope(html,
+ token->tag.type, HTML_SCOPE_DEFAULT)) {
+ /* parse error, ignore */
+ html_parse_error(html);
+ return HTML_TOKEN_PROCESSED;
+ }
+
+ html_generate_implied_end_tags(html, NULL, false);
+
+ if (!html_has_tag_open(html, token->tag.type)) {
+ /* parse error, ignore */
+ html_parse_error(html);
+ return HTML_TOKEN_PROCESSED;
+ }
+
+ html_pop_nodes_until_past_tag(html, token->tag.type);
+ return HTML_TOKEN_PROCESSED;
+ }
+
+ if (token->type == HTML_TOKEN_END_TAG && token->tag.type == HTML_TAG_FORM) {
+ if (!html_has_tag_open(html, HTML_TAG_TEMPLATE)) {
+ /* TODO */
+ } else {
+ if (!html_has_tag_open(html, HTML_TAG_FORM)) {
+ /* parse error, ignore */
+ html_parse_error(html);
+ return HTML_TOKEN_PROCESSED;
+ }
+
+ html_generate_implied_end_tags(html, NULL, false);
+
+ if (html->current_node->type != HTML_TAG_FORM) {
+ /* parse error */
+ html_parse_error(html);
+ }
+
+ html_pop_nodes_until_past_tag(html, HTML_TAG_FORM);
+ }
+ return HTML_TOKEN_PROCESSED;
+ }
+
+ if (token->type == HTML_TOKEN_END_TAG && token->tag.type == HTML_TAG_P) {
+ if (!html_has_element_with_tag_open_in_scope(html, HTML_TAG_P,
+ HTML_SCOPE_BUTTON)) {
+ /* parse error */
+ html_parse_error(html);
+ memset(&ttoken, 0, sizeof(html_token));
+ ttoken.type = HTML_TOKEN_START_TAG;
+ ttoken.tag.type = HTML_TAG_P;
+ html_append_element_for_token(html, &ttoken, HTML_NAMESPACE_HTML);
+ }
+
+ html_close_p(html);
+ return HTML_TOKEN_PROCESSED;
+ }
+
+ if (token->type == HTML_TOKEN_END_TAG && token->tag.type == HTML_TAG_LI) {
+ if (!html_has_element_with_tag_open_in_scope(html, HTML_TAG_LI,
+ HTML_SCOPE_LIST_ITEM)) {
+ /* parse error, ignore */
+ html_parse_error(html);
+ return HTML_TOKEN_PROCESSED;
+ }
+
+ html_generate_implied_end_tags(html, "li", false);
+
+ if (html->current_node->type != HTML_TAG_LI) {
+ /* parse error */
+ html_parse_error(html);
+ }
+
+ html_pop_nodes_until_past_tag(html, HTML_TAG_LI);
+ return HTML_TOKEN_PROCESSED;
+ }
+
+ if (token->type == HTML_TOKEN_END_TAG &&
+ (token->tag.type == HTML_TAG_DD ||
+ token->tag.type == HTML_TAG_DT)) {
+ if (!html_has_tag_open(html, token->tag.type)) {
+ /* parse error, ignore */
+ html_parse_error(html);
+ return HTML_TOKEN_PROCESSED;
+ }
+
+ html_generate_implied_end_tags(html, token->tag.name, false);
+
+ if (html->current_node->type != token->tag.type) {
+ /* parse error */
+ html_parse_error(html);
+ }
+
+ html_pop_nodes_until_past_tag(html, token->tag.type);
+ return HTML_TOKEN_PROCESSED;
+ }
+
+ if (token->type == HTML_TOKEN_END_TAG &&
+ (token->tag.type == HTML_TAG_H1 ||
+ token->tag.type == HTML_TAG_H2 ||
+ token->tag.type == HTML_TAG_H3 ||
+ token->tag.type == HTML_TAG_H4 ||
+ token->tag.type == HTML_TAG_H5 ||
+ token->tag.type == HTML_TAG_H6)) {
+ if (!(html_has_element_with_tag_open_in_scope(html, HTML_TAG_H1,
+ HTML_SCOPE_DEFAULT) ||
+ html_has_element_with_tag_open_in_scope(html, HTML_TAG_H2,
+ HTML_SCOPE_DEFAULT) ||
+ html_has_element_with_tag_open_in_scope(html, HTML_TAG_H3,
+ HTML_SCOPE_DEFAULT) ||
+ html_has_element_with_tag_open_in_scope(html, HTML_TAG_H4,
+ HTML_SCOPE_DEFAULT) ||
+ html_has_element_with_tag_open_in_scope(html, HTML_TAG_H5,
+ HTML_SCOPE_DEFAULT) ||
+ html_has_element_with_tag_open_in_scope(html, HTML_TAG_H6,
+ HTML_SCOPE_DEFAULT))) {
+ /* parse error, ignore */
+ html_parse_error(html);
+ return HTML_TOKEN_PROCESSED;
+ }
+
+ html_generate_implied_end_tags(html, NULL, false);
+
+ if (html->current_node->type != token->tag.type) {
+ /* parse error */
+ html_parse_error(html);
+ }
+
+ html_pop_nodes_until_past_tag(html, token->tag.type);
+ return HTML_TOKEN_PROCESSED;
+ }
+
+ if (token->type == HTML_TOKEN_END_TAG &&
+ strcmp(token->tag.name, "sarcasm") == 0) {
+ /* TODO: take a deep breath */
+ goto any_other_end_tag;
+ }
+
+ if (token->type == HTML_TOKEN_START_TAG && token->tag.type == HTML_TAG_A) {
+ short last_marker = 0;
+ struct html_element *found_a;
+
+ for (n = 0; n < html->active_formatting_count; n++) {
+ if (html->active_formatting[n].marker) {
+ HTML_DEBUG((": af[%d]=marker", n));
+ } else {
+ HTML_DEBUG((": af[%d]=<%s>", n,
+ html->active_formatting[n].element->name));
+ }
+ }
+
+ /* find last marker, if any */
+ for (n = html->active_formatting_count - 1; n >= 0; n--) {
+ if (html->active_formatting[n].marker) {
+ last_marker = n;
+ break;
+ }
+ }
+
+ /*
+ * "If the list of active formatting elements contains an a element
+ * between the end of the list and the last marker on the list (or the
+ * start of the list if there is no marker on the list), then this is a
+ * parse error;"
+ */
+ for (n = last_marker; n < html->active_formatting_count; n++) {
+ if (!html->active_formatting[n].element ||
+ html->active_formatting[n].element->type != HTML_TAG_A)
+ continue;
+
+ found_a = html->active_formatting[n].element;
+ html_parse_error(html);
+
+ /*
+ * "then remove that element from the list of active formatting
+ * elements and the stack of open elements if the adoption
+ * agency algorithm didn't already remove it"
+ */
+ html_run_adoption_agency(html, token);
+ html_remove_active_formatting_element(html, found_a);
+ break;
+ }
+
+ if (html->active_formatting_count > 0)
+ html_reconstruct_active_formatting(html);
+ element = html_append_element_for_token(html, token,
+ HTML_NAMESPACE_HTML);
+ html_push_active_formatting_element(html, element, token->type);
+ return HTML_TOKEN_PROCESSED;
+ }
+
+ if (token->type == HTML_TOKEN_START_TAG &&
+ (token->tag.type == HTML_TAG_B ||
+ token->tag.type == HTML_TAG_BIG ||
+ token->tag.type == HTML_TAG_CODE ||
+ token->tag.type == HTML_TAG_EM ||
+ token->tag.type == HTML_TAG_FONT ||
+ token->tag.type == HTML_TAG_I ||
+ token->tag.type == HTML_TAG_S ||
+ token->tag.type == HTML_TAG_SMALL ||
+ token->tag.type == HTML_TAG_STRIKE ||
+ token->tag.type == HTML_TAG_STRONG ||
+ token->tag.type == HTML_TAG_TT ||
+ token->tag.type == HTML_TAG_U)) {
+ if (html->active_formatting_count > 0)
+ html_reconstruct_active_formatting(html);
+ element = html_append_element_for_token(html, token,
+ HTML_NAMESPACE_HTML);
+ html_push_active_formatting_element(html, element, token->type);
+ return HTML_TOKEN_PROCESSED;
+ }
+
+ if (token->type == HTML_TOKEN_START_TAG &&
+ token->tag.type == HTML_TAG_NOBR) {
+ if (html->active_formatting_count > 0)
+ html_reconstruct_active_formatting(html);
+ if (html_has_element_with_tag_open_in_scope(html, HTML_TAG_NOBR,
+ HTML_SCOPE_DEFAULT)) {
+ /* parse error */
+ html_parse_error(html);
+ html_run_adoption_agency(html, token);
+ if (html->active_formatting_count > 0)
+ html_reconstruct_active_formatting(html);
+ }
+
+ element = html_append_element_for_token(html, token,
+ HTML_NAMESPACE_HTML);
+ html_push_active_formatting_element(html, element, token->type);
+ return HTML_TOKEN_PROCESSED;
+ }
+
+ if (token->type == HTML_TOKEN_END_TAG &&
+ (token->tag.type == HTML_TAG_A ||
+ token->tag.type == HTML_TAG_B ||
+ token->tag.type == HTML_TAG_BIG ||
+ token->tag.type == HTML_TAG_CODE ||
+ token->tag.type == HTML_TAG_EM ||
+ token->tag.type == HTML_TAG_FONT ||
+ token->tag.type == HTML_TAG_I ||
+ token->tag.type == HTML_TAG_NOBR ||
+ token->tag.type == HTML_TAG_S ||
+ token->tag.type == HTML_TAG_SMALL ||
+ token->tag.type == HTML_TAG_STRIKE ||
+ token->tag.type == HTML_TAG_STRONG ||
+ token->tag.type == HTML_TAG_TT ||
+ token->tag.type == HTML_TAG_U)) {
+ if (!html_run_adoption_agency(html, token))
+ goto any_other_end_tag;
+ return HTML_TOKEN_PROCESSED;
+ }
+
+ if (token->type == HTML_TOKEN_START_TAG &&
+ (token->tag.type == HTML_TAG_APPLET ||
+ token->tag.type == HTML_TAG_MARQUEE ||
+ token->tag.type == HTML_TAG_OBJECT)) {
+ if (html->active_formatting_count > 0)
+ html_reconstruct_active_formatting(html);
+ element = html_append_element_for_token(html, token,
+ HTML_NAMESPACE_HTML);
+ html_push_active_formatting_element(html, element, token->type);
+ html->frameset_ok = false;
+ return HTML_TOKEN_PROCESSED;
+ }
+
+ if (token->type == HTML_TOKEN_END_TAG &&
+ (token->tag.type == HTML_TAG_APPLET ||
+ token->tag.type == HTML_TAG_MARQUEE ||
+ token->tag.type == HTML_TAG_OBJECT)) {
+ if (!html_has_tag_open(html, token->tag.type)) {
+ /* parse error, ignore */
+ html_parse_error(html);
+ return HTML_TOKEN_PROCESSED;
+ }
+
+ html_generate_implied_end_tags(html, NULL, false);
+
+ if (html->current_node->type != token->tag.type) {
+ /* parse error */
+ html_parse_error(html);
+ }
+
+ html_pop_nodes_until_past_tag(html, token->tag.type);
+
+ /* TODO: clear list of active formatting elements up to last marker */
+
+ return HTML_TOKEN_PROCESSED;
+ }
+
+ if (token->type == HTML_TOKEN_START_TAG &&
+ token->tag.type == HTML_TAG_TABLE) {
+ if (html_has_element_with_tag_open_in_scope(html, HTML_TAG_P,
+ HTML_SCOPE_BUTTON)) {
+ /* TODO: only do this if document is not set to quirks mode */
+ html_close_p(html);
+ }
+
+ html_append_element_for_token(html, token, HTML_NAMESPACE_HTML);
+ html->frameset_ok = false;
+ html->mode = HTML_MODE_IN_TABLE;
+ return HTML_TOKEN_PROCESSED;
+ }
+
+ if (token->type == HTML_TOKEN_END_TAG && token->tag.type == HTML_TAG_BR) {
+ /* parse error, drop attributes and turn into start */
+ html_parse_error(html);
+
+ token->tag.attrs_count = 0;
+ token->type = HTML_TOKEN_START_TAG;
+
+ /* fall through */
+ }
+
+ if (token->type == HTML_TOKEN_START_TAG &&
+ (token->tag.type == HTML_TAG_AREA ||
+ token->tag.type == HTML_TAG_BR ||
+ token->tag.type == HTML_TAG_EMBED ||
+ token->tag.type == HTML_TAG_IMG ||
+ token->tag.type == HTML_TAG_KEYGEN ||
+ token->tag.type == HTML_TAG_WBR)) {
+ if (html->active_formatting_count > 0)
+ html_reconstruct_active_formatting(html);
+
+ html_append_element_for_token(html, token, HTML_NAMESPACE_HTML);
+ html_pop_current_element(html);
+
+ if (token->tag.self_closing)
+ token->tag.self_closing_acked = true;
+
+ html->frameset_ok = false;
+ return HTML_TOKEN_PROCESSED;
+ }
+
+ if (token->type == HTML_TOKEN_START_TAG &&
+ token->tag.type == HTML_TAG_INPUT) {
+ bool found_hidden;
+
+ if (html->active_formatting_count > 0)
+ html_reconstruct_active_formatting(html);
+
+ html_append_element_for_token(html, token, HTML_NAMESPACE_HTML);
+ html_pop_current_element(html);
+
+ if (token->tag.self_closing)
+ token->tag.self_closing_acked = true;
+
+ for (n = 0, found_hidden = false; n < token->tag.attrs_count; n++) {
+ if (strcasecmp(token->tag.attrs[n].name, "type") == 0 &&
+ strcasecmp(token->tag.attrs[n].val, "hidden") == 0) {
+ found_hidden = true;
+ break;
+ }
+ }
+ if (!found_hidden)
+ html->frameset_ok = false;
+ return HTML_TOKEN_PROCESSED;
+ }
+
+ if (token->type == HTML_TOKEN_START_TAG &&
+ (token->tag.type == HTML_TAG_PARAM ||
+ token->tag.type == HTML_TAG_SOURCE ||
+ token->tag.type == HTML_TAG_TRACK)) {
+ html_append_element_for_token(html, token, HTML_NAMESPACE_HTML);
+ html_pop_current_element(html);
+
+ if (token->tag.self_closing)
+ token->tag.self_closing_acked = true;
+
+ return HTML_TOKEN_PROCESSED;
+ }
+
+ if (token->type == HTML_TOKEN_START_TAG &&
+ token->tag.type == HTML_TAG_HR) {
+ if (html_has_element_with_tag_open_in_scope(html, HTML_TAG_P,
+ HTML_SCOPE_BUTTON))
+ html_close_p(html);
+
+ html_append_element_for_token(html, token, HTML_NAMESPACE_HTML);
+ html_pop_current_element(html);
+
+ if (token->tag.self_closing)
+ token->tag.self_closing_acked = true;
+
+ html->frameset_ok = false;
+ return HTML_TOKEN_PROCESSED;
+ }
+
+ if (token->type == HTML_TOKEN_START_TAG &&
+ token->tag.type == HTML_TAG_IMAGE) {
+ /* parse error */
+ html_parse_error(html);
+
+ /* "Don't ask." */
+ token->tag.name_len = strlcpy(token->tag.name, "img",
+ sizeof(token->tag.name));
+ token->tag.type = HTML_TAG_IMG;
+
+ return HTML_TOKEN_REPROCESS;
+ }
+
+ if (token->type == HTML_TOKEN_START_TAG &&
+ token->tag.type == HTML_TAG_TEXTAREA) {
+ html_append_element_for_token(html, token, HTML_NAMESPACE_HTML);
+
+ html->skip_newline_char_token = true;
+ html->state = HTML_STATE_RCDATA;
+ html->original_mode = html->mode;
+ html->frameset_ok = false;
+ html->mode = HTML_MODE_TEXT;
+ return HTML_TOKEN_PROCESSED;
+ }
+
+ if (token->type == HTML_TOKEN_START_TAG &&
+ token->tag.type == HTML_TAG_XMP) {
+ if (html_has_element_with_tag_open_in_scope(html, HTML_TAG_P,
+ HTML_SCOPE_BUTTON))
+ html_close_p(html);
+
+ if (html->active_formatting_count > 0)
+ html_reconstruct_active_formatting(html);
+ html->frameset_ok = false;
+
+ /* "raw text element parsing algorithm" */
+ html->state = HTML_STATE_RAWTEXT;
+ html->original_mode = html->mode;
+ html->mode = HTML_MODE_TEXT;
+ return HTML_TOKEN_PROCESSED;
+ }
+
+ if (token->type == HTML_TOKEN_START_TAG &&
+ token->tag.type == HTML_TAG_IFRAME) {
+ html->frameset_ok = false;
+
+ /* "raw text element parsing algorithm" */
+ html->state = HTML_STATE_RAWTEXT;
+ html->original_mode = html->mode;
+ html->mode = HTML_MODE_TEXT;
+ return HTML_TOKEN_PROCESSED;
+ }
+
+ if (token->type == HTML_TOKEN_START_TAG &&
+ (token->tag.type == HTML_TAG_NOEMBED ||
+ (token->tag.type == HTML_TAG_NOSCRIPT && html->scripting))) {
+ /* "raw text element parsing algorithm" */
+ html->state = HTML_STATE_RAWTEXT;
+ html->original_mode = html->mode;
+ html->mode = HTML_MODE_TEXT;
+ return HTML_TOKEN_PROCESSED;
+ }
+
+ if (token->type == HTML_TOKEN_START_TAG &&
+ token->tag.type == HTML_TAG_SELECT) {
+ if (html->active_formatting_count > 0)
+ html_reconstruct_active_formatting(html);
+ html_append_element_for_token(html, token, HTML_NAMESPACE_HTML);
+ html->frameset_ok = false;
+
+ if (html->mode == HTML_MODE_IN_TABLE ||
+ html->mode == HTML_MODE_IN_CAPTION ||
+ html->mode == HTML_MODE_IN_TABLE_BODY ||
+ html->mode == HTML_MODE_IN_ROW ||
+ html->mode == HTML_MODE_IN_CELL)
+ html->mode = HTML_MODE_IN_SELECT_IN_TABLE;
+ else
+ html->mode = HTML_MODE_IN_SELECT;
+
+ return HTML_TOKEN_PROCESSED;
+ }
+
+ if (token->type == HTML_TOKEN_START_TAG &&
+ (token->tag.type == HTML_TAG_OPTGROUP ||
+ token->tag.type == HTML_TAG_OPTION)) {
+ if (token->tag.type == HTML_TAG_OPTION)
+ html_pop_current_element(html);
+
+ if (html->active_formatting_count > 0)
+ html_reconstruct_active_formatting(html);
+ html_append_element_for_token(html, token, HTML_NAMESPACE_HTML);
+ return HTML_TOKEN_PROCESSED;
+ }
+
+ if (token->type == HTML_TOKEN_START_TAG &&
+ (token->tag.type == HTML_TAG_RB ||
+ token->tag.type == HTML_TAG_RTC)) {
+ if (html_has_tag_open(html, HTML_TAG_RUBY)) {
+ html_generate_implied_end_tags(html, "rtc", false);
+
+ if (token->tag.type == HTML_TAG_RUBY) {
+ /* parse error */
+ html_parse_error(html);
+ }
+ }
+
+ html_append_element_for_token(html, token, HTML_NAMESPACE_HTML);
+ return HTML_TOKEN_PROCESSED;
+ }
+
+ if (token->type == HTML_TOKEN_START_TAG &&
+ token->tag.type == HTML_TAG_MATH) {
+ if (html->active_formatting_count > 0)
+ html_reconstruct_active_formatting(html);
+
+ /* TODO: "adjust MathML attributes" */
+
+ element = html_append_element_for_token(html, token,
+ HTML_NAMESPACE_MATHML);
+
+ if (token->tag.self_closing) {
+ html_pop_current_element(html);
+ token->tag.self_closing_acked = true;
+ }
+
+ return HTML_TOKEN_PROCESSED;
+ }
+
+ if (token->type == HTML_TOKEN_START_TAG &&
+ token->tag.type == HTML_TAG_SVG) {
+ if (html->active_formatting_count > 0)
+ html_reconstruct_active_formatting(html);
+
+ /* TODO: "adjust SVG attributes" */
+
+ element = html_append_element_for_token(html, token,
+ HTML_NAMESPACE_SVG);
+
+ if (token->tag.self_closing) {
+ html_pop_current_element(html);
+ token->tag.self_closing_acked = true;
+ }
+
+ return HTML_TOKEN_PROCESSED;
+ }
+
+ if (token->type == HTML_TOKEN_START_TAG &&
+ (token->tag.type == HTML_TAG_CAPTION ||
+ token->tag.type == HTML_TAG_COL ||
+ token->tag.type == HTML_TAG_COLGROUP ||
+ token->tag.type == HTML_TAG_FRAME ||
+ token->tag.type == HTML_TAG_HEAD ||
+ token->tag.type == HTML_TAG_TBODY ||
+ token->tag.type == HTML_TAG_TD ||
+ token->tag.type == HTML_TAG_TFOOT ||
+ token->tag.type == HTML_TAG_TH ||
+ token->tag.type == HTML_TAG_THEAD ||
+ token->tag.type == HTML_TAG_TR)) {
+ /* parse error, ignore */
+ html_parse_error(html);
+ return HTML_TOKEN_PROCESSED;
+ }
+
+ if (token->type == HTML_TOKEN_START_TAG) {
+ /* any other tag */
+ if (html->active_formatting_count > 0)
+ html_reconstruct_active_formatting(html);
+ html_append_element_for_token(html, token, HTML_NAMESPACE_HTML);
+ return HTML_TOKEN_PROCESSED;
+ }
+
+ if (token->type == HTML_TOKEN_END_TAG) {
+any_other_end_tag:
+ /*
+ * 1. Initialize node to be the current node (the bottommost node of
+ * the stack).
+ */
+ /* 2. Loop: */
+ for (n = html->open_count - 1; n >= 0; n--) {
+ node = html->open[n];
+
+ /*
+ * 2. If node is an HTML element with the same tag name as the
+ * token, then:
+ */
+ if (strcmp(node->name, token->tag.name) == 0) {
+ /*
+ * 1. Generate implied end tags, except for HTML elements with
+ * the same tag name as the token.
+ */
+ html_generate_implied_end_tags(html, token->tag.name, false);
+
+ /*
+ * 2. If node is not the current node, then this is a parse
+ * error.
+ */
+ if (node != html->current_node) {
+ html_parse_error(html);
+ }
+
+ /*
+ * 3. Pop all the nodes from the current node up to node,
+ * including node, then stop these steps.
+ */
+ html_pop_nodes_until_past_element(html, node);
+ break;
+ }
+
+ /*
+ * 3. Otherwise, if node is in the special category, then this
+ * is a parse error; ignore the token, and return.
+ */
+ if (node->type && html_is_element_special(html, node)) {
+ html_parse_error(html);
+ /* ignore */
+ return HTML_TOKEN_PROCESSED;
+ }
+
+ /*
+ * 4. Set node to the previous entry in the stack of open
+ * elements.
+ */
+ /* 5. Return to the step labeled loop. */
+ }
+
+ return HTML_TOKEN_PROCESSED;
+ }
+
+ panic("we shouldn't get to default case in 'in body' parser");
+ return HTML_TOKEN_PROCESSED;
+}
+
+html_token_act
+html_process_token_text(struct html_page *html, html_token *token)
+{
+ /*
+ * https://html.spec.whatwg.org/multipage/parsing.html#parsing-main-incdata
+ */
+
+ if (token->type == HTML_TOKEN_CHARACTER) {
+ html_insert_character(html, token->ch.c);
+ return HTML_TOKEN_PROCESSED;
+ }
+
+ if (token->type == HTML_TOKEN_EOF) {
+ /* parse error */
+ html_parse_error(html);
+ if (token->tag.type == HTML_TAG_SCRIPT) {
+ /* TODO: "set its already started to true" */
+ }
+
+ html_pop_current_element(html);
+
+ html->mode = html->original_mode;
+ html->original_mode = HTML_MODE_NONE;
+ return HTML_TOKEN_REPROCESS;
+ }
+
+ if (token->type == HTML_TOKEN_END_TAG &&
+ token->tag.type == HTML_TAG_SCRIPT) {
+ html_pop_current_element(html);
+
+ html->mode = html->original_mode;
+ html->original_mode = HTML_MODE_NONE;
+
+ /* TODO: some more stuff related to scripting engine */
+ return HTML_TOKEN_PROCESSED;
+ }
+
+ if (token->type == HTML_TOKEN_END_TAG) {
+ html_pop_current_element(html);
+
+ html->mode = html->original_mode;
+ html->original_mode = HTML_MODE_NONE;
+ return HTML_TOKEN_REPROCESS;
+ }
+
+ return HTML_TOKEN_PROCESSED;
+}
+
+html_token_act
+html_process_token_in_table(struct html_page *html, html_token *token)
+{
+ HTML_DEBUG(("in_table: TODO"));
+ /* TODO */
+ return HTML_TOKEN_PROCESSED;
+}
+
+html_token_act
+html_process_token_in_table_text(struct html_page *html, html_token *token)
+{
+ HTML_DEBUG(("in_table_text: TODO"));
+ /* TODO */
+ return HTML_TOKEN_PROCESSED;
+}
+
+html_token_act
+html_process_token_in_caption(struct html_page *html, html_token *token)
+{
+ HTML_DEBUG(("in_caption: TODO"));
+ /* TODO */
+ return HTML_TOKEN_PROCESSED;
+}
+
+html_token_act
+html_process_token_in_column_group(struct html_page *html, html_token *token)
+{
+ HTML_DEBUG(("in_column_group: TODO"));
+ /* TODO */
+ return HTML_TOKEN_PROCESSED;
+}
+
+html_token_act
+html_process_token_in_table_body(struct html_page *html, html_token *token)
+{
+ HTML_DEBUG(("in_table_body: TODO"));
+ /* TODO */
+ return HTML_TOKEN_PROCESSED;
+}
+
+html_token_act
+html_process_token_in_row(struct html_page *html, html_token *token)
+{
+ HTML_DEBUG(("in_row: TODO"));
+ /* TODO */
+ return HTML_TOKEN_PROCESSED;
+}
+
+html_token_act
+html_process_token_in_cell(struct html_page *html, html_token *token)
+{
+ HTML_DEBUG(("in_cell: TODO"));
+ /* TODO */
+ return HTML_TOKEN_PROCESSED;
+}
+
+html_token_act
+html_process_token_in_select(struct html_page *html, html_token *token)
+{
+ HTML_DEBUG(("in_select: TODO"));
+ /* TODO */
+ return HTML_TOKEN_PROCESSED;
+}
+
+html_token_act
+html_process_token_in_select_in_table(struct html_page *html,
+ html_token *token)
+{
+ HTML_DEBUG(("in_select_in_table: TODO"));
+ /* TODO */
+ return HTML_TOKEN_PROCESSED;
+}
+
+html_token_act
+html_process_token_in_template(struct html_page *html, html_token *token)
+{
+ HTML_DEBUG(("in_template: TODO"));
+ /* TODO */
+ return HTML_TOKEN_PROCESSED;
+}
+
+html_token_act
+html_process_token_after_body(struct html_page *html, html_token *token)
+{
+ if (token->type == HTML_TOKEN_CHARACTER &&
+ (token->ch.c == '\t' || token->ch.c == '\n' || token->ch.c == '\f' ||
+ token->ch.c == '\r' || token->ch.c == ' ')) {
+ /* process as "in body" */
+ html_process_token_in_body(html, token);
+ return HTML_TOKEN_PROCESSED;
+ }
+
+ if (token->type == HTML_TOKEN_COMMENT) {
+ html_append_comment(html, &token->comment);
+ return HTML_TOKEN_PROCESSED;
+ }
+
+ if (token->type == HTML_TOKEN_DOCTYPE) {
+ html_parse_error(html);
+ /* ignore */
+ return HTML_TOKEN_PROCESSED;
+ }
+
+ if (token->type == HTML_TOKEN_START_TAG &&
+ token->tag.type == HTML_TAG_HTML) {
+ /* process as "in body" */
+ html_process_token_in_body(html, token);
+ return HTML_TOKEN_PROCESSED;
+ }
+
+ if (token->type == HTML_TOKEN_END_TAG &&
+ token->tag.type == HTML_TAG_HTML) {
+ html->mode = HTML_MODE_AFTER_AFTER_BODY;
+ return HTML_TOKEN_PROCESSED;
+ }
+
+ if (token->type == HTML_TOKEN_EOF) {
+ html_stop_parsing(html);
+ return HTML_TOKEN_PROCESSED;
+ }
+
+ html_parse_error(html);
+ html->mode = HTML_MODE_IN_BODY;
+ return HTML_TOKEN_REPROCESS;
+}
+
+html_token_act
+html_process_token_in_frameset(struct html_page *html, html_token *token)
+{
+ if (token->type == HTML_TOKEN_CHARACTER &&
+ (token->ch.c == '\t' || token->ch.c == '\n' || token->ch.c == '\f' ||
+ token->ch.c == '\r' || token->ch.c == ' ')) {
+ html_insert_character(html, token->ch.c);
+ return HTML_TOKEN_PROCESSED;
+ }
+
+ if (token->type == HTML_TOKEN_COMMENT) {
+ html_append_comment(html, &token->comment);
+ return HTML_TOKEN_PROCESSED;
+ }
+
+ if (token->type == HTML_TOKEN_DOCTYPE) {
+ html_parse_error(html);
+ /* ignore */
+ return HTML_TOKEN_PROCESSED;
+ }
+
+ if (token->type == HTML_TOKEN_START_TAG &&
+ token->tag.type == HTML_TAG_HTML) {
+ /* process as "in body" */
+ html_process_token_in_body(html, token);
+ return HTML_TOKEN_PROCESSED;
+ }
+
+ if (token->type == HTML_TOKEN_START_TAG &&
+ token->tag.type == HTML_TAG_FRAMESET) {
+ html_append_element_for_token(html, token, HTML_NAMESPACE_HTML);
+ return HTML_TOKEN_PROCESSED;
+ }
+
+ if (token->type == HTML_TOKEN_END_TAG &&
+ token->tag.type == HTML_TAG_FRAMESET) {
+ if (html->current_node->type == HTML_TAG_HTML) {
+ html_parse_error(html);
+ /* ignore */
+ return HTML_TOKEN_PROCESSED;
+ }
+
+ html_pop_current_element(html);
+ if (html->current_node->type != HTML_TAG_FRAMESET)
+ html->mode = HTML_MODE_AFTER_FRAMESET;
+
+ return HTML_TOKEN_PROCESSED;
+ }
+
+ if (token->type == HTML_TOKEN_START_TAG &&
+ token->tag.type == HTML_TAG_FRAME) {
+ html_append_element_for_token(html, token, HTML_NAMESPACE_HTML);
+ html_pop_current_element(html);
+ if (token->tag.self_closing)
+ token->tag.self_closing_acked = true;
+ return HTML_TOKEN_PROCESSED;
+ }
+
+ if (token->type == HTML_TOKEN_START_TAG &&
+ token->tag.type == HTML_TAG_NOFRAMES) {
+ /* process as "in head" */
+ html_process_token_in_head(html, token);
+ return HTML_TOKEN_PROCESSED;
+ }
+
+ if (token->type == HTML_TOKEN_EOF) {
+ if (html->current_node->type != HTML_TAG_HTML)
+ html_parse_error(html);
+ html_stop_parsing(html);
+ return HTML_TOKEN_PROCESSED;
+ }
+
+ html_parse_error(html);
+ /* ignore */
+ return HTML_TOKEN_PROCESSED;
+}
+
+html_token_act
+html_process_token_after_frameset(struct html_page *html, html_token *token)
+{
+ if (token->type == HTML_TOKEN_CHARACTER &&
+ (token->ch.c == '\t' || token->ch.c == '\n' || token->ch.c == '\f' ||
+ token->ch.c == '\r' || token->ch.c == ' ')) {
+ html_insert_character(html, token->ch.c);
+ return HTML_TOKEN_PROCESSED;
+ }
+
+ if (token->type == HTML_TOKEN_COMMENT) {
+ html_append_comment(html, &token->comment);
+ return HTML_TOKEN_PROCESSED;
+ }
+
+ if (token->type == HTML_TOKEN_DOCTYPE) {
+ html_parse_error(html);
+ /* ignore */
+ return HTML_TOKEN_PROCESSED;
+ }
+
+ if (token->type == HTML_TOKEN_START_TAG &&
+ token->tag.type == HTML_TAG_HTML) {
+ /* process as "in body" */
+ html_process_token_in_body(html, token);
+ return HTML_TOKEN_PROCESSED;
+ }
+
+ if (token->type == HTML_TOKEN_END_TAG &&
+ token->tag.type == HTML_TAG_HTML) {
+ html->mode = HTML_MODE_AFTER_AFTER_FRAMESET;
+ return HTML_TOKEN_PROCESSED;
+ }
+
+ if (token->type == HTML_TOKEN_START_TAG &&
+ token->tag.type == HTML_TAG_NOFRAMES) {
+ /* process as "in head" */
+ html_process_token_in_head(html, token);
+ return HTML_TOKEN_PROCESSED;
+ }
+
+ if (token->type == HTML_TOKEN_EOF) {
+ html_stop_parsing(html);
+ return HTML_TOKEN_PROCESSED;
+ }
+
+ html_parse_error(html);
+ /* ignore */
+ return HTML_TOKEN_PROCESSED;
+}
+
+html_token_act
+html_process_token_after_after_body(struct html_page *html, html_token *token)
+{
+ if (token->type == HTML_TOKEN_COMMENT) {
+ /* doc says "as the last child of the Document object */
+ html_append_comment(html, &token->comment);
+ return HTML_TOKEN_PROCESSED;
+ }
+
+ if (token->type == HTML_TOKEN_DOCTYPE ||
+ (token->type == HTML_TOKEN_CHARACTER &&
+ (token->ch.c == '\t' || token->ch.c == '\n' || token->ch.c == '\f' ||
+ token->ch.c == '\r' || token->ch.c == ' ')) ||
+ (token->type == HTML_TOKEN_START_TAG &&
+ token->tag.type == HTML_TAG_HTML)) {
+ /* process as "in body" */
+ html_process_token_in_body(html, token);
+ return HTML_TOKEN_PROCESSED;
+ }
+
+ if (token->type == HTML_TOKEN_EOF) {
+ html_stop_parsing(html);
+ return HTML_TOKEN_PROCESSED;
+ }
+
+ html_parse_error(html);
+ html->mode = HTML_MODE_IN_BODY;
+ return HTML_TOKEN_REPROCESS;
+}
+
+html_token_act
+html_process_token_after_after_frameset(struct html_page *html,
+ html_token *token)
+{
+ if (token->type == HTML_TOKEN_COMMENT) {
+ /* doc says "as the last child of the Document object */
+ html_append_comment(html, &token->comment);
+ return HTML_TOKEN_PROCESSED;
+ }
+
+ if (token->type == HTML_TOKEN_DOCTYPE ||
+ (token->type == HTML_TOKEN_CHARACTER &&
+ (token->ch.c == '\t' || token->ch.c == '\n' || token->ch.c == '\f' ||
+ token->ch.c == '\r' || token->ch.c == ' ')) ||
+ (token->type == HTML_TOKEN_START_TAG &&
+ token->tag.type == HTML_TAG_HTML)) {
+ /* process as "in body" */
+ html_process_token_in_body(html, token);
+ return HTML_TOKEN_PROCESSED;
+ }
+
+ if (token->type == HTML_TOKEN_EOF) {
+ html_stop_parsing(html);
+ return HTML_TOKEN_PROCESSED;
+ }
+
+ if (token->type == HTML_TOKEN_START_TAG &&
+ token->tag.type == HTML_TAG_NOFRAMES) {
+ /* process as "in head" */
+ html_process_token_in_head(html, token);
+ return HTML_TOKEN_PROCESSED;
+ }
+
+ html_parse_error(html);
+ /* ignore */
+ return HTML_TOKEN_PROCESSED;
+}
+
+html_token_act
+html_process_token_in_foreign_content(struct html_page *html,
+ html_token *token)
+{
+ struct html_element *node;
+ short n;
+
+ if (token->type == HTML_TOKEN_CHARACTER && token->ch.c == '\0') {
+ html_parse_error(html);
+ html_insert_character(html, HTML_REPLACEMENT_CHARACTER);
+ return HTML_TOKEN_PROCESSED;
+ }
+
+ if (token->type == HTML_TOKEN_CHARACTER &&
+ (token->ch.c == '\t' || token->ch.c == '\n' || token->ch.c == '\f' ||
+ token->ch.c == '\r' || token->ch.c == ' ')) {
+ html_insert_character(html, token->ch.c);
+ return HTML_TOKEN_PROCESSED;
+ }
+
+ if (token->type == HTML_TOKEN_CHARACTER) {
+ html_insert_character(html, token->ch.c);
+ html->frameset_ok = false;
+ return HTML_TOKEN_PROCESSED;
+ }
+
+ if (token->type == HTML_TOKEN_COMMENT) {
+ html_append_comment(html, &token->comment);
+ return HTML_TOKEN_PROCESSED;
+ }
+
+ if (token->type == HTML_TOKEN_DOCTYPE) {
+ html_parse_error(html);
+ /* ignore */
+ return HTML_TOKEN_PROCESSED;
+ }
+
+ if ((token->type == HTML_TOKEN_START_TAG &&
+ (token->tag.type == HTML_TAG_B ||
+ token->tag.type == HTML_TAG_BIG ||
+ token->tag.type == HTML_TAG_BLOCKQUOTE ||
+ token->tag.type == HTML_TAG_BODY ||
+ token->tag.type == HTML_TAG_BR ||
+ token->tag.type == HTML_TAG_CENTER ||
+ token->tag.type == HTML_TAG_CODE ||
+ token->tag.type == HTML_TAG_DD ||
+ token->tag.type == HTML_TAG_DIV ||
+ token->tag.type == HTML_TAG_DL ||
+ token->tag.type == HTML_TAG_DT ||
+ token->tag.type == HTML_TAG_EM ||
+ token->tag.type == HTML_TAG_EMBED ||
+ token->tag.type == HTML_TAG_H1 ||
+ token->tag.type == HTML_TAG_H2 ||
+ token->tag.type == HTML_TAG_H3 ||
+ token->tag.type == HTML_TAG_H4 ||
+ token->tag.type == HTML_TAG_H5 ||
+ token->tag.type == HTML_TAG_H6 ||
+ token->tag.type == HTML_TAG_HEAD ||
+ token->tag.type == HTML_TAG_HR ||
+ token->tag.type == HTML_TAG_I ||
+ token->tag.type == HTML_TAG_IMG ||
+ token->tag.type == HTML_TAG_LI ||
+ token->tag.type == HTML_TAG_LISTING ||
+ token->tag.type == HTML_TAG_MENU ||
+ token->tag.type == HTML_TAG_META ||
+ token->tag.type == HTML_TAG_NOBR ||
+ token->tag.type == HTML_TAG_OL ||
+ token->tag.type == HTML_TAG_P ||
+ token->tag.type == HTML_TAG_PRE ||
+ token->tag.type == HTML_TAG_RUBY ||
+ token->tag.type == HTML_TAG_S ||
+ token->tag.type == HTML_TAG_SMALL ||
+ token->tag.type == HTML_TAG_SPAN ||
+ token->tag.type == HTML_TAG_STRONG ||
+ token->tag.type == HTML_TAG_STRIKE ||
+ token->tag.type == HTML_TAG_SUB ||
+ token->tag.type == HTML_TAG_SUP ||
+ token->tag.type == HTML_TAG_TABLE ||
+ token->tag.type == HTML_TAG_TT ||
+ token->tag.type == HTML_TAG_U ||
+ token->tag.type == HTML_TAG_UL ||
+ token->tag.type == HTML_TAG_VAR)) ||
+ (token->type == HTML_TOKEN_END_TAG &&
+ (token->tag.type == HTML_TAG_BR ||
+ token->tag.type == HTML_TAG_P))) {
+ html_parse_error(html);
+
+ /* TODO: check mathml */
+
+ while (html->current_node->ns != HTML_NAMESPACE_HTML)
+ html_pop_current_element(html);
+
+ /*
+ * Reprocess the token according to the rules given in the section
+ * corresponding to the current insertion mode in HTML content.
+ */
+ return HTML_TOKEN_REPROCESS;
+ }
+
+ if (token->type == HTML_TOKEN_START_TAG) {
+ /* TODO: check mathml */
+
+ if (html->current_node->ns == HTML_NAMESPACE_SVG) {
+ /* TODO: check svg tag name according to a list */
+
+ /* TODO: "adjust SVG attributes" */
+ }
+
+ /* TODO: "adjust foreign attributes" */
+
+ /*
+ * Insert a foreign element for the token, with adjusted current node's
+ * namespace and false.
+ */
+ html_append_element_for_token(html, token, html->current_node->ns);
+
+ if (token->tag.self_closing) {
+ if (token->tag.type == HTML_TAG_SCRIPT &&
+ html->current_node->ns == HTML_NAMESPACE_SVG) {
+ token->tag.self_closing_acked = true;
+ goto svg_script;
+ } else {
+ html_pop_current_element(html);
+ token->tag.self_closing_acked = true;
+ }
+ }
+
+ return HTML_TOKEN_PROCESSED;
+ }
+
+ /*
+ * An end tag whose tag name is "script", if the current node is an SVG
+ * script element
+ */
+ if (token->type == HTML_TOKEN_END_TAG &&
+ token->tag.type == HTML_TAG_SCRIPT &&
+ html->current_node->type == HTML_TAG_SCRIPT &&
+ html->current_node->ns == HTML_NAMESPACE_SVG) {
+svg_script:
+ html_pop_current_element(html);
+
+ /* TODO: other things */
+
+ return HTML_TOKEN_PROCESSED;
+ }
+
+ if (token->type == HTML_TOKEN_END_TAG) {
+ /*
+ * 1. Initialize node to be the current node (the bottommost node of
+ * the stack).
+ */
+ node = html->current_node;
+
+ /*
+ * 2. If node's tag name, converted to ASCII lowercase, is not the same
+ * as the tag name of the token, then this is a parse error.
+ */
+ if (strcasecmp(token->tag.name, node->name) != 0)
+ html_parse_error(html);
+
+ /*
+ * 3. Loop: If node is the topmost element in the stack of open
+ * elements, then return. (fragment case)
+ */
+loop:
+ if (node == html->open[0])
+ return HTML_TOKEN_PROCESSED;
+
+ /*
+ * 4. If node's tag name, converted to ASCII lowercase, is the same as
+ * the tag name of the token, pop elements from the stack of open
+ * elements until node has been popped from the stack, and then return.
+ */
+ if (strcasecmp(token->tag.name, node->name) == 0) {
+ html_pop_nodes_until_past_tag(html, token->tag.type);
+ return HTML_TOKEN_PROCESSED;
+ }
+
+ /* 5. Set node to the previous entry in the stack of open elements. */
+ for (n = 1; n < html->open_count; n++) {
+ if (html->open[n] == node) {
+ node = html->open[n - 1];
+ break;
+ }
+ }
+
+ /*
+ * 6. If node is not an element in the HTML namespace, return to the
+ * step labeled loop.
+ */
+ if (node->ns != HTML_NAMESPACE_HTML)
+ goto loop;
+
+ /*
+ * 7. Otherwise, process the token according to the rules given in the
+ * section corresponding to the current insertion mode in HTML content.
+ */
+ return HTML_TOKEN_REPROCESS;
+ }
+
+ return HTML_TOKEN_PROCESSED;
+}
+
+void
+html_stop_parsing(struct html_page *html)
+{
+ while (html->current_node)
+ html_pop_current_element(html);
+}
+
+/*
+ * helpers
+ */
+
+bool
+html_has_tag_open(struct html_page *html, html_tag_type tag)
+{
+ short n;
+
+ for (n = 0; n < html->open_count; n++) {
+ if (html->open[n]->type == tag)
+ return true;
+ }
+
+ return false;
+}
+
+bool
+html_is_element_open(struct html_page *html, struct html_element *el)
+{
+ short n;
+
+ for (n = 0; n < html->open_count; n++)
+ if (html->open[n] == el)
+ return true;
+
+ return false;
+}
+
+bool
+html_has_element_in_scope(struct html_page *html, struct html_element *element,
+ html_scope scope)
+{
+ return html_has_element_or_one_with_tag_open_in_scope(html, element, 0,
+ scope);
+}
+
+bool
+html_has_element_with_tag_open_in_scope(struct html_page *html,
+ html_tag_type tag, html_scope scope)
+{
+ return html_has_element_or_one_with_tag_open_in_scope(html, NULL, tag,
+ scope);
+}
+
+bool
+html_has_element_or_one_with_tag_open_in_scope(struct html_page *html,
+ struct html_element *element, html_tag_type tag, html_scope scope)
+{
+ struct html_element *oelement;
+ short n;
+
+ for (n = html->open_count - 1; n >= 0; n--) {
+ oelement = html->open[n];
+
+ if (element) {
+ if (oelement == element)
+ return true;
+ } else {
+ if (oelement->type == tag)
+ return true;
+ }
+
+ if (scope == HTML_SCOPE_DEFAULT || scope == HTML_SCOPE_LIST_ITEM ||
+ scope == HTML_SCOPE_BUTTON) {
+ if (oelement->type == HTML_TAG_APPLET ||
+ oelement->type == HTML_TAG_CAPTION ||
+ oelement->type == HTML_TAG_HTML ||
+ oelement->type == HTML_TAG_TABLE ||
+ oelement->type == HTML_TAG_TD ||
+ oelement->type == HTML_TAG_TH ||
+ oelement->type == HTML_TAG_MARQUEE ||
+ oelement->type == HTML_TAG_OBJECT ||
+ oelement->type == HTML_TAG_TEMPLATE) {
+ /* TODO: MathML and SVG tags */
+ return false;
+ }
+ }
+
+ if (scope == HTML_SCOPE_LIST_ITEM) {
+ if (oelement->ns == HTML_NAMESPACE_HTML &&
+ (oelement->type == HTML_TAG_OL || oelement->type == HTML_TAG_UL))
+ return false;
+ }
+
+ if (scope == HTML_SCOPE_BUTTON) {
+ if (oelement->ns == HTML_NAMESPACE_HTML &&
+ oelement->type == HTML_TAG_BUTTON)
+ return false;
+ }
+
+ if (scope == HTML_SCOPE_TABLE) {
+ if (oelement->ns == HTML_NAMESPACE_HTML &&
+ (oelement->type == HTML_TAG_HTML ||
+ oelement->type == HTML_TAG_TABLE ||
+ oelement->type == HTML_TAG_TEMPLATE))
+ return false;
+ }
+
+ if (scope == HTML_SCOPE_SELECT) {
+ /* all but these two */
+ if (oelement->ns == HTML_NAMESPACE_HTML &&
+ (oelement->type != HTML_TAG_OPTGROUP &&
+ oelement->type != HTML_TAG_OPTION))
+ return false;
+ }
+ }
+
+ return false;
+}
+
+bool
+html_element_serializes_as_void(struct html_page *html,
+ struct html_element *element)
+{
+ /* https://html.spec.whatwg.org/multipage/syntax.html#elements-2 */
+ switch (element->type) {
+ case HTML_TAG_AREA:
+ case HTML_TAG_BASE:
+ case HTML_TAG_BR:
+ case HTML_TAG_COL:
+ case HTML_TAG_EMBED:
+ case HTML_TAG_HR:
+ case HTML_TAG_IMG:
+ case HTML_TAG_INPUT:
+ case HTML_TAG_LINK:
+ case HTML_TAG_META:
+ case HTML_TAG_SOURCE:
+ case HTML_TAG_TRACK:
+ case HTML_TAG_WBR:
+ return true;
+ /*
+ * https://html.spec.whatwg.org/multipage/parsing.html#serialising-html-fragments
+ */
+ case HTML_TAG_BASEFONT:
+ case HTML_TAG_BGSOUND:
+ case HTML_TAG_FRAME:
+ case HTML_TAG_KEYGEN:
+ case HTML_TAG_PARAM:
+ return true;
+ default:
+ return false;
+ }
+}
+
+bool
+html_is_element_special(struct html_page *html, struct html_element *el)
+{
+ /* https://html.spec.whatwg.org/multipage/parsing.html#special */
+
+ switch (el->type) {
+ case HTML_TAG_ADDRESS:
+ case HTML_TAG_APPLET:
+ case HTML_TAG_AREA:
+ case HTML_TAG_ARTICLE:
+ case HTML_TAG_ASIDE:
+ case HTML_TAG_BASE:
+ case HTML_TAG_BASEFONT:
+ case HTML_TAG_BGSOUND:
+ case HTML_TAG_BLOCKQUOTE:
+ case HTML_TAG_BODY:
+ case HTML_TAG_BR:
+ case HTML_TAG_BUTTON:
+ case HTML_TAG_CAPTION:
+ case HTML_TAG_CENTER:
+ case HTML_TAG_COL:
+ case HTML_TAG_COLGROUP:
+ case HTML_TAG_DD:
+ case HTML_TAG_DETAILS:
+ case HTML_TAG_DIR:
+ case HTML_TAG_DIV:
+ case HTML_TAG_DL:
+ case HTML_TAG_DT:
+ case HTML_TAG_EMBED:
+ case HTML_TAG_FIELDSET:
+ case HTML_TAG_FIGCAPTION:
+ case HTML_TAG_FIGURE:
+ case HTML_TAG_FOOTER:
+ case HTML_TAG_FORM:
+ case HTML_TAG_FRAME:
+ case HTML_TAG_FRAMESET:
+ case HTML_TAG_H1:
+ case HTML_TAG_H2:
+ case HTML_TAG_H3:
+ case HTML_TAG_H4:
+ case HTML_TAG_H5:
+ case HTML_TAG_H6:
+ case HTML_TAG_HEAD:
+ case HTML_TAG_HEADER:
+ case HTML_TAG_HGROUP:
+ case HTML_TAG_HR:
+ case HTML_TAG_HTML:
+ case HTML_TAG_IFRAME:
+ case HTML_TAG_IMG:
+ case HTML_TAG_INPUT:
+ case HTML_TAG_KEYGEN:
+ case HTML_TAG_LI:
+ case HTML_TAG_LINK:
+ case HTML_TAG_LISTING:
+ case HTML_TAG_MAIN:
+ case HTML_TAG_MARQUEE:
+ case HTML_TAG_MENU:
+ case HTML_TAG_META:
+ case HTML_TAG_NAV:
+ case HTML_TAG_NOEMBED:
+ case HTML_TAG_NOFRAMES:
+ case HTML_TAG_NOSCRIPT:
+ case HTML_TAG_OBJECT:
+ case HTML_TAG_OL:
+ case HTML_TAG_P:
+ case HTML_TAG_PARAM:
+ case HTML_TAG_PLAINTEXT:
+ case HTML_TAG_PRE:
+ case HTML_TAG_SCRIPT:
+ case HTML_TAG_SEARCH:
+ case HTML_TAG_SECTION:
+ case HTML_TAG_SELECT:
+ case HTML_TAG_SOURCE:
+ case HTML_TAG_STYLE:
+ case HTML_TAG_SUMMARY:
+ case HTML_TAG_TABLE:
+ case HTML_TAG_TBODY:
+ case HTML_TAG_TD:
+ case HTML_TAG_TEMPLATE:
+ case HTML_TAG_TEXTAREA:
+ case HTML_TAG_TFOOT:
+ case HTML_TAG_TH:
+ case HTML_TAG_THEAD:
+ case HTML_TAG_TITLE:
+ case HTML_TAG_TR:
+ case HTML_TAG_TRACK:
+ case HTML_TAG_UL:
+ case HTML_TAG_WBR:
+ /* TODO: MathML and SVG */
+ return true;
+ default:
+ return false;
+ }
+}
+
+bool
+html_is_element_formatting(struct html_page *html, struct html_element *el)
+{
+ switch (el->type) {
+ case HTML_TAG_A:
+ case HTML_TAG_B:
+ case HTML_TAG_BIG:
+ case HTML_TAG_CODE:
+ case HTML_TAG_EM:
+ case HTML_TAG_FONT:
+ case HTML_TAG_I:
+ case HTML_TAG_NOBR:
+ case HTML_TAG_S:
+ case HTML_TAG_SMALL:
+ case HTML_TAG_STRIKE:
+ case HTML_TAG_STRONG:
+ case HTML_TAG_TT:
+ case HTML_TAG_U:
+ return true;
+ default:
+ return false;
+ }
+}
+
+char *
+html_escape_string(struct html_page *html, char *str, size_t *len,
+ bool attribute_mode)
+{
+ size_t len_escaped;
+ short append;
+ short n;
+
+ for (append = 0, len_escaped = 0; append <= 1; append++) {
+ /*
+ * https://html.spec.whatwg.org/multipage/parsing.html#escapingString
+ */
+
+ if (append) {
+ if (html->escaped_buf == NULL ||
+ html->escaped_size < len_escaped + 1) {
+ html->escaped_size = len_escaped + 1;
+ HTML_DEBUG((": reallocing escaped to %ld",
+ html->escaped_size));
+ if (html->escaped_buf)
+ xfree(&html->escaped_buf);
+ html->escaped_buf = xmalloc(html->escaped_size);
+ }
+
+ if (html->escaped_buf == NULL)
+ panic("escaped_buf is null");
+
+ len_escaped = 0;
+ }
+
+ for (n = 0; n < *len; n++) {
+ switch ((unsigned char)str[n]) {
+ case '&':
+ if (append) {
+ html->escaped_buf[len_escaped++] = '&';
+ html->escaped_buf[len_escaped++] = 'a';
+ html->escaped_buf[len_escaped++] = 'm';
+ html->escaped_buf[len_escaped++] = 'p';
+ html->escaped_buf[len_escaped++] = ';';
+ } else
+ len_escaped += 5;
+ break;
+ case 0xa0:
+ if (append) {
+ html->escaped_buf[len_escaped++] = '&';
+ html->escaped_buf[len_escaped++] = 'n';
+ html->escaped_buf[len_escaped++] = 'b';
+ html->escaped_buf[len_escaped++] = 's';
+ html->escaped_buf[len_escaped++] = 'p';
+ html->escaped_buf[len_escaped++] = ';';
+ } else
+ len_escaped += 6;
+ break;
+ case '"':
+ if (attribute_mode) {
+ if (append) {
+ html->escaped_buf[len_escaped++] = '&';
+ html->escaped_buf[len_escaped++] = 'q';
+ html->escaped_buf[len_escaped++] = 'u';
+ html->escaped_buf[len_escaped++] = 'o';
+ html->escaped_buf[len_escaped++] = 't';
+ html->escaped_buf[len_escaped++] = ';';
+ } else
+ len_escaped += 6;
+ break;
+ }
+ /* fallthrough */
+ case '<':
+ if (!attribute_mode) {
+ if (append) {
+ html->escaped_buf[len_escaped++] = '&';
+ html->escaped_buf[len_escaped++] = 'l';
+ html->escaped_buf[len_escaped++] = 't';
+ html->escaped_buf[len_escaped++] = ';';
+ } else
+ len_escaped += 4;
+ break;
+ }
+ /* fallthrough */
+ case '>':
+ if (!attribute_mode) {
+ if (append) {
+ html->escaped_buf[len_escaped++] = '&';
+ html->escaped_buf[len_escaped++] = 'g';
+ html->escaped_buf[len_escaped++] = 't';
+ html->escaped_buf[len_escaped++] = ';';
+ } else
+ len_escaped += 4;
+ break;
+ }
+ /* fallthrough */
+ default:
+ if (append)
+ html->escaped_buf[len_escaped++] = str[n];
+ else
+ len_escaped++;
+ }
+ }
+ }
+
+ html->escaped_buf[len_escaped] = '\0';
+ HTML_DEBUG((": escaped '%s' to [%ld] '%s'", str, len_escaped,
+ html->escaped_buf));
+ *len = len_escaped;
+ return html->escaped_buf;
+}
+
+void
+html_pop_current_element(struct html_page *html)
+{
+ short n;
+
+ if (html->open_count <= 0)
+ panic("bogus open count %d", html->open_count);
+
+ HTML_DEBUG((": rendering current <%s>", html->current_node->name));
+
+ html_render_current_node(html, true);
+ html_deref_element(html, html->current_node);
+
+ HTML_DEBUG((": popping current <%s>", html->current_node->name));
+
+ html->open_count--;
+ if (html->open_count)
+ html->current_node = html->open[html->open_count - 1];
+ else
+ html->current_node = NULL;
+
+ HTML_DEBUG((": still open: "));
+ for (n = 0; n <= html->open_count - 1; n++)
+ HTML_DEBUG(("<%s>", html->open[n]->name));
+}
+
+void
+html_pop_nodes_until_past_tag(struct html_page *html, html_tag_type stop_after)
+{
+ short n;
+ bool done;
+
+ HTML_DEBUG((": popping until past <%s>", html_tag_names[stop_after]));
+
+ for (n = html->open_count - 1, done = false; n >= 0; n--) {
+ if (html->open[n]->type == stop_after)
+ done = true;
+
+ html_pop_current_element(html);
+
+ if (done)
+ return;
+ }
+
+ /* closed a tag that was never open? */
+ HTML_DEBUG(("popped tags all the way to root looking for %s",
+ html_tag_names[stop_after]));
+}
+
+void
+html_pop_nodes_until_past_element(struct html_page *html,
+ struct html_element *element)
+{
+ short n;
+ bool done;
+
+ for (n = html->open_count - 1, done = false; n >= 0; n--) {
+ if (html->open[n] == element)
+ done = true;
+
+ html_pop_current_element(html);
+
+ if (done)
+ return;
+ }
+}
+
+void
+html_generate_implied_end_tags(struct html_page *html, char *except,
+ bool thoroughly)
+{
+ struct html_element *element;
+
+ HTML_DEBUG((": html_generate_implied_end_tags"));
+ if (except)
+ HTML_DEBUG((" except <%s>", except));
+
+ while (html->current_node) {
+ element = html->current_node;
+
+ if (except != NULL && strcmp(element->name, except) == 0)
+ return;
+
+ if (element->type == HTML_TAG_DD ||
+ element->type == HTML_TAG_DT ||
+ element->type == HTML_TAG_LI ||
+ element->type == HTML_TAG_OPTGROUP ||
+ element->type == HTML_TAG_OPTION ||
+ element->type == HTML_TAG_P ||
+ element->type == HTML_TAG_RB ||
+ element->type == HTML_TAG_RP ||
+ element->type == HTML_TAG_RT ||
+ element->type == HTML_TAG_RTC) {
+ html_pop_current_element(html);
+ continue;
+ }
+
+ if (thoroughly &&
+ (element->type == HTML_TAG_CAPTION ||
+ element->type == HTML_TAG_COLGROUP ||
+ element->type == HTML_TAG_TBODY ||
+ element->type == HTML_TAG_TD ||
+ element->type == HTML_TAG_TFOOT ||
+ element->type == HTML_TAG_TH ||
+ element->type == HTML_TAG_THEAD ||
+ element->type == HTML_TAG_TR)) {
+ html_pop_current_element(html);
+ continue;
+ }
+
+ return;
+ }
+}
+
+bool
+html_remove_active_formatting_element(struct html_page *html,
+ struct html_element *element)
+{
+ short n;
+
+ for (n = 0; n < html->active_formatting_count; n++) {
+ if (html->active_formatting[n].element == element) {
+ /* shift out */
+ for (; n < html->active_formatting_count - 1; n++) {
+ html->active_formatting[n] = html->active_formatting[n + 1];
+ }
+ html->active_formatting_count--;
+ html_deref_element(html, element);
+ return true;
+ }
+ }
+
+ return false;
+}
+
+void
+html_close_p(struct html_page *html)
+{
+ html_generate_implied_end_tags(html, "p", false);
+
+ if (html->current_node->type != HTML_TAG_P) {
+ /* parse error */
+ html_parse_error(html);
+ }
+
+ html_pop_nodes_until_past_tag(html, HTML_TAG_P);
+}
+
+bool
+html_remove_open_element(struct html_page *html, struct html_element *element)
+{
+ short n;
+
+ for (n = 0; n < html->open_count; n++) {
+ if (html->open[n] == element) {
+ for (; n < html->open_count - 1; n++)
+ html->open[n] = html->open[n + 1];
+ html->open_count--;
+ html_deref_element(html, element);
+ return true;
+ }
+ }
+
+ return false;
+}
+
+bool
+html_is_tag_in_active_formatting(struct html_page *html, html_tag_type tag)
+{
+ short n;
+
+ for (n = 0; n < html->active_formatting_count - 1; n++) {
+ if (html->active_formatting[n].element &&
+ html->active_formatting[n].element->type == tag)
+ return true;
+ }
+
+ return false;
+}
+
+bool
+html_is_element_in_active_formatting(struct html_page *html,
+ struct html_element *element)
+{
+ short n;
+
+ for (n = 0; n < html->active_formatting_count; n++) {
+ if (html->active_formatting[n].element == element)
+ return true;
+ }
+
+ return false;
+}
+
+void
+html_reconstruct_active_formatting(struct html_page *html)
+{
+ struct html_formatting *entry;
+ struct html_element *new_element;
+ short n, entry_n;
+ html_token token;
+
+ HTML_DEBUG((": reconstructing AF"));
+
+ /*
+ * 1. If there are no entries in the list of active formatting elements,
+ * then there is nothing to reconstruct; stop this algorithm.
+ */
+ if (html->active_formatting_count == 0)
+ return;
+
+ /*
+ * 2. If the last (most recently added) entry in the list of active
+ * formatting elements is a marker, or if it is an element that is in the
+ * stack of open elements, then there is nothing to reconstruct; stop this
+ * algorithm.
+ */
+ if (html->active_formatting[html->active_formatting_count - 1].marker)
+ return;
+ if (html_is_element_open(html,
+ html->active_formatting[html->active_formatting_count - 1].element))
+ return;
+
+ /*
+ * 3. Let entry be the last (most recently added) element in the list of
+ * active formatting elements.
+ */
+ entry_n = -1;
+ for (n = html->active_formatting_count - 1; n >= 0; n--) {
+ if (html->active_formatting[n].marker)
+ continue;
+ entry = &html->active_formatting[n];
+ entry_n = n;
+ break;
+ }
+ if (entry_n == -1)
+ panic("html_reconstruct_active_formatting: no last element");
+
+ /*
+ * 4. Rewind: If there are no entries before entry in the list of active
+ * formatting elements, then jump to the step labeled create.
+ */
+rewind:
+ if (entry_n == 0)
+ goto create;
+
+ /*
+ * 5; Let entry be the entry one earlier than entry in the list of active
+ * formatting elements.
+ */
+ entry = &html->active_formatting[--entry_n];
+
+ /*
+ * 6. If entry is neither a marker nor an element that is also in the stack
+ * of open elements, go to the step labeled rewind.
+ */
+ if (!(entry->marker || html_is_element_open(html, entry->element)))
+ goto rewind;
+
+advance:
+ /*
+ * 7. Advance: Let entry be the element one later than entry in the list of
+ * active formatting elements.
+ */
+ entry = &html->active_formatting[++entry_n];
+
+create:
+ /*
+ * 8. Create: Insert an HTML element for the token for which the element
+ * entry was created, to obtain new element.
+ */
+ memset(&token, 0, sizeof(html_token));
+ token.type = entry->token;
+ token.tag.type = entry->element->type;
+ memcpy(&token.tag.name, entry->element->name, sizeof(token.tag.name));
+ token.tag.name_len = entry->element->name_len;
+ memcpy(&token.tag.attrs, entry->element->attrs, sizeof(token.tag.attrs));
+ token.tag.attrs_count = entry->element->attrs_count;
+ new_element = html_append_element_for_token(html, &token,
+ HTML_NAMESPACE_HTML);
+
+ /*
+ * 9. Replace the entry for entry in the list with an entry for new element.
+ */
+ html_deref_element(html, entry->element);
+ entry->element = new_element;
+ new_element->refs++;
+
+ HTML_DEBUG((": AF created new <%s>", new_element->name));
+
+ /*
+ * 10. If the entry for new element in the list of active formatting
+ * elements is not the last entry in the list, return to the step labeled
+ * advance.
+ */
+ if (entry_n + 1 != html->active_formatting_count)
+ goto advance;
+}
+
+void
+html_push_active_formatting_element(struct html_page *html,
+ struct html_element *element, html_token_type token_type)
+{
+ /*
+ * https://html.spec.whatwg.org/multipage/parsing.html#the-list-of-active-formatting-elements
+ */
+ short last_marker = 0;
+ short found = 0, n, j;
+ struct html_element *found_matches[3];
+
+ /* find last marker, if any */
+ for (n = html->active_formatting_count - 1; n >= 0; n--) {
+ if (html->active_formatting[n].marker) {
+ last_marker = n;
+ break;
+ }
+ }
+
+ /*
+ * "This is the Noah's Ark clause. But with three per family instead of
+ * two."
+ *
+ * 1. If there are already three elements in the list of active formatting
+ * elements after the last marker, if any, or anywhere in the list if there
+ * are no markers, that have the same tag name, namespace, and attributes
+ * as element, then remove the earliest such element from the list of
+ * active formatting elements.
+ */
+ for (n = last_marker + 1; n < html->active_formatting_count - 1; n++) {
+ if (html->active_formatting[n].marker)
+ panic("shouldn't have a marker after last marker in active "
+ "formatting list");
+
+ if (html->active_formatting[n].element->type != element->type)
+ continue;
+ if (html->active_formatting[n].element->ns != element->ns)
+ continue;
+
+ /* TODO: also compare attribute names and values */
+
+ found_matches[found++] = html->active_formatting[n].element;
+
+ if (found < 3)
+ continue;
+
+ /* remove found_matches[0] from the list */
+ for (n = 0; n < html->active_formatting_count - 1; n++) {
+ if (html->active_formatting[n].element != found_matches[0])
+ continue;
+
+ HTML_DEBUG(("push_active_formatting_element shifting out tag "
+ "%s\r", found_matches[0]->name));
+
+ /* skip this one, move everything else down */
+ for (j = n; j < html->active_formatting_count - 2; j++) {
+ html->active_formatting[j].token =
+ html->active_formatting[j + 1].token;
+ html->active_formatting[j].marker =
+ html->active_formatting[j + 1].marker;
+ }
+
+ html->active_formatting_count--;
+ html_deref_element(html, found_matches[0]);
+ break;
+ }
+ }
+
+ /* 2. Add element to the list of active formatting elements. */
+ html->active_formatting_count++;
+ html->active_formatting[html->active_formatting_count - 1].marker = false;
+ html->active_formatting[html->active_formatting_count - 1].token =
+ token_type;
+ html->active_formatting[html->active_formatting_count - 1].element = element;
+ element->refs++;
+}
+
+void
+html_push_active_formatting_marker(struct html_page *html,
+ html_token_type token_type)
+{
+ if (html->active_formatting_count >= nitems(html->active_formatting))
+ panic("active formatting overflow");
+
+ html->active_formatting[html->active_formatting_count - 1].token =
+ token_type;
+ html->active_formatting[html->active_formatting_count - 1].element = NULL;
+ html->active_formatting[html->active_formatting_count - 1].marker = true;
+ html->active_formatting_count++;
+}
+
+bool
+html_run_adoption_agency(struct html_page *html, html_token *token)
+{
+ /*
+ * https://html.spec.whatwg.org/multipage/parsing.html#adoption-agency-algorithm
+ */
+ char *subject;
+ short olc, ilc, n;
+ struct html_element *formatting_element, *before_fe, *after_fe,
+ *furthest_block, *common_ancestor, *node, *last_node, *before_node,
+ *element;
+ html_token ttoken;
+ bool found;
+
+ HTML_DEBUG((": AAA for <%s>: AF tags", token->tag.name));
+ for (n = 0; n < html->active_formatting_count; n++) {
+ HTML_DEBUG((" <%s>", html->active_formatting[n].element->name));
+ }
+ HTML_DEBUG((": open nodes "));
+ for (n = 0; n < html->open_count; n++) {
+ HTML_DEBUG(("<%s>", html->open[n]->name));
+ }
+
+ /* 1. Let subject be token's tag name. */
+ subject = token->tag.name;
+
+ /*
+ * 2. If the current node is an HTML element whose tag name is subject, and
+ * the current node is not in the list of active formatting elements, then
+ * pop the current node off the stack of open elements and return.
+ */
+ if (strcmp(html->current_node->name, subject) == 0 &&
+ !html_is_element_in_active_formatting(html, html->current_node)) {
+ html_pop_current_element(html);
+ return true;
+ }
+
+ /* 3. Let outerLoopCounter be 0. */
+ olc = 0;
+
+ /* 4. While true: */
+ for (;;) {
+ /* 1. If outerLoopCounter is greater than or equal to 8, then return. */
+ if (olc >= 8)
+ return true;
+
+ /* 2. Increment outerLoopCounter by 1. */
+ olc++;
+
+ /*
+ * 3. Let formattingElement be the last element in the list of active
+ * formatting elements that:
+ *
+ * - is between the end of the list and the last marker in the list, if
+ * any, or the start of the list otherwise, and
+ * - has the tag name /subject/.
+ */
+ formatting_element = NULL;
+ for (n = html->active_formatting_count - 1; n >= 0; n--) {
+ if (html->active_formatting[n].marker || n == 0) {
+ if (html->active_formatting[n].marker)
+ n++;
+ for (; n < html->active_formatting_count; n++) {
+ if (strcmp(html->active_formatting[n].element->name,
+ subject) == 0) {
+ formatting_element = html->active_formatting[n].element;
+ break;
+ }
+ }
+ break;
+ }
+ }
+
+ /*
+ * If there is no such element, then return and instead act as
+ * described in the "any other end tag" entry above.
+ * (we'll return false to indicate that)
+ */
+ if (formatting_element == NULL)
+ return false;
+
+ /*
+ * 4. If formattingElement is not in the stack of open elements, then
+ * this is a parse error; remove the element from the list, and return.
+ */
+ found = false;
+ for (n = 0; n < html->open_count; n++) {
+ if (html->open[n] == formatting_element) {
+ found = true;
+ break;
+ }
+ }
+
+ if (!found) {
+ html_parse_error(html);
+ html_remove_active_formatting_element(html, formatting_element);
+ return true;
+ }
+
+ /*
+ * 5. If formattingElement is in the stack of open elements, but the
+ * element is not in scope, then this is a parse error; return.
+ */
+ if (!html_has_element_in_scope(html, formatting_element,
+ HTML_SCOPE_DEFAULT)) {
+ html_parse_error(html);
+ return true;
+ }
+
+ /*
+ * 6. If formattingElement is not the current node, this is a parse
+ * error. (But do not return.)
+ */
+ if (formatting_element != html->current_node)
+ html_parse_error(html);
+
+ /*
+ * 7. Let furthestBlock be the topmost node in the stack of open
+ * elements that is lower in the stack than formattingElement, and is
+ * an element in the special category. There might not be one.
+ */
+ furthest_block = NULL;
+ for (n = 0; n < html->active_formatting_count; n++) {
+ if (html->active_formatting[n].element != formatting_element)
+ continue;
+
+ for (n = n + 1; n < html->active_formatting_count; n++) {
+ if (html_is_element_special(html,
+ html->active_formatting[n].element)) {
+ furthest_block = html->active_formatting[n].element;
+ break;
+ }
+ }
+ }
+
+ /*
+ * 8. If there is no furthestBlock, then the UA must first pop all the
+ * nodes from the bottom of the stack of open elements, from the
+ * current node up to and including formattingElement, then remove
+ * formattingElement from the list of active formatting elements, and
+ * finally return.
+ */
+ if (furthest_block == NULL) {
+ while (html->current_node != formatting_element)
+ html_pop_current_element(html);
+ if (html->current_node == formatting_element)
+ html_pop_current_element(html);
+
+ html_remove_active_formatting_element(html, formatting_element);
+ return true;
+ }
+
+ /*
+ * 9. Let commonAncestor be the element immediately above
+ * formattingElement in the stack of open elements.
+ */
+ for (n = 0; n < html->active_formatting_count - 1; n++) {
+ if (html->active_formatting[n + 1].element == formatting_element) {
+ common_ancestor = html->active_formatting[n].element;
+ break;
+ }
+ }
+
+ /*
+ * 10. Let a bookmark note the position of formattingElement in the
+ * list of active formatting elements relative to the elements on
+ * either side of it in the list.
+ */
+ for (n = 0; n < html->active_formatting_count; n++) {
+ if (html->active_formatting[n].element == formatting_element) {
+ before_fe = html->active_formatting[n - 1].element;
+ after_fe = html->active_formatting[n + 1].element;
+ break;
+ }
+ }
+
+ /* 11. Let node and lastNode be furthestBlock. */
+ node = furthest_block;
+ last_node = furthest_block;
+
+ before_node = NULL;
+ for (n = 1; n < html->open_count; n++) {
+ if (html->open[n] == node) {
+ before_node = html->open[n - 1];
+ break;
+ }
+ }
+
+ /* 12. Let innerLoopCounter be 0. */
+ ilc = 0;
+
+ /* 13. While true: */
+ for (;;) {
+ /* 1. Increment innerLoopCounter by 1. */
+ ilc++;
+
+ /*
+ * 2. Let /node/ be the element immediately above /node/ in the
+ * stack of open elements, or if node is no longer in the stack of
+ * open elements (e.g. because it got removed by this algorithm),
+ * the element that was immediately above node in the stack of open
+ * elements before node was removed.
+ */
+ node = before_node;
+
+ /* 3. If node is formattingElement, then break. */
+ if (node == formatting_element)
+ break;
+
+ /*
+ * 4. If innerLoopCounter is greater than 3 and node is in the list
+ * of active formatting elements, then remove node from the list of
+ * active formatting elements.
+ */
+ if (ilc > 3)
+ html_remove_active_formatting_element(html, node);
+
+ /*
+ * 5. If node is not in the list of active formatting elements,
+ * then remove node from the stack of open elements and continue.
+ */
+ found = false;
+ for (n = 0; n < html->active_formatting_count; n++) {
+ if (html->active_formatting[n].element == node) {
+ found = true;
+ break;
+ }
+ }
+ if (!found) {
+ before_node = NULL;
+ for (n = 1; n < html->open_count; n++) {
+ if (html->open[n] == node) {
+ before_node = html->open[n - 1];
+ break;
+ }
+ }
+
+ html_remove_open_element(html, node);
+ continue;
+ }
+
+ /*
+ * 6. Create an element for the token for which the element node
+ * was created, in the HTML namespace, with commonAncestor as the
+ * intended parent; replace the entry for node in the list of
+ * active formatting elements with an entry for the new element,
+ * replace the entry for node in the stack of open elements with an
+ * entry for the new element, and let node be the new element.
+ */
+ memset(&ttoken, 0, sizeof(html_token));
+ ttoken.type = HTML_TOKEN_START_TAG;
+ ttoken.tag.type = node->type;
+ element = html_create_element_for_token(html, &ttoken);
+
+ for (n = 0; n < html->active_formatting_count; n++) {
+ if (html->active_formatting[n].element == node) {
+ html_deref_element(html, node);
+ html->active_formatting[n].element = element;
+ element->refs++;
+ break;
+ }
+ }
+
+ for (n = 0; n < html->open_count; n++) {
+ if (html->open[n] == node) {
+ html_deref_element(html, node);
+ html->open[n] = element;
+ element->refs++;
+ break;
+ }
+ }
+
+ node = element;
+ before_node = NULL;
+ for (n = 1; n < html->open_count; n++) {
+ if (html->open[n] == node) {
+ before_node = html->open[n - 1];
+ break;
+ }
+ }
+
+ /*
+ * 7. If /last node/ is furthestBlock, then move the aforementioned
+ * bookmark to be immediately after the new node in the list of
+ * active formatting elements.
+ */
+ if (last_node == furthest_block) {
+ for (n = 0; n < html->active_formatting_count; n++) {
+ if (html->active_formatting[n].element != element)
+ continue;
+
+ before_fe = html->active_formatting[n - 1].element;
+ after_fe = html->active_formatting[n + 1].element;
+ }
+ }
+
+ /* 8. Append lastNode to node. */
+ /* TODO */
+
+ /* 9. Set lastNode to node. */
+ last_node = node;
+ }
+
+ /*
+ * 14. Insert whatever lastNode ended up being in the previous step at
+ * the appropriate place for inserting a node, but using commonAncestor
+ * as the override target.
+ */
+ /* TODO */
+
+ /*
+ * 15. Create an element for the token for which formattingElement was
+ * created, in the HTML namespace, with furthestBlock as the intended
+ * parent.
+ */
+ /* TODO */
+
+ /*
+ * 16. Take all of the child nodes of furthestBlock and append them to
+ * the element created in the last step.
+ */
+ /* TODO */
+
+ /* 17. Append that new element to furthestBlock. */
+ /* TODO */
+
+ /*
+ * 18. Remove formattingElement from the list of active formatting
+ * elements, and insert the new element into the list of active
+ * formatting elements at the position of the aforementioned bookmark.
+ */
+ /* TODO */
+
+ /*
+ * 19. Remove formattingElement from the stack of open elements, and
+ * insert the new element into the stack of open elements immediately
+ * below the position of furthestBlock in that stack.
+ */
+ /* TODO */
+ }
+}
+
+/*
+ * emitters
+ */
+
+static html_token emittok = { 0 };
+
+void
+html_emit_char_token(struct html_page *html, short cc)
+{
+ emittok.type = HTML_TOKEN_CHARACTER;
+ emittok.ch.c = cc;
+ html_process_token(html, &emittok);
+}
+
+void
+html_emit_eof_token(struct html_page *html)
+{
+ emittok.type = HTML_TOKEN_EOF;
+ html_process_token(html, &emittok);
+}
+
+void
+html_emit_comment(struct html_page *html, struct html_comment *comment)
+{
+ size_t len;
+
+ emittok.type = HTML_TOKEN_COMMENT;
+
+ len = comment->len;
+ if (len >= sizeof(emittok.comment.data))
+ len = sizeof(emittok.comment.data) - 1;
+ emittok.comment.len = len;
+
+ memcpy(emittok.comment.data, comment->data, len);
+ emittok.comment.data[len] = '\0';
+
+ html_process_token(html, &emittok);
+}