jcs
/detritus
/amendments
/59
html: Import (basic) HTML render parsing the output of tree builder
jcs made amendment 59 about 1 year ago
--- html.c Thu Dec 12 12:08:30 2024
+++ html.c Thu Dec 12 12:08:30 2024
@@ -0,0 +1,496 @@
+/*
+ * Copyright (c) 2024 joshua stein <jcs@jcs.org>
+ *
+ * Permission to use, copy, modify, and distribute this software for any
+ * purpose with or without fee is hereby granted, provided that the above
+ * copyright notice and this permission notice appear in all copies.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
+ * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
+ * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
+ * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
+ * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
+ * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
+ * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
+ */
+
+/*
+ * Glue for tying browser_page to the tokenizer, and then on the other end
+ * handling tags and text output by the tree builder.
+ */
+
+#include <stdarg.h>
+#include <stdio.h>
+#include <string.h>
+
+#include "html.h"
+
+#ifdef HTML_ENABLE_DEBUGGING
+struct html_page *the_html = NULL;
+#endif
+
+struct html_page *
+html_init_page(void *cookie)
+{
+ struct html_page *html;
+
+ /* sanity check */
+ if (strcmp(html_tag_names[HTML_TAG_XMP], "xmp") != 0)
+ panic("html_tag_names is out of sync with HTML_TAGs");
+
+ html = xmalloczero(sizeof(struct html_page));
+ if (html == NULL)
+ return NULL;
+ html->cookie = cookie;
+ html->mode = HTML_MODE_INITIAL;
+ html->state = HTML_STATE_DATA;
+ html->frameset_ok = true;
+
+ html->new_token.doctype.public_identifier_len = -1;
+ html->new_token.doctype.system_identifier_len = -1;
+
+#ifdef HTML_ENABLE_DEBUGGING
+ the_html = html;
+#endif
+ return html;
+}
+
+bool
+html_parse(struct html_page *html, char *str, size_t len)
+{
+ size_t n;
+ register char cc;
+
+ for (n = 0; n < len; n++) {
+ cc = str[n];
+
+ /* https://infra.spec.whatwg.org/#normalize-newlines */
+ if (html->parse_last_cr) {
+ html->parse_last_cr = false;
+ if (cc != '\n') {
+ cc = '\n';
+ n--;
+ }
+ }
+ if (cc == '\r') {
+ html->parse_last_cr = true;
+ continue;
+ }
+
+ html_tokenize(html, cc);
+
+ if (html->eof) {
+ HTML_DEBUG(("\rEOF\r"));
+ break;
+ }
+ }
+
+ if (html->eof)
+ return false;
+
+ return true;
+}
+
+void
+html_page_finish(struct html_page **htmlp)
+{
+ struct html_page *html = *htmlp;
+
+ html_tokenize_finish(html);
+ html_xfree(htmlp);
+}
+
+void
+html_xfree(struct html_page **htmlp)
+{
+ struct html_page *html = *htmlp;
+
+ if (html->escaped_buf)
+ xfree(&html->escaped_buf);
+
+ xfree(&html);
+}
+
+void
+html_parse_error(struct html_page *html)
+{
+ HTML_DEBUG((": [[PARSE ERROR at %d]]", html->input_pos));
+}
+
+#if 0
+void
+html_emit_token(struct html_page *html, html_token *token)
+{
+ /*
+ * html_tokenize handles each byte of html and runs it through the state
+ * machine, possibly emitting a token to us here.
+ *
+ * https://html.spec.whatwg.org/multipage/parsing.html#tree-construction
+ */
+
+ /*
+ * At some point we might collect tags and proces them all at once, or
+ * maybe just keep a buffer of a few, before handing them off to the tree
+ * constructor.
+ *
+ * But for now, just feed them all to the tree constructor as soon as we get
+ * them.
+ */
+
+ html_process_token(html, token);
+}
+#endif
+
+void
+html_insert_character(struct html_page *html, short cc)
+{
+ register unsigned char c = cc;
+
+ if (html->current_node == NULL) {
+ Debugger();
+ return;
+ }
+
+ if (html->current_node->text == NULL) {
+ html->current_node->text_size = HTML_TAG_TEXT_CHUNK_SIZE;
+ html->current_node->text = xmalloc(HTML_TAG_TEXT_CHUNK_SIZE);
+ } else if (html->current_node->text_len >=
+ html->current_node->text_size) {
+ html->current_node->text_size += HTML_TAG_TEXT_CHUNK_SIZE;
+ html->current_node->text = xrealloc(html->current_node->text,
+ html->current_node->text_size);
+ }
+ if (html->current_node->text == NULL)
+ panic("OOM");
+
+ if (html->current_node->type == HTML_TAG_TEXTAREA ||
+ html->current_node->type == HTML_TAG_PRE) {
+ /* TODO: still remove leading newlines */
+ goto append;
+ }
+
+ if (c == '\t' || c == '\n' || c == '\f' || c == '\r')
+ c = ' ';
+
+ if (c == ' ') {
+ /* collapse multiple whitespaces */
+ if (html->current_node->text_len && html->current_node->text[
+ html->current_node->text_len - 1] == ' ')
+ return;
+ }
+
+append:
+ html->current_node->text[html->current_node->text_len++] = c;
+}
+
+bool
+html_is_block_tag(struct html_page *html, html_tag_type tag)
+{
+ /* https://html.spec.whatwg.org/multipage/sections.html#sections */
+ switch (tag) {
+ case HTML_TAG_ADDRESS:
+ case HTML_TAG_ARTICLE:
+ case HTML_TAG_ASIDE:
+ case HTML_TAG_BLOCKQUOTE:
+ case HTML_TAG_BODY:
+ case HTML_TAG_DD:
+ case HTML_TAG_DIV:
+ case HTML_TAG_DL:
+ case HTML_TAG_DT:
+ case HTML_TAG_FIGCAPTION:
+ case HTML_TAG_FIGURE:
+ case HTML_TAG_FOOTER:
+ case HTML_TAG_H1:
+ case HTML_TAG_H2:
+ case HTML_TAG_H3:
+ case HTML_TAG_H4:
+ case HTML_TAG_H5:
+ case HTML_TAG_H6:
+ case HTML_TAG_HEADER:
+ case HTML_TAG_HGROUP:
+ case HTML_TAG_HR:
+ case HTML_TAG_LI:
+ case HTML_TAG_MAIN:
+ case HTML_TAG_MENU:
+ case HTML_TAG_NAV:
+ case HTML_TAG_OL:
+ case HTML_TAG_P:
+ case HTML_TAG_PRE:
+ case HTML_TAG_SEARCH:
+ case HTML_TAG_SECTION:
+ case HTML_TAG_UL:
+ return true;
+ default:
+ return false;
+ }
+}
+
+long
+html_get_attribute_value(struct html_page *html, struct html_element *element,
+ char *name, char **ret)
+{
+ short n, namelen;
+
+ namelen = strlen(name);
+
+ for (n = 0; n < element->attrs_count; n++) {
+ if (element->attrs[n].name_len != namelen)
+ continue;
+
+ if (strcasecmp(element->attrs[n].name, name) == 0) {
+ *ret = (char *)&element->attrs[n].val;
+ return element->attrs[n].val_len;
+ }
+ }
+
+ *ret = NULL;
+ return 0;
+}
+
+void
+html_render_current_node(struct html_page *html, bool popping)
+{
+ struct html_element *el = html->current_node;
+ struct html_element *list_parent;
+ short n, len;
+ char ol_li[10];
+ char *val;
+ bool have_height = false;
+ bool found;
+
+ el->renders++;
+
+ /* trim trailing whitespace */
+ if (popping) {
+ while (el->text_len && el->text[el->text_len - 1] == ' ')
+ el->text_len--;
+ }
+
+ /* ignore non-title tags before <body> */
+ if (!html->render_in_body) {
+ for (n = 0; n < html->open_count; n++) {
+ if (html->open[n]->type == HTML_TAG_BODY) {
+ html->render_in_body = true;
+ break;
+ }
+
+ if (n == html->open_count - 1) {
+ if (el->type == HTML_TAG_TITLE)
+ html_have_title(html->cookie, html, el->text,
+ el->text_len);
+ return;
+ }
+ }
+ }
+
+ if (el->renders == 1) {
+ /* block elements should start on a new line */
+ if (html->last_output != '\r' && html->last_output != '\0' &&
+ html_is_block_tag(html, el->type)) {
+ HTML_DEBUG(("[block-separate:%s\\r]", html_tag_names[el->type]));
+ html_output_new_line(html->cookie, html);
+ }
+
+ /* if the element has a top margin, add more space */
+ if (el->margin_top) {
+ /* unless the last element had a bottom margin */
+ if (html->last_margin_bottom || html->last_output == '\0') {
+ HTML_DEBUG(("[margin-top-but-merging:%s]",
+ html_tag_names[el->type]));
+ } else {
+ HTML_DEBUG(("[margin-top:%s\\r]", html_tag_names[el->type]));
+ html_output_new_line(html->cookie, html);
+ }
+ html->last_margin_bottom = 0;
+ }
+
+ html->last_margin_top = el->margin_top;
+
+ switch (el->type) {
+ case HTML_TAG_OL:
+ case HTML_TAG_UL:
+ html->render_list_depth++;
+ break;
+ case HTML_TAG_INPUT:
+ have_height = true;
+
+ html_output(html->cookie, html, "[ input type=", 13);
+
+ len = html_get_attribute_value(html, el, "type", &val);
+ if (val)
+ html_output(html->cookie, html, val, len);
+ else
+ html_output(html->cookie, html, "(none)", 6);
+ html_output(html->cookie, html, " ]", 2);
+ break;
+ case HTML_TAG_IMG:
+ have_height = true;
+ html_output(html->cookie, html, "[ img: ", 7);
+ /* show img alt text */
+ len = html_get_attribute_value(html, el, "alt", &val);
+ if (!val || !len)
+ /* try img title */
+ len = html_get_attribute_value(html, el, "title", &val);
+ if (val && len)
+ html_output(html->cookie, html, val, len);
+ else {
+ /* last resort, show img src filename */
+ len = html_get_attribute_value(html, el, "src", &val);
+ if (val && len) {
+ for (n = len; n >= 0; n--) {
+ if (val[n] == '/') {
+ html_output(html->cookie, html, val + n + 1,
+ len - n - 1);
+ break;
+ }
+ }
+ }
+ }
+ html_output(html->cookie, html, " ]", 2);
+ break;
+ }
+ }
+
+ /* remove leading whitespace */
+ if (el->text_len &&
+ (html->last_output == ' ' || html->last_output == '\r' ||
+ html->last_output == '\0')) {
+ while (el->text_len && el->text[el->text_off] == ' ') {
+ el->text_off++;
+ el->text_len--;
+ }
+ }
+
+ if (html->render_list_depth) {
+ if (el->type == HTML_TAG_LI && el->renders == 1) {
+ for (n = 1; n < html->render_list_depth; n++)
+ html_output(html->cookie, html, "\t", 1);
+
+ list_parent = NULL;
+ for (n = html->open_count - 1; n >= 0; n--) {
+ if (html->open[n]->type == HTML_TAG_OL ||
+ html->open[n]->type == HTML_TAG_UL) {
+ list_parent = html->open[n];
+ break;
+ }
+ }
+
+ if (list_parent && list_parent->type == HTML_TAG_UL) {
+ if (html->render_list_depth == 1)
+ html_output(html->cookie, html, " •\t", 5);
+ else if (html->render_list_depth == 2)
+ html_output(html->cookie, html, " o\t", 5);
+ else
+ html_output(html->cookie, html, " ◊\t", 5);
+ } else if (list_parent && list_parent->type == HTML_TAG_OL) {
+ list_parent->ol_count++;
+ len = snprintf(ol_li, sizeof(ol_li), "% 4d.\t",
+ list_parent->ol_count);
+ html_output(html->cookie, html, ol_li, len);
+ }
+
+ html->last_output = ' ';
+ have_height = true;
+ } else if (el->text_len) {
+ /* in a list but not a direct child of <li>, what are we in? */
+ for (n = html->open_count - 1; n >= 0; n--) {
+ if (html->open[n]->type == HTML_TAG_OL ||
+ html->open[n]->type == HTML_TAG_UL) {
+ /* text in root of list not in an li, ident it */
+ for (n = 0; n < html->render_list_depth; n++)
+ html_output(html->cookie, html, "\t", 1);
+ break;
+ }
+
+ if (html->open[n]->type == HTML_TAG_LI) {
+ if (html->last_output == '\r') {
+ /* text after a <br> inside an <li>, re-indent */
+ for (n = 0; n < html->render_list_depth; n++)
+ html_output(html->cookie, html, "\t", 1);
+ }
+ break;
+ }
+ }
+
+ html->last_output = ' ';
+ }
+ }
+
+ /* print inner text */
+ if (el->text_len) {
+ html_output(html->cookie, html, el->text + el->text_off,
+ el->text_len);
+ have_height = true;
+ }
+
+ /* brrrr */
+ if (el->type == HTML_TAG_BR) {
+ HTML_DEBUG(("[br\\r]"));
+ html_output_new_line(html->cookie, html);
+ have_height = true;
+ }
+
+ /* mark this block (or its nearest parent block) as having height */
+ if (have_height) {
+ if (html_is_block_tag(html, el->type))
+ el->has_height = true;
+ else {
+ /* find parent block */
+ for (n = html->open_count - 1; n >= 0; n--) {
+ if (html_is_block_tag(html, html->open[n]->type)) {
+ html->open[n]->has_height = true;
+ break;
+ }
+ }
+ }
+ }
+
+ if (popping) {
+ /* block elements that had text (or br) get a separating newline */
+ if (el->has_height &&
+ !(el->type == HTML_TAG_OL || el->type == HTML_TAG_UL) &&
+ !(el->type == HTML_TAG_LI && html->last_output == '\r')) {
+ HTML_DEBUG(("[end-block:/%s\\r]", html_tag_names[el->type]));
+ html_output_new_line(html->cookie, html);
+ }
+
+ if (el->margin_bottom) {
+ /* unless the last element had a bottom margin */
+ if (!html->last_margin_bottom) {
+ HTML_DEBUG(("[margin-bottom\\r]"));
+ html_output_new_line(html->cookie, html);
+ html->last_margin_bottom = el->margin_bottom;
+ }
+ }
+
+ if (el->has_height) {
+ HTML_DEBUG(("[new-last-margin-bottom:%d]", el->margin_bottom));
+ html->last_margin_bottom = el->margin_bottom;
+ }
+
+ if (el->type == HTML_TAG_OL || el->type == HTML_TAG_UL)
+ html->render_list_depth--;
+ }
+
+ el->text_off = 0;
+ el->text_len = 0;
+}
+
+#ifdef HTML_ENABLE_DEBUGGING
+void
+html_debug(const char *fmt, ...)
+{
+ static char buf[512];
+ size_t len;
+
+ va_list args;
+ va_start(args, fmt);
+ len = vsnprintf(buf, sizeof(buf), fmt, args);
+ va_end(args);
+
+ if (len > sizeof(buf))
+ len = sizeof(buf);
+
+ html_output(the_html->cookie, the_html, buf, len);
+}
+#endif
\ No newline at end of file
--- html.h Wed Dec 11 11:24:31 2024
+++ html.h Wed Dec 11 11:24:31 2024
@@ -0,0 +1,648 @@
+/*
+ * Copyright (c) 2024 joshua stein <jcs@jcs.org>
+ *
+ * Permission to use, copy, modify, and distribute this software for any
+ * purpose with or without fee is hereby granted, provided that the above
+ * copyright notice and this permission notice appear in all copies.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
+ * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
+ * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
+ * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
+ * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
+ * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
+ * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
+ */
+
+#include <stdlib.h>
+#include <stdarg.h>
+#include <stdio.h>
+#include <string.h>
+#include "stdint.h"
+
+/* external functions the caller needs to provide */
+#if 0
+extern void panic(const char *format, ...);
+extern void * xmalloc(size_t);
+extern void xfree(void *ptrptr);
+extern void * xmalloczero(size_t);
+extern void * xrealloc(void *src, size_t size);
+extern size_t strlcpy(char *dst, const char *src, size_t dsize);
+extern size_t strlcat(char *dst, const char *src, size_t dsize);
+extern short snprintf(char *s, size_t size, const char *fmt, ...);
+extern short strcasecmp(const char *s1, const char *s2);
+extern short strncasecmp(const char *s1, const char *s2, size_t n);
+#else
+#include "util.h"
+#endif
+void html_output(void *cookie, struct html_page *html, char *str,
+ size_t len);
+void html_output_new_line(void *cookie, struct html_page *html);
+void html_debug(const char *fmt, ...);
+void html_have_title(void *cookie, struct html_page *html, char *str,
+ size_t len);
+
+//#define HTML_ENABLE_DEBUGGING
+#ifdef HTML_ENABLE_DEBUGGING
+extern struct html_page *the_html;
+# define HTML_DEBUG(x) do { html_debug x; } while (0)
+#else
+# define HTML_DEBUG(x) {}
+#endif
+
+/*
+ * tunables
+ */
+
+#define HTML_STACK_DEPTH 128
+
+/* this should in theory be the max size of an html_entity but that's huge */
+#define HTML_LOOKAHEAD_SIZE 10
+
+#define HTML_OUTPUT_BUF_SIZE 64
+#define HTML_TAG_TEXT_CHUNK_SIZE 512
+
+/*
+ * helpers
+ */
+
+#define IS_WHITESPACE(c) ((c) == '\t' || (c) == '\n' || (c) == '\f' || \
+ (c) == '\r' || (c) == ' ')
+#define IS_LOWER_ALPHA(c) ((c) >= 'a' && (c) <= 'z')
+#define IS_UPPER_ALPHA(c) ((c) >= 'A' && (c) <= 'Z')
+#define IS_ALPHA(c) (IS_LOWER_ALPHA((c)) || IS_UPPER_ALPHA((c)))
+#define IS_NUMERIC(c) (((c) >= '0' && (c) <= '9'))
+#define IS_ALPHANUMERIC(c) (IS_ALPHA((c)) || IS_NUMERIC((c)))
+#define IS_HEX_DIGIT(c) (IS_NUMERIC((c)) || ((c) >= 'a' && (c) <= 'f') || \
+ ((c) >= 'A' && (c) <= 'F'))
+
+#define IS_BLOCK(tag) ((tag) < HTML_TAG_LAST_BLOCK)
+
+#define NEW_TOKEN_LAST_ATTR (html->new_token.tag.attrs[html->new_token.tag.attrs_count - 1])
+
+/* only works on fixed-size char arrays */
+#define STR_APPEND(field, len, ch) \
+ if ((len) < sizeof(field)) { \
+ (field)[(len)++] = (ch); \
+ (field)[(len)] = '\0'; \
+ }
+
+#define CONSUMED_AS_PART_OF_AN_ATTRIBUTE \
+ (html->return_state == HTML_STATE_ATTRIBUTE_VALUE_DOUBLE_QUOTED || \
+ html->return_state == HTML_STATE_ATTRIBUTE_VALUE_SINGLE_QUOTED || \
+ html->return_state == HTML_STATE_ATTRIBUTE_VALUE_UNQUOTED)
+
+#ifndef nitems
+#define nitems(what) (sizeof((what)) / sizeof((what)[0]))
+#endif
+
+/* insertion mode */
+/* https://html.spec.whatwg.org/multipage/parsing.html#the-insertion-mode */
+extern const char *html_mode_names[];
+typedef enum {
+ HTML_MODE_NONE = 0,
+ HTML_MODE_INITIAL,
+ HTML_MODE_BEFORE_HTML,
+ HTML_MODE_BEFORE_HEAD,
+ HTML_MODE_IN_HEAD,
+ HTML_MODE_IN_HEAD_NOSCRIPT,
+ HTML_MODE_AFTER_HEAD,
+ HTML_MODE_IN_BODY,
+ HTML_MODE_TEXT,
+ HTML_MODE_IN_TABLE,
+ HTML_MODE_IN_TABLE_TEXT,
+ HTML_MODE_IN_CAPTION,
+ HTML_MODE_IN_COLUMN_GROUP,
+ HTML_MODE_IN_TABLE_BODY,
+ HTML_MODE_IN_ROW,
+ HTML_MODE_IN_CELL,
+ HTML_MODE_IN_SELECT,
+ HTML_MODE_IN_SELECT_IN_TABLE,
+ HTML_MODE_IN_TEMPLATE,
+ HTML_MODE_AFTER_BODY,
+ HTML_MODE_IN_FRAMESET,
+ HTML_MODE_AFTER_FRAMESET,
+ HTML_MODE_AFTER_AFTER_BODY,
+ HTML_MODE_AFTER_AFTER_FRAMESET
+} html_mode;
+
+/* tokenization state */
+/* https://html.spec.whatwg.org/multipage/parsing.html#tokenization */
+extern const char *html_state_names[];
+typedef enum {
+ HTML_STATE_NONE = 0,
+ HTML_STATE_DATA,
+ HTML_STATE_RCDATA,
+ HTML_STATE_RAWTEXT,
+ HTML_STATE_SCRIPT_DATA,
+ HTML_STATE_PLAINTEXT,
+ HTML_STATE_TAG_OPEN,
+ HTML_STATE_END_TAG_OPEN,
+ HTML_STATE_TAG_NAME,
+ HTML_STATE_RCDATA_LESS_THAN_SIGN,
+ HTML_STATE_RCDATA_END_TAG_OPEN,
+ HTML_STATE_RCDATA_END_TAG_NAME,
+ HTML_STATE_RAWTEXT_LESS_THAN_SIGN,
+ HTML_STATE_RAWTEXT_END_TAG_OPEN,
+ HTML_STATE_RAWTEXT_END_TAG_NAME,
+ HTML_STATE_SCRIPT_DATA_LESS_THAN_SIGN,
+ HTML_STATE_SCRIPT_DATA_END_TAG_OPEN,
+ HTML_STATE_SCRIPT_DATA_END_TAG_NAME,
+ HTML_STATE_SCRIPT_DATA_ESCAPE_START,
+ HTML_STATE_SCRIPT_DATA_ESCAPE_START_DASH,
+ HTML_STATE_SCRIPT_DATA_ESCAPED,
+ HTML_STATE_SCRIPT_DATA_ESCAPED_DASH,
+ HTML_STATE_SCRIPT_DATA_ESCAPED_DASH_DASH,
+ HTML_STATE_SCRIPT_DATA_ESCAPED_LESS_THAN_SIGN,
+ HTML_STATE_SCRIPT_DATA_ESCAPED_END_TAG_OPEN,
+ HTML_STATE_SCRIPT_DATA_ESCAPED_END_TAG_NAME,
+ HTML_STATE_SCRIPT_DATA_DOUBLE_ESCAPE_START,
+ HTML_STATE_SCRIPT_DATA_DOUBLE_ESCAPED,
+ HTML_STATE_SCRIPT_DATA_DOUBLE_ESCAPED_DASH,
+ HTML_STATE_SCRIPT_DATA_DOUBLE_ESCAPED_DASH_DASH,
+ HTML_STATE_SCRIPT_DATA_DOUBLE_ESCAPED_LESS_THAN_SIGN,
+ HTML_STATE_SCRIPT_DATA_DOUBLE_ESCAPE_END,
+ HTML_STATE_BEFORE_ATTRIBUTE_NAME,
+ HTML_STATE_ATTRIBUTE_NAME,
+ HTML_STATE_AFTER_ATTRIBUTE_NAME,
+ HTML_STATE_BEFORE_ATTRIBUTE_VALUE,
+ HTML_STATE_ATTRIBUTE_VALUE_DOUBLE_QUOTED,
+ HTML_STATE_ATTRIBUTE_VALUE_SINGLE_QUOTED,
+ HTML_STATE_ATTRIBUTE_VALUE_UNQUOTED,
+ HTML_STATE_AFTER_ATTRIBUTE_VALUE_QUOTED,
+ HTML_STATE_SELF_CLOSING_START_TAG,
+ HTML_STATE_BOGUS_COMMENT,
+ HTML_STATE_MARKUP_DECLARATION_OPEN,
+ HTML_STATE_COMMENT_START,
+ HTML_STATE_COMMENT_START_DASH,
+ HTML_STATE_COMMENT,
+ HTML_STATE_COMMENT_LESS_THAN_SIGN,
+ HTML_STATE_COMMENT_LESS_THAN_SIGN_BANG,
+ HTML_STATE_COMMENT_LESS_THAN_SIGN_BANG_DASH,
+ HTML_STATE_COMMENT_LESS_THAN_SIGN_BANG_DASH_DASH,
+ HTML_STATE_COMMENT_END_DASH,
+ HTML_STATE_COMMENT_END,
+ HTML_STATE_COMMENT_END_BANG,
+ HTML_STATE_DOCTYPE,
+ HTML_STATE_BEFORE_DOCTYPE_NAME,
+ HTML_STATE_DOCTYPE_NAME,
+ HTML_STATE_AFTER_DOCTYPE_NAME,
+ HTML_STATE_AFTER_DOCTYPE_PUBLIC_KEYWORD,
+ HTML_STATE_BEFORE_DOCTYPE_PUBLIC_IDENTIFIER,
+ HTML_STATE_DOCTYPE_PUBLIC_IDENTIFIER_DOUBLE_QUOTED,
+ HTML_STATE_DOCTYPE_PUBLIC_IDENTIFIER_SINGLE_QUOTED,
+ HTML_STATE_AFTER_DOCTYPE_PUBLIC_IDENTIFIER,
+ HTML_STATE_BETWEEN_DOCTYPE_PUBLIC_AND_SYSTEM_IDENTIFIERS,
+ HTML_STATE_AFTER_DOCTYPE_SYSTEM_KEYWORD,
+ HTML_STATE_BEFORE_DOCTYPE_SYSTEM_IDENTIFIER,
+ HTML_STATE_DOCTYPE_SYSTEM_IDENTIFIER_DOUBLE_QUOTED,
+ HTML_STATE_DOCTYPE_SYSTEM_IDENTIFIER_SINGLE_QUOTED,
+ HTML_STATE_AFTER_DOCTYPE_SYSTEM_IDENTIFIER,
+ HTML_STATE_BOGUS_DOCTYPE,
+ HTML_STATE_CDATA_SECTION,
+ HTML_STATE_CDATA_SECTION_BRACKET,
+ HTML_STATE_CDATA_SECTION_END,
+ HTML_STATE_CHARACTER_REFERENCE,
+ HTML_STATE_NAMED_CHARACTER_REFERENCE,
+ HTML_STATE_AMBIGUOUS_AMPERSAND,
+ HTML_STATE_NUMERIC_CHARACTER_REFERENCE,
+ HTML_STATE_HEXADECIMAL_CHARACTER_REFERENCE_START,
+ HTML_STATE_DECIMAL_CHARACTER_REFERENCE_START,
+ HTML_STATE_HEXADECIMAL_CHARACTER_REFERENCE,
+ HTML_STATE_DECIMAL_CHARACTER_REFERENCE,
+ HTML_STATE_NUMERIC_CHARACTER_REFERENCE_END
+} html_state;
+
+/* tokenization output */
+/* https://html.spec.whatwg.org/multipage/parsing.html#tokenization */
+#ifdef HTML_ENABLE_DEBUGGING
+extern const char *html_token_names[];
+#endif
+typedef enum {
+ HTML_TOKEN_DOCTYPE = 1,
+ HTML_TOKEN_START_TAG,
+ HTML_TOKEN_END_TAG,
+ HTML_TOKEN_COMMENT,
+ HTML_TOKEN_CHARACTER,
+ HTML_TOKEN_EOF
+} html_token_type;
+
+/* html_process_token return states */
+typedef enum {
+ HTML_TOKEN_REPROCESS = 1,
+ HTML_TOKEN_PROCESSED
+} html_token_act;
+
+/* parse errors */
+/* https://html.spec.whatwg.org/multipage/parsing.html#parse-errors */
+#ifdef HTML_ENABLE_DEBUGGING
+extern const char *html_error_strings[];
+#endif
+typedef enum {
+ HTML_ERROR_NONE,
+ HTML_ERROR_ABRUPT_CLOSING_OF_EMPTY_COMMENT,
+ HTML_ERROR_ABRUPT_DOCTYPE_PUBLIC_IDENTIFIER,
+ HTML_ERROR_ABRUPT_DOCTYPE_SYSTEM_IDENTIFIER,
+ HTML_ERROR_ABSENCE_OF_DIGITS_IN_NUMERIC_CHARACTER_REFERENCE,
+ HTML_ERROR_CDATA_IN_HTML_CONTENT,
+ HTML_ERROR_CHARACTER_REFERENCE_OUTSIDE_UNICODE_RANGE,
+ HTML_ERROR_CONTROL_CHARACTER_IN_INPUT_STREAM,
+ HTML_ERROR_CONTROL_CHARACTER_REFERENCE,
+ HTML_ERROR_DUPLICATE_ATTRIBUTE,
+ HTML_ERROR_END_TAG_WITH_ATTRIBUTES,
+ HTML_ERROR_END_TAG_WITH_TRAILING_SOLIDUS,
+ HTML_ERROR_EOF_BEFORE_TAG_NAME,
+ HTML_ERROR_EOF_IN_CDATA,
+ HTML_ERROR_EOF_IN_COMMENT,
+ HTML_ERROR_EOF_IN_DOCTYPE,
+ HTML_ERROR_EOF_IN_SCRIPT_HTML_COMMENT_LIKE_TEXT,
+ HTML_ERROR_EOF_IN_TAG,
+ HTML_ERROR_INCORRECTLY_CLOSED_COMMENT,
+ HTML_ERROR_INCORRECTLY_OPENED_COMMENT,
+ HTML_ERROR_INVALID_CHARACTER_SEQUENCE_AFTER_DOCTYPE_NAME,
+ HTML_ERROR_INVALID_FIRST_CHARACTER_OF_TAG_NAME,
+ HTML_ERROR_MISSING_ATTRIBUTE_VALUE,
+ HTML_ERROR_MISSING_DOCTYPE_NAME,
+ HTML_ERROR_MISSING_DOCTYPE_PUBLIC_IDENTIFIER,
+ HTML_ERROR_MISSING_DOCTYPE_SYSTEM_IDENTIFIER,
+ HTML_ERROR_MISSING_END_TAG_NAME,
+ HTML_ERROR_MISSING_QUOTE_BEFORE_DOCTYPE_PUBLIC_IDENTIFIER,
+ HTML_ERROR_MISSING_QUOTE_BEFORE_DOCTYPE_SYSTEM_IDENTIFIER,
+ HTML_ERROR_MISSING_SEMICOLON_AFTER_CHARACTER_REFERENCE,
+ HTML_ERROR_MISSING_WHITESPACE_AFTER_DOCTYPE_PUBLIC_KEYWORD,
+ HTML_ERROR_MISSING_WHITESPACE_AFTER_DOCTYPE_SYSTEM_KEYWORD,
+ HTML_ERROR_MISSING_WHITESPACE_BEFORE_DOCTYPE_NAME,
+ HTML_ERROR_MISSING_WHITESPACE_BETWEEN_ATTRIBUTES,
+ HTML_ERROR_MISSING_WHITESPACE_BETWEEN_DOCTYPE_PUBLIC_AND_SYSTEM_IDENTIFIERS,
+ HTML_ERROR_NESTED_COMMENT,
+ HTML_ERROR_NONCHARACTER_CHARACTER_REFERENCE,
+ HTML_ERROR_NONCHARACTER_IN_INPUT_STREAM,
+ HTML_ERROR_NON_VOID_HTML_ELEMENT_START_TAG_WITH_TRAILING_SOLIDUS,
+ HTML_ERROR_NULL_CHARACTER_REFERENCE,
+ HTML_ERROR_SURROGATE_CHARACTER_REFERENCE,
+ HTML_ERROR_SURROGATE_IN_INPUT_STREAM,
+ HTML_ERROR_UNEXPECTED_CHARACTER_AFTER_DOCTYPE_SYSTEM_IDENTIFIER,
+ HTML_ERROR_UNEXPECTED_CHARACTER_IN_ATTRIBUTE_NAME,
+ HTML_ERROR_UNEXPECTED_CHARACTER_IN_UNQUOTED_ATTRIBUTE_VALUE,
+ HTML_ERROR_UNEXPECTED_EQUALS_SIGN_BEFORE_ATTRIBUTE_NAME,
+ HTML_ERROR_UNEXPECTED_NULL_CHARACTER,
+ HTML_ERROR_UNEXPECTED_QUESTION_MARK_INSTEAD_OF_TAG_NAME,
+ HTML_ERROR_UNEXPECTED_SOLIDUS_IN_TAG,
+ HTML_ERROR_UNKNOWN_NAMED_CHARACTER_REFERENCE
+} html_error;
+
+/* keep this in same order as html_tag_names[] */
+extern const char *html_tag_names[];
+typedef enum {
+ HTML_TAG_A = 1,
+ HTML_TAG_ADDRESS,
+ HTML_TAG_APPLET,
+ HTML_TAG_AREA,
+ HTML_TAG_ARTICLE,
+ HTML_TAG_ASIDE,
+ HTML_TAG_B,
+ HTML_TAG_BASE,
+ HTML_TAG_BASEFONT,
+ HTML_TAG_BGSOUND,
+ HTML_TAG_BIG,
+ HTML_TAG_BLOCKQUOTE,
+ HTML_TAG_BODY,
+ HTML_TAG_BR,
+ HTML_TAG_BUTTON,
+ HTML_TAG_CAPTION,
+ HTML_TAG_CENTER,
+ HTML_TAG_CITE,
+ HTML_TAG_CODE,
+ HTML_TAG_COL,
+ HTML_TAG_COLGROUP,
+ HTML_TAG_DD,
+ HTML_TAG_DETAILS,
+ HTML_TAG_DFN,
+ HTML_TAG_DIALOG,
+ HTML_TAG_DIR,
+ HTML_TAG_DIV,
+ HTML_TAG_DL,
+ HTML_TAG_DT,
+ HTML_TAG_EM,
+ HTML_TAG_EMBED,
+ HTML_TAG_FIELDSET,
+ HTML_TAG_FIGCAPTION,
+ HTML_TAG_FIGURE,
+ HTML_TAG_FONT,
+ HTML_TAG_FOOTER,
+ HTML_TAG_FORM,
+ HTML_TAG_FRAME,
+ HTML_TAG_FRAMESET,
+ HTML_TAG_H1,
+ HTML_TAG_H2,
+ HTML_TAG_H3,
+ HTML_TAG_H4,
+ HTML_TAG_H5,
+ HTML_TAG_H6,
+ HTML_TAG_HEAD,
+ HTML_TAG_HEADER,
+ HTML_TAG_HGROUP,
+ HTML_TAG_HR,
+ HTML_TAG_HTML,
+ HTML_TAG_I,
+ HTML_TAG_IFRAME,
+ HTML_TAG_IMAGE,
+ HTML_TAG_IMG,
+ HTML_TAG_INPUT,
+ HTML_TAG_INS,
+ HTML_TAG_KBD,
+ HTML_TAG_KEYGEN,
+ HTML_TAG_LI,
+ HTML_TAG_LINK,
+ HTML_TAG_LISTING,
+ HTML_TAG_MAIN,
+ HTML_TAG_MARQUEE,
+ HTML_TAG_MATH,
+ HTML_TAG_MENU,
+ HTML_TAG_META,
+ HTML_TAG_NAV,
+ HTML_TAG_NOBR,
+ HTML_TAG_NOEMBED,
+ HTML_TAG_NOFRAMES,
+ HTML_TAG_NOSCRIPT,
+ HTML_TAG_OBJECT,
+ HTML_TAG_OL,
+ HTML_TAG_OPTGROUP,
+ HTML_TAG_OPTION,
+ HTML_TAG_P,
+ HTML_TAG_PARAM,
+ HTML_TAG_PLAINTEXT,
+ HTML_TAG_PRE,
+ HTML_TAG_RB,
+ HTML_TAG_RP,
+ HTML_TAG_RT,
+ HTML_TAG_RTC,
+ HTML_TAG_RUBY,
+ HTML_TAG_S,
+ HTML_TAG_SAMP,
+ HTML_TAG_SCRIPT,
+ HTML_TAG_SEARCH,
+ HTML_TAG_SECTION,
+ HTML_TAG_SELECT,
+ HTML_TAG_SMALL,
+ HTML_TAG_SOURCE,
+ HTML_TAG_SPAN,
+ HTML_TAG_STRIKE,
+ HTML_TAG_STRONG,
+ HTML_TAG_STYLE,
+ HTML_TAG_SUB,
+ HTML_TAG_SUP,
+ HTML_TAG_SUMMARY,
+ HTML_TAG_SVG,
+ HTML_TAG_TABLE,
+ HTML_TAG_TBODY,
+ HTML_TAG_TD,
+ HTML_TAG_TEMPLATE,
+ HTML_TAG_TEXTAREA,
+ HTML_TAG_TFOOT,
+ HTML_TAG_TH,
+ HTML_TAG_THEAD,
+ HTML_TAG_TITLE,
+ HTML_TAG_TR,
+ HTML_TAG_TRACK,
+ HTML_TAG_TT,
+ HTML_TAG_U,
+ HTML_TAG_UL,
+ HTML_TAG_VAR,
+ HTML_TAG_WBR,
+ HTML_TAG_XMP,
+
+ HTML_TAG_MAX_ID
+} html_tag_type;
+
+typedef enum {
+ HTML_SCOPE_DEFAULT,
+ HTML_SCOPE_LIST_ITEM,
+ HTML_SCOPE_BUTTON,
+ HTML_SCOPE_TABLE,
+ HTML_SCOPE_SELECT
+} html_scope;
+
+typedef enum {
+ HTML_NAMESPACE_HTML,
+ HTML_NAMESPACE_MATHML,
+ HTML_NAMESPACE_SVG,
+ HTML_NAMESPACE_XLINK,
+ HTML_NAMESPACE_XML,
+ HTML_NAMESPACE_XMLNS
+} html_namespace;
+
+typedef struct {
+ const char *entity;
+ uint32_t codepoint;
+} html_entity;
+
+extern const html_entity html_entities[];
+
+struct html_attr {
+ char name[24];
+ short name_len;
+ char val[128];
+ short val_len;
+};
+
+struct html_tag {
+ /* this must be first */
+ html_tag_type token_type;
+
+ html_tag_type type;
+ html_namespace ns;
+ char name[16];
+ short name_len;
+ /* TODO: make this dynamic so it's not so many KB on the stack */
+ struct html_attr attrs[8];
+ short attrs_count;
+ bool emitted;
+ bool self_closing;
+ bool self_closing_acked;
+};
+
+struct html_element {
+ html_tag_type type;
+
+ html_namespace ns;
+ char name[16];
+ short name_len;
+ struct html_attr attrs[8];
+ short attrs_count;
+
+ char *text;
+ size_t text_len;
+ size_t text_off;
+ size_t text_size;
+ bool has_height;
+ short margin_top;
+ short margin_bottom;
+ short ol_count;
+ short renders;
+
+ short refs;
+ struct html_element *next_need_free;
+};
+
+struct html_comment {
+ /* this must be first */
+ html_token_type token_type;
+
+ char data[8];
+ short len;
+};
+
+struct html_char {
+ /* this must be first */
+ html_token_type token_type;
+
+ char c;
+};
+
+struct html_doctype {
+ /* this must be first */
+ html_token_type _pad;
+
+ char name[32];
+ short name_len;
+ char public_identifier[32];
+ short public_identifier_len;
+ char system_identifier[32];
+ short system_identifier_len;
+ bool system_identifier_found;
+ bool force_quirks;
+};
+
+/*
+ * THINK C doesn't support anonymous unions so we can't have a
+ * struct html_token with tag/doctype/comment at the root
+ */
+union html_token {
+ /* every other type has html_token_type as its first member */
+ html_token_type type;
+
+ struct html_tag tag;
+ struct html_doctype doctype;
+ struct html_comment comment;
+ struct html_char ch;
+};
+typedef union html_token html_token;
+
+struct html_formatting {
+ bool marker;
+ struct html_element *element;
+ html_token_type token;
+};
+
+struct html_page {
+ void *cookie;
+
+ size_t input_pos;
+ bool eof;
+
+ /* insertion mode */
+ html_mode mode;
+ html_mode original_mode;
+
+ html_state state;
+ html_state return_state;
+
+ html_error error;
+
+ char *escaped_buf;
+ size_t escaped_size;
+
+ bool parse_last_cr;
+ bool frameset_ok;
+ bool parser_cannot_change_mode;
+ bool foster_parenting;
+ bool quirks_mode;
+
+ /* rendering */
+ bool render_in_body;
+ short render_list_depth;
+ char last_output;
+ bool last_margin_top;
+ bool last_margin_bottom;
+
+ /* configurables */
+ bool ignore_script_data;
+ bool ignore_comment_data;
+ bool scripting;
+
+ /* if the next character token should be skipped if it's \n */
+ bool skip_newline_char_token;
+
+ /* "stack of open elements" */
+ struct html_element *open[HTML_STACK_DEPTH];
+ short open_count;
+ struct html_element *current_node;
+ struct html_element *need_free_list;
+ struct html_element *need_free_tail;
+
+ /* https://html.spec.whatwg.org/multipage/parsing.html#the-list-of-active-formatting-elements */
+ struct html_formatting active_formatting[HTML_STACK_DEPTH];
+ short active_formatting_count;
+
+ /* https://html.spec.whatwg.org/multipage/parsing.html#the-element-pointers */
+ struct html_element *head;
+ struct html_element *form;
+
+ union html_token new_token;
+
+ /* we'll queue some characters up before actually parsing */
+ char lookahead[HTML_LOOKAHEAD_SIZE];
+ unsigned char lookahead_len;
+
+ /* some tokens need a temporary buffer to store text */
+ char tmp[128];
+ unsigned char tmp_len;
+};
+
+#define HTML_REPLACEMENT_CHARACTER 0xff
+
+void html_output(void *cookie, struct html_page *html, char *str,
+ size_t len);
+void html_debug(const char *fmt, ...);
+void html_have_title(void *cookie, struct html_page *html, char *str,
+ size_t len);
+
+/* html.c */
+struct html_page * html_init_page(void *cookie);
+void html_page_finish(struct html_page **htmlp);
+void html_xfree(struct html_page **htmlp);
+bool html_parse(struct html_page *html, char *str, size_t len);
+void html_insert_character(struct html_page *html, short cc);
+bool html_is_block_tag(struct html_page *html, html_tag_type tag);
+long html_get_attribute_value(struct html_page *html,
+ struct html_element *element, char *name, char **ret);
+void html_render_current_node(struct html_page *html, bool popping);
+void html_parse_error(struct html_page *html);
+void html_debug(const char *fmt, ...);
+#if 0
+void html_emit_token(struct html_page *html, html_token *token);
+#else
+#define html_emit_token(a, b) html_process_token(a, b)
+#endif
+void html_buffer_output(struct html_page *html, char *str, size_t len);
+void html_flush_output_buffer(struct html_page *html);
+
+/* html_tokenize.c */
+void html_tokenize(struct html_page *html, short cc);
+void html_prep_new_token(struct html_page *html, html_token_type token_type);
+struct html_attr * html_prep_new_attribute(struct html_page *html,
+ struct html_tag *tag);
+void html_tokenize_finish(struct html_page *html);
+html_token_act html_process_token_in_foreign_content(struct html_page *html,
+ html_token *token);
+
+/* html_tree.c */
+void html_process_token(struct html_page *html, html_token *token);
+void html_append_comment(struct html_page *html, struct html_comment *comment);
+void html_stop_parsing(struct html_page *html);
+char * html_escape_string(struct html_page *html, char *str, size_t *len,
+ bool attribute_mode);
+void html_emit_char_token(struct html_page *html, short cc);
+void html_emit_eof_token(struct html_page *html);
+void html_emit_comment(struct html_page *html, struct html_comment *comment);
--- http.c Thu Nov 21 16:27:58 2024
+++ http.c Thu Dec 12 21:38:17 2024
@@ -19,6 +19,7 @@
#include <string.h>
#include "detritus.h"
+#include "html.h"
#define HTTP_REQUEST_BUF_SIZE 512
@@ -28,8 +29,9 @@ enum {
PARSE_STATE_DOWNLOAD
};
-extern bool html_print(struct page *page);
-extern void html_free(struct page *page);
+struct http_page {
+ struct html_page *html;
+};
bool http_accept_uri(struct URI *uri);
bool http_request_init(page_handle pageh);
@@ -37,6 +39,7 @@ bool http_process(page_handle pageh);
void http_reset(page_handle pageh);
void http_free(page_handle pageh);
+bool html_parse_page(page_handle pageh);
static void print_plaintext(struct page *page);
struct page_handler http_handler = {
@@ -146,6 +149,7 @@ http_process(page_handle pageh)
strncasecmp(page->content_type, "text/html", 9) == 0) {
page->parse_state = PARSE_STATE_BODY;
browser_commit_to_loading_page(page->browser);
+ TVTabStop(page->browser->output_tv, 28);
} else {
page->parse_state = PARSE_STATE_DOWNLOAD;
@@ -172,13 +176,48 @@ http_process(page_handle pageh)
return true;
if (strncasecmp(page->content_type, "text/html", 9) == 0) {
- html_print(page);
+ html_parse_page(pageh);
return PAGE_CAN_READ_MORE(page);
}
return page_print_plaintext(pageh);
}
+bool
+html_parse_page(page_handle pageh)
+{
+ struct page *page = *pageh;
+ struct html_page *html;
+ size_t len;
+
+ if (page->handler_cookie == NULL) {
+ html = html_init_page(pageh);
+ if (html == NULL) {
+ warn("Out of memory");
+ return false;
+ }
+ html->ignore_script_data = true;
+ html->ignore_comment_data = true;
+ page->handler_cookie = html;
+ } else
+ html = (struct html_page *)page->handler_cookie;
+
+ len = page->content_len - page->content_pos;
+ if (len) {
+ html_parse(html, page->content + page->content_pos, len);
+ TVUpdateScrollbar(page->browser->output_tv,
+ page->browser->output_tv_scroller);
+ page->content_pos += len;
+ return true;
+ }
+
+ if (PAGE_CAN_READ_MORE(page))
+ return true;
+
+ html_page_finish(&html);
+ return false;
+}
+
void
http_reset(page_handle pageh)
{
@@ -187,6 +226,9 @@ http_reset(page_handle pageh)
/* restart at body */
page->parse_state = PARSE_STATE_BODY;
page->content_pos = page->header_len;
+
+ if (page->handler_cookie != NULL)
+ html_xfree((struct html_page **)&page->handler_cookie);
}
void
@@ -194,6 +236,154 @@ http_free(page_handle pageh)
{
struct page *page = *pageh;
- if (page->handler_cookie)
- html_free(page);
-}
+ if (page->handler_cookie != NULL)
+ html_xfree((struct html_page **)&page->handler_cookie);
+}
+
+void
+html_output(void *cookie, struct html_page *html, char *str, size_t len)
+{
+ struct page *page = *((page_handle)cookie);
+ struct TVStyle style = { 0 };
+ short n;
+
+ style.font = geneva;
+ style.size = 10;
+ style.style = 0;
+
+ for (n = 0; n < html->open_count; n++) {
+ switch (html->open[n]->type) {
+ case HTML_TAG_A:
+ style.style |= underline;
+ break;
+ case HTML_TAG_ADDRESS:
+ style.style |= italic;
+ break;
+ case HTML_TAG_B:
+ style.style |= bold | condense;
+ break;
+ case HTML_TAG_CITE:
+ style.style |= italic;
+ break;
+ case HTML_TAG_CODE:
+ style.font = monaco;
+ style.size = 9;
+ break;
+ case HTML_TAG_DFN:
+ style.style |= italic;
+ break;
+ case HTML_TAG_EM:
+ style.style |= italic;
+ break;
+ case HTML_TAG_H1:
+ /* 2em */
+ style.size = 16;
+ style.style |= bold | condense;
+ break;
+ case HTML_TAG_H2:
+ /* 1.5em */
+ style.size = 14;
+ style.style |= bold | condense;
+ break;
+ case HTML_TAG_H3:
+ /* 1.17em */
+ style.size = 12;
+ style.style |= bold | condense;
+ break;
+ case HTML_TAG_H4:
+ /* 1em */
+ style.size = 10;
+ style.style |= bold | condense;
+ break;
+ case HTML_TAG_H5:
+ /* 0.83em */
+ style.size = 8;
+ style.style |= bold | condense;
+ break;
+ case HTML_TAG_H6:
+ /* 0.67em */
+ style.size = 8;
+ style.style |= bold | condense;
+ break;
+ case HTML_TAG_I:
+ style.style |= italic;
+ break;
+ case HTML_TAG_INS:
+ style.style |= underline;
+ break;
+ case HTML_TAG_KBD:
+ style.font = monaco;
+ style.size = 9;
+ break;
+ case HTML_TAG_PRE:
+ style.font = monaco;
+ style.size = 9;
+ break;
+ case HTML_TAG_S:
+ /* TODO: line-through */
+ break;
+ case HTML_TAG_SAMP:
+ style.font = monaco;
+ style.size = 9;
+ break;
+ case HTML_TAG_SMALL:
+ style.size -= 2;
+ break;
+ case HTML_TAG_STRIKE:
+ /* TODO: line-through */
+ break;
+ case HTML_TAG_STRONG:
+ style.style |= bold | condense;
+ break;
+ case HTML_TAG_SUP:
+ style.size -= 2;
+ break;
+ case HTML_TAG_TH:
+ style.style |= bold | condense;
+ break;
+ case HTML_TAG_U:
+ style.style |= underline;
+ break;
+ case HTML_TAG_VAR:
+ style.style |= italic;
+ break;
+ }
+ }
+
+ if (style.size < 8)
+ style.size = 8;
+
+ if (!TVAppend(page->browser->output_tv, &style, str, len))
+ panic("out of memory in TVAppend");
+
+ html->last_output = str[len - 1];
+}
+
+void
+html_output_new_line(void *cookie, struct html_page *html)
+{
+ struct page *page = *((page_handle)cookie);
+ struct TVStyle style = { 0 };
+
+ style.font = geneva;
+ style.size = 10;
+ style.style = 0;
+
+ if (!TVAppend(page->browser->output_tv, &style, "\r", 1))
+ panic("out of memory in TVAppend");
+
+ html->last_output = '\r';
+}
+
+void
+html_have_title(void *cookie, struct html_page *html, char *str, size_t len)
+{
+ Str255 pstr;
+ struct page *page = *((page_handle)cookie);
+ short plen;
+
+ plen = MIN(len, 255);
+ memcpy((char *)pstr + 1, str, len);
+ pstr[0] = (unsigned char)plen;
+ SetWTitle(page->browser->win, pstr);
+}
\ No newline at end of file