jcs
/detritus
/amendments
/68
html: Put all of this behind HTML_ENABLE
jcs made amendment 68 about 1 year ago
--- html.c Fri Dec 20 20:16:04 2024
+++ html.c Tue Mar 4 15:04:26 2025
@@ -25,6 +25,8 @@
#include "html.h"
+#ifdef HTML_ENABLE
+
#ifdef HTML_ENABLE_DEBUGGING
struct html_page *the_html = NULL;
#endif
@@ -487,4 +489,6 @@ html_debug(const char *fmt, ...)
html_output(the_html->cookie, the_html, buf, len);
}
-#endif
\ No newline at end of file
+#endif
+
+#endif /* HTML_ENABLE */
\ No newline at end of file
--- html.h Fri Dec 20 23:47:01 2024
+++ html.h Tue Mar 4 15:04:09 2025
@@ -22,6 +22,9 @@
#include "util.h"
+//#define HTML_ENABLE
+
+#ifdef HTML_ENABLE
void html_output(void *cookie, struct html_page *html, char *str,
size_t len);
void html_output_margin(void *cookie, struct html_page *html);
@@ -60,11 +63,35 @@ extern struct html_page *the_html;
#define IS_LOWER_ALPHA(c) ((c) >= 'a' && (c) <= 'z')
#define IS_UPPER_ALPHA(c) ((c) >= 'A' && (c) <= 'Z')
#define IS_ALPHA(c) (IS_LOWER_ALPHA((c)) || IS_UPPER_ALPHA((c)))
-#define IS_NUMERIC(c) (((c) >= '0' && (c) <= '9'))
-#define IS_ALPHANUMERIC(c) (IS_ALPHA((c)) || IS_NUMERIC((c)))
-#define IS_HEX_DIGIT(c) (IS_NUMERIC((c)) || ((c) >= 'a' && (c) <= 'f') || \
- ((c) >= 'A' && (c) <= 'F'))
-
+#define IS_DIGIT(c) (((c) >= '0' && (c) <= '9'))
+#define IS_ALPHANUMERIC(c) (IS_ALPHA((c)) || IS_DIGIT((c)))
+#define IS_LOWER_HEX_DIGIT(c) ((c) >= 'a' && (c) <= 'f')
+#define IS_UPPER_HEX_DIGIT(c) ((c) >= 'A' && (c) <= 'F')
+#define IS_HEX_DIGIT(c) (IS_LOWER_HEX_DIGIT(c) || IS_UPPER_HEX_DIGIT(c))
+#define IS_LEADING_SURROGATE(c) ((c) >= 0xdb00 && (c) <= 0xdbff)
+#define IS_TRAILING_SURROGATE(c) ((c) >= 0xdc00 && (c) <= 0xdfff)
+#define IS_SURROGATE(c) (IS_LEADING_SURROGATE(c) || IS_TRAILING_SURROGATE(c))
+#define IS_NONCHARACTER(c) (\
+ ((c) >= 0xfdd0 && (c) <= 0xfdef) || \
+ (c) == 0xfffe || (c) == 0xffff || \
+ (c) == 0x1fffe || (c) == 0x1ffff || \
+ (c) == 0x2fffe || (c) == 0x2ffff || \
+ (c) == 0x3fffe || (c) == 0x3ffff || \
+ (c) == 0x4fffe || (c) == 0x4ffff || \
+ (c) == 0x5fffe || (c) == 0x5ffff || \
+ (c) == 0x6fffe || (c) == 0x6ffff || \
+ (c) == 0x7fffe || (c) == 0x7ffff || \
+ (c) == 0x8fffe || (c) == 0x8ffff || \
+ (c) == 0x9fffe || (c) == 0x9ffff || \
+ (c) == 0xafffe || (c) == 0xaffff || \
+ (c) == 0xbfffe || (c) == 0xbffff || \
+ (c) == 0xcfffe || (c) == 0xcffff || \
+ (c) == 0xdfffe || (c) == 0xdffff || \
+ (c) == 0xefffe || (c) == 0xeffff || \
+ (c) == 0xffffe || (c) == 0xfffff || \
+ (c) == 0x10fffe || (c) == 0x10ffff)
+#define IS_C0_CONTROL(c) ((c) >= 0 && (c) <= 0x1f)
+#define IS_CONTROL(c) (IS_C0_CONTROL((c)) || ((c) >= 0x7f && (c) <= 0x9f))
#define IS_BLOCK(tag) ((tag) < HTML_TAG_LAST_BLOCK)
#define NEW_TOKEN_LAST_ATTR (html->new_token.tag.attrs[html->new_token.tag.attrs_count - 1])
@@ -283,7 +310,8 @@ typedef enum {
/* keep this in same order as html_tag_names[] */
extern const char *html_tag_names[];
typedef enum {
- HTML_TAG_A = 1,
+ HTML_TAG__NONE = 0,
+ HTML_TAG_A,
HTML_TAG_ADDRESS,
HTML_TAG_APPLET,
HTML_TAG_AREA,
@@ -542,6 +570,8 @@ struct html_page {
char *escaped_buf;
size_t escaped_size;
+
+ long char_ref_code;
bool parse_last_cr;
bool frameset_ok;
@@ -637,3 +667,5 @@ char * html_escape_string(struct html_page *html, char
void html_emit_char_token(struct html_page *html, short cc);
void html_emit_eof_token(struct html_page *html);
void html_emit_comment(struct html_page *html, struct html_comment *comment);
+
+#endif /* HTML_ENABLE */
\ No newline at end of file
--- html_data.c Wed Dec 11 11:20:37 2024
+++ html_data.c Tue Mar 4 15:14:23 2025
@@ -16,6 +16,8 @@
#include "html.h"
+#ifdef HTML_ENABLE
+
#ifdef HTML_ENABLE_DEBUGGING
const char *html_mode_names[] = {
"NONE",
@@ -195,9 +197,8 @@ const char *html_token_names[] = {
/* this doesn't have to list all tags, just ones the docs reference */
const char *html_tag_names[] = {
- NULL,
-
- "a", /* 1 */
+ "\1", /* bogus just so 0 isn't a valid tag id */
+ "a",
"address",
"applet",
"area",
@@ -317,3 +318,5 @@ const char *html_tag_names[] = {
NULL
};
+
+#endif /* HTML_ENABLE */
--- html_entities.c Tue Dec 10 22:39:10 2024
+++ html_entities.c Tue Mar 4 15:14:42 2025
@@ -16,6 +16,8 @@
#include "html.h"
+#ifdef HTML_ENABLE
+
/* https://html.spec.whatwg.org/multipage/named-characters.html#named-character-references */
const html_entity html_entities[] = {
/* super common ones to speed up searching */
@@ -2298,3 +2300,5 @@ const html_entity html_entities[] = {
{ NULL, 0 },
};
+
+#endif /* HTML_ENABLE */
--- html_tokenize.c Mon Dec 23 19:48:17 2024
+++ html_tokenize.c Tue Mar 4 15:15:23 2025
@@ -24,6 +24,8 @@
#include "html.h"
+#ifdef HTML_ENABLE
+
void html_tokenize(struct html_page *html, short cc);
bool html_appropriate_end_tag_token(struct html_page *html, html_token *token);
html_tag_type html_find_tag_type(char *tag_name);
@@ -2288,13 +2290,14 @@ html_appropriate_end_tag_token(struct html_page *html,
html_tag_type
html_find_tag_type(char *name)
{
- short n;
-
- for (n = 1; html_tag_names[n] != NULL; n++) {
- if (strcasecmp(name, html_tag_names[n]) == 0)
- return n;
- }
-
+ long idx;
+
+ idx = strcaseidx(name, html_tag_names);
+ if (idx >= 0)
+ return idx;
+
HTML_DEBUG((": html_find_tag_type couldn't find %s", name));
return 0;
}
+
+#endif /* HTML_ENABLE */
\ No newline at end of file
--- html_tree.c Sat Dec 21 10:07:45 2024
+++ html_tree.c Tue Mar 4 15:15:04 2025
@@ -25,6 +25,8 @@
#include "html.h"
+#ifdef HTML_ENABLE
+
void html_deref_element(struct html_page *html,
struct html_element *element);
void html_append_element(struct html_page *html,
@@ -3569,3 +3571,5 @@ html_emit_comment(struct html_page *html, struct html_
html_process_token(html, &emittok);
}
+
+#endif /* HTML_ENABLE */
--- http.c Fri Dec 20 20:44:39 2024
+++ http.c Tue Mar 4 15:42:55 2025
@@ -30,7 +30,11 @@ enum {
};
struct http_page {
+#ifdef HTML_ENABLE
struct html_page *html;
+#else
+ void *filler;
+#endif
};
bool http_accept_uri(struct URI *uri);
@@ -40,9 +44,11 @@ void http_update(page_handle pageh);
void http_reset(page_handle pageh);
void http_free(page_handle pageh);
-bool html_parse_page(page_handle pageh);
static void print_plaintext(struct page *page);
+#ifdef HTML_ENABLE
+bool html_parse_page(page_handle pageh);
void html_compute_style(struct page *page, struct html_page *html);
+#endif
struct page_handler http_handler = {
http_accept_uri,
@@ -67,7 +73,7 @@ bool
http_request_init(page_handle pageh)
{
struct page *page = *pageh;
- char *output;
+ char *output, *mac;
size_t output_len;
bool is_tls;
@@ -82,19 +88,27 @@ http_request_init(page_handle pageh)
return false;
}
- /* pretend to be curl so we seem like a downloader, not a browser */
+ mac = gestalt_machine_type();
output_len = snprintf(output,
HTTP_REQUEST_BUF_SIZE,
- "GET %s HTTP/1.0\r\n"
+ "GET %s HTTP/1.0\r\n" /* 1.1 may get chunked responses we can't grok */
"Host: %s\r\n"
- "User-Agent: Mozilla/5.0 (Macintosh) %s/%s\r\n"
+ "User-Agent: Mozilla/5.0 (%s%s) %s/%s\r\n"
+ "Accept: %s*/*;q=0.8\r\n"
"Accept-Language: en-US,en;q=0.5\r\n"
- "Accept: text/html;q=0.9,*/*;q=0.8\r\n"
"Connection: close\r\n"
"\r\n",
page->uri->path,
page->uri->hostname,
- PROGRAM_NAME, get_version(false));
+ mac ? "Macintosh " : "Unknown Macintosh",
+ mac ? mac : "",
+ PROGRAM_NAME, get_version(false),
+#ifdef HTML_ENABLE
+ "text/html;q=0.9,"
+#else
+ ""
+#endif
+ );
if (output_len >= HTTP_REQUEST_BUF_SIZE) {
warn("Not enough room for HTTP request (%ld)", output_len);
@@ -132,24 +146,36 @@ http_process(page_handle pageh)
continue;
if (strncasecmp(line, "HTTP/1.0 ", 9) == 0 ||
- strncasecmp(line, "HTTP/1.1 ", 9) == 0)
+ strncasecmp(line, "HTTP/1.1 ", 9) == 0) {
page->server_status = atoi(line + 9);
- else if (strncasecmp(line, "content-type: ", 14) == 0) {
+ } else if (strncasecmp(line, "content-type: ", 14) == 0) {
/* "application/octet-stream" or "text/html; charset..." */
memcpy(page->content_type, line + 14,
MIN(sizeof(page->content_type),
n - page->content_pos - 2));
page->content_type[sizeof(page->content_type) - 1] = '\0';
- } else if (strncasecmp(line, "content-length: ", 16) == 0)
+ } else if (strncasecmp(line, "content-length: ", 16) == 0) {
page->server_content_len = atol(line + 16);
- else if (line[0] == '\r' && line[1] == '\n') {
+ } else if (strncasecmp(line, "location: ", 10) == 0 &&
+ (page->server_status == 301 || page->server_status == 302 ||
+ page->server_status == 307)) {
+ /* TODO: for 307, preserve POST method */
+ page->redir_to = build_relative_uri(page->uri, line + 10,
+ n - page->content_pos - 10 - 1);
+ if (page->redir_to == NULL)
+ browser_statusf(page->browser, "Error: Out of memory");
+ return false;
+ } else if (line[0] == '\r' && line[1] == '\n') {
/* \r\n on a line by itself */
- page->header_len = n + 1;
+ page->header_len = n + 2;
/* TODO: if status is not 200, bail */
- if (strncasecmp(page->content_type, "text/plain", 10) == 0 ||
- strncasecmp(page->content_type, "text/html", 9) == 0) {
+ if (strncasecmp(page->content_type, "text/plain", 10) == 0
+#ifdef HTML_ENABLE
+ || strncasecmp(page->content_type, "text/html", 9) == 0
+#endif
+ ) {
page->parse_state = PARSE_STATE_BODY;
browser_commit_to_loading_page(page->browser);
TVTabStop(page->browser->output_tv, 28);
@@ -177,15 +203,18 @@ http_process(page_handle pageh)
if (page->parse_state != PARSE_STATE_BODY)
return true;
-
+
+#ifdef HTML_ENABLE
if (strncasecmp(page->content_type, "text/html", 9) == 0) {
html_parse_page(pageh);
return PAGE_CAN_READ_MORE(page);
}
+#endif
return page_print_plaintext(pageh);
}
+#ifdef HTML_ENABLE
bool
html_parse_page(page_handle pageh)
{
@@ -220,6 +249,7 @@ html_parse_page(page_handle pageh)
html_page_finish(&html);
return false;
}
+#endif
void
http_reset(page_handle pageh)
@@ -230,8 +260,10 @@ http_reset(page_handle pageh)
page->parse_state = PARSE_STATE_BODY;
page->content_pos = page->header_len;
+#ifdef HTML_ENABLE
if (page->handler_cookie != NULL)
html_xfree((struct html_page **)&page->handler_cookie);
+#endif
}
void
@@ -239,11 +271,19 @@ http_free(page_handle pageh)
{
struct page *page = *pageh;
+#ifdef HTML_ENABLE
if (page->handler_cookie != NULL)
html_xfree((struct html_page **)&page->handler_cookie);
+#endif
}
void
+http_update(page_handle pageh)
+{
+}
+
+#ifdef HTML_ENABLE
+void
html_compute_style(struct page *page, struct html_page *html)
{
short n, j;
@@ -395,7 +435,7 @@ html_output_margin(void *cookie, struct html_page *htm
struct page *page = *((page_handle)cookie);
html_compute_style(page, html);
- page->cur_style.size = (page->cur_style.size * 75) / 100;
+ page->cur_style.size /= 2;
if (!TVAppend(page->browser->output_tv, &page->cur_style, "\r", 1))
panic("out of memory in TVAppend");
@@ -459,11 +499,6 @@ html_output_field(void *cookie, struct html_page *html
}
void
-http_update(page_handle pageh)
-{
-}
-
-void
html_have_title(void *cookie, struct html_page *html, char *str, size_t len)
{
Str255 pstr;
@@ -474,4 +509,5 @@ html_have_title(void *cookie, struct html_page *html,
memcpy((char *)pstr + 1, str, len);
pstr[0] = (unsigned char)plen;
SetWTitle(page->browser->win, pstr);
-}
\ No newline at end of file
+}
+#endif
\ No newline at end of file