AmendHub

Download:

jcs

/

detritus

/

amendments

/

68

html: Put all of this behind HTML_ENABLE


jcs made amendment 68 about 1 year ago
--- html.c Fri Dec 20 20:16:04 2024 +++ html.c Tue Mar 4 15:04:26 2025 @@ -25,6 +25,8 @@ #include "html.h" +#ifdef HTML_ENABLE + #ifdef HTML_ENABLE_DEBUGGING struct html_page *the_html = NULL; #endif @@ -487,4 +489,6 @@ html_debug(const char *fmt, ...) html_output(the_html->cookie, the_html, buf, len); } -#endif \ No newline at end of file +#endif + +#endif /* HTML_ENABLE */ \ No newline at end of file --- html.h Fri Dec 20 23:47:01 2024 +++ html.h Tue Mar 4 15:04:09 2025 @@ -22,6 +22,9 @@ #include "util.h" +//#define HTML_ENABLE + +#ifdef HTML_ENABLE void html_output(void *cookie, struct html_page *html, char *str, size_t len); void html_output_margin(void *cookie, struct html_page *html); @@ -60,11 +63,35 @@ extern struct html_page *the_html; #define IS_LOWER_ALPHA(c) ((c) >= 'a' && (c) <= 'z') #define IS_UPPER_ALPHA(c) ((c) >= 'A' && (c) <= 'Z') #define IS_ALPHA(c) (IS_LOWER_ALPHA((c)) || IS_UPPER_ALPHA((c))) -#define IS_NUMERIC(c) (((c) >= '0' && (c) <= '9')) -#define IS_ALPHANUMERIC(c) (IS_ALPHA((c)) || IS_NUMERIC((c))) -#define IS_HEX_DIGIT(c) (IS_NUMERIC((c)) || ((c) >= 'a' && (c) <= 'f') || \ - ((c) >= 'A' && (c) <= 'F')) - +#define IS_DIGIT(c) (((c) >= '0' && (c) <= '9')) +#define IS_ALPHANUMERIC(c) (IS_ALPHA((c)) || IS_DIGIT((c))) +#define IS_LOWER_HEX_DIGIT(c) ((c) >= 'a' && (c) <= 'f') +#define IS_UPPER_HEX_DIGIT(c) ((c) >= 'A' && (c) <= 'F') +#define IS_HEX_DIGIT(c) (IS_LOWER_HEX_DIGIT(c) || IS_UPPER_HEX_DIGIT(c)) +#define IS_LEADING_SURROGATE(c) ((c) >= 0xdb00 && (c) <= 0xdbff) +#define IS_TRAILING_SURROGATE(c) ((c) >= 0xdc00 && (c) <= 0xdfff) +#define IS_SURROGATE(c) (IS_LEADING_SURROGATE(c) || IS_TRAILING_SURROGATE(c)) +#define IS_NONCHARACTER(c) (\ + ((c) >= 0xfdd0 && (c) <= 0xfdef) || \ + (c) == 0xfffe || (c) == 0xffff || \ + (c) == 0x1fffe || (c) == 0x1ffff || \ + (c) == 0x2fffe || (c) == 0x2ffff || \ + (c) == 0x3fffe || (c) == 0x3ffff || \ + (c) == 0x4fffe || (c) == 0x4ffff || \ + (c) == 0x5fffe || (c) == 0x5ffff || \ + (c) == 0x6fffe || (c) == 0x6ffff || \ + (c) == 0x7fffe || (c) == 0x7ffff || \ + (c) == 0x8fffe || (c) == 0x8ffff || \ + (c) == 0x9fffe || (c) == 0x9ffff || \ + (c) == 0xafffe || (c) == 0xaffff || \ + (c) == 0xbfffe || (c) == 0xbffff || \ + (c) == 0xcfffe || (c) == 0xcffff || \ + (c) == 0xdfffe || (c) == 0xdffff || \ + (c) == 0xefffe || (c) == 0xeffff || \ + (c) == 0xffffe || (c) == 0xfffff || \ + (c) == 0x10fffe || (c) == 0x10ffff) +#define IS_C0_CONTROL(c) ((c) >= 0 && (c) <= 0x1f) +#define IS_CONTROL(c) (IS_C0_CONTROL((c)) || ((c) >= 0x7f && (c) <= 0x9f)) #define IS_BLOCK(tag) ((tag) < HTML_TAG_LAST_BLOCK) #define NEW_TOKEN_LAST_ATTR (html->new_token.tag.attrs[html->new_token.tag.attrs_count - 1]) @@ -283,7 +310,8 @@ typedef enum { /* keep this in same order as html_tag_names[] */ extern const char *html_tag_names[]; typedef enum { - HTML_TAG_A = 1, + HTML_TAG__NONE = 0, + HTML_TAG_A, HTML_TAG_ADDRESS, HTML_TAG_APPLET, HTML_TAG_AREA, @@ -542,6 +570,8 @@ struct html_page { char *escaped_buf; size_t escaped_size; + + long char_ref_code; bool parse_last_cr; bool frameset_ok; @@ -637,3 +667,5 @@ char * html_escape_string(struct html_page *html, char void html_emit_char_token(struct html_page *html, short cc); void html_emit_eof_token(struct html_page *html); void html_emit_comment(struct html_page *html, struct html_comment *comment); + +#endif /* HTML_ENABLE */ \ No newline at end of file --- html_data.c Wed Dec 11 11:20:37 2024 +++ html_data.c Tue Mar 4 15:14:23 2025 @@ -16,6 +16,8 @@ #include "html.h" +#ifdef HTML_ENABLE + #ifdef HTML_ENABLE_DEBUGGING const char *html_mode_names[] = { "NONE", @@ -195,9 +197,8 @@ const char *html_token_names[] = { /* this doesn't have to list all tags, just ones the docs reference */ const char *html_tag_names[] = { - NULL, - - "a", /* 1 */ + "\1", /* bogus just so 0 isn't a valid tag id */ + "a", "address", "applet", "area", @@ -317,3 +318,5 @@ const char *html_tag_names[] = { NULL }; + +#endif /* HTML_ENABLE */ --- html_entities.c Tue Dec 10 22:39:10 2024 +++ html_entities.c Tue Mar 4 15:14:42 2025 @@ -16,6 +16,8 @@ #include "html.h" +#ifdef HTML_ENABLE + /* https://html.spec.whatwg.org/multipage/named-characters.html#named-character-references */ const html_entity html_entities[] = { /* super common ones to speed up searching */ @@ -2298,3 +2300,5 @@ const html_entity html_entities[] = { { NULL, 0 }, }; + +#endif /* HTML_ENABLE */ --- html_tokenize.c Mon Dec 23 19:48:17 2024 +++ html_tokenize.c Tue Mar 4 15:15:23 2025 @@ -24,6 +24,8 @@ #include "html.h" +#ifdef HTML_ENABLE + void html_tokenize(struct html_page *html, short cc); bool html_appropriate_end_tag_token(struct html_page *html, html_token *token); html_tag_type html_find_tag_type(char *tag_name); @@ -2288,13 +2290,14 @@ html_appropriate_end_tag_token(struct html_page *html, html_tag_type html_find_tag_type(char *name) { - short n; - - for (n = 1; html_tag_names[n] != NULL; n++) { - if (strcasecmp(name, html_tag_names[n]) == 0) - return n; - } - + long idx; + + idx = strcaseidx(name, html_tag_names); + if (idx >= 0) + return idx; + HTML_DEBUG((": html_find_tag_type couldn't find %s", name)); return 0; } + +#endif /* HTML_ENABLE */ \ No newline at end of file --- html_tree.c Sat Dec 21 10:07:45 2024 +++ html_tree.c Tue Mar 4 15:15:04 2025 @@ -25,6 +25,8 @@ #include "html.h" +#ifdef HTML_ENABLE + void html_deref_element(struct html_page *html, struct html_element *element); void html_append_element(struct html_page *html, @@ -3569,3 +3571,5 @@ html_emit_comment(struct html_page *html, struct html_ html_process_token(html, &emittok); } + +#endif /* HTML_ENABLE */ --- http.c Fri Dec 20 20:44:39 2024 +++ http.c Tue Mar 4 15:42:55 2025 @@ -30,7 +30,11 @@ enum { }; struct http_page { +#ifdef HTML_ENABLE struct html_page *html; +#else + void *filler; +#endif }; bool http_accept_uri(struct URI *uri); @@ -40,9 +44,11 @@ void http_update(page_handle pageh); void http_reset(page_handle pageh); void http_free(page_handle pageh); -bool html_parse_page(page_handle pageh); static void print_plaintext(struct page *page); +#ifdef HTML_ENABLE +bool html_parse_page(page_handle pageh); void html_compute_style(struct page *page, struct html_page *html); +#endif struct page_handler http_handler = { http_accept_uri, @@ -67,7 +73,7 @@ bool http_request_init(page_handle pageh) { struct page *page = *pageh; - char *output; + char *output, *mac; size_t output_len; bool is_tls; @@ -82,19 +88,27 @@ http_request_init(page_handle pageh) return false; } - /* pretend to be curl so we seem like a downloader, not a browser */ + mac = gestalt_machine_type(); output_len = snprintf(output, HTTP_REQUEST_BUF_SIZE, - "GET %s HTTP/1.0\r\n" + "GET %s HTTP/1.0\r\n" /* 1.1 may get chunked responses we can't grok */ "Host: %s\r\n" - "User-Agent: Mozilla/5.0 (Macintosh) %s/%s\r\n" + "User-Agent: Mozilla/5.0 (%s%s) %s/%s\r\n" + "Accept: %s*/*;q=0.8\r\n" "Accept-Language: en-US,en;q=0.5\r\n" - "Accept: text/html;q=0.9,*/*;q=0.8\r\n" "Connection: close\r\n" "\r\n", page->uri->path, page->uri->hostname, - PROGRAM_NAME, get_version(false)); + mac ? "Macintosh " : "Unknown Macintosh", + mac ? mac : "", + PROGRAM_NAME, get_version(false), +#ifdef HTML_ENABLE + "text/html;q=0.9," +#else + "" +#endif + ); if (output_len >= HTTP_REQUEST_BUF_SIZE) { warn("Not enough room for HTTP request (%ld)", output_len); @@ -132,24 +146,36 @@ http_process(page_handle pageh) continue; if (strncasecmp(line, "HTTP/1.0 ", 9) == 0 || - strncasecmp(line, "HTTP/1.1 ", 9) == 0) + strncasecmp(line, "HTTP/1.1 ", 9) == 0) { page->server_status = atoi(line + 9); - else if (strncasecmp(line, "content-type: ", 14) == 0) { + } else if (strncasecmp(line, "content-type: ", 14) == 0) { /* "application/octet-stream" or "text/html; charset..." */ memcpy(page->content_type, line + 14, MIN(sizeof(page->content_type), n - page->content_pos - 2)); page->content_type[sizeof(page->content_type) - 1] = '\0'; - } else if (strncasecmp(line, "content-length: ", 16) == 0) + } else if (strncasecmp(line, "content-length: ", 16) == 0) { page->server_content_len = atol(line + 16); - else if (line[0] == '\r' && line[1] == '\n') { + } else if (strncasecmp(line, "location: ", 10) == 0 && + (page->server_status == 301 || page->server_status == 302 || + page->server_status == 307)) { + /* TODO: for 307, preserve POST method */ + page->redir_to = build_relative_uri(page->uri, line + 10, + n - page->content_pos - 10 - 1); + if (page->redir_to == NULL) + browser_statusf(page->browser, "Error: Out of memory"); + return false; + } else if (line[0] == '\r' && line[1] == '\n') { /* \r\n on a line by itself */ - page->header_len = n + 1; + page->header_len = n + 2; /* TODO: if status is not 200, bail */ - if (strncasecmp(page->content_type, "text/plain", 10) == 0 || - strncasecmp(page->content_type, "text/html", 9) == 0) { + if (strncasecmp(page->content_type, "text/plain", 10) == 0 +#ifdef HTML_ENABLE + || strncasecmp(page->content_type, "text/html", 9) == 0 +#endif + ) { page->parse_state = PARSE_STATE_BODY; browser_commit_to_loading_page(page->browser); TVTabStop(page->browser->output_tv, 28); @@ -177,15 +203,18 @@ http_process(page_handle pageh) if (page->parse_state != PARSE_STATE_BODY) return true; - + +#ifdef HTML_ENABLE if (strncasecmp(page->content_type, "text/html", 9) == 0) { html_parse_page(pageh); return PAGE_CAN_READ_MORE(page); } +#endif return page_print_plaintext(pageh); } +#ifdef HTML_ENABLE bool html_parse_page(page_handle pageh) { @@ -220,6 +249,7 @@ html_parse_page(page_handle pageh) html_page_finish(&html); return false; } +#endif void http_reset(page_handle pageh) @@ -230,8 +260,10 @@ http_reset(page_handle pageh) page->parse_state = PARSE_STATE_BODY; page->content_pos = page->header_len; +#ifdef HTML_ENABLE if (page->handler_cookie != NULL) html_xfree((struct html_page **)&page->handler_cookie); +#endif } void @@ -239,11 +271,19 @@ http_free(page_handle pageh) { struct page *page = *pageh; +#ifdef HTML_ENABLE if (page->handler_cookie != NULL) html_xfree((struct html_page **)&page->handler_cookie); +#endif } void +http_update(page_handle pageh) +{ +} + +#ifdef HTML_ENABLE +void html_compute_style(struct page *page, struct html_page *html) { short n, j; @@ -395,7 +435,7 @@ html_output_margin(void *cookie, struct html_page *htm struct page *page = *((page_handle)cookie); html_compute_style(page, html); - page->cur_style.size = (page->cur_style.size * 75) / 100; + page->cur_style.size /= 2; if (!TVAppend(page->browser->output_tv, &page->cur_style, "\r", 1)) panic("out of memory in TVAppend"); @@ -459,11 +499,6 @@ html_output_field(void *cookie, struct html_page *html } void -http_update(page_handle pageh) -{ -} - -void html_have_title(void *cookie, struct html_page *html, char *str, size_t len) { Str255 pstr; @@ -474,4 +509,5 @@ html_have_title(void *cookie, struct html_page *html, memcpy((char *)pstr + 1, str, len); pstr[0] = (unsigned char)plen; SetWTitle(page->browser->win, pstr); -} \ No newline at end of file +} +#endif \ No newline at end of file