/* * Copyright (c) 2024 joshua stein * * Permission to use, copy, modify, and distribute this software for any * purpose with or without fee is hereby granted, provided that the above * copyright notice and this permission notice appear in all copies. * * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. */ #include #include #include #include "detritus.h" #include "html.h" #define HTTP_REQUEST_BUF_SIZE 512 enum { PARSE_STATE_HEADERS, PARSE_STATE_BODY, PARSE_STATE_DOWNLOAD }; struct http_page { #ifdef HTML_ENABLE struct html_page *html; #else void *filler; #endif }; bool http_accept_uri(struct URI *uri); bool http_request_init(page_handle pageh); bool http_process(page_handle pageh); void http_update(page_handle pageh); void http_reset(page_handle pageh); void http_free(page_handle pageh); static void print_plaintext(struct page *page); #ifdef HTML_ENABLE bool html_parse_page(page_handle pageh); void html_compute_style(struct page *page, struct html_page *html); #endif struct page_handler http_handler = { http_accept_uri, http_request_init, page_queue_output, page_consume_data, page_request_cleanup, http_process, http_update, http_reset, http_free }; bool http_accept_uri(struct URI *uri) { return (strcasecmp(uri->scheme, "https") == 0 || strcasecmp(uri->scheme, "http") == 0); } bool http_request_init(page_handle pageh) { struct page *page = *pageh; char *output, *mac; size_t output_len; bool is_tls; is_tls = (strcasecmp(page->uri->scheme, "https") == 0); if (page->uri->port == 0) page->uri->port = (is_tls ? HTTPS_PORT : HTTP_PORT); output = xmalloc(HTTP_REQUEST_BUF_SIZE); if (output == NULL) { warn("Out of memory"); return false; } mac = gestalt_machine_type(); output_len = snprintf(output, HTTP_REQUEST_BUF_SIZE, "GET %s HTTP/1.0\r\n" /* 1.1 may get chunked responses we can't grok */ "Host: %s\r\n" "User-Agent: Mozilla/5.0 (%s%s) %s/%s\r\n" "Accept: %s*/*;q=0.8\r\n" "Accept-Language: en-US,en;q=0.5\r\n" "Connection: close\r\n" "\r\n", page->uri->path, page->uri->hostname, mac ? "Macintosh " : "Unknown Macintosh", mac ? mac : "", PROGRAM_NAME, get_version(false), #ifdef HTML_ENABLE "text/html;q=0.9," #else "" #endif ); if (output_len >= HTTP_REQUEST_BUF_SIZE) { warn("Not enough room for HTTP request (%ld)", output_len); return false; } page->request = request_connect(page->browser, page->uri->hostname, page->uri->port, is_tls, 0); if (page->request == NULL) { xfree(&output); return false; } page->request->output_len = output_len; page->request->output = output; return true; } static bool http_process(page_handle pageh) { struct page *page = *pageh; char c, *filename, *line; size_t n; if (page->content_pos == page->content_len) return PAGE_CAN_READ_MORE(page); if (page->parse_state == PARSE_STATE_HEADERS) { line = page->content + page->content_pos; for (n = page->content_pos; n < page->content_len; n++) { c = page->content[n]; if (!(c == '\n' && n && page->content[n - 1] == '\r')) continue; if (strncasecmp(line, "HTTP/1.0 ", 9) == 0 || strncasecmp(line, "HTTP/1.1 ", 9) == 0) { page->server_status = atoi(line + 9); } else if (strncasecmp(line, "content-type: ", 14) == 0) { /* "application/octet-stream" or "text/html; charset..." */ memcpy(page->content_type, line + 14, MIN(sizeof(page->content_type), n - page->content_pos - 2)); page->content_type[sizeof(page->content_type) - 1] = '\0'; } else if (strncasecmp(line, "content-length: ", 16) == 0) { page->server_content_len = atol(line + 16); } else if (strncasecmp(line, "location: ", 10) == 0 && (page->server_status == 301 || page->server_status == 302 || page->server_status == 307)) { /* TODO: for 307, preserve POST method */ page->redir_to = build_relative_uri(page->uri, line + 10, n - page->content_pos - 10 - 1); if (page->redir_to == NULL) browser_statusf(page->browser, "Error: Out of memory"); return false; } else if (line[0] == '\r' && line[1] == '\n') { /* \r\n on a line by itself */ page->header_len = n + 2; /* TODO: if status is not 200, bail */ if (strncasecmp(page->content_type, "text/plain", 10) == 0 #ifdef HTML_ENABLE || strncasecmp(page->content_type, "text/html", 9) == 0 #endif ) { page->parse_state = PARSE_STATE_BODY; browser_commit_to_loading_page(page->browser); TVTabStop(page->browser->output_tv, 28); } else { page->parse_state = PARSE_STATE_DOWNLOAD; filename = strrchr(page->uri->path, '/'); if (filename && filename[0] == '/') filename++; if (!browser_start_download(page->browser, filename, page->content + page->header_len, page->content_len - page->header_len)) return false; } } page->content_pos = n + 1; line = page->content + page->content_pos; if (page->parse_state != PARSE_STATE_HEADERS) break; } } if (page->parse_state != PARSE_STATE_BODY) return true; #ifdef HTML_ENABLE if (strncasecmp(page->content_type, "text/html", 9) == 0) { html_parse_page(pageh); return PAGE_CAN_READ_MORE(page); } #endif return page_print_plaintext(pageh); } #ifdef HTML_ENABLE bool html_parse_page(page_handle pageh) { struct page *page = *pageh; struct html_page *html; size_t len; if (page->handler_cookie == NULL) { html = html_init_page(pageh); if (html == NULL) { warn("Out of memory"); return false; } html->ignore_script_data = true; html->ignore_comment_data = true; page->handler_cookie = html; } else html = (struct html_page *)page->handler_cookie; len = page->content_len - page->content_pos; if (len) { html_parse(html, page->content + page->content_pos, len); TVUpdateScrollbar(page->browser->output_tv, page->browser->output_tv_scroller); page->content_pos += len; return true; } if (PAGE_CAN_READ_MORE(page)) return true; html_page_finish(&html); return false; } #endif void http_reset(page_handle pageh) { struct page *page = *pageh; /* restart at body */ page->parse_state = PARSE_STATE_BODY; page->content_pos = page->header_len; #ifdef HTML_ENABLE if (page->handler_cookie != NULL) html_xfree((struct html_page **)&page->handler_cookie); #endif } void http_free(page_handle pageh) { struct page *page = *pageh; #ifdef HTML_ENABLE if (page->handler_cookie != NULL) html_xfree((struct html_page **)&page->handler_cookie); #endif } void http_update(page_handle pageh) { } #ifdef HTML_ENABLE void html_compute_style(struct page *page, struct html_page *html) { short n, j; char *val; page->cur_style.font = geneva; page->cur_style.size = 10; page->cur_style.style = 0; page->cur_style.align = TV_ALIGN_LEFT; for (n = 0; n < html->open_count; n++) { switch (html->open[n]->type) { case HTML_TAG_A: page->cur_style.style |= underline; break; case HTML_TAG_ADDRESS: page->cur_style.style |= italic; break; case HTML_TAG_B: page->cur_style.style |= bold | condense; break; case HTML_TAG_CENTER: page->cur_style.align = TV_ALIGN_CENTER; break; case HTML_TAG_CITE: page->cur_style.style |= italic; break; case HTML_TAG_CODE: page->cur_style.font = courier; page->cur_style.size = 9; break; case HTML_TAG_DFN: page->cur_style.style |= italic; break; case HTML_TAG_EM: page->cur_style.style |= italic; break; case HTML_TAG_H1: /* 2em */ page->cur_style.size = 20; page->cur_style.style |= bold; break; case HTML_TAG_H2: /* 1.5em */ page->cur_style.size = 14; page->cur_style.style |= bold; break; case HTML_TAG_H3: /* 1.17em */ page->cur_style.size = 12; page->cur_style.style |= bold; break; case HTML_TAG_H4: /* 1em */ page->cur_style.size = 10; page->cur_style.style |= bold; break; case HTML_TAG_H5: /* 0.83em */ page->cur_style.size = 9; page->cur_style.style |= bold; break; case HTML_TAG_H6: /* 0.67em */ page->cur_style.size = 9; page->cur_style.style |= bold | condense; break; case HTML_TAG_I: page->cur_style.style |= italic; break; case HTML_TAG_INS: page->cur_style.style |= underline; break; case HTML_TAG_KBD: page->cur_style.font = courier; page->cur_style.size = 9; break; case HTML_TAG_PRE: page->cur_style.font = courier; page->cur_style.size = 9; break; case HTML_TAG_S: /* TODO: line-through */ break; case HTML_TAG_SAMP: page->cur_style.font = courier; page->cur_style.size = 9; break; case HTML_TAG_SMALL: page->cur_style.size -= 2; break; case HTML_TAG_STRIKE: /* TODO: line-through */ break; case HTML_TAG_STRONG: page->cur_style.style |= bold | condense; break; case HTML_TAG_SUP: page->cur_style.size -= 2; break; case HTML_TAG_TH: page->cur_style.style |= bold | condense; break; case HTML_TAG_U: page->cur_style.style |= underline; break; case HTML_TAG_VAR: page->cur_style.style |= italic; break; } /* honor the "align" attribute on certain block elements */ switch (html->open[n]->type) { case HTML_TAG_DIV: case HTML_TAG_P: if (html_get_attribute_value(html, html->open[n], "align", &val)) { if (strcasecmp(val, "left") == 0) page->cur_style.align = TV_ALIGN_LEFT; else if (strcasecmp(val, "center") == 0) page->cur_style.align = TV_ALIGN_CENTER; else if (strcasecmp(val, "right") == 0) page->cur_style.align = TV_ALIGN_RIGHT; } break; } } if (page->cur_style.size < 9) page->cur_style.size = 9; } void html_output(void *cookie, struct html_page *html, char *str, size_t len) { struct page *page = *((page_handle)cookie); html_compute_style(page, html); if (!TVAppend(page->browser->output_tv, &page->cur_style, str, len)) panic("out of memory in TVAppend"); html->last_output = str[len - 1]; } void html_output_margin(void *cookie, struct html_page *html) { struct page *page = *((page_handle)cookie); html_compute_style(page, html); page->cur_style.size /= 2; if (!TVAppend(page->browser->output_tv, &page->cur_style, "\r", 1)) panic("out of memory in TVAppend"); html->last_output = '\r'; } void html_output_field(void *cookie, struct html_page *html, struct html_element *el) { struct page *page = *((page_handle)cookie); size_t len; char *val, *filler; short osize, isize; BigRect brect; Rect rect; len = html_get_attribute_value(html, el, "type", &val); if (val && strcasecmp(val, "text") == 0) { page->cur_style.tag = (unsigned long)el; osize = page->cur_style.size; isize = 0; if (html_get_attribute_value(html, el, "size", &val)) isize = atoi(val); if (isize < 3) isize = 3; filler = xmalloczero(isize + 4); if (filler == NULL) return; page->cur_style.size = 12; TVAppend(page->browser->output_tv, &page->cur_style, filler, isize + 3); xfree(&filler); browser_find_tag_rect(page->browser, page->cur_style.tag, &brect); TVBigRectToLocalRect(page->browser->output_tv, &brect, &rect); el->input_te = TENew(&rect, &rect); FrameRect(&rect); isize = html_get_attribute_value(html, el, "value", &val); if (isize) { TESetText(val, isize, el->input_te); } else TESetText("hello", 5, el->input_te); page->cur_style.tag = 0; page->cur_style.size = osize; } else { html_output(html->cookie, html, "[ input type=", 13); if (val) html_output(html->cookie, html, val, len); else html_output(html->cookie, html, "(none)", 6); html_output(html->cookie, html, " ]", 2); } } void html_have_title(void *cookie, struct html_page *html, char *str, size_t len) { Str255 pstr; struct page *page = *((page_handle)cookie); short plen; plen = MIN(len, 255); memcpy((char *)pstr + 1, str, len); pstr[0] = (unsigned char)plen; SetWTitle(page->browser->win, pstr); } #endif