AmendHub

Download:

jcs

/

detritus

/

amendments

/

32

http: Add HTTP(s) module

For now this just downloads files that aren't text/plain or
text/markdown but the Markdown parsing isn't implemented yet.
 
Also stop having every handler re-parse the URI, just parse it once
and ask the handler if it wants it.

jcs made amendment 32 about 1 year ago
--- browser.c Mon Nov 11 16:05:39 2024 +++ browser.c Mon Nov 11 23:31:50 2024 @@ -41,7 +41,7 @@ struct page_handler * page_handlers[] = { &finger_handler, &gemini_handler, &gopher_handler, - //&http_handler, + &http_handler, }; bool browser_close(struct focusable *focusable); @@ -314,12 +314,8 @@ browser_finished_loading(struct browser *browser) browser_stop_loading_page(browser); browser->redirs = 0; - if (download) + if (download || !committed) return; - - if (browser->current_page == NULL) - /* browser_commit_to_loading_page never called */ - Debugger(); /* release some wasted memory if we can */ if ((*(browser->current_page))->content_size > @@ -721,10 +717,14 @@ browser_create_page(struct browser *browser, char *uri browser_stop_loading_page(browser); + uri = parse_uri(uristr); + if (uri == NULL) + goto fail; + for (n = 0; n < nitems(page_handlers); n++) { handler = page_handlers[n]; - if ((uri = handler->parse_uri(uristr)) == NULL) + if (!handler->accept(uri)) continue; pageh = (page_handle)NewHandleClear(sizeof(struct page) + @@ -759,6 +759,9 @@ browser_create_page(struct browser *browser, char *uri return pageh; } +fail: + if (uri) + xfree(&uri); warn("Could not parse URI \"%s\"", uristr); return NULL; } @@ -1031,20 +1034,20 @@ bool page_queue_output(struct request *request, void *cookie, char **buf, size_t *len, bool did_write) { + struct page *page = *((page_handle)cookie); + if (did_write == false) { - *len = request->output_len; - *buf = request->output; + if (request->output_pos == 0) + browser_statusf(page->browser, + "Connected to %s, sending request...", page->uri->hostname); + + *len = request->output_len - request->output_pos; + *buf = request->output + request->output_pos; return true; } - if (*len == 0) - return true; + request->output_pos += *len; - if (*len < request->output_len) - memmove(request->output, request->output + *len, - request->output_len - *len); - request->output_len -= *len; - return true; } @@ -1090,5 +1093,6 @@ page_request_cleanup(page_handle pageh) { struct page *page = *pageh; - request_xfree(&page->request); + if (page->request) + request_xfree(&page->request); } --- browser.h Mon Nov 11 16:03:43 2024 +++ browser.h Mon Nov 11 23:13:28 2024 @@ -101,8 +101,8 @@ struct browser { }; struct page_handler { - /* return a URI object if this handler can process this URI string */ - struct URI * (*parse_uri)(char *uristr); + /* can this handler process this URI? */ + bool (*accept)(struct URI *uri); /* build a request to acquire content */ bool (*request_init)(page_handle pageh); --- finger.c Mon Nov 11 15:53:45 2024 +++ finger.c Mon Nov 11 23:11:30 2024 @@ -22,12 +22,12 @@ #define FINGER_PORT 79 -struct URI * finger_parse_uri(char *uristr); +bool finger_accept_uri(struct URI *uri); bool finger_request_init(page_handle pageh); bool finger_process(page_handle pageh); struct page_handler finger_handler = { - finger_parse_uri, + finger_accept_uri, finger_request_init, page_queue_output, page_consume_data, @@ -36,10 +36,10 @@ struct page_handler finger_handler = { NULL, }; -struct URI * -finger_parse_uri(char *uristr) +bool +finger_accept_uri(struct URI *uri) { - return parse_uri(uristr, "finger"); + return (strcasecmp(uri->scheme, "finger") == 0); } bool @@ -47,6 +47,8 @@ finger_request_init(page_handle pageh) { struct page *page = *pageh; size_t path_len; + char *output; + size_t output_len; if (page->uri->port == 0) page->uri->port = FINGER_PORT; @@ -57,27 +59,24 @@ finger_request_init(page_handle pageh) else path_len = strlen(page->uri->path + 1); + output_len = path_len + 2; + output = xmalloc(output_len + 1); + if (output == NULL) { + warn("Out of memory"); + return false; + } + snprintf(output, output_len + 1, "%s\r\n", + path_len ? page->uri->path + 1 : ""); + page->request = request_connect(page->browser, page->uri->hostname, page->uri->port, false, 0); - if (page->request == NULL) + if (page->request == NULL) { + xfree(&output); return false; - - page->request->output_len = path_len + 2; - page->request->output = xmalloc(page->request->output_len + 1); - if (page->request->output == NULL) { - xfree(&page->request); - warn("Out of memory"); - return false; } + page->request->output_len = output_len; + page->request->output = output; - snprintf(page->request->output, page->request->output_len + 1, - "%s\r\n", path_len ? page->uri->path + 1 : ""); - - browser_statusf(page->browser, "Connected to %s, sending request...", - page->uri->hostname); - - browser_commit_to_loading_page(page->browser); - return true; } @@ -90,6 +89,9 @@ finger_process(page_handle pageh) if (page->content_pos == page->content_len) return (page->request != NULL); + + if (page->content_pos == 0) + browser_commit_to_loading_page(page->browser); page->browser->style = STYLE_PRE; --- gemini.c Mon Nov 11 16:08:27 2024 +++ gemini.c Mon Nov 11 23:12:07 2024 @@ -28,7 +28,7 @@ enum { PARSE_STATE_DOWNLOAD }; -struct URI * gemini_parse_uri(char *uristr); +bool gemini_accept_uri(struct URI *uri); bool gemini_request_init(page_handle pageh); bool gemini_process(page_handle pageh); void gemini_reset(page_handle pageh); @@ -36,7 +36,7 @@ void gemini_reset(page_handle pageh); static bool parse_header(struct page *page, char *str, size_t len); struct page_handler gemini_handler = { - gemini_parse_uri, + gemini_accept_uri, gemini_request_init, page_queue_output, page_consume_data, @@ -45,36 +45,38 @@ struct page_handler gemini_handler = { gemini_reset, }; -struct URI * -gemini_parse_uri(char *uristr) +bool +gemini_accept_uri(struct URI *uri) { - return parse_uri(uristr, "gemini"); + return (strcasecmp(uri->scheme, "gemini") == 0); } bool gemini_request_init(page_handle pageh) { struct page *page = *pageh; - struct gemini_request *gemini = NULL; - + char *output; + size_t output_len; + if (page->uri->port == 0) page->uri->port = GEMINI_PORT; - page->request = request_connect(page->browser, page->uri->hostname, - page->uri->port, true, BLUESCSI_TLS_INIT_REQUEST_FLAG_NO_VERIFY); - if (page->request == NULL) - return false; - - page->request->output_len = strlen(page->uri->str) + 2; - page->request->output = xmalloc(page->request->output_len + 1); + output_len = strlen(page->uri->str) + 2; + output = xmalloc(output_len + 1); if (page->request->output == NULL) { - xfree(&page->request); warn("Out of memory"); return false; } - - snprintf(page->request->output, page->request->output_len + 1, - "%s\r\n", page->uri->str); + snprintf(output, output_len + 1, "%s\r\n", page->uri->str); + + page->request = request_connect(page->browser, page->uri->hostname, + page->uri->port, true, BLUESCSI_TLS_INIT_REQUEST_FLAG_NO_VERIFY); + if (page->request == NULL) { + xfree(&output); + return false; + } + page->request->output = output; + page->request->output_len = output_len; return true; } --- gopher.c Mon Nov 11 16:03:56 2024 +++ gopher.c Mon Nov 11 23:12:46 2024 @@ -27,7 +27,7 @@ static const char showable_types[] = "013i"; -struct URI * gopher_parse_uri(char *uristr); +bool gopher_accept_uri(struct URI *uri); bool gopher_request_init(page_handle pageh); bool gopher_process(page_handle pageh); void gopher_reset(page_handle pageh); @@ -35,7 +35,7 @@ void gopher_reset(page_handle pageh); static void gopher_print_menu(struct page *page, char *line, size_t len); struct page_handler gopher_handler = { - gopher_parse_uri, + gopher_accept_uri, gopher_request_init, page_queue_output, page_consume_data, @@ -43,18 +43,18 @@ struct page_handler gopher_handler = { gopher_process, }; -struct URI * -gopher_parse_uri(char *uristr) +bool +gopher_accept_uri(struct URI *uri) { - return parse_uri(uristr, "gopher"); + return (strcasecmp(uri->scheme, "gopher") == 0); } bool gopher_request_init(page_handle pageh) { struct page *page = *pageh; - size_t selector_len; - char *filename; + size_t selector_len, output_len; + char *filename, *output; if (page->uri->port == 0) page->uri->port = GOPHER_PORT; @@ -73,22 +73,26 @@ gopher_request_init(page_handle pageh) selector_len = strlen(page->uri->path + 2); } + output_len = selector_len + 2; + output = xmalloc(output_len + 1); + if (output == NULL) { + warn("Out of memory"); + return false; + } + snprintf(output, output_len + 1, "%s\r\n", + selector_len ? page->uri->path + 2 : ""); + page->request = request_connect(page->browser, page->uri->hostname, page->uri->port, false, 0); - if (page->request == NULL) + if (page->request == NULL) { + xfree(&output); return false; - - page->request->output_len = selector_len + 2; - page->request->output = xmalloc(page->request->output_len + 1); - if (page->request->output == NULL) { - xfree(&page->request); - warn("Out of memory"); - return false; } - - snprintf(page->request->output, page->request->output_len + 1, - "%s\r\n", selector_len ? page->uri->path + 2 : ""); + page->request->output_len = output_len; + page->request->output = output; + /* XXX: try to detect server responding with "3" for error? */ + if (strchr(showable_types, page->content_type[0]) == NULL) { if (selector_len == 2) filename = NULL; --- http.c Mon Nov 11 23:32:46 2024 +++ http.c Mon Nov 11 23:32:46 2024 @@ -0,0 +1,236 @@ +/* + * Copyright (c) 2024 joshua stein <jcs@jcs.org> + * + * Permission to use, copy, modify, and distribute this software for any + * purpose with or without fee is hereby granted, provided that the above + * copyright notice and this permission notice appear in all copies. + * + * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES + * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF + * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR + * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES + * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN + * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF + * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. + */ + +#include <stdarg.h> +#include <stdio.h> +#include <string.h> + +#include "detritus.h" + +#define HTTP_PORT 80 +#define HTTPS_PORT 443 +#define HTTP_REQUEST_BUF_SIZE 512 + +enum { + PARSE_STATE_HEADERS, + PARSE_STATE_BODY, + PARSE_STATE_DOWNLOAD +}; + +bool http_accept_uri(struct URI *uri); +bool http_request_init(page_handle pageh); +bool http_process(page_handle pageh); +void http_reset(page_handle pageh); + +static bool parse_header(struct page *page, char *str, size_t len); +static void print_plaintext(struct page *page); + +struct page_handler http_handler = { + http_accept_uri, + http_request_init, + page_queue_output, + page_consume_data, + page_request_cleanup, + http_process, + http_reset, +}; + +bool +http_accept_uri(struct URI *uri) +{ + return (strcasecmp(uri->scheme, "https") == 0 || + strcasecmp(uri->scheme, "http") == 0); +} + +bool +http_request_init(page_handle pageh) +{ + struct page *page = *pageh; + char *output; + size_t output_len; + bool is_tls; + + is_tls = (strcasecmp(page->uri->scheme, "https") == 0); + + if (page->uri->port == 0) + page->uri->port = (is_tls ? HTTPS_PORT : HTTP_PORT); + + output = xmalloc(HTTP_REQUEST_BUF_SIZE); + if (output == NULL) { + warn("Out of memory"); + return false; + } + + /* pretend to be curl so we seem like a downloader, not a browser */ + output_len = snprintf(output, + HTTP_REQUEST_BUF_SIZE, + "GET %s HTTP/1.0\r\n" + "Host: %s\r\n" + "User-Agent: curl/8.10.1 (%s %s)\r\n" + "Accept: text/markdown,text/html;q=0.9,*/*;q=0.8\r\n" + "\r\n", + page->uri->path, + page->uri->hostname, + PROGRAM_NAME, get_version(false)); + + if (output_len >= HTTP_REQUEST_BUF_SIZE) { + warn("Not enough room for HTTP request (%ld)", output_len); + return false; + } + + page->request = request_connect(page->browser, page->uri->hostname, + page->uri->port, is_tls, 0); + if (page->request == NULL) { + xfree(&output); + return false; + } + page->request->output_len = output_len; + page->request->output = output; + + return true; +} + +static bool +http_process(page_handle pageh) +{ + struct page *page = *pageh; + char c, *filename, *line; + size_t n; + + if (page->content_pos == page->content_len) + return (page->request != NULL); + + if (page->parse_state == PARSE_STATE_HEADERS) { + line = page->content + page->content_pos; + + for (n = page->content_pos; n < page->content_len; n++) { + c = page->content[n]; + if (!(c == '\n' && n && page->content[n - 1] == '\r')) + continue; + + if (strncasecmp(line, "HTTP/1.0 ", 9) == 0 || + strncasecmp(line, "HTTP/1.1 ", 9) == 0) { + page->server_status = atoi(line + 9); + } + else if (strncasecmp(line, "content-type: ", 14) == 0) { + /* "application/octet-stream" or "text/html; charset..." */ + memcpy(page->content_type, line + 14, + MIN(sizeof(page->content_type), + n - page->content_pos - 2)); + page->content_type[sizeof(page->content_type) - 1] = '\0'; + } + else if (strncasecmp(line, "content-length: ", 16) == 0) { + page->server_content_len = atol(line + 16); + } + else if (line[0] == '\r' && line[1] == '\n') { + /* \r\n on a line by itself */ + page->header_len = n + 1; + + /* TODO: if status is not 200, bail */ + + if (strncasecmp(page->content_type, "text/plain", 10) == 0 || + strncasecmp(page->content_type, "text/markdown", 13) == 0) { + page->parse_state = PARSE_STATE_BODY; + browser_commit_to_loading_page(page->browser); + } else { + page->parse_state = PARSE_STATE_DOWNLOAD; + + filename = strrchr(page->uri->path, '/'); + if (filename && filename[0] == '/') + filename++; + + if (!browser_start_download(page->browser, filename, + page->content + page->header_len, + page->content_len - page->header_len)) + return false; + } + } + + page->content_pos = n + 1; + line = page->content + page->content_pos; + + if (page->parse_state != PARSE_STATE_HEADERS) + break; + } + } + + if (page->parse_state != PARSE_STATE_BODY) + return true; + + if (strncasecmp(page->content_type, "text/plain", 10) == 0) + print_plaintext(page); + else if (strncasecmp(page->content_type, "text/markdown", 13) == 0) + ; /* TODO: print_markdown(page); */ + + if (page->request == NULL && page->content_pos < page->content_len) { + browser_print(page->browser, page->content + page->content_pos, + page->content_len - page->content_pos, false); + page->content_pos = page->content_len; + } + + if (page->content_pos == page->content_len) + return (page->request != NULL); +} + +void +http_reset(page_handle pageh) +{ + struct page *page = *pageh; + + /* restart at body */ + page->parse_state = PARSE_STATE_BODY; + page->content_pos = page->header_len; + + browser_commit_to_loading_page(page->browser); +} + +void +print_plaintext(struct page *page) +{ + size_t n, trail, skip, len, tlen, j; + bool newline; + + for (n = page->content_pos; n < page->content_len; n++) { + if (page->content[n] != '\n' && + !(n == page->content_len - 1 && page->request == NULL)) + continue; + + len = n - page->content_pos + 1; + trail = 0; + skip = 0; + newline = false; + + if (page->content[n] == '\n') { + len--; + trail = 1; + newline = true; + + if (n > 0 && page->content[n - 1] == '\r') { + len--; + trail++; + } + } else if (page->request != NULL) + /* no newline at the end and fetching, so wait for more data */ + return; + +print_line: + if (len) + browser_print(page->browser, + page->content + page->content_pos + skip, len, newline); + + page->content_pos += skip + len + trail; + } +} \ No newline at end of file --- main.c Thu Nov 7 21:42:43 2024 +++ main.c Mon Nov 11 22:21:29 2024 @@ -60,7 +60,7 @@ main(void) edit_menu = GetMHandle(EDIT_MENU_ID); bookmarks_menu = GetMHandle(BOOKMARKS_MENU_ID); menu_defaults(); - + AppendMenu(bookmarks_menu, "\p."); SetItem(bookmarks_menu, 1, "\pgemini://geminiprotocol.net/history/"); AppendMenu(bookmarks_menu, "\p."); --- request.c Mon Nov 11 13:08:11 2024 +++ request.c Mon Nov 11 23:23:29 2024 @@ -43,7 +43,7 @@ bool request_tls_read_plaintext(struct request *reques request_data_consumer consumer, void *cookie); struct URI * -parse_uri(char *uristr, char *restrict_scheme) +parse_uri(char *uristr) { static char scheme[URI_MAX_SCHEME_LEN + 1]; static char hostname[URI_MAX_HOSTNAME_LEN + 1]; @@ -91,9 +91,6 @@ parse_uri(char *uristr, char *restrict_scheme) return NULL; parse_ok: - if (restrict_scheme != NULL && strcasecmp(restrict_scheme, scheme) != 0) - return NULL; - scheme_len = strlen(scheme); hostname_len = strlen(hostname); path_len = strlen(path); @@ -153,7 +150,7 @@ build_relative_uri(struct URI *uri, char *relative, si } str[slen] = '\0'; - return parse_uri(str, NULL); + return parse_uri(str); } /* http://a.b/c + /d/e -> http://a.b/d/e */ @@ -168,7 +165,7 @@ build_relative_uri(struct URI *uri, char *relative, si } str[slen] = '\0'; - return parse_uri(str, NULL); + return parse_uri(str); } for (n = 0; n <= len; n++) { @@ -180,7 +177,7 @@ build_relative_uri(struct URI *uri, char *relative, si len = sizeof(str) - 1; memcpy(str, relative, len); str[len] = '\0'; - return parse_uri(str, NULL); + return parse_uri(str); } /* http://a.b/c/d.html + e/f.html -> http://a.b/c/e/f.html */ @@ -205,13 +202,13 @@ build_relative_uri(struct URI *uri, char *relative, si path[plen + len] = '\0'; snprintf(str, sizeof(str), "%s://%s%s", uri->scheme, uri->hostname, path); - return parse_uri(str, NULL); + return parse_uri(str); } } /* what the heck is this? */ Debugger(); - return parse_uri(relative, NULL); + return parse_uri(relative); } struct request * --- request.h Mon Nov 11 12:05:52 2024 +++ request.h Mon Nov 11 23:17:18 2024 @@ -54,6 +54,7 @@ struct request { char *output; unsigned long output_len; + unsigned long output_pos; TCPiopb iopb; StreamPtr stream; @@ -73,7 +74,7 @@ struct request { struct browser *browser; }; -struct URI * parse_uri(char *uristr, char *restrict_scheme); +struct URI * parse_uri(char *uristr); struct URI * build_relative_uri(struct URI *uri, char *relative, size_t len); /*