jcs
/detritus
/amendments
/32
http: Add HTTP(s) module
For now this just downloads files that aren't text/plain or
text/markdown but the Markdown parsing isn't implemented yet.
Also stop having every handler re-parse the URI, just parse it once
and ask the handler if it wants it.
jcs made amendment 32 about 1 year ago
--- browser.c Mon Nov 11 16:05:39 2024
+++ browser.c Mon Nov 11 23:31:50 2024
@@ -41,7 +41,7 @@ struct page_handler * page_handlers[] = {
&finger_handler,
&gemini_handler,
&gopher_handler,
- //&http_handler,
+ &http_handler,
};
bool browser_close(struct focusable *focusable);
@@ -314,12 +314,8 @@ browser_finished_loading(struct browser *browser)
browser_stop_loading_page(browser);
browser->redirs = 0;
- if (download)
+ if (download || !committed)
return;
-
- if (browser->current_page == NULL)
- /* browser_commit_to_loading_page never called */
- Debugger();
/* release some wasted memory if we can */
if ((*(browser->current_page))->content_size >
@@ -721,10 +717,14 @@ browser_create_page(struct browser *browser, char *uri
browser_stop_loading_page(browser);
+ uri = parse_uri(uristr);
+ if (uri == NULL)
+ goto fail;
+
for (n = 0; n < nitems(page_handlers); n++) {
handler = page_handlers[n];
- if ((uri = handler->parse_uri(uristr)) == NULL)
+ if (!handler->accept(uri))
continue;
pageh = (page_handle)NewHandleClear(sizeof(struct page) +
@@ -759,6 +759,9 @@ browser_create_page(struct browser *browser, char *uri
return pageh;
}
+fail:
+ if (uri)
+ xfree(&uri);
warn("Could not parse URI \"%s\"", uristr);
return NULL;
}
@@ -1031,20 +1034,20 @@ bool
page_queue_output(struct request *request, void *cookie, char **buf,
size_t *len, bool did_write)
{
+ struct page *page = *((page_handle)cookie);
+
if (did_write == false) {
- *len = request->output_len;
- *buf = request->output;
+ if (request->output_pos == 0)
+ browser_statusf(page->browser,
+ "Connected to %s, sending request...", page->uri->hostname);
+
+ *len = request->output_len - request->output_pos;
+ *buf = request->output + request->output_pos;
return true;
}
- if (*len == 0)
- return true;
+ request->output_pos += *len;
- if (*len < request->output_len)
- memmove(request->output, request->output + *len,
- request->output_len - *len);
- request->output_len -= *len;
-
return true;
}
@@ -1090,5 +1093,6 @@ page_request_cleanup(page_handle pageh)
{
struct page *page = *pageh;
- request_xfree(&page->request);
+ if (page->request)
+ request_xfree(&page->request);
}
--- browser.h Mon Nov 11 16:03:43 2024
+++ browser.h Mon Nov 11 23:13:28 2024
@@ -101,8 +101,8 @@ struct browser {
};
struct page_handler {
- /* return a URI object if this handler can process this URI string */
- struct URI * (*parse_uri)(char *uristr);
+ /* can this handler process this URI? */
+ bool (*accept)(struct URI *uri);
/* build a request to acquire content */
bool (*request_init)(page_handle pageh);
--- finger.c Mon Nov 11 15:53:45 2024
+++ finger.c Mon Nov 11 23:11:30 2024
@@ -22,12 +22,12 @@
#define FINGER_PORT 79
-struct URI * finger_parse_uri(char *uristr);
+bool finger_accept_uri(struct URI *uri);
bool finger_request_init(page_handle pageh);
bool finger_process(page_handle pageh);
struct page_handler finger_handler = {
- finger_parse_uri,
+ finger_accept_uri,
finger_request_init,
page_queue_output,
page_consume_data,
@@ -36,10 +36,10 @@ struct page_handler finger_handler = {
NULL,
};
-struct URI *
-finger_parse_uri(char *uristr)
+bool
+finger_accept_uri(struct URI *uri)
{
- return parse_uri(uristr, "finger");
+ return (strcasecmp(uri->scheme, "finger") == 0);
}
bool
@@ -47,6 +47,8 @@ finger_request_init(page_handle pageh)
{
struct page *page = *pageh;
size_t path_len;
+ char *output;
+ size_t output_len;
if (page->uri->port == 0)
page->uri->port = FINGER_PORT;
@@ -57,27 +59,24 @@ finger_request_init(page_handle pageh)
else
path_len = strlen(page->uri->path + 1);
+ output_len = path_len + 2;
+ output = xmalloc(output_len + 1);
+ if (output == NULL) {
+ warn("Out of memory");
+ return false;
+ }
+ snprintf(output, output_len + 1, "%s\r\n",
+ path_len ? page->uri->path + 1 : "");
+
page->request = request_connect(page->browser, page->uri->hostname,
page->uri->port, false, 0);
- if (page->request == NULL)
+ if (page->request == NULL) {
+ xfree(&output);
return false;
-
- page->request->output_len = path_len + 2;
- page->request->output = xmalloc(page->request->output_len + 1);
- if (page->request->output == NULL) {
- xfree(&page->request);
- warn("Out of memory");
- return false;
}
+ page->request->output_len = output_len;
+ page->request->output = output;
- snprintf(page->request->output, page->request->output_len + 1,
- "%s\r\n", path_len ? page->uri->path + 1 : "");
-
- browser_statusf(page->browser, "Connected to %s, sending request...",
- page->uri->hostname);
-
- browser_commit_to_loading_page(page->browser);
-
return true;
}
@@ -90,6 +89,9 @@ finger_process(page_handle pageh)
if (page->content_pos == page->content_len)
return (page->request != NULL);
+
+ if (page->content_pos == 0)
+ browser_commit_to_loading_page(page->browser);
page->browser->style = STYLE_PRE;
--- gemini.c Mon Nov 11 16:08:27 2024
+++ gemini.c Mon Nov 11 23:12:07 2024
@@ -28,7 +28,7 @@ enum {
PARSE_STATE_DOWNLOAD
};
-struct URI * gemini_parse_uri(char *uristr);
+bool gemini_accept_uri(struct URI *uri);
bool gemini_request_init(page_handle pageh);
bool gemini_process(page_handle pageh);
void gemini_reset(page_handle pageh);
@@ -36,7 +36,7 @@ void gemini_reset(page_handle pageh);
static bool parse_header(struct page *page, char *str, size_t len);
struct page_handler gemini_handler = {
- gemini_parse_uri,
+ gemini_accept_uri,
gemini_request_init,
page_queue_output,
page_consume_data,
@@ -45,36 +45,38 @@ struct page_handler gemini_handler = {
gemini_reset,
};
-struct URI *
-gemini_parse_uri(char *uristr)
+bool
+gemini_accept_uri(struct URI *uri)
{
- return parse_uri(uristr, "gemini");
+ return (strcasecmp(uri->scheme, "gemini") == 0);
}
bool
gemini_request_init(page_handle pageh)
{
struct page *page = *pageh;
- struct gemini_request *gemini = NULL;
-
+ char *output;
+ size_t output_len;
+
if (page->uri->port == 0)
page->uri->port = GEMINI_PORT;
- page->request = request_connect(page->browser, page->uri->hostname,
- page->uri->port, true, BLUESCSI_TLS_INIT_REQUEST_FLAG_NO_VERIFY);
- if (page->request == NULL)
- return false;
-
- page->request->output_len = strlen(page->uri->str) + 2;
- page->request->output = xmalloc(page->request->output_len + 1);
+ output_len = strlen(page->uri->str) + 2;
+ output = xmalloc(output_len + 1);
if (page->request->output == NULL) {
- xfree(&page->request);
warn("Out of memory");
return false;
}
-
- snprintf(page->request->output, page->request->output_len + 1,
- "%s\r\n", page->uri->str);
+ snprintf(output, output_len + 1, "%s\r\n", page->uri->str);
+
+ page->request = request_connect(page->browser, page->uri->hostname,
+ page->uri->port, true, BLUESCSI_TLS_INIT_REQUEST_FLAG_NO_VERIFY);
+ if (page->request == NULL) {
+ xfree(&output);
+ return false;
+ }
+ page->request->output = output;
+ page->request->output_len = output_len;
return true;
}
--- gopher.c Mon Nov 11 16:03:56 2024
+++ gopher.c Mon Nov 11 23:12:46 2024
@@ -27,7 +27,7 @@
static const char showable_types[] = "013i";
-struct URI * gopher_parse_uri(char *uristr);
+bool gopher_accept_uri(struct URI *uri);
bool gopher_request_init(page_handle pageh);
bool gopher_process(page_handle pageh);
void gopher_reset(page_handle pageh);
@@ -35,7 +35,7 @@ void gopher_reset(page_handle pageh);
static void gopher_print_menu(struct page *page, char *line, size_t len);
struct page_handler gopher_handler = {
- gopher_parse_uri,
+ gopher_accept_uri,
gopher_request_init,
page_queue_output,
page_consume_data,
@@ -43,18 +43,18 @@ struct page_handler gopher_handler = {
gopher_process,
};
-struct URI *
-gopher_parse_uri(char *uristr)
+bool
+gopher_accept_uri(struct URI *uri)
{
- return parse_uri(uristr, "gopher");
+ return (strcasecmp(uri->scheme, "gopher") == 0);
}
bool
gopher_request_init(page_handle pageh)
{
struct page *page = *pageh;
- size_t selector_len;
- char *filename;
+ size_t selector_len, output_len;
+ char *filename, *output;
if (page->uri->port == 0)
page->uri->port = GOPHER_PORT;
@@ -73,22 +73,26 @@ gopher_request_init(page_handle pageh)
selector_len = strlen(page->uri->path + 2);
}
+ output_len = selector_len + 2;
+ output = xmalloc(output_len + 1);
+ if (output == NULL) {
+ warn("Out of memory");
+ return false;
+ }
+ snprintf(output, output_len + 1, "%s\r\n",
+ selector_len ? page->uri->path + 2 : "");
+
page->request = request_connect(page->browser, page->uri->hostname,
page->uri->port, false, 0);
- if (page->request == NULL)
+ if (page->request == NULL) {
+ xfree(&output);
return false;
-
- page->request->output_len = selector_len + 2;
- page->request->output = xmalloc(page->request->output_len + 1);
- if (page->request->output == NULL) {
- xfree(&page->request);
- warn("Out of memory");
- return false;
}
-
- snprintf(page->request->output, page->request->output_len + 1,
- "%s\r\n", selector_len ? page->uri->path + 2 : "");
+ page->request->output_len = output_len;
+ page->request->output = output;
+ /* XXX: try to detect server responding with "3" for error? */
+
if (strchr(showable_types, page->content_type[0]) == NULL) {
if (selector_len == 2)
filename = NULL;
--- http.c Mon Nov 11 23:32:46 2024
+++ http.c Mon Nov 11 23:32:46 2024
@@ -0,0 +1,236 @@
+/*
+ * Copyright (c) 2024 joshua stein <jcs@jcs.org>
+ *
+ * Permission to use, copy, modify, and distribute this software for any
+ * purpose with or without fee is hereby granted, provided that the above
+ * copyright notice and this permission notice appear in all copies.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
+ * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
+ * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
+ * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
+ * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
+ * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
+ * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
+ */
+
+#include <stdarg.h>
+#include <stdio.h>
+#include <string.h>
+
+#include "detritus.h"
+
+#define HTTP_PORT 80
+#define HTTPS_PORT 443
+#define HTTP_REQUEST_BUF_SIZE 512
+
+enum {
+ PARSE_STATE_HEADERS,
+ PARSE_STATE_BODY,
+ PARSE_STATE_DOWNLOAD
+};
+
+bool http_accept_uri(struct URI *uri);
+bool http_request_init(page_handle pageh);
+bool http_process(page_handle pageh);
+void http_reset(page_handle pageh);
+
+static bool parse_header(struct page *page, char *str, size_t len);
+static void print_plaintext(struct page *page);
+
+struct page_handler http_handler = {
+ http_accept_uri,
+ http_request_init,
+ page_queue_output,
+ page_consume_data,
+ page_request_cleanup,
+ http_process,
+ http_reset,
+};
+
+bool
+http_accept_uri(struct URI *uri)
+{
+ return (strcasecmp(uri->scheme, "https") == 0 ||
+ strcasecmp(uri->scheme, "http") == 0);
+}
+
+bool
+http_request_init(page_handle pageh)
+{
+ struct page *page = *pageh;
+ char *output;
+ size_t output_len;
+ bool is_tls;
+
+ is_tls = (strcasecmp(page->uri->scheme, "https") == 0);
+
+ if (page->uri->port == 0)
+ page->uri->port = (is_tls ? HTTPS_PORT : HTTP_PORT);
+
+ output = xmalloc(HTTP_REQUEST_BUF_SIZE);
+ if (output == NULL) {
+ warn("Out of memory");
+ return false;
+ }
+
+ /* pretend to be curl so we seem like a downloader, not a browser */
+ output_len = snprintf(output,
+ HTTP_REQUEST_BUF_SIZE,
+ "GET %s HTTP/1.0\r\n"
+ "Host: %s\r\n"
+ "User-Agent: curl/8.10.1 (%s %s)\r\n"
+ "Accept: text/markdown,text/html;q=0.9,*/*;q=0.8\r\n"
+ "\r\n",
+ page->uri->path,
+ page->uri->hostname,
+ PROGRAM_NAME, get_version(false));
+
+ if (output_len >= HTTP_REQUEST_BUF_SIZE) {
+ warn("Not enough room for HTTP request (%ld)", output_len);
+ return false;
+ }
+
+ page->request = request_connect(page->browser, page->uri->hostname,
+ page->uri->port, is_tls, 0);
+ if (page->request == NULL) {
+ xfree(&output);
+ return false;
+ }
+ page->request->output_len = output_len;
+ page->request->output = output;
+
+ return true;
+}
+
+static bool
+http_process(page_handle pageh)
+{
+ struct page *page = *pageh;
+ char c, *filename, *line;
+ size_t n;
+
+ if (page->content_pos == page->content_len)
+ return (page->request != NULL);
+
+ if (page->parse_state == PARSE_STATE_HEADERS) {
+ line = page->content + page->content_pos;
+
+ for (n = page->content_pos; n < page->content_len; n++) {
+ c = page->content[n];
+ if (!(c == '\n' && n && page->content[n - 1] == '\r'))
+ continue;
+
+ if (strncasecmp(line, "HTTP/1.0 ", 9) == 0 ||
+ strncasecmp(line, "HTTP/1.1 ", 9) == 0) {
+ page->server_status = atoi(line + 9);
+ }
+ else if (strncasecmp(line, "content-type: ", 14) == 0) {
+ /* "application/octet-stream" or "text/html; charset..." */
+ memcpy(page->content_type, line + 14,
+ MIN(sizeof(page->content_type),
+ n - page->content_pos - 2));
+ page->content_type[sizeof(page->content_type) - 1] = '\0';
+ }
+ else if (strncasecmp(line, "content-length: ", 16) == 0) {
+ page->server_content_len = atol(line + 16);
+ }
+ else if (line[0] == '\r' && line[1] == '\n') {
+ /* \r\n on a line by itself */
+ page->header_len = n + 1;
+
+ /* TODO: if status is not 200, bail */
+
+ if (strncasecmp(page->content_type, "text/plain", 10) == 0 ||
+ strncasecmp(page->content_type, "text/markdown", 13) == 0) {
+ page->parse_state = PARSE_STATE_BODY;
+ browser_commit_to_loading_page(page->browser);
+ } else {
+ page->parse_state = PARSE_STATE_DOWNLOAD;
+
+ filename = strrchr(page->uri->path, '/');
+ if (filename && filename[0] == '/')
+ filename++;
+
+ if (!browser_start_download(page->browser, filename,
+ page->content + page->header_len,
+ page->content_len - page->header_len))
+ return false;
+ }
+ }
+
+ page->content_pos = n + 1;
+ line = page->content + page->content_pos;
+
+ if (page->parse_state != PARSE_STATE_HEADERS)
+ break;
+ }
+ }
+
+ if (page->parse_state != PARSE_STATE_BODY)
+ return true;
+
+ if (strncasecmp(page->content_type, "text/plain", 10) == 0)
+ print_plaintext(page);
+ else if (strncasecmp(page->content_type, "text/markdown", 13) == 0)
+ ; /* TODO: print_markdown(page); */
+
+ if (page->request == NULL && page->content_pos < page->content_len) {
+ browser_print(page->browser, page->content + page->content_pos,
+ page->content_len - page->content_pos, false);
+ page->content_pos = page->content_len;
+ }
+
+ if (page->content_pos == page->content_len)
+ return (page->request != NULL);
+}
+
+void
+http_reset(page_handle pageh)
+{
+ struct page *page = *pageh;
+
+ /* restart at body */
+ page->parse_state = PARSE_STATE_BODY;
+ page->content_pos = page->header_len;
+
+ browser_commit_to_loading_page(page->browser);
+}
+
+void
+print_plaintext(struct page *page)
+{
+ size_t n, trail, skip, len, tlen, j;
+ bool newline;
+
+ for (n = page->content_pos; n < page->content_len; n++) {
+ if (page->content[n] != '\n' &&
+ !(n == page->content_len - 1 && page->request == NULL))
+ continue;
+
+ len = n - page->content_pos + 1;
+ trail = 0;
+ skip = 0;
+ newline = false;
+
+ if (page->content[n] == '\n') {
+ len--;
+ trail = 1;
+ newline = true;
+
+ if (n > 0 && page->content[n - 1] == '\r') {
+ len--;
+ trail++;
+ }
+ } else if (page->request != NULL)
+ /* no newline at the end and fetching, so wait for more data */
+ return;
+
+print_line:
+ if (len)
+ browser_print(page->browser,
+ page->content + page->content_pos + skip, len, newline);
+
+ page->content_pos += skip + len + trail;
+ }
+}
\ No newline at end of file
--- main.c Thu Nov 7 21:42:43 2024
+++ main.c Mon Nov 11 22:21:29 2024
@@ -60,7 +60,7 @@ main(void)
edit_menu = GetMHandle(EDIT_MENU_ID);
bookmarks_menu = GetMHandle(BOOKMARKS_MENU_ID);
menu_defaults();
-
+
AppendMenu(bookmarks_menu, "\p.");
SetItem(bookmarks_menu, 1, "\pgemini://geminiprotocol.net/history/");
AppendMenu(bookmarks_menu, "\p.");
--- request.c Mon Nov 11 13:08:11 2024
+++ request.c Mon Nov 11 23:23:29 2024
@@ -43,7 +43,7 @@ bool request_tls_read_plaintext(struct request *reques
request_data_consumer consumer, void *cookie);
struct URI *
-parse_uri(char *uristr, char *restrict_scheme)
+parse_uri(char *uristr)
{
static char scheme[URI_MAX_SCHEME_LEN + 1];
static char hostname[URI_MAX_HOSTNAME_LEN + 1];
@@ -91,9 +91,6 @@ parse_uri(char *uristr, char *restrict_scheme)
return NULL;
parse_ok:
- if (restrict_scheme != NULL && strcasecmp(restrict_scheme, scheme) != 0)
- return NULL;
-
scheme_len = strlen(scheme);
hostname_len = strlen(hostname);
path_len = strlen(path);
@@ -153,7 +150,7 @@ build_relative_uri(struct URI *uri, char *relative, si
}
str[slen] = '\0';
- return parse_uri(str, NULL);
+ return parse_uri(str);
}
/* http://a.b/c + /d/e -> http://a.b/d/e */
@@ -168,7 +165,7 @@ build_relative_uri(struct URI *uri, char *relative, si
}
str[slen] = '\0';
- return parse_uri(str, NULL);
+ return parse_uri(str);
}
for (n = 0; n <= len; n++) {
@@ -180,7 +177,7 @@ build_relative_uri(struct URI *uri, char *relative, si
len = sizeof(str) - 1;
memcpy(str, relative, len);
str[len] = '\0';
- return parse_uri(str, NULL);
+ return parse_uri(str);
}
/* http://a.b/c/d.html + e/f.html -> http://a.b/c/e/f.html */
@@ -205,13 +202,13 @@ build_relative_uri(struct URI *uri, char *relative, si
path[plen + len] = '\0';
snprintf(str, sizeof(str), "%s://%s%s", uri->scheme,
uri->hostname, path);
- return parse_uri(str, NULL);
+ return parse_uri(str);
}
}
/* what the heck is this? */
Debugger();
- return parse_uri(relative, NULL);
+ return parse_uri(relative);
}
struct request *
--- request.h Mon Nov 11 12:05:52 2024
+++ request.h Mon Nov 11 23:17:18 2024
@@ -54,6 +54,7 @@ struct request {
char *output;
unsigned long output_len;
+ unsigned long output_pos;
TCPiopb iopb;
StreamPtr stream;
@@ -73,7 +74,7 @@ struct request {
struct browser *browser;
};
-struct URI * parse_uri(char *uristr, char *restrict_scheme);
+struct URI * parse_uri(char *uristr);
struct URI * build_relative_uri(struct URI *uri, char *relative, size_t len);
/*