AmendHub

Download:

jcs

/

detritus

/

amendments

/

35

request: Rewrite parse_uri, add URI.query, loop request_data_shuffle

Looping through the idle handler for every back-and-forth is slow,
so loop tightly until we detect any mouse or keyboard input from the
user wanting to stop.

jcs made amendment 35 about 1 year ago
--- request.c Mon Nov 11 23:23:29 2024 +++ request.c Fri Nov 15 13:53:58 2024 @@ -26,6 +26,16 @@ enum { REQ_STATE_PARSING_RESPONSE }; +struct default_port { + char *scheme; + unsigned short port; +} default_ports[] = { + { "finger", FINGER_PORT }, + { "gemini", GEMINI_PORT }, + { "http", HTTP_PORT }, + { "https", HTTPS_PORT }, +}; + /* TCP functions */ bool request_tcp_connect(struct request *request); bool request_tcp_send(struct request *request, char *data, size_t len); @@ -35,74 +45,199 @@ ssize_t request_tcp_read(struct request *request, char bool request_tls_init(struct request *request); void request_tls_cleanup(struct tls_request *request); -bool request_tls_read_tls_ciphertext(struct request *request); -bool request_tls_read_tcp_ciphertext(struct request *request, short space); -bool request_tls_send_plaintext(struct request *request, +ssize_t request_tls_read_tls_ciphertext(struct request *request); +ssize_t request_tls_read_tcp_ciphertext(struct request *request, + short space); +ssize_t request_tls_send_plaintext(struct request *request, size_t space, request_data_queuer queuer, void *cookie); -bool request_tls_read_plaintext(struct request *request, +ssize_t request_tls_read_plaintext(struct request *request, request_data_consumer consumer, void *cookie); +enum { + URI_STATE_SCHEME, + URI_STATE_HOSTNAME, + URI_STATE_PORT, + URI_STATE_PATH, + URI_STATE_QUERY +}; + struct URI * parse_uri(char *uristr) { + register char c; static char scheme[URI_MAX_SCHEME_LEN + 1]; static char hostname[URI_MAX_HOSTNAME_LEN + 1]; + static char sport[URI_MAX_PORT_LEN + 1]; static char path[URI_MAX_PATH_LEN + 1]; + static char query[URI_MAX_QUERY_LEN + 1]; static char str[URI_MAX_STR_LEN + 1]; struct URI *uri; char *data; - size_t scheme_len, hostname_len, path_len, str_len, size; - short count; + size_t n, uri_len, scheme_len, hostname_len, sport_len, path_len, + query_len, str_len, cat_len, size; + short state, count, pos; + long lport; unsigned short port; /* TODO: handle host:port */ /* TODO: handle //user:pass@host */ - /* TODO: split path at ? and store query */ - /* scheme://host/path */ - if (count = 0, sscanf(uristr, - "%" STR(URI_MAX_SCHEME_LEN) "[^:]://%" - STR(URI_MAX_HOSTNAME_LEN) "[^/]%" - STR(URI_MAX_PATH_LEN) "[ -~]%n", /* %s stops at whitespace, use all vis */ - &scheme, &hostname, &path, &count) == 3 && count > 10) - goto parse_ok; + uri_len = strlen(uristr); + scheme[0] = '\0'; + hostname[0] = '\0'; + port = 0; + sport[0] = '\0'; + path[0] = '\0'; + query[0] = '\0'; + str[0] = '\0'; + state = URI_STATE_SCHEME; + pos = 0; - /* scheme://host/ */ - if (count = 0, sscanf(uristr, - "%" STR(URI_MAX_SCHEME_LEN) "[^:]://%" - STR(URI_MAX_HOSTNAME_LEN) "[^/]/%n", - &scheme, &hostname, &count) == 2 && count > 10) { - path[0] = '/'; - path[1] = '\0'; - goto parse_ok; + for (n = 0; n < uri_len; n++) { + c = uristr[n]; + + switch (state) { + case URI_STATE_SCHEME: + if (pos == sizeof(scheme)) + return NULL; + if (c == ':') { + if (uristr[n + 1] != '/' || uristr[n + 2] != '/') + /* TODO: support "mailto:" type URIs? */ + return NULL; + scheme[pos] = '\0'; + pos = 0; + n += 2; + state = URI_STATE_HOSTNAME; + break; + } + if ((c >= 'A' && c <= 'Z') || (c >= 'a' && c <= 'z')) { + scheme[pos++] = c; + scheme[pos] = '\0'; + } else + return NULL; + break; + case URI_STATE_HOSTNAME: + if (pos == sizeof(hostname)) + return NULL; + if (c == '/' || c == '?') { + hostname[pos] = '\0'; + pos = 0; + if (c == '?') { + state = URI_STATE_QUERY; + n++; + } else { + state = URI_STATE_PATH; + path[0] = '/'; + path[1] = '\0'; + pos = 1; + } + break; + } + if (c == ':') { + hostname[pos] = '\0'; + pos = 0; + state = URI_STATE_PORT; + break; + } + if ((c >= '0' && c <= '9') || c == '.' || + (c >= 'A' && c <= 'Z') || (c >= 'a' && c <= 'z') || + ((c == '.' || c == '-') && pos != 0)) { + hostname[pos++] = c; + hostname[pos] = '\0'; + } else + return NULL; + break; + case URI_STATE_PORT: + if (pos == sizeof(sport)) + return NULL; + if (c == '/' || c == '?') { + sport[pos] = '\0'; + pos = 0; + if (c == '?') + state = URI_STATE_QUERY; + else { + state = URI_STATE_PATH; + path[0] = '/'; + path[1] = '\0'; + pos = 1; + } + break; + } + if (c >= '0' && c <= '9') { + sport[pos++] = c; + sport[pos] = '\0'; + } else + return NULL; + break; + case URI_STATE_PATH: + if (pos == sizeof(path)) + return NULL; + if (c == '?') { + path[pos] = '\0'; + pos = 0; + state = URI_STATE_QUERY; + break; + } + /* XXX: should we encode non-ascii here? */ + path[pos++] = c; + path[pos] = '\0'; + break; + case URI_STATE_QUERY: + if (pos == sizeof(path)) + return NULL; + query[pos++] = c; + query[pos] = '\0'; + break; + } } - /* gemini://host */ - if (count = 0, sscanf(uristr, - "%" STR(URI_MAX_SCHEME_LEN) "[^:]://%" - STR(URI_MAX_HOSTNAME_LEN) "[^/]%n", - &scheme, &hostname, &count) == 2 && count > 10) { + if (state == URI_STATE_SCHEME) + return NULL; + + if (hostname[0] == '\0') + return NULL; + + if (sport[0]) { + lport = atol(sport); + if (lport == 0 || lport > USHRT_MAX) + return NULL; + port = lport; + } + + /* if the uri has a port but it's the default for the scheme, drop it */ + for (n = 0; n < sizeof(default_ports) / sizeof(default_ports[0]); n++) { + if (strcasecmp(default_ports[n].scheme, scheme) == 0) { + if (port == default_ports[n].port) { + port = 0; + sport[0] = '\0'; + } + break; + } + } + + if (path[0] == '\0') { path[0] = '/'; path[1] = '\0'; - goto parse_ok; } - /* failed */ - return NULL; - -parse_ok: scheme_len = strlen(scheme); hostname_len = strlen(hostname); + sport_len = strlen(sport); path_len = strlen(path); + query_len = strlen(query); str_len = scheme_len + 3 + hostname_len + path_len; - + if (port) + str_len += 1 + sport_len; + if (query_len) + str_len += 1 + query_len; + size = sizeof(struct URI) + scheme_len + 1 + hostname_len + 1 + - path_len + 1 + str_len + 1; + path_len + 1 + query_len + 1 + str_len + 1; uri = xmalloc(size); if (uri == NULL) return NULL; - uri->port = 0; + uri->port = port; data = (char *)uri + sizeof(struct URI); @@ -115,18 +250,32 @@ parse_ok: memcpy(uri->hostname, hostname, hostname_len); uri->hostname[hostname_len] = '\0'; data += hostname_len + 1; - + uri->path = data; memcpy(uri->path, path, path_len); uri->path[path_len] = '\0'; data += path_len + 1; + + uri->query = data; + memcpy(uri->query, query, query_len); + uri->query[query_len] = '\0'; + data += query_len + 1; uri->str = data; - data += snprintf(uri->str, str_len + 1, "%s://%s%s", - uri->scheme, uri->hostname, uri->path); - data++; + cat_len = strlcpy(data, uri->scheme, str_len + 1); + cat_len = strlcat(data, "://", str_len + 1); + cat_len = strlcat(data, uri->hostname, str_len + 1); + if (port) { + cat_len = strlcat(data, ":", str_len + 1); + cat_len = strlcat(data, sport, str_len + 1); + } + cat_len = strlcat(data, path, str_len + 1); + if (query_len) { + cat_len = strlcat(data, "?", str_len + 1); + cat_len = strlcat(data, query, str_len + 1); + } - if (data > (char *)uri + size) + if (cat_len > str_len) panic("URI overflow"); return uri; @@ -137,49 +286,67 @@ build_relative_uri(struct URI *uri, char *relative, si { static char path[URI_MAX_PATH_LEN + 1]; static char str[URI_MAX_STR_LEN + 1]; - size_t slen, plen, n; - + size_t plen, n; + ssize_t slen; + /* http://a.b/c + //d/e -> http://d/e */ if (relative[0] == '/' && relative[1] == '/') { - /* retain scheme, new host and path */ - slen = snprintf(str, sizeof(str), "%s://", uri->scheme); - while (slen < sizeof(str) - 1 && len) { - str[slen - 1] = *relative++; - slen++; - len--; + /* retain scheme, new host, port, path, query */ + slen = snprintf(str, sizeof(str), "%s:", uri->scheme); + if (len > sizeof(str) - slen - 1) + return NULL; + memcpy(str + slen, relative, len); + str[slen + len] = '\0'; + return parse_uri(str); + } + + /* if we can find :// before /, this looks like a whole new uri */ + if (relative[0] != '/' && relative[0] != '?') { + for (n = 0; n <= len; n++) { + if (relative[n] == ':' && relative[n + 1] == '/' && + relative[n + 2] == '/') { + /* scheme, not relative */ + if (len > sizeof(str) - 1) + return NULL; + memcpy(str, relative, len); + str[len] = '\0'; + return parse_uri(str); + } + + if (relative[n] == '/') + break; } - str[slen] = '\0'; - + } + + /* retain scheme, host, port, any following will set new path/query */ + slen = snprintf(str, sizeof(str), "%s://%s", uri->scheme, + uri->hostname); + if (uri->port) + slen += snprintf(str + slen, sizeof(str) - slen, ":%d", uri->port); + if (len > sizeof(str) - slen - 1) + return NULL; + + /* http://a.b/c + ?goose -> http://a.b/c?goose */ + if (relative[0] == '?') { + slen = strlcat(str, uri->path, sizeof(str)); + if (len + 1 > sizeof(str) - slen - 1) + return NULL; + memcpy(str + slen, relative, len); + str[slen + len] = '\0'; return parse_uri(str); } /* http://a.b/c + /d/e -> http://a.b/d/e */ if (relative[0] == '/') { - /* retain scheme and host, new path */ - slen = snprintf(str, sizeof(str), "%s://%s", uri->scheme, - uri->hostname); - while (slen < sizeof(str) - 1 && len) { - str[slen] = *relative++; - slen++; - len--; - } - str[slen] = '\0'; - + /* new path and query */ + if (len > sizeof(str) - slen - 1) + return NULL; + memcpy(str + slen, relative, len); + str[slen + len] = '\0'; return parse_uri(str); } for (n = 0; n <= len; n++) { - /* http://a.b/c + gemini://d/e -> gemini://d/e */ - if (n < len - 2 && relative[n] == ':' && relative[n + 1] == '/' && - relative[n + 2] == '/') { - /* scheme found, this isn't relative */ - if (len >= sizeof(str)) - len = sizeof(str) - 1; - memcpy(str, relative, len); - str[len] = '\0'; - return parse_uri(str); - } - /* http://a.b/c/d.html + e/f.html -> http://a.b/c/e/f.html */ if (relative[n] == '/' || n == len) { /* remove last component in uri path up to slash */ @@ -211,6 +378,46 @@ build_relative_uri(struct URI *uri, char *relative, si return parse_uri(relative); } +char * +uri_encode(unsigned char *str) +{ + char *ret = NULL; + size_t len, n; + bool encode = false; + char a, b; + +encode: + for (n = 0, len = 0; str[n] != '\0'; n++) { + if ((str[n] >= 'A' && str[n] <= 'Z') || + (str[n] >= 'a' && str[n] <= 'z') || + (str[n] >= '0' && str[n] <= '9') || + (str[n] == '-' || str[n] == '_' || str[n] == '.' || + str[n] == '~')) { + if (ret) + ret[len] = str[n]; + len++; + } else { + if (ret) { + sprintf(ret + len, "%%%02X", str[n]); + } + len += 3; + } + } + + if (ret) { + ret[len] = '\0'; + return ret; + } + + ret = xmalloc(len + 1); + if (ret == NULL) { + warn("Failed allocating %ld", len + 1); + return NULL; + } + len = 0; + goto encode; +} + struct request * request_connect(struct browser *browser, char *hostname, unsigned short port, bool tls, unsigned char tls_flags) @@ -226,14 +433,14 @@ request_connect(struct browser *browser, char *hostnam request->port = port; if (!request_tcp_connect(request)) { - xfree(&request); + request_xfree(&request); return NULL; } if (tls) { request->tls_flags = tls_flags; if (!request_tls_init(request)) { - xfree(&request); + request_xfree(&request); return NULL; } } @@ -269,106 +476,118 @@ request_data_shuffle(struct request *request, request_ ssize_t slen; char *data; short status, cipherspace, plainspace, tls_error; - bool final; + EventRecord event; - if (request->tls_id) { - status = scsi_tls_status(request->tls_id, &cipherspace, &plainspace, - &tls_error); + do { + if (request->tls_id) { + status = scsi_tls_status(request->tls_id, &cipherspace, + &plainspace, &tls_error); - final = false; - while (status != 0) { - if (status & 0x1) { - /* closed */ - if (final) { - if (tls_error != 0) - browser_statusf(request->browser, - "Error: TLS handshake failed: %d (TLS status " - "0x%x)", tls_error, status); - return false; - } - final = true; - } - - if ((status & 0x10) || final) { + if ((status & 0x10) || (status & 0x1)) { /* tls has plaintext data for us */ - if (!request_tls_read_plaintext(request, consumer, - consumer_cookie)) + slen = request_tls_read_plaintext(request, consumer, + consumer_cookie); + if (slen < 0) return false; - status &= ~0x10; + if (slen == 0) + status &= ~0x10; } if (status & 0x2) { /* tls has ciphertext for tcp */ - if (!request_tls_read_tls_ciphertext(request)) + slen = request_tls_read_tls_ciphertext(request); + if (slen < 0) return false; - status &= ~0x2; + if (slen == 0) + status &= ~0x2; } if (status & 0x8) { /* tls can read plaintext from us */ - if (!request_tls_send_plaintext(request, plainspace, - queuer, queuer_cookie)) + slen = request_tls_send_plaintext(request, plainspace, + queuer, queuer_cookie); + if (slen < 0) return false; - status &= ~0x8; + if (slen == 0) + status &= ~0x8; } if (status & 0x4) { /* tls can read ciphertext from tcp */ - if (!request_tls_read_tcp_ciphertext(request, cipherspace)) + slen = request_tls_read_tcp_ciphertext(request, + cipherspace); + if (slen < 0) return false; - status &= ~0x4; + if (slen == 0) + status &= ~0x4; } - if (final) - continue; - - if (status) { - browser_statusf(request->browser, - "Error: TLS status is 0x%x?", status); + /* only do this when we have no other statuses */ + if (status == 0x1) { + /* closed */ + if (tls_error != 0) + browser_statusf(request->browser, + "Error: TLS handshake failed: %d (TLS status " + "0x%x)", tls_error, status); return false; } - } - } else { - /* let the caller send out anything it has */ - if (!queuer(request, queuer_cookie, &data, &len, false)) - return false; + } else { + status = 0; - if (len > 0 && data != NULL) { - if (!request_tcp_send(request, data, len)) + /* let the caller send out anything it has */ + if (!queuer(request, queuer_cookie, &data, &len, false)) return false; + + if (len > 0 && data != NULL) { + status = 1; + if (!request_tcp_send(request, data, len)) + return false; - /* inform that we wrote len bytes */ - if (!queuer(request, queuer_cookie, NULL, &len, true)) - return false; - } - - /* receive data and send it to the consumer */ - slen = request_tcp_avail(request); - if (slen > 0) { - data = NULL; + /* inform that we wrote len bytes */ + if (!queuer(request, queuer_cookie, NULL, &len, true)) + return false; + } - /* get the consumer's buf and make sure it can handle slen */ - len = slen; - if (!consumer(request, consumer_cookie, &data, &len, false)) - return false; - - /* read into their buf */ - slen = request_tcp_read(request, data, len); + /* receive data and send it to the consumer */ + slen = request_tcp_avail(request); if (slen < 0) return false; if (slen > 0) { - /* and let them know we read it */ + status = 1; + + /* get the consumer's buf to make sure it can handle slen */ + data = NULL; len = slen; - if (!consumer(request, consumer_cookie, &data, &len, true)) + if (!consumer(request, consumer_cookie, &data, &len, false)) return false; + + /* read into their buf */ + slen = request_tcp_read(request, data, len); + if (slen < 0) + return false; + if (slen > 0) { + /* and let them know we read it */ + len = slen; + if (!consumer(request, consumer_cookie, &data, &len, + true)) + return false; + } } - } else if (slen < 0) + } + + if (request->tcp_done_reading && request->input_len == 0) return false; - } + + if (CommandPeriodPressed()) { + browser_statusf(request->browser, "Request canceled"); + return false; + } + + /* if the user did anything else, let main event loop handle it */ + if (GetNextEvent(everyEvent & ~updateMask, &event)) + break; + } while (status != 0); - if (request->tcp_done_reading && request->input_len == 0) - return false; - return true; } @@ -430,7 +649,7 @@ request_tcp_avail(struct request *request) { short err; - if (request->iopb.ioResult > 0 || CommandPeriodPressed()) { + if (request->iopb.ioResult > 0) { BROWSER_DEBUGF((request->browser, "TCP I/O Result %d, disconnecting", request->iopb.ioResult)); return -1; @@ -532,7 +751,7 @@ request_tls_init(struct request *request) return true; } -bool +ssize_t request_tls_read_tls_ciphertext(struct request *request) { size_t len; @@ -541,21 +760,20 @@ request_tls_read_tls_ciphertext(struct request *reques /* read ciphertext from TLS and send it out TCP */ if (request->tcp_done_reading) - return true; + return 0; if (request_tcp_avail(request) < 0) { request->tcp_done_reading = true; - return true; + return 0; } /* this will point buf to scsi's static buffer */ buf = NULL; len = scsi_tls_read(request->tls_id, &buf, 0, true); - if (len == 0 || buf == NULL) { - browser_statusf(request->browser, - "Error: No ciphertext read from TLS when expected to"); - return false; - } + if (len == 0) + return 0; + if (buf == NULL) + return -1; BROWSER_DEBUGF((request->browser, "Read %lu bytes of TLS ciphertext, forwarding to TCP", len)); @@ -563,10 +781,10 @@ request_tls_read_tls_ciphertext(struct request *reques /* result ignored? */ request_tcp_send(request, (char *)buf, len); - return true; + return len; } -bool +ssize_t request_tls_read_tcp_ciphertext(struct request *request, short space) { size_t len, n; @@ -603,7 +821,7 @@ request_tls_read_tcp_ciphertext(struct request *reques forward_ciphertext: if (!request->input_len || !space) - return true; + return 0; slen = MIN(request->input_len, space); BROWSER_DEBUGF((request->browser, @@ -613,7 +831,7 @@ forward_ciphertext: if (len == 0) { browser_statusf(request->browser, "Error: Failed forwarding %ld bytes of ciphertext to TLS", slen); - return false; + return -1; } if (len == request->input_len) @@ -626,10 +844,10 @@ forward_ciphertext: BROWSER_DEBUGF((request->browser, "Wrote %ld bytes of TCP ciphertext to TLS, %ld left", len, request->input_len)); - return true; + return len; } -bool +ssize_t request_tls_send_plaintext(struct request *request, size_t space, request_data_queuer queuer, void *cookie) { @@ -642,9 +860,9 @@ request_tls_send_plaintext(struct request *request, si /* queuer will set data and len to message content */ len = space; if (!queuer(request, cookie, &data, &len, false)) - return false; + return -1; if (len == 0 || data == NULL) - return true; + return 0; if (request->tls_state == REQ_STATE_NEGOTIATING) request->tls_state = REQ_STATE_SENDING_REQUEST; @@ -652,7 +870,7 @@ request_tls_send_plaintext(struct request *request, si browser_statusf(request->browser, "Error: bogus state (%d) instead of SENDING_REQUEST, " "disconnecting", request->tls_state); - return false; + return -1; } olen = MIN(len, space); @@ -661,19 +879,19 @@ request_tls_send_plaintext(struct request *request, si if (!len) { browser_statusf(request->browser, "Error: Failed sending %ld bytes of plaintext to TLS", olen); - return false; + return -1; } /* inform that we wrote len bytes */ if (!queuer(request, cookie, NULL, &len, true)) - return false; + return -1; BROWSER_DEBUGF((request->browser, "Wrote %ld bytes of plaintext to TLS", len)); - return true; + return len; } -bool +ssize_t request_tls_read_plaintext(struct request *request, request_data_consumer consumer, void *cookie) { @@ -684,14 +902,14 @@ request_tls_read_plaintext(struct request *request, len = 1024; if (!consumer(request, cookie, &buf, &len, false)) - return false; + return -1; len = scsi_tls_read(request->tls_id, (unsigned char **)&buf, len, false); if (len == 0) - return true; + return 0; if (!consumer(request, cookie, &buf, &len, true)) - return false; + return -1; - return true; + return len; } --- request.h Mon Nov 11 23:17:18 2024 +++ request.h Thu Nov 14 23:00:22 2024 @@ -25,24 +25,28 @@ #include "browser.h" #include "tcp.h" -extern uint8_t tls_req_last_id; +#define GOPHER_PORT 70 +#define FINGER_PORT 79 +#define HTTP_PORT 80 +#define HTTPS_PORT 443 +#define GEMINI_PORT 1965 -struct tls_init_request { - uint8_t flags[2]; -#define BLUESCSI_TLS_INIT_REQUEST_FLAG_NO_VERIFY (1 << 0) - uint8_t unix_time[4]; - char hostname[256]; -}; +#define URI_MAX_SCHEME_LEN 20 +#define URI_MAX_HOSTNAME_LEN 255 +#define URI_MAX_PORT_LEN 5 +#define URI_MAX_PATH_LEN 512 +#define URI_MAX_QUERY_LEN 512 +#define URI_MAX_STR_LEN (URI_MAX_SCHEME_LEN + 3 + \ + URI_MAX_HOSTNAME_LEN + \ + 1 + URI_MAX_PORT_LEN + \ + URI_MAX_PATH_LEN + \ + 1 + URI_MAX_QUERY_LEN) struct URI { -#define URI_MAX_SCHEME_LEN 20 char *scheme; -#define URI_MAX_HOSTNAME_LEN 255 char *hostname; -#define URI_MAX_PATH_LEN 512 char *path; -#define URI_MAX_STR_LEN (URI_MAX_SCHEME_LEN + 3 + URI_MAX_HOSTNAME_LEN + \ - URI_MAX_PATH_LEN) + char *query; char *str; unsigned short port; }; @@ -74,8 +78,18 @@ struct request { struct browser *browser; }; +extern uint8_t tls_req_last_id; + +struct tls_init_request { + uint8_t flags[2]; +#define BLUESCSI_TLS_INIT_REQUEST_FLAG_NO_VERIFY (1 << 0) + uint8_t unix_time[4]; + char hostname[256]; +}; + struct URI * parse_uri(char *uristr); struct URI * build_relative_uri(struct URI *uri, char *relative, size_t len); +char * uri_encode(unsigned char *str); /* * queuer is called with wrote=false to set buf and len, then data is