AmendHub

Download:

jcs

/

detritus

/

amendments

/

44

http: Claim to be a Mozilla/5.0 browser for... reasons


jcs made amendment 44 about 1 year ago
--- http.c Fri Nov 15 11:43:32 2024 +++ http.c Wed Nov 20 13:35:04 2024 @@ -28,6 +28,8 @@ enum { PARSE_STATE_DOWNLOAD }; +extern bool print_html(struct page *page); + bool http_accept_uri(struct URI *uri); bool http_request_init(page_handle pageh); bool http_process(page_handle pageh); @@ -76,8 +78,10 @@ http_request_init(page_handle pageh) HTTP_REQUEST_BUF_SIZE, "GET %s HTTP/1.0\r\n" "Host: %s\r\n" - "User-Agent: curl/8.10.1 (%s %s)\r\n" - "Accept: text/markdown,text/html;q=0.9,*/*;q=0.8\r\n" + "User-Agent: Mozilla/5.0 (Macintosh) %s/%s\r\n" + "Accept-Language: en-US,en;q=0.5\r\n" + "Accept: text/html;q=0.9,*/*;q=0.8\r\n" + "Connection: close\r\n" "\r\n", page->uri->path, page->uri->hostname, @@ -108,7 +112,7 @@ http_process(page_handle pageh) size_t n; if (page->content_pos == page->content_len) - return (page->request != NULL); + return PAGE_CAN_READ_MORE(page); if (page->parse_state == PARSE_STATE_HEADERS) { line = page->content + page->content_pos; @@ -119,19 +123,16 @@ http_process(page_handle pageh) continue; if (strncasecmp(line, "HTTP/1.0 ", 9) == 0 || - strncasecmp(line, "HTTP/1.1 ", 9) == 0) { + strncasecmp(line, "HTTP/1.1 ", 9) == 0) page->server_status = atoi(line + 9); - } else if (strncasecmp(line, "content-type: ", 14) == 0) { /* "application/octet-stream" or "text/html; charset..." */ memcpy(page->content_type, line + 14, MIN(sizeof(page->content_type), n - page->content_pos - 2)); page->content_type[sizeof(page->content_type) - 1] = '\0'; - } - else if (strncasecmp(line, "content-length: ", 16) == 0) { + } else if (strncasecmp(line, "content-length: ", 16) == 0) page->server_content_len = atol(line + 16); - } else if (line[0] == '\r' && line[1] == '\n') { /* \r\n on a line by itself */ page->header_len = n + 1; @@ -139,7 +140,7 @@ http_process(page_handle pageh) /* TODO: if status is not 200, bail */ if (strncasecmp(page->content_type, "text/plain", 10) == 0 || - strncasecmp(page->content_type, "text/markdown", 13) == 0) { + strncasecmp(page->content_type, "text/html", 9) == 0) { page->parse_state = PARSE_STATE_BODY; browser_commit_to_loading_page(page->browser); } else { @@ -167,19 +168,12 @@ http_process(page_handle pageh) if (page->parse_state != PARSE_STATE_BODY) return true; - if (strncasecmp(page->content_type, "text/plain", 10) == 0) + if (strncasecmp(page->content_type, "text/html", 9) == 0) + print_html(page); + else print_plaintext(page); - else if (strncasecmp(page->content_type, "text/markdown", 13) == 0) - ; /* TODO: print_markdown(page); */ - if (page->request == NULL && page->content_pos < page->content_len) { - browser_print(page->browser, page->content + page->content_pos, - page->content_len - page->content_pos, false); - page->content_pos = page->content_len; - } - - if (page->content_pos == page->content_len) - return (page->request != NULL); + return PAGE_CAN_READ_MORE(page); } void @@ -200,7 +194,7 @@ print_plaintext(struct page *page) for (n = page->content_pos; n < page->content_len; n++) { if (page->content[n] != '\n' && - !(n == page->content_len - 1 && page->request == NULL)) + !(n == page->content_len - 1 && !PAGE_CAN_READ_MORE(page))) continue; len = n - page->content_pos + 1; @@ -228,4 +222,9 @@ print_line: page->content_pos += skip + len + trail; } + + if (!PAGE_CAN_READ_MORE(page) && page->content_pos < page->content_len) + browser_print(page->browser, + page->content + page->content_pos, + page->content_len - page->content_pos, false); } \ No newline at end of file