AmendHub

Download

jcs

/

detritus

/

http.c

 

(View History)

jcs   html: Put all of this behind HTML_ENABLE Latest amendment: 68 on 2025-03-04

1 /*
2 * Copyright (c) 2024 joshua stein <jcs@jcs.org>
3 *
4 * Permission to use, copy, modify, and distribute this software for any
5 * purpose with or without fee is hereby granted, provided that the above
6 * copyright notice and this permission notice appear in all copies.
7 *
8 * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
9 * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
10 * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
11 * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
12 * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
13 * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
14 * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
15 */
16
17 #include <stdarg.h>
18 #include <stdio.h>
19 #include <string.h>
20
21 #include "detritus.h"
22 #include "html.h"
23
24 #define HTTP_REQUEST_BUF_SIZE 512
25
26 enum {
27 PARSE_STATE_HEADERS,
28 PARSE_STATE_BODY,
29 PARSE_STATE_DOWNLOAD
30 };
31
32 struct http_page {
33 #ifdef HTML_ENABLE
34 struct html_page *html;
35 #else
36 void *filler;
37 #endif
38 };
39
40 bool http_accept_uri(struct URI *uri);
41 bool http_request_init(page_handle pageh);
42 bool http_process(page_handle pageh);
43 void http_update(page_handle pageh);
44 void http_reset(page_handle pageh);
45 void http_free(page_handle pageh);
46
47 static void print_plaintext(struct page *page);
48 #ifdef HTML_ENABLE
49 bool html_parse_page(page_handle pageh);
50 void html_compute_style(struct page *page, struct html_page *html);
51 #endif
52
53 struct page_handler http_handler = {
54 http_accept_uri,
55 http_request_init,
56 page_queue_output,
57 page_consume_data,
58 page_request_cleanup,
59 http_process,
60 http_update,
61 http_reset,
62 http_free
63 };
64
65 bool
66 http_accept_uri(struct URI *uri)
67 {
68 return (strcasecmp(uri->scheme, "https") == 0 ||
69 strcasecmp(uri->scheme, "http") == 0);
70 }
71
72 bool
73 http_request_init(page_handle pageh)
74 {
75 struct page *page = *pageh;
76 char *output, *mac;
77 size_t output_len;
78 bool is_tls;
79
80 is_tls = (strcasecmp(page->uri->scheme, "https") == 0);
81
82 if (page->uri->port == 0)
83 page->uri->port = (is_tls ? HTTPS_PORT : HTTP_PORT);
84
85 output = xmalloc(HTTP_REQUEST_BUF_SIZE);
86 if (output == NULL) {
87 warn("Out of memory");
88 return false;
89 }
90
91 mac = gestalt_machine_type();
92 output_len = snprintf(output,
93 HTTP_REQUEST_BUF_SIZE,
94 "GET %s HTTP/1.0\r\n" /* 1.1 may get chunked responses we can't grok */
95 "Host: %s\r\n"
96 "User-Agent: Mozilla/5.0 (%s%s) %s/%s\r\n"
97 "Accept: %s*/*;q=0.8\r\n"
98 "Accept-Language: en-US,en;q=0.5\r\n"
99 "Connection: close\r\n"
100 "\r\n",
101 page->uri->path,
102 page->uri->hostname,
103 mac ? "Macintosh " : "Unknown Macintosh",
104 mac ? mac : "",
105 PROGRAM_NAME, get_version(false),
106 #ifdef HTML_ENABLE
107 "text/html;q=0.9,"
108 #else
109 ""
110 #endif
111 );
112
113 if (output_len >= HTTP_REQUEST_BUF_SIZE) {
114 warn("Not enough room for HTTP request (%ld)", output_len);
115 return false;
116 }
117
118 page->request = request_connect(page->browser, page->uri->hostname,
119 page->uri->port, is_tls, 0);
120 if (page->request == NULL) {
121 xfree(&output);
122 return false;
123 }
124 page->request->output_len = output_len;
125 page->request->output = output;
126
127 return true;
128 }
129
130 static bool
131 http_process(page_handle pageh)
132 {
133 struct page *page = *pageh;
134 char c, *filename, *line;
135 size_t n;
136
137 if (page->content_pos == page->content_len)
138 return PAGE_CAN_READ_MORE(page);
139
140 if (page->parse_state == PARSE_STATE_HEADERS) {
141 line = page->content + page->content_pos;
142
143 for (n = page->content_pos; n < page->content_len; n++) {
144 c = page->content[n];
145 if (!(c == '\n' && n && page->content[n - 1] == '\r'))
146 continue;
147
148 if (strncasecmp(line, "HTTP/1.0 ", 9) == 0 ||
149 strncasecmp(line, "HTTP/1.1 ", 9) == 0) {
150 page->server_status = atoi(line + 9);
151 } else if (strncasecmp(line, "content-type: ", 14) == 0) {
152 /* "application/octet-stream" or "text/html; charset..." */
153 memcpy(page->content_type, line + 14,
154 MIN(sizeof(page->content_type),
155 n - page->content_pos - 2));
156 page->content_type[sizeof(page->content_type) - 1] = '\0';
157 } else if (strncasecmp(line, "content-length: ", 16) == 0) {
158 page->server_content_len = atol(line + 16);
159 } else if (strncasecmp(line, "location: ", 10) == 0 &&
160 (page->server_status == 301 || page->server_status == 302 ||
161 page->server_status == 307)) {
162 /* TODO: for 307, preserve POST method */
163 page->redir_to = build_relative_uri(page->uri, line + 10,
164 n - page->content_pos - 10 - 1);
165 if (page->redir_to == NULL)
166 browser_statusf(page->browser, "Error: Out of memory");
167 return false;
168 } else if (line[0] == '\r' && line[1] == '\n') {
169 /* \r\n on a line by itself */
170 page->header_len = n + 2;
171
172 /* TODO: if status is not 200, bail */
173
174 if (strncasecmp(page->content_type, "text/plain", 10) == 0
175 #ifdef HTML_ENABLE
176 || strncasecmp(page->content_type, "text/html", 9) == 0
177 #endif
178 ) {
179 page->parse_state = PARSE_STATE_BODY;
180 browser_commit_to_loading_page(page->browser);
181 TVTabStop(page->browser->output_tv, 28);
182 } else {
183 page->parse_state = PARSE_STATE_DOWNLOAD;
184
185 filename = strrchr(page->uri->path, '/');
186 if (filename && filename[0] == '/')
187 filename++;
188
189 if (!browser_start_download(page->browser, filename,
190 page->content + page->header_len,
191 page->content_len - page->header_len))
192 return false;
193 }
194 }
195
196 page->content_pos = n + 1;
197 line = page->content + page->content_pos;
198
199 if (page->parse_state != PARSE_STATE_HEADERS)
200 break;
201 }
202 }
203
204 if (page->parse_state != PARSE_STATE_BODY)
205 return true;
206
207 #ifdef HTML_ENABLE
208 if (strncasecmp(page->content_type, "text/html", 9) == 0) {
209 html_parse_page(pageh);
210 return PAGE_CAN_READ_MORE(page);
211 }
212 #endif
213
214 return page_print_plaintext(pageh);
215 }
216
217 #ifdef HTML_ENABLE
218 bool
219 html_parse_page(page_handle pageh)
220 {
221 struct page *page = *pageh;
222 struct html_page *html;
223 size_t len;
224
225 if (page->handler_cookie == NULL) {
226 html = html_init_page(pageh);
227 if (html == NULL) {
228 warn("Out of memory");
229 return false;
230 }
231 html->ignore_script_data = true;
232 html->ignore_comment_data = true;
233 page->handler_cookie = html;
234 } else
235 html = (struct html_page *)page->handler_cookie;
236
237 len = page->content_len - page->content_pos;
238 if (len) {
239 html_parse(html, page->content + page->content_pos, len);
240 TVUpdateScrollbar(page->browser->output_tv,
241 page->browser->output_tv_scroller);
242 page->content_pos += len;
243 return true;
244 }
245
246 if (PAGE_CAN_READ_MORE(page))
247 return true;
248
249 html_page_finish(&html);
250 return false;
251 }
252 #endif
253
254 void
255 http_reset(page_handle pageh)
256 {
257 struct page *page = *pageh;
258
259 /* restart at body */
260 page->parse_state = PARSE_STATE_BODY;
261 page->content_pos = page->header_len;
262
263 #ifdef HTML_ENABLE
264 if (page->handler_cookie != NULL)
265 html_xfree((struct html_page **)&page->handler_cookie);
266 #endif
267 }
268
269 void
270 http_free(page_handle pageh)
271 {
272 struct page *page = *pageh;
273
274 #ifdef HTML_ENABLE
275 if (page->handler_cookie != NULL)
276 html_xfree((struct html_page **)&page->handler_cookie);
277 #endif
278 }
279
280 void
281 http_update(page_handle pageh)
282 {
283 }
284
285 #ifdef HTML_ENABLE
286 void
287 html_compute_style(struct page *page, struct html_page *html)
288 {
289 short n, j;
290 char *val;
291
292 page->cur_style.font = geneva;
293 page->cur_style.size = 10;
294 page->cur_style.style = 0;
295 page->cur_style.align = TV_ALIGN_LEFT;
296
297 for (n = 0; n < html->open_count; n++) {
298 switch (html->open[n]->type) {
299 case HTML_TAG_A:
300 page->cur_style.style |= underline;
301 break;
302 case HTML_TAG_ADDRESS:
303 page->cur_style.style |= italic;
304 break;
305 case HTML_TAG_B:
306 page->cur_style.style |= bold | condense;
307 break;
308 case HTML_TAG_CENTER:
309 page->cur_style.align = TV_ALIGN_CENTER;
310 break;
311 case HTML_TAG_CITE:
312 page->cur_style.style |= italic;
313 break;
314 case HTML_TAG_CODE:
315 page->cur_style.font = courier;
316 page->cur_style.size = 9;
317 break;
318 case HTML_TAG_DFN:
319 page->cur_style.style |= italic;
320 break;
321 case HTML_TAG_EM:
322 page->cur_style.style |= italic;
323 break;
324 case HTML_TAG_H1:
325 /* 2em */
326 page->cur_style.size = 20;
327 page->cur_style.style |= bold;
328 break;
329 case HTML_TAG_H2:
330 /* 1.5em */
331 page->cur_style.size = 14;
332 page->cur_style.style |= bold;
333 break;
334 case HTML_TAG_H3:
335 /* 1.17em */
336 page->cur_style.size = 12;
337 page->cur_style.style |= bold;
338 break;
339 case HTML_TAG_H4:
340 /* 1em */
341 page->cur_style.size = 10;
342 page->cur_style.style |= bold;
343 break;
344 case HTML_TAG_H5:
345 /* 0.83em */
346 page->cur_style.size = 9;
347 page->cur_style.style |= bold;
348 break;
349 case HTML_TAG_H6:
350 /* 0.67em */
351 page->cur_style.size = 9;
352 page->cur_style.style |= bold | condense;
353 break;
354 case HTML_TAG_I:
355 page->cur_style.style |= italic;
356 break;
357 case HTML_TAG_INS:
358 page->cur_style.style |= underline;
359 break;
360 case HTML_TAG_KBD:
361 page->cur_style.font = courier;
362 page->cur_style.size = 9;
363 break;
364 case HTML_TAG_PRE:
365 page->cur_style.font = courier;
366 page->cur_style.size = 9;
367 break;
368 case HTML_TAG_S:
369 /* TODO: line-through */
370 break;
371 case HTML_TAG_SAMP:
372 page->cur_style.font = courier;
373 page->cur_style.size = 9;
374 break;
375 case HTML_TAG_SMALL:
376 page->cur_style.size -= 2;
377 break;
378 case HTML_TAG_STRIKE:
379 /* TODO: line-through */
380 break;
381 case HTML_TAG_STRONG:
382 page->cur_style.style |= bold | condense;
383 break;
384 case HTML_TAG_SUP:
385 page->cur_style.size -= 2;
386 break;
387 case HTML_TAG_TH:
388 page->cur_style.style |= bold | condense;
389 break;
390 case HTML_TAG_U:
391 page->cur_style.style |= underline;
392 break;
393 case HTML_TAG_VAR:
394 page->cur_style.style |= italic;
395 break;
396 }
397
398 /* honor the "align" attribute on certain block elements */
399 switch (html->open[n]->type) {
400 case HTML_TAG_DIV:
401 case HTML_TAG_P:
402 if (html_get_attribute_value(html, html->open[n], "align",
403 &val)) {
404 if (strcasecmp(val, "left") == 0)
405 page->cur_style.align = TV_ALIGN_LEFT;
406 else if (strcasecmp(val, "center") == 0)
407 page->cur_style.align = TV_ALIGN_CENTER;
408 else if (strcasecmp(val, "right") == 0)
409 page->cur_style.align = TV_ALIGN_RIGHT;
410 }
411 break;
412 }
413 }
414
415 if (page->cur_style.size < 9)
416 page->cur_style.size = 9;
417 }
418
419 void
420 html_output(void *cookie, struct html_page *html, char *str, size_t len)
421 {
422 struct page *page = *((page_handle)cookie);
423
424 html_compute_style(page, html);
425
426 if (!TVAppend(page->browser->output_tv, &page->cur_style, str, len))
427 panic("out of memory in TVAppend");
428
429 html->last_output = str[len - 1];
430 }
431
432 void
433 html_output_margin(void *cookie, struct html_page *html)
434 {
435 struct page *page = *((page_handle)cookie);
436
437 html_compute_style(page, html);
438 page->cur_style.size /= 2;
439
440 if (!TVAppend(page->browser->output_tv, &page->cur_style, "\r", 1))
441 panic("out of memory in TVAppend");
442
443 html->last_output = '\r';
444 }
445
446 void
447 html_output_field(void *cookie, struct html_page *html,
448 struct html_element *el)
449 {
450 struct page *page = *((page_handle)cookie);
451 size_t len;
452 char *val, *filler;
453 short osize, isize;
454 BigRect brect;
455 Rect rect;
456
457 len = html_get_attribute_value(html, el, "type", &val);
458 if (val && strcasecmp(val, "text") == 0) {
459 page->cur_style.tag = (unsigned long)el;
460 osize = page->cur_style.size;
461
462 isize = 0;
463 if (html_get_attribute_value(html, el, "size", &val))
464 isize = atoi(val);
465 if (isize < 3)
466 isize = 3;
467
468 filler = xmalloczero(isize + 4);
469 if (filler == NULL)
470 return;
471
472 page->cur_style.size = 12;
473 TVAppend(page->browser->output_tv, &page->cur_style, filler,
474 isize + 3);
475 xfree(&filler);
476
477 browser_find_tag_rect(page->browser, page->cur_style.tag, &brect);
478 TVBigRectToLocalRect(page->browser->output_tv, &brect, &rect);
479
480 el->input_te = TENew(&rect, &rect);
481 FrameRect(&rect);
482 isize = html_get_attribute_value(html, el, "value", &val);
483 if (isize) {
484 TESetText(val, isize, el->input_te);
485 } else
486 TESetText("hello", 5, el->input_te);
487
488 page->cur_style.tag = 0;
489 page->cur_style.size = osize;
490 } else {
491 html_output(html->cookie, html, "[ input type=", 13);
492
493 if (val)
494 html_output(html->cookie, html, val, len);
495 else
496 html_output(html->cookie, html, "(none)", 6);
497 html_output(html->cookie, html, " ]", 2);
498 }
499 }
500
501 void
502 html_have_title(void *cookie, struct html_page *html, char *str, size_t len)
503 {
504 Str255 pstr;
505 struct page *page = *((page_handle)cookie);
506 short plen;
507
508 plen = MIN(len, 255);
509 memcpy((char *)pstr + 1, str, len);
510 pstr[0] = (unsigned char)plen;
511 SetWTitle(page->browser->win, pstr);
512 }
513 #endif