AmendHub

Download

jcs

/

detritus

/

html_tree.c

 

(View History)

jcs   html: Put all of this behind HTML_ENABLE Latest amendment: 68 on 2025-03-04

1 /*
2 * Copyright (c) 2024 joshua stein <jcs@jcs.org>
3 *
4 * Permission to use, copy, modify, and distribute this software for any
5 * purpose with or without fee is hereby granted, provided that the above
6 * copyright notice and this permission notice appear in all copies.
7 *
8 * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
9 * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
10 * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
11 * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
12 * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
13 * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
14 * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
15 */
16
17 /*
18 * Tree construction
19 * https://html.spec.whatwg.org/multipage/parsing.html#tree-construction
20 *
21 * html_tokenize() outputs tokens of various types to the html_emit_*token()
22 * functions, which then output them to html_process_token() here for tree
23 * building, tag order manipulation, tag closing, etc.
24 */
25
26 #include "html.h"
27
28 #ifdef HTML_ENABLE
29
30 void html_deref_element(struct html_page *html,
31 struct html_element *element);
32 void html_append_element(struct html_page *html,
33 struct html_element *element);
34 struct html_element * html_create_element_for_token(struct html_page *html,
35 html_token *token);
36 struct html_element * html_append_element_for_token(struct html_page *html,
37 html_token *token, html_namespace ns);
38 bool html_remove_open_element(struct html_page *html,
39 struct html_element *element);
40
41 html_token_act html_process_token_initial(struct html_page *html,
42 html_token *token);
43 html_token_act html_process_token_before_html(struct html_page *html,
44 html_token *token);
45 html_token_act html_process_token_before_head(struct html_page *html,
46 html_token *token);
47 html_token_act html_process_token_in_head(struct html_page *html,
48 html_token *token);
49 html_token_act html_process_token_in_head_noscript(struct html_page *html,
50 html_token *token);
51 html_token_act html_process_token_after_head(struct html_page *html,
52 html_token *token);
53 html_token_act html_process_token_in_body(struct html_page *html,
54 html_token *token);
55 html_token_act html_process_token_text(struct html_page *html,
56 html_token *token);
57 html_token_act html_process_token_in_table(struct html_page *html,
58 html_token *token);
59 html_token_act html_process_token_in_table_text(struct html_page *html,
60 html_token *token);
61 html_token_act html_process_token_in_caption(struct html_page *html,
62 html_token *token);
63 html_token_act html_process_token_in_column_group(struct html_page *html,
64 html_token *token);
65 html_token_act html_process_token_in_table_body(struct html_page *html,
66 html_token *token);
67 html_token_act html_process_token_in_row(struct html_page *html,
68 html_token *token);
69 html_token_act html_process_token_in_cell(struct html_page *html,
70 html_token *token);
71 html_token_act html_process_token_in_select(struct html_page *html,
72 html_token *token);
73 html_token_act html_process_token_in_select_in_table(struct html_page *html,
74 html_token *token);
75 html_token_act html_process_token_in_template(struct html_page *html,
76 html_token *token);
77 html_token_act html_process_token_after_body(struct html_page *html,
78 html_token *token);
79 html_token_act html_process_token_in_frameset(struct html_page *html,
80 html_token *token);
81 html_token_act html_process_token_after_frameset(struct html_page *html,
82 html_token *token);
83 html_token_act html_process_token_after_after_body(struct html_page *html,
84 html_token *token);
85 html_token_act html_process_token_after_after_frameset(struct html_page *html,
86 html_token *token);
87
88 void html_pop_current_element(struct html_page *html);
89 void html_pop_nodes_until_past_tag(struct html_page *html,
90 html_tag_type stop_after);
91 void html_pop_nodes_until_past_element(struct html_page *html,
92 struct html_element *element);
93 void html_close_p(struct html_page *html);
94 void html_generate_implied_end_tags(struct html_page *html, char *except,
95 bool thoroughly);
96
97 /* active formatting */
98 void html_push_active_formatting_element(struct html_page *html,
99 struct html_element *element, html_token_type token_type);
100 void html_push_active_formatting_marker(struct html_page *html,
101 html_token_type token_type);
102 bool html_is_tag_in_active_formatting(struct html_page *html,
103 html_tag_type tag);
104 bool html_is_element_in_active_formatting(struct html_page *html,
105 struct html_element *element);
106 bool html_remove_active_formatting_element(struct html_page *html,
107 struct html_element *element);
108 void html_reconstruct_active_formatting(struct html_page *html);
109 void html_clear_active_formatting_to_last_marker(struct html_page *html);
110 bool html_run_adoption_agency(struct html_page *html, html_token *token);
111
112 /* helpers */
113 bool html_is_element_special(struct html_page *html, struct html_element *el);
114 bool html_is_element_formatting(struct html_page *html,
115 struct html_element *el);
116 bool html_is_element_open(struct html_page *html, struct html_element *el);
117 bool html_has_tag_open(struct html_page *html, html_tag_type tag);
118 bool html_has_element_in_scope(struct html_page *html,
119 struct html_element *element, html_scope scope);
120 bool html_has_element_with_tag_open_in_scope(struct html_page *html,
121 html_tag_type tag, html_scope scope);
122 bool html_has_element_or_one_with_tag_open_in_scope(struct html_page *html,
123 struct html_element *element, html_tag_type tag, html_scope scope);
124 bool html_element_serializes_as_void(struct html_page *html,
125 struct html_element *element);
126
127 void
128 html_append_element(struct html_page *html, struct html_element *element)
129 {
130 short n;
131
132 if (html->open_count >= nitems(html->open))
133 panic("ran out of tag stack space");
134
135 if (html->current_node) {
136 HTML_DEBUG((": rendering current before-append <%s>",
137 html->current_node->name));
138 html_render_current_node(html, false);
139 }
140
141 HTML_DEBUG((": appending element"));
142 if (element->ns != HTML_NAMESPACE_HTML)
143 HTML_DEBUG((" in namespace %d", element->ns));
144 HTML_DEBUG((": %d: <%s>", html->open_count, element->name));
145
146 html->open[html->open_count++] = element;
147 element->refs++;
148 html->current_node = element;
149
150 switch (element->type) {
151 case HTML_TAG_BLOCKQUOTE:
152 case HTML_TAG_CENTER:
153 case HTML_TAG_DL:
154 case HTML_TAG_H1:
155 case HTML_TAG_H2:
156 case HTML_TAG_H3:
157 case HTML_TAG_H4:
158 case HTML_TAG_H5:
159 case HTML_TAG_H6:
160 case HTML_TAG_MENU:
161 case HTML_TAG_P:
162 element->margin_top = 1;
163 element->margin_bottom = 1;
164 break;
165 case HTML_TAG_OL:
166 case HTML_TAG_UL:
167 /* only give margins if not inside another list */
168 for (n = html->open_count - 2; n >= 0; n--) {
169 if (html->open[n]->type == HTML_TAG_OL ||
170 html->open[n]->type == HTML_TAG_UL)
171 break;
172
173 if (n == 0) {
174 element->margin_top = 1;
175 element->margin_bottom = 1;
176 }
177 }
178 break;
179 }
180
181 HTML_DEBUG((": now open: "));
182 for (n = 0; n <= html->open_count - 1; n++)
183 HTML_DEBUG(("<%s>", html->open[n]->name));
184 }
185
186 void
187 html_append_comment(struct html_page *html, struct html_comment *comment)
188 {
189 #if 0
190 size_t esclen;
191 char *esc;
192
193 /*
194 * https://html.spec.whatwg.org/multipage/parsing.html#serialising-html-fragments:comment-2
195 */
196
197 esclen = comment->len;
198 esc = html_escape_string(html, comment->data, &esclen, false);
199 html_buffer_output(html, "<!--", 4);
200 html_buffer_output(html, esc, esclen);
201 html_buffer_output(html, "-->", 3);
202 #endif
203 }
204
205 struct html_element *
206 html_create_element_for_token(struct html_page *html, html_token *token)
207 {
208 struct html_element *element;
209
210 if (token->tag.name[0] == '\0')
211 token->tag.name_len = strlcpy(token->tag.name,
212 html_tag_names[token->tag.type], sizeof(token->tag.name));
213
214 /* TODO: do an optimized allocation only the size we need */
215 element = xmalloczero(sizeof(struct html_element));
216 element->type = token->tag.type;
217 memcpy(element->name, token->tag.name, sizeof(element->name));
218 element->name_len = token->tag.name_len;
219 memcpy(element->attrs, token->tag.attrs, sizeof(element->attrs));
220 element->attrs_count = token->tag.attrs_count;
221
222 return element;
223 }
224
225 void
226 html_deref_element(struct html_page *html, struct html_element *element)
227 {
228 if (element->refs == 0)
229 Debugger();
230 else
231 element->refs--;
232
233 if (element->refs == 0) {
234 if (html->need_free_list) {
235 html->need_free_tail->next_need_free = element;
236 html->need_free_tail = element;
237 } else {
238 html->need_free_list = element;
239 html->need_free_tail = element;
240 }
241 }
242 }
243
244 struct html_element *
245 html_append_element_for_token(struct html_page *html, html_token *token,
246 html_namespace ns)
247 {
248 struct html_element *element;
249
250 element = html_create_element_for_token(html, token);
251 element->ns = ns;
252 html_append_element(html, element);
253 return element;
254 }
255
256 void
257 html_process_token(struct html_page *html, html_token *token)
258 {
259 html_token_act ret;
260 struct html_element *el;
261
262 while (html->need_free_list) {
263 HTML_DEBUG((": freeing deref'd <%s>", html->need_free_list->name));
264 el = html->need_free_list->next_need_free;
265 if (html->need_free_list->text)
266 xfree(&html->need_free_list->text);
267 xfree(&html->need_free_list);
268 html->need_free_list = el;
269 html->need_free_tail = NULL;
270 }
271
272 /*
273 * https://html.spec.whatwg.org/multipage/parsing.html#parsing-main-inhtml
274 */
275
276 if (token->type == HTML_TOKEN_CHARACTER && token->ch.c == '\n' &&
277 html->skip_newline_char_token) {
278 html->skip_newline_char_token = false;
279 return;
280 }
281
282 HTML_DEBUG((" => token %s,", html_token_names[token->type]));
283
284 reprocess:
285 HTML_DEBUG((" mode %s", html_mode_names[html->mode]));
286
287 if (!(html->current_node == NULL ||
288 html->current_node->ns == HTML_NAMESPACE_HTML ||
289 token->type == HTML_TOKEN_EOF)) {
290 /*
291 * Process the token according to the rules given in the section for
292 * parsing tokens in foreign content.
293 */
294
295 /* TODO mathml checks */
296
297 ret = html_process_token_in_foreign_content(html, token);
298 if (ret != HTML_TOKEN_REPROCESS)
299 return;
300
301 HTML_DEBUG((" -R->"));
302 /* fallthrough */
303 }
304
305 /*
306 * Process the token according to the rules given in the section
307 * corresponding to the current insertion mode in HTML content.
308 */
309 switch (html->mode) {
310 case HTML_MODE_INITIAL:
311 ret = html_process_token_initial(html, token);
312 break;
313 case HTML_MODE_BEFORE_HTML:
314 ret = html_process_token_before_html(html, token);
315 break;
316 case HTML_MODE_BEFORE_HEAD:
317 ret = html_process_token_before_head(html, token);
318 break;
319 case HTML_MODE_IN_HEAD:
320 ret = html_process_token_in_head(html, token);
321 break;
322 case HTML_MODE_IN_HEAD_NOSCRIPT:
323 ret = html_process_token_in_head_noscript(html, token);
324 break;
325 case HTML_MODE_AFTER_HEAD:
326 ret = html_process_token_after_head(html, token);
327 break;
328 case HTML_MODE_IN_BODY:
329 ret = html_process_token_in_body(html, token);
330 break;
331 case HTML_MODE_TEXT:
332 ret = html_process_token_text(html, token);
333 break;
334 case HTML_MODE_IN_TABLE:
335 ret = html_process_token_in_table(html, token);
336 break;
337 case HTML_MODE_IN_TABLE_TEXT:
338 ret = html_process_token_in_table_text(html, token);
339 break;
340 case HTML_MODE_IN_CAPTION:
341 ret = html_process_token_in_caption(html, token);
342 break;
343 case HTML_MODE_IN_COLUMN_GROUP:
344 ret = html_process_token_in_column_group(html, token);
345 break;
346 case HTML_MODE_IN_TABLE_BODY:
347 ret = html_process_token_in_table_body(html, token);
348 break;
349 case HTML_MODE_IN_ROW:
350 ret = html_process_token_in_row(html, token);
351 break;
352 case HTML_MODE_IN_CELL:
353 ret = html_process_token_in_cell(html, token);
354 break;
355 case HTML_MODE_IN_SELECT:
356 ret = html_process_token_in_select(html, token);
357 break;
358 case HTML_MODE_IN_SELECT_IN_TABLE:
359 ret = html_process_token_in_table(html, token);
360 break;
361 case HTML_MODE_IN_TEMPLATE:
362 ret = html_process_token_in_template(html, token);
363 break;
364 case HTML_MODE_AFTER_BODY:
365 ret = html_process_token_after_body(html, token);
366 break;
367 case HTML_MODE_IN_FRAMESET:
368 ret = html_process_token_in_frameset(html, token);
369 break;
370 case HTML_MODE_AFTER_FRAMESET:
371 ret = html_process_token_after_frameset(html, token);
372 break;
373 case HTML_MODE_AFTER_AFTER_BODY:
374 ret = html_process_token_after_after_body(html, token);
375 break;
376 case HTML_MODE_AFTER_AFTER_FRAMESET:
377 ret = html_process_token_after_after_frameset(html, token);
378 break;
379 default:
380 panic("bogus mode");
381 }
382
383 if (ret == HTML_TOKEN_REPROCESS) {
384 HTML_DEBUG((" -R->"));
385 goto reprocess;
386 }
387 }
388
389 html_token_act
390 html_process_token_initial(struct html_page *html, html_token *token)
391 {
392 /*
393 * https://html.spec.whatwg.org/multipage/parsing.html#the-initial-insertion-mode
394 */
395
396 if (token->type == HTML_TOKEN_CHARACTER &&
397 (token->ch.c == '\t' || token->ch.c == '\n' || token->ch.c == '\f' ||
398 token->ch.c == '\r' || token->ch.c == ' ')) {
399 /* ignore */
400 return HTML_TOKEN_PROCESSED;
401 }
402
403 if (token->type == HTML_TOKEN_COMMENT) {
404 /* XXX: insert as "last child of the Document object" */
405 html_append_comment(html, &token->comment);
406 return HTML_TOKEN_PROCESSED;
407 }
408
409 if (token->type == HTML_TOKEN_DOCTYPE) {
410 /* TODO: handle if doctype is not "html" */
411
412 html->mode = HTML_MODE_BEFORE_HTML;
413 return HTML_TOKEN_PROCESSED;
414 }
415
416 /* TODO: check if "document is not an iframe srcdoc document" */
417 if (true) {
418 html_parse_error(html);
419 if (!html->parser_cannot_change_mode)
420 html->quirks_mode = true;
421 }
422
423 html->mode = HTML_MODE_BEFORE_HTML;
424 return HTML_TOKEN_REPROCESS;
425 }
426
427 html_token_act
428 html_process_token_before_html(struct html_page *html, html_token *token)
429 {
430 html_token ttoken;
431
432 /*
433 * https://html.spec.whatwg.org/multipage/parsing.html#the-before-html-insertion-mode
434 */
435
436 if (token->type == HTML_TOKEN_DOCTYPE) {
437 /* parse error, ignore */
438 html_parse_error(html);
439 return HTML_TOKEN_PROCESSED;
440 }
441
442 if (token->type == HTML_TOKEN_COMMENT) {
443 html_append_comment(html, &token->comment);
444 return HTML_TOKEN_PROCESSED;
445 }
446
447 if (token->type == HTML_TOKEN_CHARACTER &&
448 (token->ch.c == '\t' || token->ch.c == '\n' || token->ch.c == '\f' ||
449 token->ch.c == '\r' || token->ch.c == ' ')) {
450 /* ignore */
451 return HTML_TOKEN_PROCESSED;
452 }
453
454 if (token->type == HTML_TOKEN_START_TAG &&
455 token->tag.type == HTML_TAG_HTML) {
456 html_append_element_for_token(html, token, HTML_NAMESPACE_HTML);
457 html->mode = HTML_MODE_BEFORE_HEAD;
458 return HTML_TOKEN_PROCESSED;
459 }
460
461 if (token->type == HTML_TOKEN_END_TAG &&
462 (token->tag.type == HTML_TAG_HEAD ||
463 token->tag.type == HTML_TAG_BODY ||
464 token->tag.type == HTML_TAG_HTML ||
465 token->tag.type == HTML_TAG_BR)) {
466 goto anything_else;
467 }
468
469 if (token->type == HTML_TOKEN_END_TAG) {
470 /* parse error, ignore */
471 html_parse_error(html);
472 return HTML_TOKEN_PROCESSED;
473 }
474
475 anything_else:
476 memset(&ttoken, 0, sizeof(html_token));
477 ttoken.type = HTML_TOKEN_START_TAG;
478 ttoken.tag.type = HTML_TAG_HTML;
479 html_append_element_for_token(html, &ttoken, HTML_NAMESPACE_HTML);
480
481 html->mode = HTML_MODE_BEFORE_HEAD;
482 return HTML_TOKEN_REPROCESS;
483 }
484
485 html_token_act
486 html_process_token_before_head(struct html_page *html, html_token *token)
487 {
488 html_token ttoken;
489
490 /*
491 * https://html.spec.whatwg.org/multipage/parsing.html#the-before-head-insertion-mode
492 */
493
494 if (token->type == HTML_TOKEN_CHARACTER &&
495 (token->ch.c == '\t' || token->ch.c == '\n' || token->ch.c == '\f' ||
496 token->ch.c == '\r' || token->ch.c == ' ')) {
497 /* ignore */
498 return HTML_TOKEN_PROCESSED;
499 }
500
501 if (token->type == HTML_TOKEN_COMMENT) {
502 html_append_comment(html, &token->comment);
503 return HTML_TOKEN_PROCESSED;
504 }
505
506 if (token->type == HTML_TOKEN_DOCTYPE) {
507 /* parse error, ignore */
508 html_parse_error(html);
509 return HTML_TOKEN_PROCESSED;
510 }
511
512 if (token->type == HTML_TOKEN_START_TAG &&
513 token->tag.type == HTML_TAG_HTML) {
514 /* process as "in body" */
515 html_process_token_in_body(html, token);
516 return HTML_TOKEN_PROCESSED;
517 }
518
519 if (token->type == HTML_TOKEN_START_TAG &&
520 token->tag.type == HTML_TAG_HEAD) {
521 html->head = html_append_element_for_token(html, token,
522 HTML_NAMESPACE_HTML);
523 html->mode = HTML_MODE_IN_HEAD;
524 return HTML_TOKEN_PROCESSED;
525 }
526
527 if (token->type == HTML_TOKEN_END_TAG &&
528 !(token->tag.type == HTML_TAG_HEAD ||
529 token->tag.type == HTML_TAG_BODY ||
530 token->tag.type == HTML_TAG_HTML ||
531 token->tag.type == HTML_TAG_BR)) {
532 /* parse error, ignore */
533 html_parse_error(html);
534 return HTML_TOKEN_PROCESSED;
535 }
536
537 memset(&ttoken, 0, sizeof(html_token));
538 ttoken.type = HTML_TOKEN_START_TAG;
539 ttoken.tag.type = HTML_TAG_HEAD;
540 html_append_element_for_token(html, &ttoken, HTML_NAMESPACE_HTML);
541
542 html->mode = HTML_MODE_IN_HEAD;
543 return HTML_TOKEN_REPROCESS;
544 }
545
546 html_token_act
547 html_process_token_in_head(struct html_page *html, html_token *token)
548 {
549 /*
550 * https://html.spec.whatwg.org/multipage/parsing.html#parsing-main-inhead
551 */
552
553 if (token->type == HTML_TOKEN_CHARACTER &&
554 (token->ch.c == '\t' || token->ch.c == '\n' || token->ch.c == '\f' ||
555 token->ch.c == '\r' || token->ch.c == ' ')) {
556 html_insert_character(html, token->ch.c);
557 return HTML_TOKEN_PROCESSED;
558 }
559
560 if (token->type == HTML_TOKEN_COMMENT) {
561 html_append_comment(html, &token->comment);
562 return HTML_TOKEN_PROCESSED;
563 }
564
565 if (token->type == HTML_TOKEN_DOCTYPE) {
566 /* parse error, ignore */
567 html_parse_error(html);
568 return HTML_TOKEN_PROCESSED;
569 }
570
571 if (token->type == HTML_TOKEN_START_TAG &&
572 token->tag.type == HTML_TAG_HTML) {
573 /* process as "in body" */
574 html_process_token_in_body(html, token);
575 return HTML_TOKEN_PROCESSED;
576 }
577
578 if (token->type == HTML_TOKEN_START_TAG &&
579 (token->tag.type == HTML_TAG_BASE ||
580 token->tag.type == HTML_TAG_BASEFONT ||
581 token->tag.type == HTML_TAG_BGSOUND ||
582 token->tag.type == HTML_TAG_LINK)) {
583 html_append_element_for_token(html, token, HTML_NAMESPACE_HTML);
584 html_pop_current_element(html);
585
586 if (token->tag.self_closing)
587 token->tag.self_closing_acked = true;
588 return HTML_TOKEN_PROCESSED;
589 }
590
591 if (token->type == HTML_TOKEN_START_TAG &&
592 token->tag.type == HTML_TAG_META) {
593 html_append_element_for_token(html, token, HTML_NAMESPACE_HTML);
594 html_pop_current_element(html);
595
596 if (token->tag.self_closing)
597 token->tag.self_closing_acked = true;
598
599 /* TODO: check "charset" and "http-equiv" and change encoding */
600
601 return HTML_TOKEN_PROCESSED;
602 }
603
604 if (token->type == HTML_TOKEN_START_TAG &&
605 token->tag.type == HTML_TAG_TITLE) {
606 /* "RCDATA element parsing algorithm" */
607 html_append_element_for_token(html, token, HTML_NAMESPACE_HTML);
608 html->state = HTML_STATE_RCDATA;
609 html->original_mode = html->mode;
610 html->mode = HTML_MODE_TEXT;
611 return HTML_TOKEN_PROCESSED;
612 }
613
614 if (token->type == HTML_TOKEN_START_TAG &&
615 ((token->tag.type == HTML_TAG_NOSCRIPT && html->scripting) ||
616 (token->tag.type == HTML_TAG_NOFRAMES ||
617 token->tag.type == HTML_TAG_STYLE))) {
618 /* "raw text element parsing algorithm" */
619 html_append_element_for_token(html, token, HTML_NAMESPACE_HTML);
620 html->state = HTML_STATE_RAWTEXT;
621 html->original_mode = html->mode;
622 html->mode = HTML_MODE_TEXT;
623 return HTML_TOKEN_PROCESSED;
624 }
625
626 if (token->type == HTML_TOKEN_START_TAG &&
627 token->tag.type == HTML_TAG_NOSCRIPT && !html->scripting) {
628 html_append_element_for_token(html, token, HTML_NAMESPACE_HTML);
629 html->mode = HTML_MODE_IN_HEAD_NOSCRIPT;
630 return HTML_TOKEN_PROCESSED;
631 }
632
633 if (token->type == HTML_TOKEN_START_TAG &&
634 token->tag.type == HTML_TAG_SCRIPT) {
635 /* TODO: more stuff according to docs */
636
637 html_append_element_for_token(html, token, HTML_NAMESPACE_HTML);
638 html->state = HTML_STATE_SCRIPT_DATA;
639 html->original_mode = html->mode;
640 html->mode = HTML_MODE_TEXT;
641 return HTML_TOKEN_PROCESSED;
642 }
643
644 if (token->type == HTML_TOKEN_END_TAG && token->tag.type == HTML_TAG_HEAD) {
645 /* this should be head */
646 html_pop_current_element(html);
647 html->mode = HTML_MODE_AFTER_HEAD;
648 return HTML_TOKEN_PROCESSED;
649 }
650
651 if (token->type == HTML_TOKEN_END_TAG &&
652 (token->tag.type == HTML_TAG_BODY ||
653 token->tag.type == HTML_TAG_HTML ||
654 token->tag.type == HTML_TAG_BR)) {
655 goto anything_else;
656 }
657
658 if (token->type == HTML_TOKEN_START_TAG &&
659 token->tag.type == HTML_TAG_TEMPLATE) {
660 html_push_active_formatting_marker(html, token->type);
661 html->frameset_ok = false;
662 html->mode = HTML_MODE_IN_TEMPLATE;
663
664 /* TODO: draw the rest of the owl */
665
666 return HTML_TOKEN_PROCESSED;
667 }
668
669 if (token->type == HTML_TOKEN_END_TAG &&
670 token->tag.type == HTML_TAG_TEMPLATE) {
671 if (!html_has_tag_open(html, HTML_TAG_TEMPLATE)) {
672 /* parse error, ignore */
673 html_parse_error(html);
674 return HTML_TOKEN_PROCESSED;
675 }
676
677 html_generate_implied_end_tags(html, NULL, true);
678
679 if (token->tag.type != HTML_TAG_TEMPLATE) {
680 /* parse error */
681 html_parse_error(html);
682 }
683
684 html_pop_nodes_until_past_tag(html, HTML_TAG_TEMPLATE);
685
686 /*
687 * TODO: "Clear the list of active formatting elements up to the last
688 * marker."
689 */
690
691 /*
692 * TODO: "Pop the current template insertion mode off the stack of
693 * template insertion modes."
694 */
695
696 /* TODO: "Reset the insertion mode appropriately." */
697
698 return HTML_TOKEN_PROCESSED;
699 }
700
701 if ((token->type == HTML_TOKEN_START_TAG &&
702 token->tag.type == HTML_TAG_HEAD) ||
703 token->type == HTML_TOKEN_END_TAG) {
704 /* parse error, ignore */
705 html_parse_error(html);
706 return HTML_TOKEN_PROCESSED;
707 }
708
709 anything_else:
710 /* this should be head */
711 html_pop_current_element(html);
712 html->mode = HTML_MODE_AFTER_HEAD;
713 return HTML_TOKEN_REPROCESS;
714 }
715
716 html_token_act
717 html_process_token_in_head_noscript(struct html_page *html, html_token *token)
718 {
719 /*
720 * https://html.spec.whatwg.org/multipage/parsing.html#parsing-main-inheadnoscript
721 */
722
723 if (token->type == HTML_TOKEN_DOCTYPE) {
724 /* parse error, ignore */
725 html_parse_error(html);
726 return HTML_TOKEN_PROCESSED;
727 }
728
729 if (token->type == HTML_TOKEN_START_TAG &&
730 token->tag.type == HTML_TAG_HTML) {
731 /* process as "in body" */
732 html_process_token_in_body(html, token);
733 return HTML_TOKEN_PROCESSED;
734 }
735
736 if (token->type == HTML_TOKEN_END_TAG &&
737 token->tag.type == HTML_TAG_NOSCRIPT) {
738 /* this should be <noscript> */
739 html_pop_current_element(html);
740 /* current tag should now be <head> */
741 html->mode = HTML_MODE_IN_HEAD;
742 return HTML_TOKEN_PROCESSED;
743 }
744
745 if ((token->type == HTML_TOKEN_CHARACTER &&
746 (token->ch.c == '\t' || token->ch.c == '\n' || token->ch.c == '\f' ||
747 token->ch.c == '\r' || token->ch.c == ' ')) ||
748 (token->type == HTML_TOKEN_COMMENT) ||
749 (token->type == HTML_TOKEN_START_TAG &&
750 (token->tag.type == HTML_TAG_BASEFONT ||
751 token->tag.type == HTML_TAG_BGSOUND ||
752 token->tag.type == HTML_TAG_LINK ||
753 token->tag.type == HTML_TAG_META ||
754 token->tag.type == HTML_TAG_NOFRAMES ||
755 token->tag.type == HTML_TAG_STYLE))) {
756 /* process as "in head" */
757 html_process_token_in_head(html, token);
758 return HTML_TOKEN_PROCESSED;
759 }
760
761 if (token->type == HTML_TOKEN_END_TAG && token->tag.type == HTML_TAG_BR)
762 goto anything_else;
763
764 if ((token->type == HTML_TOKEN_START_TAG &&
765 (token->tag.type == HTML_TAG_HEAD ||
766 token->tag.type == HTML_TAG_NOSCRIPT)) ||
767 token->type == HTML_TOKEN_END_TAG) {
768 /* parse error, ignore */
769 html_parse_error(html);
770 return HTML_TOKEN_PROCESSED;
771 }
772
773 anything_else:
774 /* parse error */
775 html_parse_error(html);
776
777 /* this should be noscript */
778 html_pop_current_element(html);
779 /* current tag should now be <head> */
780 html->mode = HTML_MODE_IN_HEAD;
781 return HTML_TOKEN_REPROCESS;
782 }
783
784 html_token_act
785 html_process_token_after_head(struct html_page *html, html_token *token)
786 {
787 html_token ttoken;
788
789 /*
790 * https://html.spec.whatwg.org/multipage/parsing.html#the-after-head-insertion-mode
791 */
792
793 if (token->type == HTML_TOKEN_CHARACTER &&
794 (token->ch.c == '\t' || token->ch.c == '\n' || token->ch.c == '\f' ||
795 token->ch.c == '\r' || token->ch.c == ' ')) {
796 html_insert_character(html, token->ch.c);
797 return HTML_TOKEN_PROCESSED;
798 }
799
800 if (token->type == HTML_TOKEN_COMMENT) {
801 html_append_comment(html, &token->comment);
802 return HTML_TOKEN_PROCESSED;
803 }
804
805 if (token->type == HTML_TOKEN_DOCTYPE) {
806 /* parse error, ignore */
807 html_parse_error(html);
808 return HTML_TOKEN_PROCESSED;
809 }
810
811 if (token->type == HTML_TOKEN_START_TAG &&
812 token->tag.type == HTML_TAG_HTML) {
813 /* process as "in body" */
814 html_process_token_in_body(html, token);
815 return HTML_TOKEN_PROCESSED;
816 }
817
818 if (token->type == HTML_TOKEN_START_TAG &&
819 token->tag.type == HTML_TAG_BODY) {
820 html_append_element_for_token(html, token, HTML_NAMESPACE_HTML);
821 html->frameset_ok = false;
822 html->mode = HTML_MODE_IN_BODY;
823 return HTML_TOKEN_PROCESSED;
824 }
825
826 if (token->type == HTML_TOKEN_START_TAG &&
827 token->tag.type == HTML_TAG_FRAMESET) {
828 html_append_element_for_token(html, token, HTML_NAMESPACE_HTML);
829 html->mode = HTML_MODE_IN_FRAMESET;
830 return HTML_TOKEN_PROCESSED;
831 }
832
833 if (token->type == HTML_TOKEN_START_TAG &&
834 (token->tag.type == HTML_TAG_BASE ||
835 token->tag.type == HTML_TAG_BASEFONT ||
836 token->tag.type == HTML_TAG_BGSOUND ||
837 token->tag.type == HTML_TAG_LINK ||
838 token->tag.type == HTML_TAG_META ||
839 token->tag.type == HTML_TAG_NOFRAMES ||
840 token->tag.type == HTML_TAG_SCRIPT ||
841 token->tag.type == HTML_TAG_STYLE ||
842 token->tag.type == HTML_TAG_TEMPLATE ||
843 token->tag.type == HTML_TAG_TITLE)) {
844 /* parse error */
845 html_parse_error(html);
846
847 html_append_element(html, html->head);
848
849 /* process as "in head" */
850 html_process_token_in_head(html, token);
851
852 html_remove_open_element(html, html->head);
853 return HTML_TOKEN_PROCESSED;
854 }
855
856 if (token->type == HTML_TOKEN_END_TAG &&
857 token->tag.type == HTML_TAG_TEMPLATE) {
858 /* process as "in head" */
859 html_process_token_in_head(html, token);
860 return HTML_TOKEN_PROCESSED;
861 }
862
863 if (token->type == HTML_TOKEN_END_TAG &&
864 (token->tag.type == HTML_TAG_BODY ||
865 token->tag.type == HTML_TAG_HTML ||
866 token->tag.type == HTML_TAG_BR))
867 goto anything_else;
868
869 if ((token->type == HTML_TOKEN_START_TAG &&
870 token->tag.type == HTML_TAG_HEAD) ||
871 token->type == HTML_TOKEN_END_TAG) {
872 /* parse error, ignore */
873 html_parse_error(html);
874 return HTML_TOKEN_PROCESSED;
875 }
876
877 anything_else:
878 memset(&ttoken, 0, sizeof(html_token));
879 ttoken.type = HTML_TOKEN_START_TAG;
880 ttoken.tag.type = HTML_TAG_BODY;
881 html_append_element_for_token(html, &ttoken, HTML_NAMESPACE_HTML);
882
883 html->mode = HTML_MODE_IN_BODY;
884 return HTML_TOKEN_REPROCESS;
885 }
886
887 html_token_act
888 html_process_token_in_body(struct html_page *html, html_token *token)
889 {
890 html_token ttoken;
891 struct html_element *element, *node;
892 short n;
893
894 /*
895 * https://html.spec.whatwg.org/multipage/parsing.html#parsing-main-inbody
896 */
897
898 if (token->type == HTML_TOKEN_CHARACTER && token->ch.c == '\0') {
899 /* parse error, ignore */
900 html_parse_error(html);
901 return HTML_TOKEN_PROCESSED;
902 }
903
904 if (token->type == HTML_TOKEN_CHARACTER &&
905 (token->ch.c == '\t' || token->ch.c == '\n' || token->ch.c == '\f' ||
906 token->ch.c == '\r' || token->ch.c == ' ')) {
907 if (html->active_formatting_count > 0)
908 html_reconstruct_active_formatting(html);
909 html_insert_character(html, token->ch.c);
910 return HTML_TOKEN_PROCESSED;
911 }
912
913 if (token->type == HTML_TOKEN_CHARACTER) {
914 if (html->active_formatting_count > 0)
915 html_reconstruct_active_formatting(html);
916 html_insert_character(html, token->ch.c);
917 html->frameset_ok = false;
918 return HTML_TOKEN_PROCESSED;
919 }
920
921 if (token->type == HTML_TOKEN_COMMENT) {
922 html_append_comment(html, &token->comment);
923 return HTML_TOKEN_PROCESSED;
924 }
925
926 if (token->type == HTML_TOKEN_DOCTYPE) {
927 /* parse error, ignore */
928 html_parse_error(html);
929 return HTML_TOKEN_PROCESSED;
930 }
931
932 if (token->type == HTML_TOKEN_START_TAG &&
933 token->tag.type == HTML_TAG_HTML) {
934 /* parse error */
935 html_parse_error(html);
936 if (html_has_tag_open(html, HTML_TAG_TEMPLATE)) {
937 /* ignore */
938 return HTML_TOKEN_PROCESSED;
939 }
940
941 /* TODO: add attrs to first html tag it doesn't already have */
942
943 return HTML_TOKEN_PROCESSED;
944 }
945
946 if ((token->type == HTML_TOKEN_START_TAG &&
947 (token->tag.type == HTML_TAG_BASE ||
948 token->tag.type == HTML_TAG_BASEFONT ||
949 token->tag.type == HTML_TAG_BGSOUND ||
950 token->tag.type == HTML_TAG_LINK ||
951 token->tag.type == HTML_TAG_META ||
952 token->tag.type == HTML_TAG_NOFRAMES ||
953 token->tag.type == HTML_TAG_SCRIPT ||
954 token->tag.type == HTML_TAG_STYLE ||
955 token->tag.type == HTML_TAG_TEMPLATE ||
956 token->tag.type == HTML_TAG_TITLE)) ||
957 (token->type == HTML_TOKEN_END_TAG &&
958 token->tag.type == HTML_TAG_TEMPLATE)) {
959 /* process as "in head" */
960 html_process_token_in_head(html, token);
961 return HTML_TOKEN_PROCESSED;
962 }
963
964 if (token->type == HTML_TOKEN_START_TAG &&
965 token->tag.type == HTML_TAG_BODY) {
966 /* parse error */
967 html_parse_error(html);
968 if (html->open_count == 1 || html->open[1]->type != HTML_TAG_BODY ||
969 html_has_tag_open(html, HTML_TAG_TEMPLATE)) {
970 /* ignore */
971 return HTML_TOKEN_PROCESSED;
972 }
973 html->frameset_ok = false;
974
975 /* TODO: add attrs to first body tag it doesn't already have */
976
977 return HTML_TOKEN_PROCESSED;
978 }
979
980 if (token->type == HTML_TOKEN_START_TAG &&
981 token->tag.type == HTML_TAG_FRAMESET) {
982 /* parse error */
983 html_parse_error(html);
984 if (html->open_count == 1 || html->open[1]->type != HTML_TAG_BODY ||
985 html_has_tag_open(html, HTML_TAG_TEMPLATE)) {
986 /* ignore */
987 return HTML_TOKEN_PROCESSED;
988 }
989
990 if (!html->frameset_ok) {
991 /* ignore */
992 return HTML_TOKEN_PROCESSED;
993 }
994
995 /* pop all nodes except root html */
996 while (html->open_count != 1)
997 html_pop_current_element(html);
998
999 html_append_element_for_token(html, token, HTML_NAMESPACE_HTML);
1000 html->mode = HTML_MODE_IN_FRAMESET;
1001 return HTML_TOKEN_PROCESSED;
1002 }
1003
1004 if (token->type == HTML_TOKEN_EOF) {
1005 /* supposed to do more here but it all ends up the same */
1006 html->eof = true;
1007 return HTML_TOKEN_PROCESSED;
1008 }
1009
1010 if (token->type == HTML_TOKEN_END_TAG &&
1011 (token->tag.type == HTML_TAG_BODY ||
1012 token->tag.type == HTML_TAG_HTML)) {
1013 if (!html_has_element_with_tag_open_in_scope(html, HTML_TAG_BODY,
1014 HTML_SCOPE_DEFAULT)) {
1015 /* parse error, ignore */
1016 html_parse_error(html);
1017 return HTML_TOKEN_PROCESSED;
1018 }
1019
1020 for (n = 0; n < html->open_count; n++) {
1021 if (html->open[n]->type == HTML_TAG_DD ||
1022 html->open[n]->type == HTML_TAG_DT ||
1023 html->open[n]->type == HTML_TAG_LI ||
1024 html->open[n]->type == HTML_TAG_OPTGROUP ||
1025 html->open[n]->type == HTML_TAG_OPTION ||
1026 html->open[n]->type == HTML_TAG_P ||
1027 html->open[n]->type == HTML_TAG_RB ||
1028 html->open[n]->type == HTML_TAG_RP ||
1029 html->open[n]->type == HTML_TAG_RT ||
1030 html->open[n]->type == HTML_TAG_RTC ||
1031 html->open[n]->type == HTML_TAG_TBODY ||
1032 html->open[n]->type == HTML_TAG_TD ||
1033 html->open[n]->type == HTML_TAG_TFOOT ||
1034 html->open[n]->type == HTML_TAG_TH ||
1035 html->open[n]->type == HTML_TAG_THEAD ||
1036 html->open[n]->type == HTML_TAG_TR ||
1037 html->open[n]->type == HTML_TAG_BODY ||
1038 html->open[n]->type == HTML_TAG_HTML)
1039 continue;
1040
1041 html_parse_error(html);
1042 break;
1043 }
1044
1045 html->mode = HTML_MODE_AFTER_BODY;
1046
1047 if (token->tag.type == HTML_TAG_HTML)
1048 return HTML_TOKEN_REPROCESS;
1049
1050 return HTML_TOKEN_PROCESSED;
1051 }
1052
1053 if (token->type == HTML_TOKEN_START_TAG &&
1054 (token->tag.type == HTML_TAG_ADDRESS ||
1055 token->tag.type == HTML_TAG_ARTICLE ||
1056 token->tag.type == HTML_TAG_ASIDE ||
1057 token->tag.type == HTML_TAG_BLOCKQUOTE ||
1058 token->tag.type == HTML_TAG_CENTER ||
1059 token->tag.type == HTML_TAG_DETAILS ||
1060 token->tag.type == HTML_TAG_DIALOG ||
1061 token->tag.type == HTML_TAG_DIR ||
1062 token->tag.type == HTML_TAG_DIV ||
1063 token->tag.type == HTML_TAG_DL ||
1064 token->tag.type == HTML_TAG_FIELDSET ||
1065 token->tag.type == HTML_TAG_FIGCAPTION ||
1066 token->tag.type == HTML_TAG_FIGURE ||
1067 token->tag.type == HTML_TAG_FOOTER ||
1068 token->tag.type == HTML_TAG_HEADER ||
1069 token->tag.type == HTML_TAG_HGROUP ||
1070 token->tag.type == HTML_TAG_MAIN ||
1071 token->tag.type == HTML_TAG_MENU ||
1072 token->tag.type == HTML_TAG_NAV ||
1073 token->tag.type == HTML_TAG_OL ||
1074 token->tag.type == HTML_TAG_P ||
1075 token->tag.type == HTML_TAG_SEARCH ||
1076 token->tag.type == HTML_TAG_SECTION ||
1077 token->tag.type == HTML_TAG_SUMMARY ||
1078 token->tag.type == HTML_TAG_UL)) {
1079 if (html_has_element_with_tag_open_in_scope(html, HTML_TAG_P,
1080 HTML_SCOPE_BUTTON))
1081 html_close_p(html);
1082 html_append_element_for_token(html, token, HTML_NAMESPACE_HTML);
1083 return HTML_TOKEN_PROCESSED;
1084 }
1085
1086 if (token->type == HTML_TOKEN_START_TAG &&
1087 (token->tag.type == HTML_TAG_H1 ||
1088 token->tag.type == HTML_TAG_H2 ||
1089 token->tag.type == HTML_TAG_H3 ||
1090 token->tag.type == HTML_TAG_H4 ||
1091 token->tag.type == HTML_TAG_H5 ||
1092 token->tag.type == HTML_TAG_H6)) {
1093 if (html_has_element_with_tag_open_in_scope(html, HTML_TAG_P,
1094 HTML_SCOPE_BUTTON))
1095 html_close_p(html);
1096
1097 if (token->type == HTML_TOKEN_START_TAG &&
1098 (html->current_node->type == HTML_TAG_H1 ||
1099 html->current_node->type == HTML_TAG_H2 ||
1100 html->current_node->type == HTML_TAG_H3 ||
1101 html->current_node->type == HTML_TAG_H4 ||
1102 html->current_node->type == HTML_TAG_H5 ||
1103 html->current_node->type == HTML_TAG_H6)) {
1104 /* parse error */
1105 html_parse_error(html);
1106 html_pop_current_element(html);
1107 }
1108
1109 html_append_element_for_token(html, token, HTML_NAMESPACE_HTML);
1110 return HTML_TOKEN_PROCESSED;
1111 }
1112
1113 if (token->type == HTML_TOKEN_START_TAG &&
1114 (token->tag.type == HTML_TAG_PRE ||
1115 token->tag.type == HTML_TAG_LISTING)) {
1116 if (html_has_element_with_tag_open_in_scope(html, HTML_TAG_P,
1117 HTML_SCOPE_BUTTON))
1118 html_close_p(html);
1119
1120 html_append_element_for_token(html, token, HTML_NAMESPACE_HTML);
1121
1122 html->skip_newline_char_token = true;
1123 html->frameset_ok = false;
1124 return HTML_TOKEN_PROCESSED;
1125 }
1126
1127 if (token->type == HTML_TOKEN_START_TAG &&
1128 token->tag.type == HTML_TAG_FORM) {
1129 if (html->form && !html_has_tag_open(html, HTML_TAG_TEMPLATE)) {
1130 /* parse error, ignore */
1131 html_parse_error(html);
1132 return HTML_TOKEN_PROCESSED;
1133 }
1134
1135 if (html_has_element_with_tag_open_in_scope(html, HTML_TAG_P,
1136 HTML_SCOPE_BUTTON))
1137 html_close_p(html);
1138
1139 element = html_append_element_for_token(html, token,
1140 HTML_NAMESPACE_HTML);
1141
1142 if (!html_has_tag_open(html, HTML_TAG_TEMPLATE))
1143 html->form = element;
1144
1145 return HTML_TOKEN_PROCESSED;
1146 }
1147
1148 if (token->type == HTML_TOKEN_START_TAG &&
1149 token->tag.type == HTML_TAG_LI) {
1150 html->frameset_ok = false;
1151
1152 /* TODO: docs say to run a loop doing stuff here */
1153
1154 if (html_has_element_with_tag_open_in_scope(html, HTML_TAG_P,
1155 HTML_SCOPE_BUTTON))
1156 html_close_p(html);
1157
1158 html_append_element_for_token(html, token, HTML_NAMESPACE_HTML);
1159 return HTML_TOKEN_PROCESSED;
1160 }
1161
1162 if (token->type == HTML_TOKEN_START_TAG &&
1163 (token->tag.type == HTML_TAG_DD ||
1164 token->tag.type == HTML_TAG_DT)) {
1165 html->frameset_ok = false;
1166
1167 /* TODO: docs say to run a loop doing stuff here */
1168
1169 if (html_has_element_with_tag_open_in_scope(html, HTML_TAG_P,
1170 HTML_SCOPE_BUTTON))
1171 html_close_p(html);
1172
1173 html_append_element_for_token(html, token, HTML_NAMESPACE_HTML);
1174 return HTML_TOKEN_PROCESSED;
1175 }
1176
1177 if (token->type == HTML_TOKEN_START_TAG &&
1178 token->tag.type == HTML_TAG_PLAINTEXT) {
1179 if (html_has_element_with_tag_open_in_scope(html, HTML_TAG_P,
1180 HTML_SCOPE_BUTTON))
1181 html_close_p(html);
1182
1183 html_append_element_for_token(html, token, HTML_NAMESPACE_HTML);
1184 html->state = HTML_STATE_PLAINTEXT;
1185 return HTML_TOKEN_PROCESSED;
1186 }
1187
1188 if (token->type == HTML_TOKEN_START_TAG &&
1189 token->tag.type == HTML_TAG_BUTTON) {
1190 if (html_has_element_with_tag_open_in_scope(html, HTML_TAG_BUTTON,
1191 HTML_SCOPE_DEFAULT)) {
1192 /* parse error */
1193 html_parse_error(html);
1194 html_generate_implied_end_tags(html, NULL, false);
1195 html_pop_nodes_until_past_tag(html, HTML_TAG_BUTTON);
1196 }
1197
1198 if (html->active_formatting_count > 0)
1199 html_reconstruct_active_formatting(html);
1200 html_append_element_for_token(html, token, HTML_NAMESPACE_HTML);
1201 html->frameset_ok = false;
1202 return HTML_TOKEN_PROCESSED;
1203 }
1204
1205 if (token->type == HTML_TOKEN_END_TAG &&
1206 (token->tag.type == HTML_TAG_ADDRESS ||
1207 token->tag.type == HTML_TAG_ARTICLE ||
1208 token->tag.type == HTML_TAG_ASIDE ||
1209 token->tag.type == HTML_TAG_BLOCKQUOTE ||
1210 token->tag.type == HTML_TAG_BUTTON ||
1211 token->tag.type == HTML_TAG_CENTER ||
1212 token->tag.type == HTML_TAG_DETAILS ||
1213 token->tag.type == HTML_TAG_DIALOG ||
1214 token->tag.type == HTML_TAG_DIR ||
1215 token->tag.type == HTML_TAG_DIV ||
1216 token->tag.type == HTML_TAG_DL ||
1217 token->tag.type == HTML_TAG_FIELDSET ||
1218 token->tag.type == HTML_TAG_FIGCAPTION ||
1219 token->tag.type == HTML_TAG_FIGURE ||
1220 token->tag.type == HTML_TAG_FOOTER ||
1221 token->tag.type == HTML_TAG_HEADER ||
1222 token->tag.type == HTML_TAG_HGROUP ||
1223 token->tag.type == HTML_TAG_LISTING ||
1224 token->tag.type == HTML_TAG_MAIN ||
1225 token->tag.type == HTML_TAG_MENU ||
1226 token->tag.type == HTML_TAG_NAV ||
1227 token->tag.type == HTML_TAG_OL ||
1228 token->tag.type == HTML_TAG_PRE ||
1229 token->tag.type == HTML_TAG_SEARCH ||
1230 token->tag.type == HTML_TAG_SECTION ||
1231 token->tag.type == HTML_TAG_SUMMARY ||
1232 token->tag.type == HTML_TAG_UL)) {
1233 if (!html_has_element_with_tag_open_in_scope(html,
1234 token->tag.type, HTML_SCOPE_DEFAULT)) {
1235 /* parse error, ignore */
1236 html_parse_error(html);
1237 return HTML_TOKEN_PROCESSED;
1238 }
1239
1240 html_generate_implied_end_tags(html, NULL, false);
1241
1242 if (!html_has_tag_open(html, token->tag.type)) {
1243 /* parse error, ignore */
1244 html_parse_error(html);
1245 return HTML_TOKEN_PROCESSED;
1246 }
1247
1248 html_pop_nodes_until_past_tag(html, token->tag.type);
1249 return HTML_TOKEN_PROCESSED;
1250 }
1251
1252 if (token->type == HTML_TOKEN_END_TAG && token->tag.type == HTML_TAG_FORM) {
1253 if (!html_has_tag_open(html, HTML_TAG_TEMPLATE)) {
1254 /* TODO */
1255 } else {
1256 if (!html_has_tag_open(html, HTML_TAG_FORM)) {
1257 /* parse error, ignore */
1258 html_parse_error(html);
1259 return HTML_TOKEN_PROCESSED;
1260 }
1261
1262 html_generate_implied_end_tags(html, NULL, false);
1263
1264 if (html->current_node->type != HTML_TAG_FORM) {
1265 /* parse error */
1266 html_parse_error(html);
1267 }
1268
1269 html_pop_nodes_until_past_tag(html, HTML_TAG_FORM);
1270 }
1271 return HTML_TOKEN_PROCESSED;
1272 }
1273
1274 if (token->type == HTML_TOKEN_END_TAG && token->tag.type == HTML_TAG_P) {
1275 if (!html_has_element_with_tag_open_in_scope(html, HTML_TAG_P,
1276 HTML_SCOPE_BUTTON)) {
1277 /* parse error */
1278 html_parse_error(html);
1279 memset(&ttoken, 0, sizeof(html_token));
1280 ttoken.type = HTML_TOKEN_START_TAG;
1281 ttoken.tag.type = HTML_TAG_P;
1282 html_append_element_for_token(html, &ttoken, HTML_NAMESPACE_HTML);
1283 }
1284
1285 html_close_p(html);
1286 return HTML_TOKEN_PROCESSED;
1287 }
1288
1289 if (token->type == HTML_TOKEN_END_TAG && token->tag.type == HTML_TAG_LI) {
1290 if (!html_has_element_with_tag_open_in_scope(html, HTML_TAG_LI,
1291 HTML_SCOPE_LIST_ITEM)) {
1292 /* parse error, ignore */
1293 html_parse_error(html);
1294 return HTML_TOKEN_PROCESSED;
1295 }
1296
1297 html_generate_implied_end_tags(html, "li", false);
1298
1299 if (html->current_node->type != HTML_TAG_LI) {
1300 /* parse error */
1301 html_parse_error(html);
1302 }
1303
1304 html_pop_nodes_until_past_tag(html, HTML_TAG_LI);
1305 return HTML_TOKEN_PROCESSED;
1306 }
1307
1308 if (token->type == HTML_TOKEN_END_TAG &&
1309 (token->tag.type == HTML_TAG_DD ||
1310 token->tag.type == HTML_TAG_DT)) {
1311 if (!html_has_tag_open(html, token->tag.type)) {
1312 /* parse error, ignore */
1313 html_parse_error(html);
1314 return HTML_TOKEN_PROCESSED;
1315 }
1316
1317 html_generate_implied_end_tags(html, token->tag.name, false);
1318
1319 if (html->current_node->type != token->tag.type) {
1320 /* parse error */
1321 html_parse_error(html);
1322 }
1323
1324 html_pop_nodes_until_past_tag(html, token->tag.type);
1325 return HTML_TOKEN_PROCESSED;
1326 }
1327
1328 if (token->type == HTML_TOKEN_END_TAG &&
1329 (token->tag.type == HTML_TAG_H1 ||
1330 token->tag.type == HTML_TAG_H2 ||
1331 token->tag.type == HTML_TAG_H3 ||
1332 token->tag.type == HTML_TAG_H4 ||
1333 token->tag.type == HTML_TAG_H5 ||
1334 token->tag.type == HTML_TAG_H6)) {
1335 if (!(html_has_element_with_tag_open_in_scope(html, HTML_TAG_H1,
1336 HTML_SCOPE_DEFAULT) ||
1337 html_has_element_with_tag_open_in_scope(html, HTML_TAG_H2,
1338 HTML_SCOPE_DEFAULT) ||
1339 html_has_element_with_tag_open_in_scope(html, HTML_TAG_H3,
1340 HTML_SCOPE_DEFAULT) ||
1341 html_has_element_with_tag_open_in_scope(html, HTML_TAG_H4,
1342 HTML_SCOPE_DEFAULT) ||
1343 html_has_element_with_tag_open_in_scope(html, HTML_TAG_H5,
1344 HTML_SCOPE_DEFAULT) ||
1345 html_has_element_with_tag_open_in_scope(html, HTML_TAG_H6,
1346 HTML_SCOPE_DEFAULT))) {
1347 /* parse error, ignore */
1348 html_parse_error(html);
1349 return HTML_TOKEN_PROCESSED;
1350 }
1351
1352 html_generate_implied_end_tags(html, NULL, false);
1353
1354 if (html->current_node->type != token->tag.type) {
1355 /* parse error */
1356 html_parse_error(html);
1357 }
1358
1359 html_pop_nodes_until_past_tag(html, token->tag.type);
1360 return HTML_TOKEN_PROCESSED;
1361 }
1362
1363 if (token->type == HTML_TOKEN_END_TAG &&
1364 strcmp(token->tag.name, "sarcasm") == 0) {
1365 /* TODO: take a deep breath */
1366 goto any_other_end_tag;
1367 }
1368
1369 if (token->type == HTML_TOKEN_START_TAG && token->tag.type == HTML_TAG_A) {
1370 short last_marker = 0;
1371 struct html_element *found_a;
1372
1373 for (n = 0; n < html->active_formatting_count; n++) {
1374 if (html->active_formatting[n].marker) {
1375 HTML_DEBUG((": af[%d]=marker", n));
1376 } else {
1377 HTML_DEBUG((": af[%d]=<%s>", n,
1378 html->active_formatting[n].element->name));
1379 }
1380 }
1381
1382 /* find last marker, if any */
1383 for (n = html->active_formatting_count - 1; n >= 0; n--) {
1384 if (html->active_formatting[n].marker) {
1385 last_marker = n;
1386 break;
1387 }
1388 }
1389
1390 /*
1391 * "If the list of active formatting elements contains an a element
1392 * between the end of the list and the last marker on the list (or the
1393 * start of the list if there is no marker on the list), then this is a
1394 * parse error;"
1395 */
1396 for (n = last_marker; n < html->active_formatting_count; n++) {
1397 if (!html->active_formatting[n].element ||
1398 html->active_formatting[n].element->type != HTML_TAG_A)
1399 continue;
1400
1401 found_a = html->active_formatting[n].element;
1402 html_parse_error(html);
1403
1404 /*
1405 * "then remove that element from the list of active formatting
1406 * elements and the stack of open elements if the adoption
1407 * agency algorithm didn't already remove it"
1408 */
1409 html_run_adoption_agency(html, token);
1410 html_remove_active_formatting_element(html, found_a);
1411 break;
1412 }
1413
1414 if (html->active_formatting_count > 0)
1415 html_reconstruct_active_formatting(html);
1416 element = html_append_element_for_token(html, token,
1417 HTML_NAMESPACE_HTML);
1418 html_push_active_formatting_element(html, element, token->type);
1419 return HTML_TOKEN_PROCESSED;
1420 }
1421
1422 if (token->type == HTML_TOKEN_START_TAG &&
1423 (token->tag.type == HTML_TAG_B ||
1424 token->tag.type == HTML_TAG_BIG ||
1425 token->tag.type == HTML_TAG_CODE ||
1426 token->tag.type == HTML_TAG_EM ||
1427 token->tag.type == HTML_TAG_FONT ||
1428 token->tag.type == HTML_TAG_I ||
1429 token->tag.type == HTML_TAG_S ||
1430 token->tag.type == HTML_TAG_SMALL ||
1431 token->tag.type == HTML_TAG_STRIKE ||
1432 token->tag.type == HTML_TAG_STRONG ||
1433 token->tag.type == HTML_TAG_TT ||
1434 token->tag.type == HTML_TAG_U)) {
1435 if (html->active_formatting_count > 0)
1436 html_reconstruct_active_formatting(html);
1437 element = html_append_element_for_token(html, token,
1438 HTML_NAMESPACE_HTML);
1439 html_push_active_formatting_element(html, element, token->type);
1440 return HTML_TOKEN_PROCESSED;
1441 }
1442
1443 if (token->type == HTML_TOKEN_START_TAG &&
1444 token->tag.type == HTML_TAG_NOBR) {
1445 if (html->active_formatting_count > 0)
1446 html_reconstruct_active_formatting(html);
1447 if (html_has_element_with_tag_open_in_scope(html, HTML_TAG_NOBR,
1448 HTML_SCOPE_DEFAULT)) {
1449 /* parse error */
1450 html_parse_error(html);
1451 html_run_adoption_agency(html, token);
1452 if (html->active_formatting_count > 0)
1453 html_reconstruct_active_formatting(html);
1454 }
1455
1456 element = html_append_element_for_token(html, token,
1457 HTML_NAMESPACE_HTML);
1458 html_push_active_formatting_element(html, element, token->type);
1459 return HTML_TOKEN_PROCESSED;
1460 }
1461
1462 if (token->type == HTML_TOKEN_END_TAG &&
1463 (token->tag.type == HTML_TAG_A ||
1464 token->tag.type == HTML_TAG_B ||
1465 token->tag.type == HTML_TAG_BIG ||
1466 token->tag.type == HTML_TAG_CODE ||
1467 token->tag.type == HTML_TAG_EM ||
1468 token->tag.type == HTML_TAG_FONT ||
1469 token->tag.type == HTML_TAG_I ||
1470 token->tag.type == HTML_TAG_NOBR ||
1471 token->tag.type == HTML_TAG_S ||
1472 token->tag.type == HTML_TAG_SMALL ||
1473 token->tag.type == HTML_TAG_STRIKE ||
1474 token->tag.type == HTML_TAG_STRONG ||
1475 token->tag.type == HTML_TAG_TT ||
1476 token->tag.type == HTML_TAG_U)) {
1477 if (!html_run_adoption_agency(html, token))
1478 goto any_other_end_tag;
1479 return HTML_TOKEN_PROCESSED;
1480 }
1481
1482 if (token->type == HTML_TOKEN_START_TAG &&
1483 (token->tag.type == HTML_TAG_APPLET ||
1484 token->tag.type == HTML_TAG_MARQUEE ||
1485 token->tag.type == HTML_TAG_OBJECT)) {
1486 if (html->active_formatting_count > 0)
1487 html_reconstruct_active_formatting(html);
1488 element = html_append_element_for_token(html, token,
1489 HTML_NAMESPACE_HTML);
1490 html_push_active_formatting_element(html, element, token->type);
1491 html->frameset_ok = false;
1492 return HTML_TOKEN_PROCESSED;
1493 }
1494
1495 if (token->type == HTML_TOKEN_END_TAG &&
1496 (token->tag.type == HTML_TAG_APPLET ||
1497 token->tag.type == HTML_TAG_MARQUEE ||
1498 token->tag.type == HTML_TAG_OBJECT)) {
1499 if (!html_has_tag_open(html, token->tag.type)) {
1500 /* parse error, ignore */
1501 html_parse_error(html);
1502 return HTML_TOKEN_PROCESSED;
1503 }
1504
1505 html_generate_implied_end_tags(html, NULL, false);
1506
1507 if (html->current_node->type != token->tag.type) {
1508 /* parse error */
1509 html_parse_error(html);
1510 }
1511
1512 html_pop_nodes_until_past_tag(html, token->tag.type);
1513
1514 /* TODO: clear list of active formatting elements up to last marker */
1515
1516 return HTML_TOKEN_PROCESSED;
1517 }
1518
1519 if (token->type == HTML_TOKEN_START_TAG &&
1520 token->tag.type == HTML_TAG_TABLE) {
1521 if (html_has_element_with_tag_open_in_scope(html, HTML_TAG_P,
1522 HTML_SCOPE_BUTTON)) {
1523 /* TODO: only do this if document is not set to quirks mode */
1524 html_close_p(html);
1525 }
1526
1527 html_append_element_for_token(html, token, HTML_NAMESPACE_HTML);
1528 html->frameset_ok = false;
1529 html->mode = HTML_MODE_IN_TABLE;
1530 return HTML_TOKEN_PROCESSED;
1531 }
1532
1533 if (token->type == HTML_TOKEN_END_TAG && token->tag.type == HTML_TAG_BR) {
1534 /* parse error, drop attributes and turn into start */
1535 html_parse_error(html);
1536
1537 token->tag.attrs_count = 0;
1538 token->type = HTML_TOKEN_START_TAG;
1539
1540 /* fall through */
1541 }
1542
1543 if (token->type == HTML_TOKEN_START_TAG &&
1544 (token->tag.type == HTML_TAG_AREA ||
1545 token->tag.type == HTML_TAG_BR ||
1546 token->tag.type == HTML_TAG_EMBED ||
1547 token->tag.type == HTML_TAG_IMG ||
1548 token->tag.type == HTML_TAG_KEYGEN ||
1549 token->tag.type == HTML_TAG_WBR)) {
1550 if (html->active_formatting_count > 0)
1551 html_reconstruct_active_formatting(html);
1552
1553 html_append_element_for_token(html, token, HTML_NAMESPACE_HTML);
1554 html_pop_current_element(html);
1555
1556 if (token->tag.self_closing)
1557 token->tag.self_closing_acked = true;
1558
1559 html->frameset_ok = false;
1560 return HTML_TOKEN_PROCESSED;
1561 }
1562
1563 if (token->type == HTML_TOKEN_START_TAG &&
1564 token->tag.type == HTML_TAG_INPUT) {
1565 bool found_hidden;
1566
1567 if (html->active_formatting_count > 0)
1568 html_reconstruct_active_formatting(html);
1569
1570 html_append_element_for_token(html, token, HTML_NAMESPACE_HTML);
1571 html_pop_current_element(html);
1572
1573 if (token->tag.self_closing)
1574 token->tag.self_closing_acked = true;
1575
1576 for (n = 0, found_hidden = false; n < token->tag.attrs_count; n++) {
1577 if (strcasecmp(token->tag.attrs[n].name, "type") == 0 &&
1578 strcasecmp(token->tag.attrs[n].val, "hidden") == 0) {
1579 found_hidden = true;
1580 break;
1581 }
1582 }
1583 if (!found_hidden)
1584 html->frameset_ok = false;
1585 return HTML_TOKEN_PROCESSED;
1586 }
1587
1588 if (token->type == HTML_TOKEN_START_TAG &&
1589 (token->tag.type == HTML_TAG_PARAM ||
1590 token->tag.type == HTML_TAG_SOURCE ||
1591 token->tag.type == HTML_TAG_TRACK)) {
1592 html_append_element_for_token(html, token, HTML_NAMESPACE_HTML);
1593 html_pop_current_element(html);
1594
1595 if (token->tag.self_closing)
1596 token->tag.self_closing_acked = true;
1597
1598 return HTML_TOKEN_PROCESSED;
1599 }
1600
1601 if (token->type == HTML_TOKEN_START_TAG &&
1602 token->tag.type == HTML_TAG_HR) {
1603 if (html_has_element_with_tag_open_in_scope(html, HTML_TAG_P,
1604 HTML_SCOPE_BUTTON))
1605 html_close_p(html);
1606
1607 html_append_element_for_token(html, token, HTML_NAMESPACE_HTML);
1608 html_pop_current_element(html);
1609
1610 if (token->tag.self_closing)
1611 token->tag.self_closing_acked = true;
1612
1613 html->frameset_ok = false;
1614 return HTML_TOKEN_PROCESSED;
1615 }
1616
1617 if (token->type == HTML_TOKEN_START_TAG &&
1618 token->tag.type == HTML_TAG_IMAGE) {
1619 /* parse error */
1620 html_parse_error(html);
1621
1622 /* "Don't ask." */
1623 token->tag.name_len = strlcpy(token->tag.name, "img",
1624 sizeof(token->tag.name));
1625 token->tag.type = HTML_TAG_IMG;
1626
1627 return HTML_TOKEN_REPROCESS;
1628 }
1629
1630 if (token->type == HTML_TOKEN_START_TAG &&
1631 token->tag.type == HTML_TAG_TEXTAREA) {
1632 html_append_element_for_token(html, token, HTML_NAMESPACE_HTML);
1633
1634 html->skip_newline_char_token = true;
1635 html->state = HTML_STATE_RCDATA;
1636 html->original_mode = html->mode;
1637 html->frameset_ok = false;
1638 html->mode = HTML_MODE_TEXT;
1639 return HTML_TOKEN_PROCESSED;
1640 }
1641
1642 if (token->type == HTML_TOKEN_START_TAG &&
1643 token->tag.type == HTML_TAG_XMP) {
1644 if (html_has_element_with_tag_open_in_scope(html, HTML_TAG_P,
1645 HTML_SCOPE_BUTTON))
1646 html_close_p(html);
1647
1648 if (html->active_formatting_count > 0)
1649 html_reconstruct_active_formatting(html);
1650 html->frameset_ok = false;
1651
1652 /* "raw text element parsing algorithm" */
1653 html->state = HTML_STATE_RAWTEXT;
1654 html->original_mode = html->mode;
1655 html->mode = HTML_MODE_TEXT;
1656 return HTML_TOKEN_PROCESSED;
1657 }
1658
1659 if (token->type == HTML_TOKEN_START_TAG &&
1660 token->tag.type == HTML_TAG_IFRAME) {
1661 html->frameset_ok = false;
1662
1663 /* "raw text element parsing algorithm" */
1664 html->state = HTML_STATE_RAWTEXT;
1665 html->original_mode = html->mode;
1666 html->mode = HTML_MODE_TEXT;
1667 return HTML_TOKEN_PROCESSED;
1668 }
1669
1670 if (token->type == HTML_TOKEN_START_TAG &&
1671 (token->tag.type == HTML_TAG_NOEMBED ||
1672 (token->tag.type == HTML_TAG_NOSCRIPT && html->scripting))) {
1673 /* "raw text element parsing algorithm" */
1674 html->state = HTML_STATE_RAWTEXT;
1675 html->original_mode = html->mode;
1676 html->mode = HTML_MODE_TEXT;
1677 return HTML_TOKEN_PROCESSED;
1678 }
1679
1680 if (token->type == HTML_TOKEN_START_TAG &&
1681 token->tag.type == HTML_TAG_SELECT) {
1682 if (html->active_formatting_count > 0)
1683 html_reconstruct_active_formatting(html);
1684 html_append_element_for_token(html, token, HTML_NAMESPACE_HTML);
1685 html->frameset_ok = false;
1686
1687 if (html->mode == HTML_MODE_IN_TABLE ||
1688 html->mode == HTML_MODE_IN_CAPTION ||
1689 html->mode == HTML_MODE_IN_TABLE_BODY ||
1690 html->mode == HTML_MODE_IN_ROW ||
1691 html->mode == HTML_MODE_IN_CELL)
1692 html->mode = HTML_MODE_IN_SELECT_IN_TABLE;
1693 else
1694 html->mode = HTML_MODE_IN_SELECT;
1695
1696 return HTML_TOKEN_PROCESSED;
1697 }
1698
1699 if (token->type == HTML_TOKEN_START_TAG &&
1700 (token->tag.type == HTML_TAG_OPTGROUP ||
1701 token->tag.type == HTML_TAG_OPTION)) {
1702 if (token->tag.type == HTML_TAG_OPTION)
1703 html_pop_current_element(html);
1704
1705 if (html->active_formatting_count > 0)
1706 html_reconstruct_active_formatting(html);
1707 html_append_element_for_token(html, token, HTML_NAMESPACE_HTML);
1708 return HTML_TOKEN_PROCESSED;
1709 }
1710
1711 if (token->type == HTML_TOKEN_START_TAG &&
1712 (token->tag.type == HTML_TAG_RB ||
1713 token->tag.type == HTML_TAG_RTC)) {
1714 if (html_has_tag_open(html, HTML_TAG_RUBY)) {
1715 html_generate_implied_end_tags(html, "rtc", false);
1716
1717 if (token->tag.type == HTML_TAG_RUBY) {
1718 /* parse error */
1719 html_parse_error(html);
1720 }
1721 }
1722
1723 html_append_element_for_token(html, token, HTML_NAMESPACE_HTML);
1724 return HTML_TOKEN_PROCESSED;
1725 }
1726
1727 if (token->type == HTML_TOKEN_START_TAG &&
1728 token->tag.type == HTML_TAG_MATH) {
1729 if (html->active_formatting_count > 0)
1730 html_reconstruct_active_formatting(html);
1731
1732 /* TODO: "adjust MathML attributes" */
1733
1734 element = html_append_element_for_token(html, token,
1735 HTML_NAMESPACE_MATHML);
1736
1737 if (token->tag.self_closing) {
1738 html_pop_current_element(html);
1739 token->tag.self_closing_acked = true;
1740 }
1741
1742 return HTML_TOKEN_PROCESSED;
1743 }
1744
1745 if (token->type == HTML_TOKEN_START_TAG &&
1746 token->tag.type == HTML_TAG_SVG) {
1747 if (html->active_formatting_count > 0)
1748 html_reconstruct_active_formatting(html);
1749
1750 /* TODO: "adjust SVG attributes" */
1751
1752 element = html_append_element_for_token(html, token,
1753 HTML_NAMESPACE_SVG);
1754
1755 if (token->tag.self_closing) {
1756 html_pop_current_element(html);
1757 token->tag.self_closing_acked = true;
1758 }
1759
1760 return HTML_TOKEN_PROCESSED;
1761 }
1762
1763 if (token->type == HTML_TOKEN_START_TAG &&
1764 (token->tag.type == HTML_TAG_CAPTION ||
1765 token->tag.type == HTML_TAG_COL ||
1766 token->tag.type == HTML_TAG_COLGROUP ||
1767 token->tag.type == HTML_TAG_FRAME ||
1768 token->tag.type == HTML_TAG_HEAD ||
1769 token->tag.type == HTML_TAG_TBODY ||
1770 token->tag.type == HTML_TAG_TD ||
1771 token->tag.type == HTML_TAG_TFOOT ||
1772 token->tag.type == HTML_TAG_TH ||
1773 token->tag.type == HTML_TAG_THEAD ||
1774 token->tag.type == HTML_TAG_TR)) {
1775 /* parse error, ignore */
1776 html_parse_error(html);
1777 return HTML_TOKEN_PROCESSED;
1778 }
1779
1780 if (token->type == HTML_TOKEN_START_TAG) {
1781 /* any other tag */
1782 if (html->active_formatting_count > 0)
1783 html_reconstruct_active_formatting(html);
1784 html_append_element_for_token(html, token, HTML_NAMESPACE_HTML);
1785 return HTML_TOKEN_PROCESSED;
1786 }
1787
1788 if (token->type == HTML_TOKEN_END_TAG) {
1789 any_other_end_tag:
1790 /*
1791 * 1. Initialize node to be the current node (the bottommost node of
1792 * the stack).
1793 */
1794 /* 2. Loop: */
1795 for (n = html->open_count - 1; n >= 0; n--) {
1796 node = html->open[n];
1797
1798 /*
1799 * 2. If node is an HTML element with the same tag name as the
1800 * token, then:
1801 */
1802 if (strcmp(node->name, token->tag.name) == 0) {
1803 /*
1804 * 1. Generate implied end tags, except for HTML elements with
1805 * the same tag name as the token.
1806 */
1807 html_generate_implied_end_tags(html, token->tag.name, false);
1808
1809 /*
1810 * 2. If node is not the current node, then this is a parse
1811 * error.
1812 */
1813 if (node != html->current_node) {
1814 html_parse_error(html);
1815 }
1816
1817 /*
1818 * 3. Pop all the nodes from the current node up to node,
1819 * including node, then stop these steps.
1820 */
1821 html_pop_nodes_until_past_element(html, node);
1822 break;
1823 }
1824
1825 /*
1826 * 3. Otherwise, if node is in the special category, then this
1827 * is a parse error; ignore the token, and return.
1828 */
1829 if (node->type && html_is_element_special(html, node)) {
1830 html_parse_error(html);
1831 /* ignore */
1832 return HTML_TOKEN_PROCESSED;
1833 }
1834
1835 /*
1836 * 4. Set node to the previous entry in the stack of open
1837 * elements.
1838 */
1839 /* 5. Return to the step labeled loop. */
1840 }
1841
1842 return HTML_TOKEN_PROCESSED;
1843 }
1844
1845 panic("we shouldn't get to default case in 'in body' parser");
1846 return HTML_TOKEN_PROCESSED;
1847 }
1848
1849 html_token_act
1850 html_process_token_text(struct html_page *html, html_token *token)
1851 {
1852 /*
1853 * https://html.spec.whatwg.org/multipage/parsing.html#parsing-main-incdata
1854 */
1855
1856 if (token->type == HTML_TOKEN_CHARACTER) {
1857 if (html->current_node &&
1858 html->current_node->type == HTML_TAG_STYLE && !html->styling)
1859 return HTML_TOKEN_PROCESSED;
1860
1861 html_insert_character(html, token->ch.c);
1862 return HTML_TOKEN_PROCESSED;
1863 }
1864
1865 if (token->type == HTML_TOKEN_EOF) {
1866 /* parse error */
1867 html_parse_error(html);
1868 if (token->tag.type == HTML_TAG_SCRIPT) {
1869 /* TODO: "set its already started to true" */
1870 }
1871
1872 html_pop_current_element(html);
1873
1874 html->mode = html->original_mode;
1875 html->original_mode = HTML_MODE_NONE;
1876 return HTML_TOKEN_REPROCESS;
1877 }
1878
1879 if (token->type == HTML_TOKEN_END_TAG &&
1880 token->tag.type == HTML_TAG_SCRIPT) {
1881 html_pop_current_element(html);
1882
1883 html->mode = html->original_mode;
1884 html->original_mode = HTML_MODE_NONE;
1885
1886 /* TODO: some more stuff related to scripting engine */
1887 return HTML_TOKEN_PROCESSED;
1888 }
1889
1890 if (token->type == HTML_TOKEN_END_TAG) {
1891 html_pop_current_element(html);
1892
1893 html->mode = html->original_mode;
1894 html->original_mode = HTML_MODE_NONE;
1895 return HTML_TOKEN_REPROCESS;
1896 }
1897
1898 return HTML_TOKEN_PROCESSED;
1899 }
1900
1901 html_token_act
1902 html_process_token_in_table(struct html_page *html, html_token *token)
1903 {
1904 HTML_DEBUG(("in_table: TODO"));
1905 /* TODO */
1906 return HTML_TOKEN_PROCESSED;
1907 }
1908
1909 html_token_act
1910 html_process_token_in_table_text(struct html_page *html, html_token *token)
1911 {
1912 HTML_DEBUG(("in_table_text: TODO"));
1913 /* TODO */
1914 return HTML_TOKEN_PROCESSED;
1915 }
1916
1917 html_token_act
1918 html_process_token_in_caption(struct html_page *html, html_token *token)
1919 {
1920 HTML_DEBUG(("in_caption: TODO"));
1921 /* TODO */
1922 return HTML_TOKEN_PROCESSED;
1923 }
1924
1925 html_token_act
1926 html_process_token_in_column_group(struct html_page *html, html_token *token)
1927 {
1928 HTML_DEBUG(("in_column_group: TODO"));
1929 /* TODO */
1930 return HTML_TOKEN_PROCESSED;
1931 }
1932
1933 html_token_act
1934 html_process_token_in_table_body(struct html_page *html, html_token *token)
1935 {
1936 HTML_DEBUG(("in_table_body: TODO"));
1937 /* TODO */
1938 return HTML_TOKEN_PROCESSED;
1939 }
1940
1941 html_token_act
1942 html_process_token_in_row(struct html_page *html, html_token *token)
1943 {
1944 HTML_DEBUG(("in_row: TODO"));
1945 /* TODO */
1946 return HTML_TOKEN_PROCESSED;
1947 }
1948
1949 html_token_act
1950 html_process_token_in_cell(struct html_page *html, html_token *token)
1951 {
1952 HTML_DEBUG(("in_cell: TODO"));
1953 /* TODO */
1954 return HTML_TOKEN_PROCESSED;
1955 }
1956
1957 html_token_act
1958 html_process_token_in_select(struct html_page *html, html_token *token)
1959 {
1960 HTML_DEBUG(("in_select: TODO"));
1961 /* TODO */
1962 return HTML_TOKEN_PROCESSED;
1963 }
1964
1965 html_token_act
1966 html_process_token_in_select_in_table(struct html_page *html,
1967 html_token *token)
1968 {
1969 HTML_DEBUG(("in_select_in_table: TODO"));
1970 /* TODO */
1971 return HTML_TOKEN_PROCESSED;
1972 }
1973
1974 html_token_act
1975 html_process_token_in_template(struct html_page *html, html_token *token)
1976 {
1977 HTML_DEBUG(("in_template: TODO"));
1978 /* TODO */
1979 return HTML_TOKEN_PROCESSED;
1980 }
1981
1982 html_token_act
1983 html_process_token_after_body(struct html_page *html, html_token *token)
1984 {
1985 if (token->type == HTML_TOKEN_CHARACTER &&
1986 (token->ch.c == '\t' || token->ch.c == '\n' || token->ch.c == '\f' ||
1987 token->ch.c == '\r' || token->ch.c == ' ')) {
1988 /* process as "in body" */
1989 html_process_token_in_body(html, token);
1990 return HTML_TOKEN_PROCESSED;
1991 }
1992
1993 if (token->type == HTML_TOKEN_COMMENT) {
1994 html_append_comment(html, &token->comment);
1995 return HTML_TOKEN_PROCESSED;
1996 }
1997
1998 if (token->type == HTML_TOKEN_DOCTYPE) {
1999 html_parse_error(html);
2000 /* ignore */
2001 return HTML_TOKEN_PROCESSED;
2002 }
2003
2004 if (token->type == HTML_TOKEN_START_TAG &&
2005 token->tag.type == HTML_TAG_HTML) {
2006 /* process as "in body" */
2007 html_process_token_in_body(html, token);
2008 return HTML_TOKEN_PROCESSED;
2009 }
2010
2011 if (token->type == HTML_TOKEN_END_TAG &&
2012 token->tag.type == HTML_TAG_HTML) {
2013 html->mode = HTML_MODE_AFTER_AFTER_BODY;
2014 return HTML_TOKEN_PROCESSED;
2015 }
2016
2017 if (token->type == HTML_TOKEN_EOF) {
2018 html_stop_parsing(html);
2019 return HTML_TOKEN_PROCESSED;
2020 }
2021
2022 html_parse_error(html);
2023 html->mode = HTML_MODE_IN_BODY;
2024 return HTML_TOKEN_REPROCESS;
2025 }
2026
2027 html_token_act
2028 html_process_token_in_frameset(struct html_page *html, html_token *token)
2029 {
2030 if (token->type == HTML_TOKEN_CHARACTER &&
2031 (token->ch.c == '\t' || token->ch.c == '\n' || token->ch.c == '\f' ||
2032 token->ch.c == '\r' || token->ch.c == ' ')) {
2033 html_insert_character(html, token->ch.c);
2034 return HTML_TOKEN_PROCESSED;
2035 }
2036
2037 if (token->type == HTML_TOKEN_COMMENT) {
2038 html_append_comment(html, &token->comment);
2039 return HTML_TOKEN_PROCESSED;
2040 }
2041
2042 if (token->type == HTML_TOKEN_DOCTYPE) {
2043 html_parse_error(html);
2044 /* ignore */
2045 return HTML_TOKEN_PROCESSED;
2046 }
2047
2048 if (token->type == HTML_TOKEN_START_TAG &&
2049 token->tag.type == HTML_TAG_HTML) {
2050 /* process as "in body" */
2051 html_process_token_in_body(html, token);
2052 return HTML_TOKEN_PROCESSED;
2053 }
2054
2055 if (token->type == HTML_TOKEN_START_TAG &&
2056 token->tag.type == HTML_TAG_FRAMESET) {
2057 html_append_element_for_token(html, token, HTML_NAMESPACE_HTML);
2058 return HTML_TOKEN_PROCESSED;
2059 }
2060
2061 if (token->type == HTML_TOKEN_END_TAG &&
2062 token->tag.type == HTML_TAG_FRAMESET) {
2063 if (html->current_node->type == HTML_TAG_HTML) {
2064 html_parse_error(html);
2065 /* ignore */
2066 return HTML_TOKEN_PROCESSED;
2067 }
2068
2069 html_pop_current_element(html);
2070 if (html->current_node->type != HTML_TAG_FRAMESET)
2071 html->mode = HTML_MODE_AFTER_FRAMESET;
2072
2073 return HTML_TOKEN_PROCESSED;
2074 }
2075
2076 if (token->type == HTML_TOKEN_START_TAG &&
2077 token->tag.type == HTML_TAG_FRAME) {
2078 html_append_element_for_token(html, token, HTML_NAMESPACE_HTML);
2079 html_pop_current_element(html);
2080 if (token->tag.self_closing)
2081 token->tag.self_closing_acked = true;
2082 return HTML_TOKEN_PROCESSED;
2083 }
2084
2085 if (token->type == HTML_TOKEN_START_TAG &&
2086 token->tag.type == HTML_TAG_NOFRAMES) {
2087 /* process as "in head" */
2088 html_process_token_in_head(html, token);
2089 return HTML_TOKEN_PROCESSED;
2090 }
2091
2092 if (token->type == HTML_TOKEN_EOF) {
2093 if (html->current_node->type != HTML_TAG_HTML)
2094 html_parse_error(html);
2095 html_stop_parsing(html);
2096 return HTML_TOKEN_PROCESSED;
2097 }
2098
2099 html_parse_error(html);
2100 /* ignore */
2101 return HTML_TOKEN_PROCESSED;
2102 }
2103
2104 html_token_act
2105 html_process_token_after_frameset(struct html_page *html, html_token *token)
2106 {
2107 if (token->type == HTML_TOKEN_CHARACTER &&
2108 (token->ch.c == '\t' || token->ch.c == '\n' || token->ch.c == '\f' ||
2109 token->ch.c == '\r' || token->ch.c == ' ')) {
2110 html_insert_character(html, token->ch.c);
2111 return HTML_TOKEN_PROCESSED;
2112 }
2113
2114 if (token->type == HTML_TOKEN_COMMENT) {
2115 html_append_comment(html, &token->comment);
2116 return HTML_TOKEN_PROCESSED;
2117 }
2118
2119 if (token->type == HTML_TOKEN_DOCTYPE) {
2120 html_parse_error(html);
2121 /* ignore */
2122 return HTML_TOKEN_PROCESSED;
2123 }
2124
2125 if (token->type == HTML_TOKEN_START_TAG &&
2126 token->tag.type == HTML_TAG_HTML) {
2127 /* process as "in body" */
2128 html_process_token_in_body(html, token);
2129 return HTML_TOKEN_PROCESSED;
2130 }
2131
2132 if (token->type == HTML_TOKEN_END_TAG &&
2133 token->tag.type == HTML_TAG_HTML) {
2134 html->mode = HTML_MODE_AFTER_AFTER_FRAMESET;
2135 return HTML_TOKEN_PROCESSED;
2136 }
2137
2138 if (token->type == HTML_TOKEN_START_TAG &&
2139 token->tag.type == HTML_TAG_NOFRAMES) {
2140 /* process as "in head" */
2141 html_process_token_in_head(html, token);
2142 return HTML_TOKEN_PROCESSED;
2143 }
2144
2145 if (token->type == HTML_TOKEN_EOF) {
2146 html_stop_parsing(html);
2147 return HTML_TOKEN_PROCESSED;
2148 }
2149
2150 html_parse_error(html);
2151 /* ignore */
2152 return HTML_TOKEN_PROCESSED;
2153 }
2154
2155 html_token_act
2156 html_process_token_after_after_body(struct html_page *html, html_token *token)
2157 {
2158 if (token->type == HTML_TOKEN_COMMENT) {
2159 /* doc says "as the last child of the Document object */
2160 html_append_comment(html, &token->comment);
2161 return HTML_TOKEN_PROCESSED;
2162 }
2163
2164 if (token->type == HTML_TOKEN_DOCTYPE ||
2165 (token->type == HTML_TOKEN_CHARACTER &&
2166 (token->ch.c == '\t' || token->ch.c == '\n' || token->ch.c == '\f' ||
2167 token->ch.c == '\r' || token->ch.c == ' ')) ||
2168 (token->type == HTML_TOKEN_START_TAG &&
2169 token->tag.type == HTML_TAG_HTML)) {
2170 /* process as "in body" */
2171 html_process_token_in_body(html, token);
2172 return HTML_TOKEN_PROCESSED;
2173 }
2174
2175 if (token->type == HTML_TOKEN_EOF) {
2176 html_stop_parsing(html);
2177 return HTML_TOKEN_PROCESSED;
2178 }
2179
2180 html_parse_error(html);
2181 html->mode = HTML_MODE_IN_BODY;
2182 return HTML_TOKEN_REPROCESS;
2183 }
2184
2185 html_token_act
2186 html_process_token_after_after_frameset(struct html_page *html,
2187 html_token *token)
2188 {
2189 if (token->type == HTML_TOKEN_COMMENT) {
2190 /* doc says "as the last child of the Document object */
2191 html_append_comment(html, &token->comment);
2192 return HTML_TOKEN_PROCESSED;
2193 }
2194
2195 if (token->type == HTML_TOKEN_DOCTYPE ||
2196 (token->type == HTML_TOKEN_CHARACTER &&
2197 (token->ch.c == '\t' || token->ch.c == '\n' || token->ch.c == '\f' ||
2198 token->ch.c == '\r' || token->ch.c == ' ')) ||
2199 (token->type == HTML_TOKEN_START_TAG &&
2200 token->tag.type == HTML_TAG_HTML)) {
2201 /* process as "in body" */
2202 html_process_token_in_body(html, token);
2203 return HTML_TOKEN_PROCESSED;
2204 }
2205
2206 if (token->type == HTML_TOKEN_EOF) {
2207 html_stop_parsing(html);
2208 return HTML_TOKEN_PROCESSED;
2209 }
2210
2211 if (token->type == HTML_TOKEN_START_TAG &&
2212 token->tag.type == HTML_TAG_NOFRAMES) {
2213 /* process as "in head" */
2214 html_process_token_in_head(html, token);
2215 return HTML_TOKEN_PROCESSED;
2216 }
2217
2218 html_parse_error(html);
2219 /* ignore */
2220 return HTML_TOKEN_PROCESSED;
2221 }
2222
2223 html_token_act
2224 html_process_token_in_foreign_content(struct html_page *html,
2225 html_token *token)
2226 {
2227 struct html_element *node;
2228 short n;
2229
2230 if (token->type == HTML_TOKEN_CHARACTER && token->ch.c == '\0') {
2231 html_parse_error(html);
2232 html_insert_character(html, HTML_REPLACEMENT_CHARACTER);
2233 return HTML_TOKEN_PROCESSED;
2234 }
2235
2236 if (token->type == HTML_TOKEN_CHARACTER &&
2237 (token->ch.c == '\t' || token->ch.c == '\n' || token->ch.c == '\f' ||
2238 token->ch.c == '\r' || token->ch.c == ' ')) {
2239 html_insert_character(html, token->ch.c);
2240 return HTML_TOKEN_PROCESSED;
2241 }
2242
2243 if (token->type == HTML_TOKEN_CHARACTER) {
2244 html_insert_character(html, token->ch.c);
2245 html->frameset_ok = false;
2246 return HTML_TOKEN_PROCESSED;
2247 }
2248
2249 if (token->type == HTML_TOKEN_COMMENT) {
2250 html_append_comment(html, &token->comment);
2251 return HTML_TOKEN_PROCESSED;
2252 }
2253
2254 if (token->type == HTML_TOKEN_DOCTYPE) {
2255 html_parse_error(html);
2256 /* ignore */
2257 return HTML_TOKEN_PROCESSED;
2258 }
2259
2260 if ((token->type == HTML_TOKEN_START_TAG &&
2261 (token->tag.type == HTML_TAG_B ||
2262 token->tag.type == HTML_TAG_BIG ||
2263 token->tag.type == HTML_TAG_BLOCKQUOTE ||
2264 token->tag.type == HTML_TAG_BODY ||
2265 token->tag.type == HTML_TAG_BR ||
2266 token->tag.type == HTML_TAG_CENTER ||
2267 token->tag.type == HTML_TAG_CODE ||
2268 token->tag.type == HTML_TAG_DD ||
2269 token->tag.type == HTML_TAG_DIV ||
2270 token->tag.type == HTML_TAG_DL ||
2271 token->tag.type == HTML_TAG_DT ||
2272 token->tag.type == HTML_TAG_EM ||
2273 token->tag.type == HTML_TAG_EMBED ||
2274 token->tag.type == HTML_TAG_H1 ||
2275 token->tag.type == HTML_TAG_H2 ||
2276 token->tag.type == HTML_TAG_H3 ||
2277 token->tag.type == HTML_TAG_H4 ||
2278 token->tag.type == HTML_TAG_H5 ||
2279 token->tag.type == HTML_TAG_H6 ||
2280 token->tag.type == HTML_TAG_HEAD ||
2281 token->tag.type == HTML_TAG_HR ||
2282 token->tag.type == HTML_TAG_I ||
2283 token->tag.type == HTML_TAG_IMG ||
2284 token->tag.type == HTML_TAG_LI ||
2285 token->tag.type == HTML_TAG_LISTING ||
2286 token->tag.type == HTML_TAG_MENU ||
2287 token->tag.type == HTML_TAG_META ||
2288 token->tag.type == HTML_TAG_NOBR ||
2289 token->tag.type == HTML_TAG_OL ||
2290 token->tag.type == HTML_TAG_P ||
2291 token->tag.type == HTML_TAG_PRE ||
2292 token->tag.type == HTML_TAG_RUBY ||
2293 token->tag.type == HTML_TAG_S ||
2294 token->tag.type == HTML_TAG_SMALL ||
2295 token->tag.type == HTML_TAG_SPAN ||
2296 token->tag.type == HTML_TAG_STRONG ||
2297 token->tag.type == HTML_TAG_STRIKE ||
2298 token->tag.type == HTML_TAG_SUB ||
2299 token->tag.type == HTML_TAG_SUP ||
2300 token->tag.type == HTML_TAG_TABLE ||
2301 token->tag.type == HTML_TAG_TT ||
2302 token->tag.type == HTML_TAG_U ||
2303 token->tag.type == HTML_TAG_UL ||
2304 token->tag.type == HTML_TAG_VAR)) ||
2305 (token->type == HTML_TOKEN_END_TAG &&
2306 (token->tag.type == HTML_TAG_BR ||
2307 token->tag.type == HTML_TAG_P))) {
2308 html_parse_error(html);
2309
2310 /* TODO: check mathml */
2311
2312 while (html->current_node->ns != HTML_NAMESPACE_HTML)
2313 html_pop_current_element(html);
2314
2315 /*
2316 * Reprocess the token according to the rules given in the section
2317 * corresponding to the current insertion mode in HTML content.
2318 */
2319 return HTML_TOKEN_REPROCESS;
2320 }
2321
2322 if (token->type == HTML_TOKEN_START_TAG) {
2323 /* TODO: check mathml */
2324
2325 if (html->current_node->ns == HTML_NAMESPACE_SVG) {
2326 /* TODO: check svg tag name according to a list */
2327
2328 /* TODO: "adjust SVG attributes" */
2329 }
2330
2331 /* TODO: "adjust foreign attributes" */
2332
2333 /*
2334 * Insert a foreign element for the token, with adjusted current node's
2335 * namespace and false.
2336 */
2337 html_append_element_for_token(html, token, html->current_node->ns);
2338
2339 if (token->tag.self_closing) {
2340 if (token->tag.type == HTML_TAG_SCRIPT &&
2341 html->current_node->ns == HTML_NAMESPACE_SVG) {
2342 token->tag.self_closing_acked = true;
2343 goto svg_script;
2344 } else {
2345 html_pop_current_element(html);
2346 token->tag.self_closing_acked = true;
2347 }
2348 }
2349
2350 return HTML_TOKEN_PROCESSED;
2351 }
2352
2353 /*
2354 * An end tag whose tag name is "script", if the current node is an SVG
2355 * script element
2356 */
2357 if (token->type == HTML_TOKEN_END_TAG &&
2358 token->tag.type == HTML_TAG_SCRIPT &&
2359 html->current_node->type == HTML_TAG_SCRIPT &&
2360 html->current_node->ns == HTML_NAMESPACE_SVG) {
2361 svg_script:
2362 html_pop_current_element(html);
2363
2364 /* TODO: other things */
2365
2366 return HTML_TOKEN_PROCESSED;
2367 }
2368
2369 if (token->type == HTML_TOKEN_END_TAG) {
2370 /*
2371 * 1. Initialize node to be the current node (the bottommost node of
2372 * the stack).
2373 */
2374 node = html->current_node;
2375
2376 /*
2377 * 2. If node's tag name, converted to ASCII lowercase, is not the same
2378 * as the tag name of the token, then this is a parse error.
2379 */
2380 if (strcasecmp(token->tag.name, node->name) != 0)
2381 html_parse_error(html);
2382
2383 /*
2384 * 3. Loop: If node is the topmost element in the stack of open
2385 * elements, then return. (fragment case)
2386 */
2387 loop:
2388 if (node == html->open[0])
2389 return HTML_TOKEN_PROCESSED;
2390
2391 /*
2392 * 4. If node's tag name, converted to ASCII lowercase, is the same as
2393 * the tag name of the token, pop elements from the stack of open
2394 * elements until node has been popped from the stack, and then return.
2395 */
2396 if (strcasecmp(token->tag.name, node->name) == 0) {
2397 html_pop_nodes_until_past_tag(html, token->tag.type);
2398 return HTML_TOKEN_PROCESSED;
2399 }
2400
2401 /* 5. Set node to the previous entry in the stack of open elements. */
2402 for (n = 1; n < html->open_count; n++) {
2403 if (html->open[n] == node) {
2404 node = html->open[n - 1];
2405 break;
2406 }
2407 }
2408
2409 /*
2410 * 6. If node is not an element in the HTML namespace, return to the
2411 * step labeled loop.
2412 */
2413 if (node->ns != HTML_NAMESPACE_HTML)
2414 goto loop;
2415
2416 /*
2417 * 7. Otherwise, process the token according to the rules given in the
2418 * section corresponding to the current insertion mode in HTML content.
2419 */
2420 return HTML_TOKEN_REPROCESS;
2421 }
2422
2423 return HTML_TOKEN_PROCESSED;
2424 }
2425
2426 void
2427 html_stop_parsing(struct html_page *html)
2428 {
2429 while (html->current_node)
2430 html_pop_current_element(html);
2431 }
2432
2433 /*
2434 * helpers
2435 */
2436
2437 bool
2438 html_has_tag_open(struct html_page *html, html_tag_type tag)
2439 {
2440 short n;
2441
2442 for (n = 0; n < html->open_count; n++) {
2443 if (html->open[n]->type == tag)
2444 return true;
2445 }
2446
2447 return false;
2448 }
2449
2450 bool
2451 html_is_element_open(struct html_page *html, struct html_element *el)
2452 {
2453 short n;
2454
2455 for (n = 0; n < html->open_count; n++)
2456 if (html->open[n] == el)
2457 return true;
2458
2459 return false;
2460 }
2461
2462 bool
2463 html_has_element_in_scope(struct html_page *html, struct html_element *element,
2464 html_scope scope)
2465 {
2466 return html_has_element_or_one_with_tag_open_in_scope(html, element, 0,
2467 scope);
2468 }
2469
2470 bool
2471 html_has_element_with_tag_open_in_scope(struct html_page *html,
2472 html_tag_type tag, html_scope scope)
2473 {
2474 return html_has_element_or_one_with_tag_open_in_scope(html, NULL, tag,
2475 scope);
2476 }
2477
2478 bool
2479 html_has_element_or_one_with_tag_open_in_scope(struct html_page *html,
2480 struct html_element *element, html_tag_type tag, html_scope scope)
2481 {
2482 struct html_element *oelement;
2483 short n;
2484
2485 for (n = html->open_count - 1; n >= 0; n--) {
2486 oelement = html->open[n];
2487
2488 if (element) {
2489 if (oelement == element)
2490 return true;
2491 } else {
2492 if (oelement->type == tag)
2493 return true;
2494 }
2495
2496 if (scope == HTML_SCOPE_DEFAULT || scope == HTML_SCOPE_LIST_ITEM ||
2497 scope == HTML_SCOPE_BUTTON) {
2498 if (oelement->type == HTML_TAG_APPLET ||
2499 oelement->type == HTML_TAG_CAPTION ||
2500 oelement->type == HTML_TAG_HTML ||
2501 oelement->type == HTML_TAG_TABLE ||
2502 oelement->type == HTML_TAG_TD ||
2503 oelement->type == HTML_TAG_TH ||
2504 oelement->type == HTML_TAG_MARQUEE ||
2505 oelement->type == HTML_TAG_OBJECT ||
2506 oelement->type == HTML_TAG_TEMPLATE) {
2507 /* TODO: MathML and SVG tags */
2508 return false;
2509 }
2510 }
2511
2512 if (scope == HTML_SCOPE_LIST_ITEM) {
2513 if (oelement->ns == HTML_NAMESPACE_HTML &&
2514 (oelement->type == HTML_TAG_OL || oelement->type == HTML_TAG_UL))
2515 return false;
2516 }
2517
2518 if (scope == HTML_SCOPE_BUTTON) {
2519 if (oelement->ns == HTML_NAMESPACE_HTML &&
2520 oelement->type == HTML_TAG_BUTTON)
2521 return false;
2522 }
2523
2524 if (scope == HTML_SCOPE_TABLE) {
2525 if (oelement->ns == HTML_NAMESPACE_HTML &&
2526 (oelement->type == HTML_TAG_HTML ||
2527 oelement->type == HTML_TAG_TABLE ||
2528 oelement->type == HTML_TAG_TEMPLATE))
2529 return false;
2530 }
2531
2532 if (scope == HTML_SCOPE_SELECT) {
2533 /* all but these two */
2534 if (oelement->ns == HTML_NAMESPACE_HTML &&
2535 (oelement->type != HTML_TAG_OPTGROUP &&
2536 oelement->type != HTML_TAG_OPTION))
2537 return false;
2538 }
2539 }
2540
2541 return false;
2542 }
2543
2544 bool
2545 html_element_serializes_as_void(struct html_page *html,
2546 struct html_element *element)
2547 {
2548 /* https://html.spec.whatwg.org/multipage/syntax.html#elements-2 */
2549 switch (element->type) {
2550 case HTML_TAG_AREA:
2551 case HTML_TAG_BASE:
2552 case HTML_TAG_BR:
2553 case HTML_TAG_COL:
2554 case HTML_TAG_EMBED:
2555 case HTML_TAG_HR:
2556 case HTML_TAG_IMG:
2557 case HTML_TAG_INPUT:
2558 case HTML_TAG_LINK:
2559 case HTML_TAG_META:
2560 case HTML_TAG_SOURCE:
2561 case HTML_TAG_TRACK:
2562 case HTML_TAG_WBR:
2563 return true;
2564 /*
2565 * https://html.spec.whatwg.org/multipage/parsing.html#serialising-html-fragments
2566 */
2567 case HTML_TAG_BASEFONT:
2568 case HTML_TAG_BGSOUND:
2569 case HTML_TAG_FRAME:
2570 case HTML_TAG_KEYGEN:
2571 case HTML_TAG_PARAM:
2572 return true;
2573 default:
2574 return false;
2575 }
2576 }
2577
2578 bool
2579 html_is_element_special(struct html_page *html, struct html_element *el)
2580 {
2581 /* https://html.spec.whatwg.org/multipage/parsing.html#special */
2582
2583 switch (el->type) {
2584 case HTML_TAG_ADDRESS:
2585 case HTML_TAG_APPLET:
2586 case HTML_TAG_AREA:
2587 case HTML_TAG_ARTICLE:
2588 case HTML_TAG_ASIDE:
2589 case HTML_TAG_BASE:
2590 case HTML_TAG_BASEFONT:
2591 case HTML_TAG_BGSOUND:
2592 case HTML_TAG_BLOCKQUOTE:
2593 case HTML_TAG_BODY:
2594 case HTML_TAG_BR:
2595 case HTML_TAG_BUTTON:
2596 case HTML_TAG_CAPTION:
2597 case HTML_TAG_CENTER:
2598 case HTML_TAG_COL:
2599 case HTML_TAG_COLGROUP:
2600 case HTML_TAG_DD:
2601 case HTML_TAG_DETAILS:
2602 case HTML_TAG_DIR:
2603 case HTML_TAG_DIV:
2604 case HTML_TAG_DL:
2605 case HTML_TAG_DT:
2606 case HTML_TAG_EMBED:
2607 case HTML_TAG_FIELDSET:
2608 case HTML_TAG_FIGCAPTION:
2609 case HTML_TAG_FIGURE:
2610 case HTML_TAG_FOOTER:
2611 case HTML_TAG_FORM:
2612 case HTML_TAG_FRAME:
2613 case HTML_TAG_FRAMESET:
2614 case HTML_TAG_H1:
2615 case HTML_TAG_H2:
2616 case HTML_TAG_H3:
2617 case HTML_TAG_H4:
2618 case HTML_TAG_H5:
2619 case HTML_TAG_H6:
2620 case HTML_TAG_HEAD:
2621 case HTML_TAG_HEADER:
2622 case HTML_TAG_HGROUP:
2623 case HTML_TAG_HR:
2624 case HTML_TAG_HTML:
2625 case HTML_TAG_IFRAME:
2626 case HTML_TAG_IMG:
2627 case HTML_TAG_INPUT:
2628 case HTML_TAG_KEYGEN:
2629 case HTML_TAG_LI:
2630 case HTML_TAG_LINK:
2631 case HTML_TAG_LISTING:
2632 case HTML_TAG_MAIN:
2633 case HTML_TAG_MARQUEE:
2634 case HTML_TAG_MENU:
2635 case HTML_TAG_META:
2636 case HTML_TAG_NAV:
2637 case HTML_TAG_NOEMBED:
2638 case HTML_TAG_NOFRAMES:
2639 case HTML_TAG_NOSCRIPT:
2640 case HTML_TAG_OBJECT:
2641 case HTML_TAG_OL:
2642 case HTML_TAG_P:
2643 case HTML_TAG_PARAM:
2644 case HTML_TAG_PLAINTEXT:
2645 case HTML_TAG_PRE:
2646 case HTML_TAG_SCRIPT:
2647 case HTML_TAG_SEARCH:
2648 case HTML_TAG_SECTION:
2649 case HTML_TAG_SELECT:
2650 case HTML_TAG_SOURCE:
2651 case HTML_TAG_STYLE:
2652 case HTML_TAG_SUMMARY:
2653 case HTML_TAG_TABLE:
2654 case HTML_TAG_TBODY:
2655 case HTML_TAG_TD:
2656 case HTML_TAG_TEMPLATE:
2657 case HTML_TAG_TEXTAREA:
2658 case HTML_TAG_TFOOT:
2659 case HTML_TAG_TH:
2660 case HTML_TAG_THEAD:
2661 case HTML_TAG_TITLE:
2662 case HTML_TAG_TR:
2663 case HTML_TAG_TRACK:
2664 case HTML_TAG_UL:
2665 case HTML_TAG_WBR:
2666 /* TODO: MathML and SVG */
2667 return true;
2668 default:
2669 return false;
2670 }
2671 }
2672
2673 bool
2674 html_is_element_formatting(struct html_page *html, struct html_element *el)
2675 {
2676 switch (el->type) {
2677 case HTML_TAG_A:
2678 case HTML_TAG_B:
2679 case HTML_TAG_BIG:
2680 case HTML_TAG_CODE:
2681 case HTML_TAG_EM:
2682 case HTML_TAG_FONT:
2683 case HTML_TAG_I:
2684 case HTML_TAG_NOBR:
2685 case HTML_TAG_S:
2686 case HTML_TAG_SMALL:
2687 case HTML_TAG_STRIKE:
2688 case HTML_TAG_STRONG:
2689 case HTML_TAG_TT:
2690 case HTML_TAG_U:
2691 return true;
2692 default:
2693 return false;
2694 }
2695 }
2696
2697 char *
2698 html_escape_string(struct html_page *html, char *str, size_t *len,
2699 bool attribute_mode)
2700 {
2701 size_t len_escaped;
2702 short append;
2703 short n;
2704
2705 for (append = 0, len_escaped = 0; append <= 1; append++) {
2706 /*
2707 * https://html.spec.whatwg.org/multipage/parsing.html#escapingString
2708 */
2709
2710 if (append) {
2711 if (html->escaped_buf == NULL ||
2712 html->escaped_size < len_escaped + 1) {
2713 html->escaped_size = len_escaped + 1;
2714 HTML_DEBUG((": reallocing escaped to %ld",
2715 html->escaped_size));
2716 if (html->escaped_buf)
2717 xfree(&html->escaped_buf);
2718 html->escaped_buf = xmalloc(html->escaped_size);
2719 }
2720
2721 if (html->escaped_buf == NULL)
2722 panic("escaped_buf is null");
2723
2724 len_escaped = 0;
2725 }
2726
2727 for (n = 0; n < *len; n++) {
2728 switch ((unsigned char)str[n]) {
2729 case '&':
2730 if (append) {
2731 html->escaped_buf[len_escaped++] = '&';
2732 html->escaped_buf[len_escaped++] = 'a';
2733 html->escaped_buf[len_escaped++] = 'm';
2734 html->escaped_buf[len_escaped++] = 'p';
2735 html->escaped_buf[len_escaped++] = ';';
2736 } else
2737 len_escaped += 5;
2738 break;
2739 case 0xa0:
2740 if (append) {
2741 html->escaped_buf[len_escaped++] = '&';
2742 html->escaped_buf[len_escaped++] = 'n';
2743 html->escaped_buf[len_escaped++] = 'b';
2744 html->escaped_buf[len_escaped++] = 's';
2745 html->escaped_buf[len_escaped++] = 'p';
2746 html->escaped_buf[len_escaped++] = ';';
2747 } else
2748 len_escaped += 6;
2749 break;
2750 case '"':
2751 if (attribute_mode) {
2752 if (append) {
2753 html->escaped_buf[len_escaped++] = '&';
2754 html->escaped_buf[len_escaped++] = 'q';
2755 html->escaped_buf[len_escaped++] = 'u';
2756 html->escaped_buf[len_escaped++] = 'o';
2757 html->escaped_buf[len_escaped++] = 't';
2758 html->escaped_buf[len_escaped++] = ';';
2759 } else
2760 len_escaped += 6;
2761 break;
2762 }
2763 /* fallthrough */
2764 case '<':
2765 if (!attribute_mode) {
2766 if (append) {
2767 html->escaped_buf[len_escaped++] = '&';
2768 html->escaped_buf[len_escaped++] = 'l';
2769 html->escaped_buf[len_escaped++] = 't';
2770 html->escaped_buf[len_escaped++] = ';';
2771 } else
2772 len_escaped += 4;
2773 break;
2774 }
2775 /* fallthrough */
2776 case '>':
2777 if (!attribute_mode) {
2778 if (append) {
2779 html->escaped_buf[len_escaped++] = '&';
2780 html->escaped_buf[len_escaped++] = 'g';
2781 html->escaped_buf[len_escaped++] = 't';
2782 html->escaped_buf[len_escaped++] = ';';
2783 } else
2784 len_escaped += 4;
2785 break;
2786 }
2787 /* fallthrough */
2788 default:
2789 if (append)
2790 html->escaped_buf[len_escaped++] = str[n];
2791 else
2792 len_escaped++;
2793 }
2794 }
2795 }
2796
2797 html->escaped_buf[len_escaped] = '\0';
2798 HTML_DEBUG((": escaped '%s' to [%ld] '%s'", str, len_escaped,
2799 html->escaped_buf));
2800 *len = len_escaped;
2801 return html->escaped_buf;
2802 }
2803
2804 void
2805 html_pop_current_element(struct html_page *html)
2806 {
2807 short n;
2808
2809 if (html->open_count <= 0)
2810 panic("bogus open count %d", html->open_count);
2811
2812 HTML_DEBUG((": rendering current <%s>", html->current_node->name));
2813
2814 html_render_current_node(html, true);
2815 html_deref_element(html, html->current_node);
2816
2817 HTML_DEBUG((": popping current <%s>", html->current_node->name));
2818
2819 html->open_count--;
2820 if (html->open_count)
2821 html->current_node = html->open[html->open_count - 1];
2822 else
2823 html->current_node = NULL;
2824
2825 HTML_DEBUG((": still open: "));
2826 for (n = 0; n <= html->open_count - 1; n++)
2827 HTML_DEBUG(("<%s>", html->open[n]->name));
2828 }
2829
2830 void
2831 html_pop_nodes_until_past_tag(struct html_page *html, html_tag_type stop_after)
2832 {
2833 short n;
2834 bool done;
2835
2836 HTML_DEBUG((": popping until past <%s>", html_tag_names[stop_after]));
2837
2838 for (n = html->open_count - 1, done = false; n >= 0; n--) {
2839 if (html->open[n]->type == stop_after)
2840 done = true;
2841
2842 html_pop_current_element(html);
2843
2844 if (done)
2845 return;
2846 }
2847
2848 /* closed a tag that was never open? */
2849 HTML_DEBUG(("popped tags all the way to root looking for %s",
2850 html_tag_names[stop_after]));
2851 }
2852
2853 void
2854 html_pop_nodes_until_past_element(struct html_page *html,
2855 struct html_element *element)
2856 {
2857 short n;
2858 bool done;
2859
2860 for (n = html->open_count - 1, done = false; n >= 0; n--) {
2861 if (html->open[n] == element)
2862 done = true;
2863
2864 html_pop_current_element(html);
2865
2866 if (done)
2867 return;
2868 }
2869 }
2870
2871 void
2872 html_generate_implied_end_tags(struct html_page *html, char *except,
2873 bool thoroughly)
2874 {
2875 struct html_element *element;
2876
2877 HTML_DEBUG((": html_generate_implied_end_tags"));
2878 if (except)
2879 HTML_DEBUG((" except <%s>", except));
2880
2881 while (html->current_node) {
2882 element = html->current_node;
2883
2884 if (except != NULL && strcmp(element->name, except) == 0)
2885 return;
2886
2887 if (element->type == HTML_TAG_DD ||
2888 element->type == HTML_TAG_DT ||
2889 element->type == HTML_TAG_LI ||
2890 element->type == HTML_TAG_OPTGROUP ||
2891 element->type == HTML_TAG_OPTION ||
2892 element->type == HTML_TAG_P ||
2893 element->type == HTML_TAG_RB ||
2894 element->type == HTML_TAG_RP ||
2895 element->type == HTML_TAG_RT ||
2896 element->type == HTML_TAG_RTC) {
2897 html_pop_current_element(html);
2898 continue;
2899 }
2900
2901 if (thoroughly &&
2902 (element->type == HTML_TAG_CAPTION ||
2903 element->type == HTML_TAG_COLGROUP ||
2904 element->type == HTML_TAG_TBODY ||
2905 element->type == HTML_TAG_TD ||
2906 element->type == HTML_TAG_TFOOT ||
2907 element->type == HTML_TAG_TH ||
2908 element->type == HTML_TAG_THEAD ||
2909 element->type == HTML_TAG_TR)) {
2910 html_pop_current_element(html);
2911 continue;
2912 }
2913
2914 return;
2915 }
2916 }
2917
2918 bool
2919 html_remove_active_formatting_element(struct html_page *html,
2920 struct html_element *element)
2921 {
2922 short n;
2923
2924 for (n = 0; n < html->active_formatting_count; n++) {
2925 if (html->active_formatting[n].element == element) {
2926 /* shift out */
2927 for (; n < html->active_formatting_count - 1; n++) {
2928 html->active_formatting[n] = html->active_formatting[n + 1];
2929 }
2930 html->active_formatting_count--;
2931 html_deref_element(html, element);
2932 return true;
2933 }
2934 }
2935
2936 return false;
2937 }
2938
2939 void
2940 html_close_p(struct html_page *html)
2941 {
2942 html_generate_implied_end_tags(html, "p", false);
2943
2944 if (html->current_node->type != HTML_TAG_P) {
2945 /* parse error */
2946 html_parse_error(html);
2947 }
2948
2949 html_pop_nodes_until_past_tag(html, HTML_TAG_P);
2950 }
2951
2952 bool
2953 html_remove_open_element(struct html_page *html, struct html_element *element)
2954 {
2955 short n;
2956
2957 for (n = 0; n < html->open_count; n++) {
2958 if (html->open[n] == element) {
2959 for (; n < html->open_count - 1; n++)
2960 html->open[n] = html->open[n + 1];
2961 html->open_count--;
2962 html_deref_element(html, element);
2963 return true;
2964 }
2965 }
2966
2967 return false;
2968 }
2969
2970 bool
2971 html_is_tag_in_active_formatting(struct html_page *html, html_tag_type tag)
2972 {
2973 short n;
2974
2975 for (n = 0; n < html->active_formatting_count - 1; n++) {
2976 if (html->active_formatting[n].element &&
2977 html->active_formatting[n].element->type == tag)
2978 return true;
2979 }
2980
2981 return false;
2982 }
2983
2984 bool
2985 html_is_element_in_active_formatting(struct html_page *html,
2986 struct html_element *element)
2987 {
2988 short n;
2989
2990 for (n = 0; n < html->active_formatting_count; n++) {
2991 if (html->active_formatting[n].element == element)
2992 return true;
2993 }
2994
2995 return false;
2996 }
2997
2998 void
2999 html_reconstruct_active_formatting(struct html_page *html)
3000 {
3001 struct html_formatting *entry;
3002 struct html_element *new_element;
3003 short n, entry_n;
3004 html_token token;
3005
3006 HTML_DEBUG((": reconstructing AF"));
3007
3008 /*
3009 * 1. If there are no entries in the list of active formatting elements,
3010 * then there is nothing to reconstruct; stop this algorithm.
3011 */
3012 if (html->active_formatting_count == 0)
3013 return;
3014
3015 /*
3016 * 2. If the last (most recently added) entry in the list of active
3017 * formatting elements is a marker, or if it is an element that is in the
3018 * stack of open elements, then there is nothing to reconstruct; stop this
3019 * algorithm.
3020 */
3021 if (html->active_formatting[html->active_formatting_count - 1].marker)
3022 return;
3023 if (html_is_element_open(html,
3024 html->active_formatting[html->active_formatting_count - 1].element))
3025 return;
3026
3027 /*
3028 * 3. Let entry be the last (most recently added) element in the list of
3029 * active formatting elements.
3030 */
3031 entry_n = -1;
3032 for (n = html->active_formatting_count - 1; n >= 0; n--) {
3033 if (html->active_formatting[n].marker)
3034 continue;
3035 entry = &html->active_formatting[n];
3036 entry_n = n;
3037 break;
3038 }
3039 if (entry_n == -1)
3040 panic("html_reconstruct_active_formatting: no last element");
3041
3042 /*
3043 * 4. Rewind: If there are no entries before entry in the list of active
3044 * formatting elements, then jump to the step labeled create.
3045 */
3046 rewind:
3047 if (entry_n == 0)
3048 goto create;
3049
3050 /*
3051 * 5; Let entry be the entry one earlier than entry in the list of active
3052 * formatting elements.
3053 */
3054 entry = &html->active_formatting[--entry_n];
3055
3056 /*
3057 * 6. If entry is neither a marker nor an element that is also in the stack
3058 * of open elements, go to the step labeled rewind.
3059 */
3060 if (!(entry->marker || html_is_element_open(html, entry->element)))
3061 goto rewind;
3062
3063 advance:
3064 /*
3065 * 7. Advance: Let entry be the element one later than entry in the list of
3066 * active formatting elements.
3067 */
3068 entry = &html->active_formatting[++entry_n];
3069
3070 create:
3071 /*
3072 * 8. Create: Insert an HTML element for the token for which the element
3073 * entry was created, to obtain new element.
3074 */
3075 memset(&token, 0, sizeof(html_token));
3076 token.type = entry->token;
3077 token.tag.type = entry->element->type;
3078 memcpy(&token.tag.name, entry->element->name, sizeof(token.tag.name));
3079 token.tag.name_len = entry->element->name_len;
3080 memcpy(&token.tag.attrs, entry->element->attrs, sizeof(token.tag.attrs));
3081 token.tag.attrs_count = entry->element->attrs_count;
3082 new_element = html_append_element_for_token(html, &token,
3083 HTML_NAMESPACE_HTML);
3084
3085 /*
3086 * 9. Replace the entry for entry in the list with an entry for new element.
3087 */
3088 html_deref_element(html, entry->element);
3089 entry->element = new_element;
3090 new_element->refs++;
3091
3092 HTML_DEBUG((": AF created new <%s>", new_element->name));
3093
3094 /*
3095 * 10. If the entry for new element in the list of active formatting
3096 * elements is not the last entry in the list, return to the step labeled
3097 * advance.
3098 */
3099 if (entry_n + 1 != html->active_formatting_count)
3100 goto advance;
3101 }
3102
3103 void
3104 html_push_active_formatting_element(struct html_page *html,
3105 struct html_element *element, html_token_type token_type)
3106 {
3107 /*
3108 * https://html.spec.whatwg.org/multipage/parsing.html#the-list-of-active-formatting-elements
3109 */
3110 short last_marker = 0;
3111 short found = 0, n, j;
3112 struct html_element *found_matches[3];
3113
3114 /* find last marker, if any */
3115 for (n = html->active_formatting_count - 1; n >= 0; n--) {
3116 if (html->active_formatting[n].marker) {
3117 last_marker = n;
3118 break;
3119 }
3120 }
3121
3122 /*
3123 * "This is the Noah's Ark clause. But with three per family instead of
3124 * two."
3125 *
3126 * 1. If there are already three elements in the list of active formatting
3127 * elements after the last marker, if any, or anywhere in the list if there
3128 * are no markers, that have the same tag name, namespace, and attributes
3129 * as element, then remove the earliest such element from the list of
3130 * active formatting elements.
3131 */
3132 for (n = last_marker + 1; n < html->active_formatting_count - 1; n++) {
3133 if (html->active_formatting[n].marker)
3134 panic("shouldn't have a marker after last marker in active "
3135 "formatting list");
3136
3137 if (html->active_formatting[n].element->type != element->type)
3138 continue;
3139 if (html->active_formatting[n].element->ns != element->ns)
3140 continue;
3141
3142 /* TODO: also compare attribute names and values */
3143
3144 found_matches[found++] = html->active_formatting[n].element;
3145
3146 if (found < 3)
3147 continue;
3148
3149 /* remove found_matches[0] from the list */
3150 for (n = 0; n < html->active_formatting_count - 1; n++) {
3151 if (html->active_formatting[n].element != found_matches[0])
3152 continue;
3153
3154 HTML_DEBUG(("push_active_formatting_element shifting out tag "
3155 "%s\r", found_matches[0]->name));
3156
3157 /* skip this one, move everything else down */
3158 for (j = n; j < html->active_formatting_count - 2; j++) {
3159 html->active_formatting[j].token =
3160 html->active_formatting[j + 1].token;
3161 html->active_formatting[j].marker =
3162 html->active_formatting[j + 1].marker;
3163 }
3164
3165 html->active_formatting_count--;
3166 html_deref_element(html, found_matches[0]);
3167 break;
3168 }
3169 }
3170
3171 /* 2. Add element to the list of active formatting elements. */
3172 html->active_formatting_count++;
3173 html->active_formatting[html->active_formatting_count - 1].marker = false;
3174 html->active_formatting[html->active_formatting_count - 1].token =
3175 token_type;
3176 html->active_formatting[html->active_formatting_count - 1].element = element;
3177 element->refs++;
3178 }
3179
3180 void
3181 html_push_active_formatting_marker(struct html_page *html,
3182 html_token_type token_type)
3183 {
3184 if (html->active_formatting_count >= nitems(html->active_formatting))
3185 panic("active formatting overflow");
3186
3187 html->active_formatting[html->active_formatting_count - 1].token =
3188 token_type;
3189 html->active_formatting[html->active_formatting_count - 1].element = NULL;
3190 html->active_formatting[html->active_formatting_count - 1].marker = true;
3191 html->active_formatting_count++;
3192 }
3193
3194 bool
3195 html_run_adoption_agency(struct html_page *html, html_token *token)
3196 {
3197 /*
3198 * https://html.spec.whatwg.org/multipage/parsing.html#adoption-agency-algorithm
3199 */
3200 char *subject;
3201 short olc, ilc, n;
3202 struct html_element *formatting_element, *before_fe, *after_fe,
3203 *furthest_block, *common_ancestor, *node, *last_node, *before_node,
3204 *element;
3205 html_token ttoken;
3206 bool found;
3207
3208 HTML_DEBUG((": AAA for <%s>: AF tags", token->tag.name));
3209 for (n = 0; n < html->active_formatting_count; n++) {
3210 HTML_DEBUG((" <%s>", html->active_formatting[n].element->name));
3211 }
3212 HTML_DEBUG((": open nodes "));
3213 for (n = 0; n < html->open_count; n++) {
3214 HTML_DEBUG(("<%s>", html->open[n]->name));
3215 }
3216
3217 /* 1. Let subject be token's tag name. */
3218 subject = token->tag.name;
3219
3220 /*
3221 * 2. If the current node is an HTML element whose tag name is subject, and
3222 * the current node is not in the list of active formatting elements, then
3223 * pop the current node off the stack of open elements and return.
3224 */
3225 if (strcmp(html->current_node->name, subject) == 0 &&
3226 !html_is_element_in_active_formatting(html, html->current_node)) {
3227 html_pop_current_element(html);
3228 return true;
3229 }
3230
3231 /* 3. Let outerLoopCounter be 0. */
3232 olc = 0;
3233
3234 /* 4. While true: */
3235 for (;;) {
3236 /* 1. If outerLoopCounter is greater than or equal to 8, then return. */
3237 if (olc >= 8)
3238 return true;
3239
3240 /* 2. Increment outerLoopCounter by 1. */
3241 olc++;
3242
3243 /*
3244 * 3. Let formattingElement be the last element in the list of active
3245 * formatting elements that:
3246 *
3247 * - is between the end of the list and the last marker in the list, if
3248 * any, or the start of the list otherwise, and
3249 * - has the tag name /subject/.
3250 */
3251 formatting_element = NULL;
3252 for (n = html->active_formatting_count - 1; n >= 0; n--) {
3253 if (html->active_formatting[n].marker || n == 0) {
3254 if (html->active_formatting[n].marker)
3255 n++;
3256 for (; n < html->active_formatting_count; n++) {
3257 if (strcmp(html->active_formatting[n].element->name,
3258 subject) == 0) {
3259 formatting_element = html->active_formatting[n].element;
3260 break;
3261 }
3262 }
3263 break;
3264 }
3265 }
3266
3267 /*
3268 * If there is no such element, then return and instead act as
3269 * described in the "any other end tag" entry above.
3270 * (we'll return false to indicate that)
3271 */
3272 if (formatting_element == NULL)
3273 return false;
3274
3275 /*
3276 * 4. If formattingElement is not in the stack of open elements, then
3277 * this is a parse error; remove the element from the list, and return.
3278 */
3279 found = false;
3280 for (n = 0; n < html->open_count; n++) {
3281 if (html->open[n] == formatting_element) {
3282 found = true;
3283 break;
3284 }
3285 }
3286
3287 if (!found) {
3288 html_parse_error(html);
3289 html_remove_active_formatting_element(html, formatting_element);
3290 return true;
3291 }
3292
3293 /*
3294 * 5. If formattingElement is in the stack of open elements, but the
3295 * element is not in scope, then this is a parse error; return.
3296 */
3297 if (!html_has_element_in_scope(html, formatting_element,
3298 HTML_SCOPE_DEFAULT)) {
3299 html_parse_error(html);
3300 return true;
3301 }
3302
3303 /*
3304 * 6. If formattingElement is not the current node, this is a parse
3305 * error. (But do not return.)
3306 */
3307 if (formatting_element != html->current_node)
3308 html_parse_error(html);
3309
3310 /*
3311 * 7. Let furthestBlock be the topmost node in the stack of open
3312 * elements that is lower in the stack than formattingElement, and is
3313 * an element in the special category. There might not be one.
3314 */
3315 furthest_block = NULL;
3316 for (n = 0; n < html->active_formatting_count; n++) {
3317 if (html->active_formatting[n].element != formatting_element)
3318 continue;
3319
3320 for (n = n + 1; n < html->active_formatting_count; n++) {
3321 if (html_is_element_special(html,
3322 html->active_formatting[n].element)) {
3323 furthest_block = html->active_formatting[n].element;
3324 break;
3325 }
3326 }
3327 }
3328
3329 /*
3330 * 8. If there is no furthestBlock, then the UA must first pop all the
3331 * nodes from the bottom of the stack of open elements, from the
3332 * current node up to and including formattingElement, then remove
3333 * formattingElement from the list of active formatting elements, and
3334 * finally return.
3335 */
3336 if (furthest_block == NULL) {
3337 while (html->current_node != formatting_element)
3338 html_pop_current_element(html);
3339 if (html->current_node == formatting_element)
3340 html_pop_current_element(html);
3341
3342 html_remove_active_formatting_element(html, formatting_element);
3343 return true;
3344 }
3345
3346 /*
3347 * 9. Let commonAncestor be the element immediately above
3348 * formattingElement in the stack of open elements.
3349 */
3350 for (n = 0; n < html->active_formatting_count - 1; n++) {
3351 if (html->active_formatting[n + 1].element == formatting_element) {
3352 common_ancestor = html->active_formatting[n].element;
3353 break;
3354 }
3355 }
3356
3357 /*
3358 * 10. Let a bookmark note the position of formattingElement in the
3359 * list of active formatting elements relative to the elements on
3360 * either side of it in the list.
3361 */
3362 for (n = 0; n < html->active_formatting_count; n++) {
3363 if (html->active_formatting[n].element == formatting_element) {
3364 before_fe = html->active_formatting[n - 1].element;
3365 after_fe = html->active_formatting[n + 1].element;
3366 break;
3367 }
3368 }
3369
3370 /* 11. Let node and lastNode be furthestBlock. */
3371 node = furthest_block;
3372 last_node = furthest_block;
3373
3374 before_node = NULL;
3375 for (n = 1; n < html->open_count; n++) {
3376 if (html->open[n] == node) {
3377 before_node = html->open[n - 1];
3378 break;
3379 }
3380 }
3381
3382 /* 12. Let innerLoopCounter be 0. */
3383 ilc = 0;
3384
3385 /* 13. While true: */
3386 for (;;) {
3387 /* 1. Increment innerLoopCounter by 1. */
3388 ilc++;
3389
3390 /*
3391 * 2. Let /node/ be the element immediately above /node/ in the
3392 * stack of open elements, or if node is no longer in the stack of
3393 * open elements (e.g. because it got removed by this algorithm),
3394 * the element that was immediately above node in the stack of open
3395 * elements before node was removed.
3396 */
3397 node = before_node;
3398
3399 /* 3. If node is formattingElement, then break. */
3400 if (node == formatting_element)
3401 break;
3402
3403 /*
3404 * 4. If innerLoopCounter is greater than 3 and node is in the list
3405 * of active formatting elements, then remove node from the list of
3406 * active formatting elements.
3407 */
3408 if (ilc > 3)
3409 html_remove_active_formatting_element(html, node);
3410
3411 /*
3412 * 5. If node is not in the list of active formatting elements,
3413 * then remove node from the stack of open elements and continue.
3414 */
3415 found = false;
3416 for (n = 0; n < html->active_formatting_count; n++) {
3417 if (html->active_formatting[n].element == node) {
3418 found = true;
3419 break;
3420 }
3421 }
3422 if (!found) {
3423 before_node = NULL;
3424 for (n = 1; n < html->open_count; n++) {
3425 if (html->open[n] == node) {
3426 before_node = html->open[n - 1];
3427 break;
3428 }
3429 }
3430
3431 html_remove_open_element(html, node);
3432 continue;
3433 }
3434
3435 /*
3436 * 6. Create an element for the token for which the element node
3437 * was created, in the HTML namespace, with commonAncestor as the
3438 * intended parent; replace the entry for node in the list of
3439 * active formatting elements with an entry for the new element,
3440 * replace the entry for node in the stack of open elements with an
3441 * entry for the new element, and let node be the new element.
3442 */
3443 memset(&ttoken, 0, sizeof(html_token));
3444 ttoken.type = HTML_TOKEN_START_TAG;
3445 ttoken.tag.type = node->type;
3446 element = html_create_element_for_token(html, &ttoken);
3447
3448 for (n = 0; n < html->active_formatting_count; n++) {
3449 if (html->active_formatting[n].element == node) {
3450 html_deref_element(html, node);
3451 html->active_formatting[n].element = element;
3452 element->refs++;
3453 break;
3454 }
3455 }
3456
3457 for (n = 0; n < html->open_count; n++) {
3458 if (html->open[n] == node) {
3459 html_deref_element(html, node);
3460 html->open[n] = element;
3461 element->refs++;
3462 break;
3463 }
3464 }
3465
3466 node = element;
3467 before_node = NULL;
3468 for (n = 1; n < html->open_count; n++) {
3469 if (html->open[n] == node) {
3470 before_node = html->open[n - 1];
3471 break;
3472 }
3473 }
3474
3475 /*
3476 * 7. If /last node/ is furthestBlock, then move the aforementioned
3477 * bookmark to be immediately after the new node in the list of
3478 * active formatting elements.
3479 */
3480 if (last_node == furthest_block) {
3481 for (n = 0; n < html->active_formatting_count; n++) {
3482 if (html->active_formatting[n].element != element)
3483 continue;
3484
3485 before_fe = html->active_formatting[n - 1].element;
3486 after_fe = html->active_formatting[n + 1].element;
3487 }
3488 }
3489
3490 /* 8. Append lastNode to node. */
3491 /* TODO */
3492
3493 /* 9. Set lastNode to node. */
3494 last_node = node;
3495 }
3496
3497 /*
3498 * 14. Insert whatever lastNode ended up being in the previous step at
3499 * the appropriate place for inserting a node, but using commonAncestor
3500 * as the override target.
3501 */
3502 /* TODO */
3503
3504 /*
3505 * 15. Create an element for the token for which formattingElement was
3506 * created, in the HTML namespace, with furthestBlock as the intended
3507 * parent.
3508 */
3509 /* TODO */
3510
3511 /*
3512 * 16. Take all of the child nodes of furthestBlock and append them to
3513 * the element created in the last step.
3514 */
3515 /* TODO */
3516
3517 /* 17. Append that new element to furthestBlock. */
3518 /* TODO */
3519
3520 /*
3521 * 18. Remove formattingElement from the list of active formatting
3522 * elements, and insert the new element into the list of active
3523 * formatting elements at the position of the aforementioned bookmark.
3524 */
3525 /* TODO */
3526
3527 /*
3528 * 19. Remove formattingElement from the stack of open elements, and
3529 * insert the new element into the stack of open elements immediately
3530 * below the position of furthestBlock in that stack.
3531 */
3532 /* TODO */
3533 }
3534 }
3535
3536 /*
3537 * emitters
3538 */
3539
3540 static html_token emittok = { 0 };
3541
3542 void
3543 html_emit_char_token(struct html_page *html, short cc)
3544 {
3545 emittok.type = HTML_TOKEN_CHARACTER;
3546 emittok.ch.c = cc;
3547 html_process_token(html, &emittok);
3548 }
3549
3550 void
3551 html_emit_eof_token(struct html_page *html)
3552 {
3553 emittok.type = HTML_TOKEN_EOF;
3554 html_process_token(html, &emittok);
3555 }
3556
3557 void
3558 html_emit_comment(struct html_page *html, struct html_comment *comment)
3559 {
3560 size_t len;
3561
3562 emittok.type = HTML_TOKEN_COMMENT;
3563
3564 len = comment->len;
3565 if (len >= sizeof(emittok.comment.data))
3566 len = sizeof(emittok.comment.data) - 1;
3567 emittok.comment.len = len;
3568
3569 memcpy(emittok.comment.data, comment->data, len);
3570 emittok.comment.data[len] = '\0';
3571
3572 html_process_token(html, &emittok);
3573 }
3574
3575 #endif /* HTML_ENABLE */