AmendHub

Download:

jcs

/

wikipedia

/

amendments

/

44

*: Fix lots of bugs, add progress in fetch dialog

More carefully check working buffer, handle large buffers of unstyled
text.

jcs made amendment 44 7 months ago
--- browser.c Wed Apr 5 12:10:03 2023 +++ browser.c Wed Aug 30 15:44:45 2023 @@ -37,7 +37,7 @@ void browser_mouse_down(struct focusable *focusable, E bool browser_handle_menu(struct focusable *focusable, short menu, short item); void browser_atexit(struct focusable *focusable); -bool browser_avoid_te_overflow(struct browser *browser, TEHandle te, +bool browser_will_te_overflow(struct browser *browser, TEHandle te, short line_height); bool browser_debug_enabled(struct browser *browser); void browser_live_search(struct browser *browser); @@ -68,8 +68,7 @@ browser_idle(struct focusable *focusable, EventRecord HLock(browser->input_te); te = *(browser->input_te); HLock(te->hText); - (*(te->hText))[te->teLength] = '\0'; - input = xstrdup(*(te->hText)); + input = xstrndup(*(te->hText), te->teLength); HUnlock(te->hText); HUnlock(browser->input_te); if (input == NULL) { @@ -78,6 +77,10 @@ browser_idle(struct focusable *focusable, EventRecord } SetCursor(*(GetCursor(watchCursor))); + + if (browser->wpr) + wikipedia_request_free(&browser->wpr); + browser->wpr = wikipedia_fetch_article(browser, input); xfree(&input); browser->state = BROWSER_STATE_ARTICLE_PROCESS; @@ -234,10 +237,8 @@ browser_close(struct focusable *focusable) { struct browser *browser = (struct browser *)focusable->cookie; - if (browser->wpr) { - wikipedia_request_abort(browser->wpr); - browser->wpr = NULL; - } + if (browser->wpr) + wikipedia_request_free(&browser->wpr); TEDispose(browser->te); DisposeWindow(browser->win); @@ -253,7 +254,7 @@ browser_atexit(struct focusable *focusable) struct browser *browser = (struct browser *)focusable->cookie; if (browser->wpr) { - wikipedia_request_abort(browser->wpr); + wikipedia_request_free(&browser->wpr); browser->wpr = NULL; } } @@ -295,7 +296,7 @@ browser_update(struct focusable *focusable, EventRecor Str255 buf; Rect r; short what = -1; - + if (event != NULL) what = event->what; @@ -407,7 +408,8 @@ browser_mouse_down(struct focusable *focusable, EventR for (n = 0; n < browser->links_count; n++) { link = &browser->links[n]; if ((link->pos <= off) && (off < link->pos + link->len)) { - TESetText(link->link, strlen(link->link), browser->input_te); + TESetText(link->link, strlen(link->link), + browser->input_te); HLock(browser->input_te); InvalRect(&(*(browser->input_te))->viewRect); HUnlock(browser->input_te); @@ -481,7 +483,7 @@ browser_live_search(struct browser *browser) Rect bounds = { 0 }; Rect data_bounds = { 0, 0, 0, 1 }; /* tlbr */ char *input, **results, k; - size_t nresults, n; + size_t nresults, n, len; Point cell_size = { 0, 0 }; Cell cell = { 0, 0 }; Rect r; @@ -498,14 +500,14 @@ browser_live_search(struct browser *browser) SetCursor(*(GetCursor(watchCursor))); HLock(te->hText); - (*(te->hText))[te->teLength] = '\0'; - input = xstrdup(*(te->hText)); + input = xstrndup(*(te->hText), te->teLength); HUnlock(te->hText); HUnlock(browser->input_te); if (input == NULL) { warn("Out of memory!"); return; } + nresults = wikipedia_fetch_search_results(browser, input, &results); xfree(&input); @@ -528,11 +530,9 @@ browser_live_search(struct browser *browser) } for (n = 0; n < nresults; n++) { - size_t len; len = strlen(results[n]); LAddRow(1, cell.v, browser->search_results); - LSetCell(results[n], len, cell, - browser->search_results); + LSetCell(results[n], len, cell, browser->search_results); cell.v++; xfree(&results[n]); } @@ -670,7 +670,8 @@ browser_debug_print(struct browser *browser, const cha was_len = (*(browser->debug_te))->teLength; HUnlock(browser->debug_te); - browser_avoid_te_overflow(browser, browser->debug_te, line_height); + if (browser_will_te_overflow(browser, browser->debug_te, line_height)) + return 0; while (len) { if (*str == '\n') @@ -702,7 +703,7 @@ browser_debug_print(struct browser *browser, const cha } bool -browser_avoid_te_overflow(struct browser *browser, TEHandle te, +browser_will_te_overflow(struct browser *browser, TEHandle te, short line_height) { RgnHandle savergn; @@ -712,49 +713,25 @@ browser_avoid_te_overflow(struct browser *browser, TEH /* too many lines */ if ((*te)->nLines >= (nitems((*te)->lineStarts) - 10)) - goto te_overflow; + return true; /* too many characters */ if ((*te)->teLength >= (SHRT_MAX - 500)) - goto te_overflow; + return true; /* rect of all lines is too tall */ if ((*te)->nLines * line_height >= (SHRT_MAX - 100)) - goto te_overflow; + return true; HUnlock(te); return false; - -te_overflow: - savergn = NewRgn(); - GetClip(savergn); - /* create an empty clip region so all TE updates are hidden */ - ClipRect(&zerorect); - - /* select some lines at the start, delete them */ - TESetSelect(0, (*te)->lineStarts[5], te); - TEDelete(te); - - /* scroll up, causing a repaint */ - TEPinScroll(0, INT_MAX, te); - - /* then scroll back down to what it looked like before we did anything */ - TEPinScroll(0, -INT_MAX, te); - - /* resume normal drawing */ - SetClip(savergn); - DisposeRgn(savergn); - - HUnlock(te); - - return true; } #define BROWSER_SCRAP_ELEMENTS 20 static Handle scrp_rec_h = NULL; -size_t +bool browser_print(struct browser *browser, const char *str, size_t len, unsigned long style) { @@ -767,6 +744,9 @@ browser_print(struct browser *browser, const char *str static unsigned long last_style = 0; struct browser_link *link = NULL; + if (browser_will_te_overflow(browser, browser->te, line_height)) + return false; + if (scrp_rec_h == NULL) { scrp_rec_h = xNewHandle(sizeof(short) + (sizeof(ScrpSTElement) * BROWSER_SCRAP_ELEMENTS)); @@ -827,7 +807,7 @@ browser_print(struct browser *browser, const char *str link->link = xstrndup(str, n); if (link->link == NULL) { warn("Out of memory"); - return 0; + return false; } break; } @@ -836,7 +816,7 @@ browser_print(struct browser *browser, const char *str link->link = xstrndup(str, n); if (link->link == NULL) { warn("Out of memory"); - return 0; + return false; } str += n + 1; len -= n + 1; @@ -848,8 +828,6 @@ browser_print(struct browser *browser, const char *str } HUnlock(scrp_rec_h); - - browser_avoid_te_overflow(browser, browser->te, line_height); HLock(browser->te); was_len = (*(browser->te))->teLength; @@ -888,7 +866,7 @@ browser_print(struct browser *browser, const char *str last_style = style; - return len; + return true; } void @@ -905,8 +883,15 @@ browser_clear(struct browser *browser) browser->links_size = 0; TEPinScroll(0, -SHRT_MAX, browser->debug_te); - TESetText("", 0, browser->te); TESetText("", 0, browser->debug_te); + HLock(browser->debug_te); + InvalRect(&(*(browser->debug_te))->viewRect); + HUnlock(browser->debug_te); + + HLock(browser->te); + TESetText("", 0, browser->te); + InvalRect(&(*(browser->te))->viewRect); + HUnlock(browser->te); UpdateScrollbarForTE(browser->win, browser->te_scroller, browser->te, true); --- browser.h Tue Sep 6 10:49:31 2022 +++ browser.h Tue Aug 29 09:55:47 2023 @@ -61,7 +61,7 @@ struct browser { }; struct browser *browser_init(void); -size_t browser_print(struct browser *browser, const char *str, size_t len, +bool browser_print(struct browser *browser, const char *str, size_t len, unsigned long style); void browser_clear(struct browser *browser); void browser_draw_line(struct browser *browser); --- http.c Wed Apr 5 12:16:49 2023 +++ http.c Tue Aug 29 23:05:16 2023 @@ -193,7 +193,7 @@ http_get(const char *surl) } long2ip(req->host_ip, (char *)&ip_s); - + err = _TCPActiveOpen(&req->tcp_iopb, req->tcp_stream, req->host_ip, req->url->port, &local_ip, &local_port, nil, nil, false); if (err) { @@ -201,14 +201,6 @@ http_get(const char *surl) req->url->host, ip_s, req->url->port, err); goto error; } - - err = _TCPStatus(&req->tcp_iopb, req->tcp_stream, &req->tcp_status_pb, - nil, nil, false); - if (err) { - warn("Failed TCPStatus on connection to %s (%s) port %d: %d", - req->url->host, ip_s, req->url->port, err); - goto error; - } alen = 256 + strlen(req->url->host) + strlen(req->url->path); req->message = xmalloc(alen); @@ -235,7 +227,7 @@ http_get(const char *surl) warn("TCPSend to %s (%s) failed: %d", req->url->host, ip_s, err); goto error; } - + return req; error: @@ -274,17 +266,13 @@ bool http_req_skip_header(struct http_request *req) { size_t len, n; + bool last_nl = false; for (;;) { - if (req->chunk_len > 3) { - /* - * Leave last 3 bytes of previous read in case \r\n\r\n happens - * across reads. - */ - memmove(req->chunk, req->chunk + req->chunk_len - 3, - req->chunk_len - 3); - req->chunk_len = 3; - } + if (req->chunk_len == sizeof(req->chunk)) + /* this sure is a long header... */ + req->chunk_len = 0; + len = http_req_read(req, req->chunk + req->chunk_len, sizeof(req->chunk) - req->chunk_len); if (len < 0) @@ -293,15 +281,26 @@ http_req_skip_header(struct http_request *req) continue; req->chunk_len += len; - for (n = 3; n < req->chunk_len; n++) { - if (req->chunk[n - 3] != '\r' || req->chunk[n - 2] != '\n' || - req->chunk[n - 1] != '\r' || req->chunk[n] != '\n') + for (n = 1; n < req->chunk_len; n++) { + if (req->chunk[n - 1] != '\r' || req->chunk[n] != '\n') { + last_nl = false; continue; - + } + + /* newline, shift chunk back */ + if (strncmp(req->chunk, "Content-Length: ", 16) == 0) { + if (sscanf(req->chunk, "Content-Length: %ld", &len) == 1) + req->content_len = len; + } req->chunk_len -= n + 1; memmove(req->chunk, req->chunk + n + 1, req->chunk_len); req->chunk_off = 0; - return true; + + if (last_nl) + return true; + + last_nl = true; + n = 0; /* start at 1 on next iteration */ } } @@ -356,4 +355,6 @@ http_req_free(void *reqptr) xfree(&req->tcp_buf); xfree(&req->url); xfree(&req); + + *addr = 0L; } --- http.h Wed Sep 7 15:27:42 2022 +++ http.h Tue Aug 29 13:20:16 2023 @@ -40,6 +40,8 @@ struct http_request { char *message; + size_t content_len; + char chunk[2048]; ssize_t chunk_len; ssize_t chunk_off; --- main.c Wed Apr 5 11:15:56 2023 +++ main.c Tue Aug 29 09:52:21 2023 @@ -67,7 +67,7 @@ main(void) browser_init(); while (!quitting) { - WaitNextEvent(everyEvent, &event, 5L, 0L); + WaitNextEvent(everyEvent, &event, 0L, 0L); switch (event.what) { case nullEvent: @@ -121,15 +121,12 @@ main(void) break; case updateEvt: event_win = (WindowPtr)event.message; - GetPort(&old_port); SetPort(event_win); BeginUpdate(event_win); - focusable = focusable_find(event_win); if (focusable && focusable->update) focusable->update(focusable, &event); - EndUpdate(event_win); SetPort(old_port); break; --- wikipedia.c Wed Apr 5 11:32:30 2023 +++ wikipedia.c Wed Aug 30 15:50:54 2023 @@ -60,7 +60,7 @@ wikipedia_fetch_article(struct browser *browser, char } snprintf(url, sizeof(url), "http://%s/w/api.php?action=query&" - "prop=revisions&rvslots=*&rvprop=content&" + "prop=revisions&rvslots=*&rvprop=size|content&" "format=xml&titles=%s", WIKIPEDIA_HOST, nencoded); xfree(&nencoded); wpr->http_request = http_get(url); @@ -70,8 +70,10 @@ wikipedia_fetch_article(struct browser *browser, char xfree(&wpr); return NULL; } + http_req_skip_header(wpr->http_request); - wpr->state = WP_STATE_XML_INIT; + wpr->read_len = wpr->http_request->chunk_len; + wpr->normalized_title = xstrdup(name); if (wpr->normalized_title == NULL) { progress(NULL); @@ -80,6 +82,7 @@ wikipedia_fetch_article(struct browser *browser, char xfree(&wpr); return NULL; } + wpr->state = WP_STATE_XML_INIT; browser_debug_print(wpr->browser, wpr->http_request->chunk, wpr->http_request->chunk_len); @@ -107,7 +110,7 @@ wikipedia_fetch_search_results(struct browser *browser } xstate = 0; char *buf; size_t buf_size; - size_t buf_len; + size_t buf_idx; uquery = macroman_to_utf8_string((unsigned char *)query, strlen(query)); @@ -118,17 +121,21 @@ wikipedia_fetch_search_results(struct browser *browser if (qencoded == NULL) return 0; - snprintf(url, sizeof(url), "http://%s/w/api.php?action=opensearch&" - "format=xml&namespace=0&limit=10&redirects=return&search=%s", - WIKIPEDIA_HOST, qencoded); + len = snprintf(url, sizeof(url), "http://%s/w/api.php?" + "action=opensearch&format=xml&namespace=0&limit=10&" + "redirects=return&search=%s", WIKIPEDIA_HOST, qencoded); xfree(&qencoded); + if (len > sizeof(url)) + return 0; + req = http_get(url); if (req == NULL) return 0; + http_req_skip_header(req); buf_size = 256; - buf_len = 0; + buf_idx = 0; buf = xmalloc(buf_size); if (buf == NULL) { warn("Out of memory!"); @@ -157,7 +164,7 @@ wikipedia_fetch_search_results(struct browser *browser break; } rets = trets; - nstr = xstrndup(buf, buf_len); + nstr = xstrndup(buf, buf_idx); if (nstr == NULL) { warn("Out of memory!"); break; @@ -166,18 +173,17 @@ wikipedia_fetch_search_results(struct browser *browser } buf[0] = '\0'; - buf_len = 0; + buf_idx = 0; xstate = XML_IN_TAG; } else if (c == '>') { - if (xstate == XML_IN_TAG && - strncmp(buf, "Text xml:", 9) == 0) + if (xstate == XML_IN_TAG && strncmp(buf, "Text xml:", 9) == 0) xstate = XML_IN_TEXT; else xstate = XML_DEFAULT; buf[0] = '\0'; - buf_len = 0; - } else if (buf_len < buf_size) { + buf_idx = 0; + } else if (buf_idx < buf_size) { if ((unsigned char)c >= UTF8_RANGE_START && (unsigned char)c <= UTF8_RANGE_END) { if (utf8[0] == 0) @@ -200,7 +206,9 @@ wikipedia_fetch_search_results(struct browser *browser } if (c) - buf[buf_len++] = c; + buf[buf_idx++] = c; + } else { + panic("out of buf space"); } } @@ -231,6 +239,7 @@ wikipedia_request_process(struct wikipedia_request *wp { struct http_request *req = wpr->http_request; size_t len, n; + short pct; char c, *last; enum xml_state { XML_DEFAULT, @@ -239,23 +248,33 @@ wikipedia_request_process(struct wikipedia_request *wp utf8_char utf8 = { 0 }; get_char: - if (req->chunk_len == 0 || (req->chunk_off + 1 > req->chunk_len)) { + if (req->chunk_len == 0 || (req->chunk_off >= req->chunk_len)) { req->chunk_len = http_req_read(req, req->chunk, sizeof(req->chunk)); req->chunk_off = 0; - - if (req->chunk_len < 1 || (req->chunk_off + 1 > req->chunk_len)) { + wpr->read_len += req->chunk_len; + + if (req->chunk_len < 1 || (req->chunk_off >= req->chunk_len)) { wpr->state = WP_STATE_DONE; goto done_parsing; } + if (req->content_len > 0) { + pct = (wpr->read_len * 100) / req->content_len; + if (pct > 100) + pct = 100; + + progress("Fetching article \"%s\" (%d%%)...", + wpr->normalized_title, pct); + } + browser_debug_print(wpr->browser, req->chunk, req->chunk_len); } switch (wpr->state) { case WP_STATE_XML_INIT: wpr->buf_size = 1024; - wpr->buf_len = 0; + wpr->buf_idx = 0; wpr->buf = xmalloc(wpr->buf_size); if (wpr->buf == NULL) { warn("Out of memory!"); @@ -270,20 +289,24 @@ get_char: if (c == '<') { wpr->buf[0] = '\0'; - wpr->buf_len = 0; + wpr->buf_idx = 0; } else if (c == '>') { - wpr->buf[wpr->buf_len] = '\0'; + wpr->buf[wpr->buf_idx] = '\0'; if (xstate == XML_DEFAULT) { if (strcmp(wpr->buf, "normalized") == 0) { xstate = XML_IN_NORMALIZED; } else if (strncmp(wpr->buf, "slot ", 5) == 0) { wpr->state = WP_STATE_WIKITEXT_INIT; + } else if (!req->content_len && + strncmp(wpr->buf, "rev size=", 9) == 0) { + if (sscanf(wpr->buf, "rev size=\"%ld\"", &len) == 1) + req->content_len = len; } } else if (xstate == XML_IN_NORMALIZED) { char from_normalized[255], to_normalized[255]; size_t count; - if (sscanf(wpr->buf, "n from=\"%[^\"]\" to=\"%[^\"]\"%n", + if (sscanf(wpr->buf, "n from=\"%254[^\"]\" to=\"%254[^\"]\"%n", &from_normalized, &to_normalized, &count) == 2 && count > 10) { if (wpr->normalized_title != NULL) @@ -296,15 +319,17 @@ get_char: } else xstate = XML_DEFAULT; } - } else if (wpr->buf_len < wpr->buf_size) { - wpr->buf[wpr->buf_len++] = c; + } else { + if (wpr->buf_idx >= wpr->buf_size) + panic("ran out of buf space parsing xml"); + wpr->buf[wpr->buf_idx++] = c; } goto get_char; case WP_STATE_WIKITEXT_INIT: wpr->article_len = 0; - wpr->buf_len = 0; + wpr->buf_idx = 0; wpr->buf[0] = '\0'; wpr->curlys = 0; @@ -319,7 +344,7 @@ get_char: case WP_STATE_WIKITEXT_PARSE: { c = req->chunk[req->chunk_off]; - last = wpr->buf + wpr->buf_len - 1; + last = wpr->buf + wpr->buf_idx - 1; if (c == '<' || c == '\0') { wpr->state = WP_STATE_DONE; @@ -330,22 +355,26 @@ get_char: if (c == ';') { /* XML entity decode */ - if (last[-3] == '&' && last[-2] == 'a' && last[-1] == 'm' && + if (wpr->buf_idx >= 4 && + last[-3] == '&' && last[-2] == 'a' && last[-1] == 'm' && last[0] == 'p') { c = '&'; - wpr->buf_len -= 4; - } else if (last[-4] == '&' && last[-3] == 'n' && - last[-2] == 'b' && last[-1] == 's' && last[0] == 'p') { + wpr->buf_idx -= 4; + } else if (wpr->buf_idx >= 5 && + last[-4] == '&' && last[-3] == 'n' && last[-2] == 'b' && + last[-1] == 's' && last[0] == 'p') { c = ' '; - wpr->buf_len -= 5; - } else if (last[-2] == '&' && last[-1] == 'l' && last[0] == 't') { + wpr->buf_idx -= 5; + } else if (wpr->buf_idx >= 2 && + last[-2] == '&' && last[-1] == 'l' && last[0] == 't') { c = '<'; - wpr->buf_len -= 3; - } else if (last[-2] == '&' && last[-1] == 'g' && last[0] == 't') { + wpr->buf_idx -= 3; + } else if (wpr->buf_idx >= 2 && + last[-2] == '&' && last[-1] == 'g' && last[0] == 't') { c = '>'; - wpr->buf_len -= 3; + wpr->buf_idx -= 3; } - last = wpr->buf + wpr->buf_len - 1; + last = wpr->buf + wpr->buf_idx - 1; } else if (c == '\n') { c = '\r'; } else if ((unsigned char)c >= UTF8_RANGE_START && @@ -372,87 +401,99 @@ get_char: /* check for style changes */ - if (last[0] == '{' && (c == '{' || c == '|')) { + if (wpr->buf_idx >= 1 && + last[0] == '{' && (c == '{' || c == '|')) { wpr->curlys++; - wpr->buf_len--; + wpr->buf_idx--; wpr->style |= STYLE_TEMPLATE; c = 0; - } else if ((last[0] == '}' || last[0] == '|') && c == '}') { + } else if (wpr->buf_idx >= 1 && + (last[0] == '}' || last[0] == '|') && c == '}') { if (wpr->curlys) wpr->curlys--; - wpr->buf_len--; + wpr->buf_idx--; if (wpr->curlys == 0) wpr->style &= ~(STYLE_TEMPLATE); c = 0; - } else if (last[0] == '[' && c == '[') { + } else if (wpr->buf_idx >= 1 && + last[0] == '[' && c == '[') { if (wpr->brackets) wpr->brackets++; - wpr->buf_len--; + wpr->buf_idx--; wpr->style |= STYLE_LINK; c = 0; - } else if (last[0] == ']' && c == ']') { + } else if (wpr->buf_idx >= 1 && + last[0] == ']' && c == ']') { if (wpr->brackets) wpr->brackets--; - wpr->buf_len--; + wpr->buf_idx--; if (wpr->brackets == 0) wpr->style &= ~(STYLE_LINK); c = 0; - } else if (last[-1] == '\'' && last[0] == '\'' && c == '\'') { + } else if (wpr->buf_idx >= 2 && + last[-1] == '\'' && last[0] == '\'' && c == '\'') { if (wpr->style & STYLE_BOLD) wpr->style &= ~(STYLE_BOLD); else wpr->style |= STYLE_BOLD; - wpr->buf_len -= 2; + wpr->buf_idx -= 2; c = 0; - } else if (last[-1] == '\'' && last[0] == '\'' && c != '\'') { + } else if (wpr->buf_idx >= 2 && + last[-1] == '\'' && last[0] == '\'' && c != '\'') { if (wpr->style & STYLE_ITALIC) wpr->style &= ~(STYLE_ITALIC); else wpr->style |= STYLE_ITALIC; - wpr->buf_len -= 2; + wpr->buf_idx -= 2; /* keep c */ - } else if (last[-3] == '=' && last[-2] == '=' && last[-1] == '=' && + } else if (wpr->buf_idx >= 4 && + last[-3] == '=' && last[-2] == '=' && last[-1] == '=' && last[0] == '=' && c == '=') { if (wpr->style & STYLE_H5) wpr->style &= ~(STYLE_H5); else wpr->style |= STYLE_H5; - wpr->buf_len -= 4; + wpr->buf_idx -= 4; c = 0; - } else if (last[-3] == '=' && last[-2] == '=' && last[-1] == '=' && + } else if (wpr->buf_idx >= 4 && + last[-3] == '=' && last[-2] == '=' && last[-1] == '=' && last[0] == '=' && c != '=') { if (wpr->style & STYLE_H4) wpr->style &= ~(STYLE_H4); else wpr->style |= STYLE_H4; - wpr->buf_len -= 4; + wpr->buf_idx -= 4; /* keep c */ - } else if (last[-2] == '=' && last[-1] == '=' && last[0] == '=' && + } else if (wpr->buf_idx >= 3 && + last[-2] == '=' && last[-1] == '=' && last[0] == '=' && c != '=') { if (wpr->style & STYLE_H3) wpr->style &= ~(STYLE_H3); else wpr->style |= STYLE_H3; - wpr->buf_len -= 3; + wpr->buf_idx -= 3; /* keep c */ - } else if (last[-1] == '=' && last[0] == '=' && c != '=') { + } else if (wpr->buf_idx >= 2 && + last[-1] == '=' && last[0] == '=' && c != '=') { if (wpr->style & STYLE_H2) wpr->style &= ~(STYLE_H2); else wpr->style |= STYLE_H2; - wpr->buf_len -= 2; + wpr->buf_idx -= 2; /* keep c */ - } else if (last[-2] == '<' && last[-1] == 'r' && last[0] == 'e' && + } else if (wpr->buf_idx >= 3 && + last[-2] == '<' && last[-1] == 'r' && last[0] == 'e' && c == 'f') { /* <ref */ wpr->refs++; wpr->style |= STYLE_REF; - wpr->buf_len -= 3; + wpr->buf_idx -= 3; c = 0; } else if ((wpr->style & STYLE_REF) && - ((last[-4] == '<' && last[-3] == '/' && last[-2] == 'r' && - last[-1] == 'e' && last[0] == 'f' && c == '>') || - (last[0] == '/' && c == '>'))) { + ((wpr->buf_idx >= 5 && last[-4] == '<' && last[-3] == '/' && + last[-2] == 'r' && last[-1] == 'e' && last[0] == 'f' && + c == '>') || + (wpr->buf_idx >= 1 && last[0] == '/' && c == '>'))) { /* </ref> or <ref /> */ if (wpr->refs) wpr->refs--; @@ -474,38 +515,38 @@ get_char: char *conv, *conv2; size_t len; - conv = xmalloc(wpr->buf_len); + conv = xmalloc(wpr->buf_idx); if (conv == NULL) { - warn("Failed allocating %ld", wpr->buf_len); + warn("Failed allocating %ld", wpr->buf_idx); break; } - conv2 = xmalloc(wpr->buf_len); + conv2 = xmalloc(wpr->buf_idx); if (conv2 == NULL) { - warn("Failed allocating %ld", wpr->buf_len); + warn("Failed allocating %ld", wpr->buf_idx); xfree(&conv); break; } - wpr->buf[wpr->buf_len] = '\0'; + wpr->buf[wpr->buf_idx] = '\0'; if (sscanf(wpr->buf, "convert|%[^|]|%[^|]|%n", conv, conv2, &len) == 2 && len >= 13) - wpr->buf_len = snprintf(wpr->buf, wpr->buf_size, + wpr->buf_idx = snprintf(wpr->buf, wpr->buf_size, "%s %s ", conv, conv2); else - wpr->buf_len = 0; + wpr->buf_idx = 0; xfree(&conv); xfree(&conv2); } else - wpr->buf_len = 0; + wpr->buf_idx = 0; } /* maybe we can do something with these later */ if (wpr->last_style & STYLE_REF) - wpr->buf_len = 0; + wpr->buf_idx = 0; /* we can't show inline images */ if ((wpr->last_style & STYLE_LINK) && strncmp(wpr->buf, "File:", 5) == 0) { - wpr->buf_len = 0; + wpr->buf_idx = 0; wpr->trim_whitespace = true; } @@ -515,13 +556,13 @@ get_char: if ((wpr->style & STYLE_LINK) && wpr->article_len == 0 && strncmp(wpr->buf, "#REDIRECT ", 10) == 0) { - wpr->buf_len = 0; + wpr->buf_idx = 0; wpr->redirect = true; } else if (wpr->redirect && !(wpr->style & STYLE_LINK) && (wpr->last_style & STYLE_LINK)) { if (wpr->normalized_title) xfree(&wpr->normalized_title); - wpr->buf[wpr->buf_len] = '\0'; + wpr->buf[wpr->buf_idx] = '\0'; wpr->normalized_title = xstrdup(wpr->buf); if (wpr->normalized_title == NULL) { warn("Out of memory!"); @@ -531,14 +572,17 @@ get_char: goto done_parsing; } - if (wpr->buf_len) { + if (wpr->buf_idx) { if (wpr->article_len == 0) wikipedia_request_present(wpr); - browser_print(wpr->browser, wpr->buf, wpr->buf_len, - wpr->last_style); - wpr->article_len += wpr->buf_len; - wpr->buf_len = 0; + if (!browser_print(wpr->browser, wpr->buf, wpr->buf_idx, + wpr->last_style)) { + wpr->state = WP_STATE_DONE; + goto done_parsing; + } + wpr->article_len += wpr->buf_idx; + wpr->buf_idx = 0; } wpr->last_style = wpr->style; } @@ -553,8 +597,18 @@ get_char: } /* and finally, add the new character */ - if (c != 0) - wpr->buf[wpr->buf_len++] = c; + if (c != 0) { + if (wpr->buf_idx >= wpr->buf_size) { + if (!browser_print(wpr->browser, wpr->buf, wpr->buf_idx, + wpr->style)) { + wpr->state = WP_STATE_DONE; + goto done_parsing; + } + wpr->article_len += wpr->buf_idx; + wpr->buf_idx = 0; + } + wpr->buf[wpr->buf_idx++] = c; + } req->chunk_off++; goto get_char; @@ -570,8 +624,17 @@ done_parsing: } void -wikipedia_request_abort(struct wikipedia_request *wpr) +wikipedia_request_free(struct wikipedia_request **wprptr) { + struct wikipedia_request *wpr = (struct wikipedia_request *)*wprptr; + + if (wpr == NULL) { + *wprptr = NULL; + return; + } + if (wpr->http_request != NULL) http_req_free(&wpr->http_request); + + *wprptr = NULL; } --- wikipedia.h Wed Sep 7 16:47:24 2022 +++ wikipedia.h Wed Aug 30 14:19:31 2023 @@ -57,11 +57,12 @@ struct wikipedia_request { struct browser *browser; struct http_request *http_request; char *normalized_title; + size_t read_len; size_t article_len; char *buf; size_t buf_size; - size_t buf_len; + size_t buf_idx; short refs, curlys, brackets; unsigned long style, last_style; bool trim_whitespace, redirect; @@ -75,6 +76,6 @@ struct wikipedia_request * wikipedia_read_cached_artic char *name); void wikipedia_request_present(struct wikipedia_request *wpr); void wikipedia_request_process(struct wikipedia_request *wpr); -void wikipedia_request_abort(struct wikipedia_request *wpr); +void wikipedia_request_free(struct wikipedia_request **wprptr); #endif