jcs
/wikipedia
/amendments
/44
*: Fix lots of bugs, add progress in fetch dialog
More carefully check working buffer, handle large buffers of unstyled
text.
jcs made amendment 44 about 1 year ago
--- browser.c Wed Apr 5 12:10:03 2023
+++ browser.c Wed Aug 30 15:44:45 2023
@@ -37,7 +37,7 @@ void browser_mouse_down(struct focusable *focusable, E
bool browser_handle_menu(struct focusable *focusable, short menu,
short item);
void browser_atexit(struct focusable *focusable);
-bool browser_avoid_te_overflow(struct browser *browser, TEHandle te,
+bool browser_will_te_overflow(struct browser *browser, TEHandle te,
short line_height);
bool browser_debug_enabled(struct browser *browser);
void browser_live_search(struct browser *browser);
@@ -68,8 +68,7 @@ browser_idle(struct focusable *focusable, EventRecord
HLock(browser->input_te);
te = *(browser->input_te);
HLock(te->hText);
- (*(te->hText))[te->teLength] = '\0';
- input = xstrdup(*(te->hText));
+ input = xstrndup(*(te->hText), te->teLength);
HUnlock(te->hText);
HUnlock(browser->input_te);
if (input == NULL) {
@@ -78,6 +77,10 @@ browser_idle(struct focusable *focusable, EventRecord
}
SetCursor(*(GetCursor(watchCursor)));
+
+ if (browser->wpr)
+ wikipedia_request_free(&browser->wpr);
+
browser->wpr = wikipedia_fetch_article(browser, input);
xfree(&input);
browser->state = BROWSER_STATE_ARTICLE_PROCESS;
@@ -234,10 +237,8 @@ browser_close(struct focusable *focusable)
{
struct browser *browser = (struct browser *)focusable->cookie;
- if (browser->wpr) {
- wikipedia_request_abort(browser->wpr);
- browser->wpr = NULL;
- }
+ if (browser->wpr)
+ wikipedia_request_free(&browser->wpr);
TEDispose(browser->te);
DisposeWindow(browser->win);
@@ -253,7 +254,7 @@ browser_atexit(struct focusable *focusable)
struct browser *browser = (struct browser *)focusable->cookie;
if (browser->wpr) {
- wikipedia_request_abort(browser->wpr);
+ wikipedia_request_free(&browser->wpr);
browser->wpr = NULL;
}
}
@@ -295,7 +296,7 @@ browser_update(struct focusable *focusable, EventRecor
Str255 buf;
Rect r;
short what = -1;
-
+
if (event != NULL)
what = event->what;
@@ -407,7 +408,8 @@ browser_mouse_down(struct focusable *focusable, EventR
for (n = 0; n < browser->links_count; n++) {
link = &browser->links[n];
if ((link->pos <= off) && (off < link->pos + link->len)) {
- TESetText(link->link, strlen(link->link), browser->input_te);
+ TESetText(link->link, strlen(link->link),
+ browser->input_te);
HLock(browser->input_te);
InvalRect(&(*(browser->input_te))->viewRect);
HUnlock(browser->input_te);
@@ -481,7 +483,7 @@ browser_live_search(struct browser *browser)
Rect bounds = { 0 };
Rect data_bounds = { 0, 0, 0, 1 }; /* tlbr */
char *input, **results, k;
- size_t nresults, n;
+ size_t nresults, n, len;
Point cell_size = { 0, 0 };
Cell cell = { 0, 0 };
Rect r;
@@ -498,14 +500,14 @@ browser_live_search(struct browser *browser)
SetCursor(*(GetCursor(watchCursor)));
HLock(te->hText);
- (*(te->hText))[te->teLength] = '\0';
- input = xstrdup(*(te->hText));
+ input = xstrndup(*(te->hText), te->teLength);
HUnlock(te->hText);
HUnlock(browser->input_te);
if (input == NULL) {
warn("Out of memory!");
return;
}
+
nresults = wikipedia_fetch_search_results(browser, input, &results);
xfree(&input);
@@ -528,11 +530,9 @@ browser_live_search(struct browser *browser)
}
for (n = 0; n < nresults; n++) {
- size_t len;
len = strlen(results[n]);
LAddRow(1, cell.v, browser->search_results);
- LSetCell(results[n], len, cell,
- browser->search_results);
+ LSetCell(results[n], len, cell, browser->search_results);
cell.v++;
xfree(&results[n]);
}
@@ -670,7 +670,8 @@ browser_debug_print(struct browser *browser, const cha
was_len = (*(browser->debug_te))->teLength;
HUnlock(browser->debug_te);
- browser_avoid_te_overflow(browser, browser->debug_te, line_height);
+ if (browser_will_te_overflow(browser, browser->debug_te, line_height))
+ return 0;
while (len) {
if (*str == '\n')
@@ -702,7 +703,7 @@ browser_debug_print(struct browser *browser, const cha
}
bool
-browser_avoid_te_overflow(struct browser *browser, TEHandle te,
+browser_will_te_overflow(struct browser *browser, TEHandle te,
short line_height)
{
RgnHandle savergn;
@@ -712,49 +713,25 @@ browser_avoid_te_overflow(struct browser *browser, TEH
/* too many lines */
if ((*te)->nLines >= (nitems((*te)->lineStarts) - 10))
- goto te_overflow;
+ return true;
/* too many characters */
if ((*te)->teLength >= (SHRT_MAX - 500))
- goto te_overflow;
+ return true;
/* rect of all lines is too tall */
if ((*te)->nLines * line_height >= (SHRT_MAX - 100))
- goto te_overflow;
+ return true;
HUnlock(te);
return false;
-
-te_overflow:
- savergn = NewRgn();
- GetClip(savergn);
- /* create an empty clip region so all TE updates are hidden */
- ClipRect(&zerorect);
-
- /* select some lines at the start, delete them */
- TESetSelect(0, (*te)->lineStarts[5], te);
- TEDelete(te);
-
- /* scroll up, causing a repaint */
- TEPinScroll(0, INT_MAX, te);
-
- /* then scroll back down to what it looked like before we did anything */
- TEPinScroll(0, -INT_MAX, te);
-
- /* resume normal drawing */
- SetClip(savergn);
- DisposeRgn(savergn);
-
- HUnlock(te);
-
- return true;
}
#define BROWSER_SCRAP_ELEMENTS 20
static Handle scrp_rec_h = NULL;
-size_t
+bool
browser_print(struct browser *browser, const char *str, size_t len,
unsigned long style)
{
@@ -767,6 +744,9 @@ browser_print(struct browser *browser, const char *str
static unsigned long last_style = 0;
struct browser_link *link = NULL;
+ if (browser_will_te_overflow(browser, browser->te, line_height))
+ return false;
+
if (scrp_rec_h == NULL) {
scrp_rec_h = xNewHandle(sizeof(short) +
(sizeof(ScrpSTElement) * BROWSER_SCRAP_ELEMENTS));
@@ -827,7 +807,7 @@ browser_print(struct browser *browser, const char *str
link->link = xstrndup(str, n);
if (link->link == NULL) {
warn("Out of memory");
- return 0;
+ return false;
}
break;
}
@@ -836,7 +816,7 @@ browser_print(struct browser *browser, const char *str
link->link = xstrndup(str, n);
if (link->link == NULL) {
warn("Out of memory");
- return 0;
+ return false;
}
str += n + 1;
len -= n + 1;
@@ -848,8 +828,6 @@ browser_print(struct browser *browser, const char *str
}
HUnlock(scrp_rec_h);
-
- browser_avoid_te_overflow(browser, browser->te, line_height);
HLock(browser->te);
was_len = (*(browser->te))->teLength;
@@ -888,7 +866,7 @@ browser_print(struct browser *browser, const char *str
last_style = style;
- return len;
+ return true;
}
void
@@ -905,8 +883,15 @@ browser_clear(struct browser *browser)
browser->links_size = 0;
TEPinScroll(0, -SHRT_MAX, browser->debug_te);
- TESetText("", 0, browser->te);
TESetText("", 0, browser->debug_te);
+ HLock(browser->debug_te);
+ InvalRect(&(*(browser->debug_te))->viewRect);
+ HUnlock(browser->debug_te);
+
+ HLock(browser->te);
+ TESetText("", 0, browser->te);
+ InvalRect(&(*(browser->te))->viewRect);
+ HUnlock(browser->te);
UpdateScrollbarForTE(browser->win, browser->te_scroller,
browser->te, true);
--- browser.h Tue Sep 6 10:49:31 2022
+++ browser.h Tue Aug 29 09:55:47 2023
@@ -61,7 +61,7 @@ struct browser {
};
struct browser *browser_init(void);
-size_t browser_print(struct browser *browser, const char *str, size_t len,
+bool browser_print(struct browser *browser, const char *str, size_t len,
unsigned long style);
void browser_clear(struct browser *browser);
void browser_draw_line(struct browser *browser);
--- http.c Wed Apr 5 12:16:49 2023
+++ http.c Tue Aug 29 23:05:16 2023
@@ -193,7 +193,7 @@ http_get(const char *surl)
}
long2ip(req->host_ip, (char *)&ip_s);
-
+
err = _TCPActiveOpen(&req->tcp_iopb, req->tcp_stream, req->host_ip,
req->url->port, &local_ip, &local_port, nil, nil, false);
if (err) {
@@ -201,14 +201,6 @@ http_get(const char *surl)
req->url->host, ip_s, req->url->port, err);
goto error;
}
-
- err = _TCPStatus(&req->tcp_iopb, req->tcp_stream, &req->tcp_status_pb,
- nil, nil, false);
- if (err) {
- warn("Failed TCPStatus on connection to %s (%s) port %d: %d",
- req->url->host, ip_s, req->url->port, err);
- goto error;
- }
alen = 256 + strlen(req->url->host) + strlen(req->url->path);
req->message = xmalloc(alen);
@@ -235,7 +227,7 @@ http_get(const char *surl)
warn("TCPSend to %s (%s) failed: %d", req->url->host, ip_s, err);
goto error;
}
-
+
return req;
error:
@@ -274,17 +266,13 @@ bool
http_req_skip_header(struct http_request *req)
{
size_t len, n;
+ bool last_nl = false;
for (;;) {
- if (req->chunk_len > 3) {
- /*
- * Leave last 3 bytes of previous read in case \r\n\r\n happens
- * across reads.
- */
- memmove(req->chunk, req->chunk + req->chunk_len - 3,
- req->chunk_len - 3);
- req->chunk_len = 3;
- }
+ if (req->chunk_len == sizeof(req->chunk))
+ /* this sure is a long header... */
+ req->chunk_len = 0;
+
len = http_req_read(req, req->chunk + req->chunk_len,
sizeof(req->chunk) - req->chunk_len);
if (len < 0)
@@ -293,15 +281,26 @@ http_req_skip_header(struct http_request *req)
continue;
req->chunk_len += len;
- for (n = 3; n < req->chunk_len; n++) {
- if (req->chunk[n - 3] != '\r' || req->chunk[n - 2] != '\n' ||
- req->chunk[n - 1] != '\r' || req->chunk[n] != '\n')
+ for (n = 1; n < req->chunk_len; n++) {
+ if (req->chunk[n - 1] != '\r' || req->chunk[n] != '\n') {
+ last_nl = false;
continue;
-
+ }
+
+ /* newline, shift chunk back */
+ if (strncmp(req->chunk, "Content-Length: ", 16) == 0) {
+ if (sscanf(req->chunk, "Content-Length: %ld", &len) == 1)
+ req->content_len = len;
+ }
req->chunk_len -= n + 1;
memmove(req->chunk, req->chunk + n + 1, req->chunk_len);
req->chunk_off = 0;
- return true;
+
+ if (last_nl)
+ return true;
+
+ last_nl = true;
+ n = 0; /* start at 1 on next iteration */
}
}
@@ -356,4 +355,6 @@ http_req_free(void *reqptr)
xfree(&req->tcp_buf);
xfree(&req->url);
xfree(&req);
+
+ *addr = 0L;
}
--- http.h Wed Sep 7 15:27:42 2022
+++ http.h Tue Aug 29 13:20:16 2023
@@ -40,6 +40,8 @@ struct http_request {
char *message;
+ size_t content_len;
+
char chunk[2048];
ssize_t chunk_len;
ssize_t chunk_off;
--- main.c Wed Apr 5 11:15:56 2023
+++ main.c Tue Aug 29 09:52:21 2023
@@ -67,7 +67,7 @@ main(void)
browser_init();
while (!quitting) {
- WaitNextEvent(everyEvent, &event, 5L, 0L);
+ WaitNextEvent(everyEvent, &event, 0L, 0L);
switch (event.what) {
case nullEvent:
@@ -121,15 +121,12 @@ main(void)
break;
case updateEvt:
event_win = (WindowPtr)event.message;
-
GetPort(&old_port);
SetPort(event_win);
BeginUpdate(event_win);
-
focusable = focusable_find(event_win);
if (focusable && focusable->update)
focusable->update(focusable, &event);
-
EndUpdate(event_win);
SetPort(old_port);
break;
--- wikipedia.c Wed Apr 5 11:32:30 2023
+++ wikipedia.c Wed Aug 30 15:50:54 2023
@@ -60,7 +60,7 @@ wikipedia_fetch_article(struct browser *browser, char
}
snprintf(url, sizeof(url), "http://%s/w/api.php?action=query&"
- "prop=revisions&rvslots=*&rvprop=content&"
+ "prop=revisions&rvslots=*&rvprop=size|content&"
"format=xml&titles=%s", WIKIPEDIA_HOST, nencoded);
xfree(&nencoded);
wpr->http_request = http_get(url);
@@ -70,8 +70,10 @@ wikipedia_fetch_article(struct browser *browser, char
xfree(&wpr);
return NULL;
}
+
http_req_skip_header(wpr->http_request);
- wpr->state = WP_STATE_XML_INIT;
+ wpr->read_len = wpr->http_request->chunk_len;
+
wpr->normalized_title = xstrdup(name);
if (wpr->normalized_title == NULL) {
progress(NULL);
@@ -80,6 +82,7 @@ wikipedia_fetch_article(struct browser *browser, char
xfree(&wpr);
return NULL;
}
+ wpr->state = WP_STATE_XML_INIT;
browser_debug_print(wpr->browser, wpr->http_request->chunk,
wpr->http_request->chunk_len);
@@ -107,7 +110,7 @@ wikipedia_fetch_search_results(struct browser *browser
} xstate = 0;
char *buf;
size_t buf_size;
- size_t buf_len;
+ size_t buf_idx;
uquery = macroman_to_utf8_string((unsigned char *)query,
strlen(query));
@@ -118,17 +121,21 @@ wikipedia_fetch_search_results(struct browser *browser
if (qencoded == NULL)
return 0;
- snprintf(url, sizeof(url), "http://%s/w/api.php?action=opensearch&"
- "format=xml&namespace=0&limit=10&redirects=return&search=%s",
- WIKIPEDIA_HOST, qencoded);
+ len = snprintf(url, sizeof(url), "http://%s/w/api.php?"
+ "action=opensearch&format=xml&namespace=0&limit=10&"
+ "redirects=return&search=%s", WIKIPEDIA_HOST, qencoded);
xfree(&qencoded);
+ if (len > sizeof(url))
+ return 0;
+
req = http_get(url);
if (req == NULL)
return 0;
+
http_req_skip_header(req);
buf_size = 256;
- buf_len = 0;
+ buf_idx = 0;
buf = xmalloc(buf_size);
if (buf == NULL) {
warn("Out of memory!");
@@ -157,7 +164,7 @@ wikipedia_fetch_search_results(struct browser *browser
break;
}
rets = trets;
- nstr = xstrndup(buf, buf_len);
+ nstr = xstrndup(buf, buf_idx);
if (nstr == NULL) {
warn("Out of memory!");
break;
@@ -166,18 +173,17 @@ wikipedia_fetch_search_results(struct browser *browser
}
buf[0] = '\0';
- buf_len = 0;
+ buf_idx = 0;
xstate = XML_IN_TAG;
} else if (c == '>') {
- if (xstate == XML_IN_TAG &&
- strncmp(buf, "Text xml:", 9) == 0)
+ if (xstate == XML_IN_TAG && strncmp(buf, "Text xml:", 9) == 0)
xstate = XML_IN_TEXT;
else
xstate = XML_DEFAULT;
buf[0] = '\0';
- buf_len = 0;
- } else if (buf_len < buf_size) {
+ buf_idx = 0;
+ } else if (buf_idx < buf_size) {
if ((unsigned char)c >= UTF8_RANGE_START &&
(unsigned char)c <= UTF8_RANGE_END) {
if (utf8[0] == 0)
@@ -200,7 +206,9 @@ wikipedia_fetch_search_results(struct browser *browser
}
if (c)
- buf[buf_len++] = c;
+ buf[buf_idx++] = c;
+ } else {
+ panic("out of buf space");
}
}
@@ -231,6 +239,7 @@ wikipedia_request_process(struct wikipedia_request *wp
{
struct http_request *req = wpr->http_request;
size_t len, n;
+ short pct;
char c, *last;
enum xml_state {
XML_DEFAULT,
@@ -239,23 +248,33 @@ wikipedia_request_process(struct wikipedia_request *wp
utf8_char utf8 = { 0 };
get_char:
- if (req->chunk_len == 0 || (req->chunk_off + 1 > req->chunk_len)) {
+ if (req->chunk_len == 0 || (req->chunk_off >= req->chunk_len)) {
req->chunk_len = http_req_read(req, req->chunk,
sizeof(req->chunk));
req->chunk_off = 0;
-
- if (req->chunk_len < 1 || (req->chunk_off + 1 > req->chunk_len)) {
+ wpr->read_len += req->chunk_len;
+
+ if (req->chunk_len < 1 || (req->chunk_off >= req->chunk_len)) {
wpr->state = WP_STATE_DONE;
goto done_parsing;
}
+ if (req->content_len > 0) {
+ pct = (wpr->read_len * 100) / req->content_len;
+ if (pct > 100)
+ pct = 100;
+
+ progress("Fetching article \"%s\" (%d%%)...",
+ wpr->normalized_title, pct);
+ }
+
browser_debug_print(wpr->browser, req->chunk, req->chunk_len);
}
switch (wpr->state) {
case WP_STATE_XML_INIT:
wpr->buf_size = 1024;
- wpr->buf_len = 0;
+ wpr->buf_idx = 0;
wpr->buf = xmalloc(wpr->buf_size);
if (wpr->buf == NULL) {
warn("Out of memory!");
@@ -270,20 +289,24 @@ get_char:
if (c == '<') {
wpr->buf[0] = '\0';
- wpr->buf_len = 0;
+ wpr->buf_idx = 0;
} else if (c == '>') {
- wpr->buf[wpr->buf_len] = '\0';
+ wpr->buf[wpr->buf_idx] = '\0';
if (xstate == XML_DEFAULT) {
if (strcmp(wpr->buf, "normalized") == 0) {
xstate = XML_IN_NORMALIZED;
} else if (strncmp(wpr->buf, "slot ", 5) == 0) {
wpr->state = WP_STATE_WIKITEXT_INIT;
+ } else if (!req->content_len &&
+ strncmp(wpr->buf, "rev size=", 9) == 0) {
+ if (sscanf(wpr->buf, "rev size=\"%ld\"", &len) == 1)
+ req->content_len = len;
}
} else if (xstate == XML_IN_NORMALIZED) {
char from_normalized[255], to_normalized[255];
size_t count;
- if (sscanf(wpr->buf, "n from=\"%[^\"]\" to=\"%[^\"]\"%n",
+ if (sscanf(wpr->buf, "n from=\"%254[^\"]\" to=\"%254[^\"]\"%n",
&from_normalized, &to_normalized, &count) == 2 &&
count > 10) {
if (wpr->normalized_title != NULL)
@@ -296,15 +319,17 @@ get_char:
} else
xstate = XML_DEFAULT;
}
- } else if (wpr->buf_len < wpr->buf_size) {
- wpr->buf[wpr->buf_len++] = c;
+ } else {
+ if (wpr->buf_idx >= wpr->buf_size)
+ panic("ran out of buf space parsing xml");
+ wpr->buf[wpr->buf_idx++] = c;
}
goto get_char;
case WP_STATE_WIKITEXT_INIT:
wpr->article_len = 0;
- wpr->buf_len = 0;
+ wpr->buf_idx = 0;
wpr->buf[0] = '\0';
wpr->curlys = 0;
@@ -319,7 +344,7 @@ get_char:
case WP_STATE_WIKITEXT_PARSE: {
c = req->chunk[req->chunk_off];
- last = wpr->buf + wpr->buf_len - 1;
+ last = wpr->buf + wpr->buf_idx - 1;
if (c == '<' || c == '\0') {
wpr->state = WP_STATE_DONE;
@@ -330,22 +355,26 @@ get_char:
if (c == ';') {
/* XML entity decode */
- if (last[-3] == '&' && last[-2] == 'a' && last[-1] == 'm' &&
+ if (wpr->buf_idx >= 4 &&
+ last[-3] == '&' && last[-2] == 'a' && last[-1] == 'm' &&
last[0] == 'p') {
c = '&';
- wpr->buf_len -= 4;
- } else if (last[-4] == '&' && last[-3] == 'n' &&
- last[-2] == 'b' && last[-1] == 's' && last[0] == 'p') {
+ wpr->buf_idx -= 4;
+ } else if (wpr->buf_idx >= 5 &&
+ last[-4] == '&' && last[-3] == 'n' && last[-2] == 'b' &&
+ last[-1] == 's' && last[0] == 'p') {
c = ' ';
- wpr->buf_len -= 5;
- } else if (last[-2] == '&' && last[-1] == 'l' && last[0] == 't') {
+ wpr->buf_idx -= 5;
+ } else if (wpr->buf_idx >= 2 &&
+ last[-2] == '&' && last[-1] == 'l' && last[0] == 't') {
c = '<';
- wpr->buf_len -= 3;
- } else if (last[-2] == '&' && last[-1] == 'g' && last[0] == 't') {
+ wpr->buf_idx -= 3;
+ } else if (wpr->buf_idx >= 2 &&
+ last[-2] == '&' && last[-1] == 'g' && last[0] == 't') {
c = '>';
- wpr->buf_len -= 3;
+ wpr->buf_idx -= 3;
}
- last = wpr->buf + wpr->buf_len - 1;
+ last = wpr->buf + wpr->buf_idx - 1;
} else if (c == '\n') {
c = '\r';
} else if ((unsigned char)c >= UTF8_RANGE_START &&
@@ -372,87 +401,99 @@ get_char:
/* check for style changes */
- if (last[0] == '{' && (c == '{' || c == '|')) {
+ if (wpr->buf_idx >= 1 &&
+ last[0] == '{' && (c == '{' || c == '|')) {
wpr->curlys++;
- wpr->buf_len--;
+ wpr->buf_idx--;
wpr->style |= STYLE_TEMPLATE;
c = 0;
- } else if ((last[0] == '}' || last[0] == '|') && c == '}') {
+ } else if (wpr->buf_idx >= 1 &&
+ (last[0] == '}' || last[0] == '|') && c == '}') {
if (wpr->curlys)
wpr->curlys--;
- wpr->buf_len--;
+ wpr->buf_idx--;
if (wpr->curlys == 0)
wpr->style &= ~(STYLE_TEMPLATE);
c = 0;
- } else if (last[0] == '[' && c == '[') {
+ } else if (wpr->buf_idx >= 1 &&
+ last[0] == '[' && c == '[') {
if (wpr->brackets)
wpr->brackets++;
- wpr->buf_len--;
+ wpr->buf_idx--;
wpr->style |= STYLE_LINK;
c = 0;
- } else if (last[0] == ']' && c == ']') {
+ } else if (wpr->buf_idx >= 1 &&
+ last[0] == ']' && c == ']') {
if (wpr->brackets)
wpr->brackets--;
- wpr->buf_len--;
+ wpr->buf_idx--;
if (wpr->brackets == 0)
wpr->style &= ~(STYLE_LINK);
c = 0;
- } else if (last[-1] == '\'' && last[0] == '\'' && c == '\'') {
+ } else if (wpr->buf_idx >= 2 &&
+ last[-1] == '\'' && last[0] == '\'' && c == '\'') {
if (wpr->style & STYLE_BOLD)
wpr->style &= ~(STYLE_BOLD);
else
wpr->style |= STYLE_BOLD;
- wpr->buf_len -= 2;
+ wpr->buf_idx -= 2;
c = 0;
- } else if (last[-1] == '\'' && last[0] == '\'' && c != '\'') {
+ } else if (wpr->buf_idx >= 2 &&
+ last[-1] == '\'' && last[0] == '\'' && c != '\'') {
if (wpr->style & STYLE_ITALIC)
wpr->style &= ~(STYLE_ITALIC);
else
wpr->style |= STYLE_ITALIC;
- wpr->buf_len -= 2;
+ wpr->buf_idx -= 2;
/* keep c */
- } else if (last[-3] == '=' && last[-2] == '=' && last[-1] == '=' &&
+ } else if (wpr->buf_idx >= 4 &&
+ last[-3] == '=' && last[-2] == '=' && last[-1] == '=' &&
last[0] == '=' && c == '=') {
if (wpr->style & STYLE_H5)
wpr->style &= ~(STYLE_H5);
else
wpr->style |= STYLE_H5;
- wpr->buf_len -= 4;
+ wpr->buf_idx -= 4;
c = 0;
- } else if (last[-3] == '=' && last[-2] == '=' && last[-1] == '=' &&
+ } else if (wpr->buf_idx >= 4 &&
+ last[-3] == '=' && last[-2] == '=' && last[-1] == '=' &&
last[0] == '=' && c != '=') {
if (wpr->style & STYLE_H4)
wpr->style &= ~(STYLE_H4);
else
wpr->style |= STYLE_H4;
- wpr->buf_len -= 4;
+ wpr->buf_idx -= 4;
/* keep c */
- } else if (last[-2] == '=' && last[-1] == '=' && last[0] == '=' &&
+ } else if (wpr->buf_idx >= 3 &&
+ last[-2] == '=' && last[-1] == '=' && last[0] == '=' &&
c != '=') {
if (wpr->style & STYLE_H3)
wpr->style &= ~(STYLE_H3);
else
wpr->style |= STYLE_H3;
- wpr->buf_len -= 3;
+ wpr->buf_idx -= 3;
/* keep c */
- } else if (last[-1] == '=' && last[0] == '=' && c != '=') {
+ } else if (wpr->buf_idx >= 2 &&
+ last[-1] == '=' && last[0] == '=' && c != '=') {
if (wpr->style & STYLE_H2)
wpr->style &= ~(STYLE_H2);
else
wpr->style |= STYLE_H2;
- wpr->buf_len -= 2;
+ wpr->buf_idx -= 2;
/* keep c */
- } else if (last[-2] == '<' && last[-1] == 'r' && last[0] == 'e' &&
+ } else if (wpr->buf_idx >= 3 &&
+ last[-2] == '<' && last[-1] == 'r' && last[0] == 'e' &&
c == 'f') {
/* <ref */
wpr->refs++;
wpr->style |= STYLE_REF;
- wpr->buf_len -= 3;
+ wpr->buf_idx -= 3;
c = 0;
} else if ((wpr->style & STYLE_REF) &&
- ((last[-4] == '<' && last[-3] == '/' && last[-2] == 'r' &&
- last[-1] == 'e' && last[0] == 'f' && c == '>') ||
- (last[0] == '/' && c == '>'))) {
+ ((wpr->buf_idx >= 5 && last[-4] == '<' && last[-3] == '/' &&
+ last[-2] == 'r' && last[-1] == 'e' && last[0] == 'f' &&
+ c == '>') ||
+ (wpr->buf_idx >= 1 && last[0] == '/' && c == '>'))) {
/* </ref> or <ref /> */
if (wpr->refs)
wpr->refs--;
@@ -474,38 +515,38 @@ get_char:
char *conv, *conv2;
size_t len;
- conv = xmalloc(wpr->buf_len);
+ conv = xmalloc(wpr->buf_idx);
if (conv == NULL) {
- warn("Failed allocating %ld", wpr->buf_len);
+ warn("Failed allocating %ld", wpr->buf_idx);
break;
}
- conv2 = xmalloc(wpr->buf_len);
+ conv2 = xmalloc(wpr->buf_idx);
if (conv2 == NULL) {
- warn("Failed allocating %ld", wpr->buf_len);
+ warn("Failed allocating %ld", wpr->buf_idx);
xfree(&conv);
break;
}
- wpr->buf[wpr->buf_len] = '\0';
+ wpr->buf[wpr->buf_idx] = '\0';
if (sscanf(wpr->buf, "convert|%[^|]|%[^|]|%n", conv,
conv2, &len) == 2 && len >= 13)
- wpr->buf_len = snprintf(wpr->buf, wpr->buf_size,
+ wpr->buf_idx = snprintf(wpr->buf, wpr->buf_size,
"%s %s ", conv, conv2);
else
- wpr->buf_len = 0;
+ wpr->buf_idx = 0;
xfree(&conv);
xfree(&conv2);
} else
- wpr->buf_len = 0;
+ wpr->buf_idx = 0;
}
/* maybe we can do something with these later */
if (wpr->last_style & STYLE_REF)
- wpr->buf_len = 0;
+ wpr->buf_idx = 0;
/* we can't show inline images */
if ((wpr->last_style & STYLE_LINK) &&
strncmp(wpr->buf, "File:", 5) == 0) {
- wpr->buf_len = 0;
+ wpr->buf_idx = 0;
wpr->trim_whitespace = true;
}
@@ -515,13 +556,13 @@ get_char:
if ((wpr->style & STYLE_LINK) && wpr->article_len == 0 &&
strncmp(wpr->buf, "#REDIRECT ", 10) == 0) {
- wpr->buf_len = 0;
+ wpr->buf_idx = 0;
wpr->redirect = true;
} else if (wpr->redirect &&
!(wpr->style & STYLE_LINK) && (wpr->last_style & STYLE_LINK)) {
if (wpr->normalized_title)
xfree(&wpr->normalized_title);
- wpr->buf[wpr->buf_len] = '\0';
+ wpr->buf[wpr->buf_idx] = '\0';
wpr->normalized_title = xstrdup(wpr->buf);
if (wpr->normalized_title == NULL) {
warn("Out of memory!");
@@ -531,14 +572,17 @@ get_char:
goto done_parsing;
}
- if (wpr->buf_len) {
+ if (wpr->buf_idx) {
if (wpr->article_len == 0)
wikipedia_request_present(wpr);
- browser_print(wpr->browser, wpr->buf, wpr->buf_len,
- wpr->last_style);
- wpr->article_len += wpr->buf_len;
- wpr->buf_len = 0;
+ if (!browser_print(wpr->browser, wpr->buf, wpr->buf_idx,
+ wpr->last_style)) {
+ wpr->state = WP_STATE_DONE;
+ goto done_parsing;
+ }
+ wpr->article_len += wpr->buf_idx;
+ wpr->buf_idx = 0;
}
wpr->last_style = wpr->style;
}
@@ -553,8 +597,18 @@ get_char:
}
/* and finally, add the new character */
- if (c != 0)
- wpr->buf[wpr->buf_len++] = c;
+ if (c != 0) {
+ if (wpr->buf_idx >= wpr->buf_size) {
+ if (!browser_print(wpr->browser, wpr->buf, wpr->buf_idx,
+ wpr->style)) {
+ wpr->state = WP_STATE_DONE;
+ goto done_parsing;
+ }
+ wpr->article_len += wpr->buf_idx;
+ wpr->buf_idx = 0;
+ }
+ wpr->buf[wpr->buf_idx++] = c;
+ }
req->chunk_off++;
goto get_char;
@@ -570,8 +624,17 @@ done_parsing:
}
void
-wikipedia_request_abort(struct wikipedia_request *wpr)
+wikipedia_request_free(struct wikipedia_request **wprptr)
{
+ struct wikipedia_request *wpr = (struct wikipedia_request *)*wprptr;
+
+ if (wpr == NULL) {
+ *wprptr = NULL;
+ return;
+ }
+
if (wpr->http_request != NULL)
http_req_free(&wpr->http_request);
+
+ *wprptr = NULL;
}
--- wikipedia.h Wed Sep 7 16:47:24 2022
+++ wikipedia.h Wed Aug 30 14:19:31 2023
@@ -57,11 +57,12 @@ struct wikipedia_request {
struct browser *browser;
struct http_request *http_request;
char *normalized_title;
+ size_t read_len;
size_t article_len;
char *buf;
size_t buf_size;
- size_t buf_len;
+ size_t buf_idx;
short refs, curlys, brackets;
unsigned long style, last_style;
bool trim_whitespace, redirect;
@@ -75,6 +76,6 @@ struct wikipedia_request * wikipedia_read_cached_artic
char *name);
void wikipedia_request_present(struct wikipedia_request *wpr);
void wikipedia_request_process(struct wikipedia_request *wpr);
-void wikipedia_request_abort(struct wikipedia_request *wpr);
+void wikipedia_request_free(struct wikipedia_request **wprptr);
#endif