AmendHub

jcs

/

wikipedia

/

amendments

/

32

browser+wikipedia: Support UTF8, article redirections, "View Source" mode


jcs made amendment 32 4 months ago
--- browser.c Tue Sep 6 10:05:04 2022 +++ browser.c Wed Sep 7 13:41:26 2022 @@ -24,7 +24,9 @@ #include "http.h" #include "util.h" -#define PADDING 10 +#define PADDING 10 +#define BROWSER_FONT_SIZE 10 +#define BROWSER_FONT geneva bool browser_close(struct focusable *focusable); void browser_idle(struct focusable *focusable, EventRecord *event); @@ -35,6 +37,9 @@ void browser_mouse_down(struct focusable *focusable, E bool browser_handle_menu(struct focusable *focusable, short menu, short item); void browser_atexit(struct focusable *focusable); +bool browser_avoid_te_overflow(struct browser *browser, TEHandle te, + short line_height); +bool browser_debug_enabled(struct browser *browser); void browser_live_search(struct browser *browser); void browser_hide_search_results(struct browser *browser); @@ -68,14 +73,6 @@ browser_idle(struct focusable *focusable, EventRecord HUnlock(te->hText); HUnlock(browser->input_te); - HLock(browser->te); - te = *(browser->te); - if (te->teLength > 0) { - TESetText("", 0, browser->te); - InvalRect(&te->viewRect); - } - HUnlock(browser->te); - SetCursor(*(GetCursor(watchCursor))); browser->wpr = wikipedia_fetch_article(browser, input); xfree(&input); @@ -90,14 +87,24 @@ browser_idle(struct focusable *focusable, EventRecord wikipedia_request_process(browser->wpr); - if (browser->wpr->state == WP_STATE_DONE) { + if (browser->wpr->state == WP_STATE_DONE) browser->state = BROWSER_STATE_ARTICLE_DONE; - break; + else if (browser->wpr->state == WP_STATE_HAVE_REDIRECT) { + progress("Following redirect to %s...", + browser->wpr->normalized_title); + TESetText(browser->wpr->normalized_title, + strlen(browser->wpr->normalized_title), browser->input_te); + HLock(browser->input_te); + InvalRect(&(*(browser->input_te))->viewRect); + HUnlock(browser->input_te); + browser->state = BROWSER_STATE_ARTICLE_GET; + xfree(&browser->wpr); } + break; case BROWSER_STATE_ARTICLE_DONE: - UpdateScrollbarForTE(browser->te_scroller, browser->te, false); - UpdtControl(browser->win, browser->win->visRgn); + UpdateScrollbarForTE(browser->win, browser->te_scroller, + browser->te, false); progress(NULL); SetCursor(&arrow); browser->state = BROWSER_STATE_IDLE; @@ -124,8 +131,8 @@ browser_init(void) /* main window */ width = screenBits.bounds.right - screenBits.bounds.left - PADDING; - if (width > 500) - width = 500; + if (width > 540) + width = 540; height = screenBits.bounds.bottom - screenBits.bounds.top - PADDING - (GetMBarHeight() * 2); if (height > 340) @@ -153,18 +160,34 @@ browser_init(void) TEAutoView(true, browser->input_te); TEActivate(browser->input_te); + /* main article TE bounds */ + browser->te_bounds.top = (*(browser->input_te))->viewRect.bottom + + PADDING; + browser->te_bounds.left = PADDING; + browser->te_bounds.right = browser->win->portRect.right - + SCROLLBAR_WIDTH - PADDING; + browser->te_bounds.bottom = browser->win->portRect.bottom - PADDING; + + /* debug TE, off-screen until enabled */ + bounds = browser->te_bounds; + bounds.top += browser->win->portRect.bottom; + bounds.bottom += browser->win->portRect.bottom; + te_bounds = bounds; + InsetRect(&te_bounds, 2, 2); + browser->debug_te = TENew(&te_bounds, &bounds); + TEAutoView(false, browser->debug_te); + (*(browser->debug_te))->caretHook = NullCaretHook; + TEActivate(browser->debug_te); + /* main article TE */ - bounds.top = (*(browser->input_te))->viewRect.bottom + PADDING; - bounds.left = PADDING; - bounds.right = browser->win->portRect.right - SCROLLBAR_WIDTH - PADDING; - bounds.bottom = browser->win->portRect.bottom - PADDING; + bounds = browser->te_bounds; te_bounds = bounds; InsetRect(&te_bounds, 2, 2); browser->te = TEStylNew(&te_bounds, &bounds); TEAutoView(false, browser->te); (*(browser->te))->caretHook = NullCaretHook; TEActivate(browser->te); - + /* scrollbar for diff text */ bounds.right = browser->win->portRect.right - PADDING; bounds.left = bounds.right - SCROLLBAR_WIDTH; @@ -174,7 +197,8 @@ browser_init(void) 1, 1, 1, scrollBarProc, 0L); browser_update_menu(browser); - UpdateScrollbarForTE(browser->te_scroller, browser->te, true); + UpdateScrollbarForTE(browser->win, browser->te_scroller, + browser->te, true); focusable = xmalloczero(sizeof(struct focusable), "focusable"); focusable->cookie = browser; @@ -241,10 +265,13 @@ browser_update_menu(struct browser *browser) DisableItem(edit_menu, EDIT_MENU_PASTE_ID); - if (te->nLines == 0) + if (te->nLines == 0) { DisableItem(edit_menu, EDIT_MENU_SELECT_ALL_ID); - else + DisableItem(edit_menu, VIEW_MENU_DEBUG_ID); + } else { EnableItem(edit_menu, EDIT_MENU_SELECT_ALL_ID); + EnableItem(edit_menu, VIEW_MENU_DEBUG_ID); + } } void @@ -344,28 +371,39 @@ browser_mouse_down(struct focusable *focusable, EventR } } - HLock(browser->te); - r = (*(browser->te))->viewRect; - HUnlock(browser->te); - if (PtInRect(p, &r)) { - TEClick(p, ((event->modifiers & shiftKey) != 0), browser->te); - - off = TEGetOffset(p, browser->te); - for (n = 0; n < browser->links_count; n++) { - struct browser_link *link = &browser->links[n]; + if (browser_debug_enabled(browser)) { + HLock(browser->debug_te); + r = (*(browser->debug_te))->viewRect; + HUnlock(browser->debug_te); + if (PtInRect(p, &r)) { + TEClick(p, ((event->modifiers & shiftKey) != 0), browser->debug_te); + browser_update_menu(browser); + return; + } + } else { + HLock(browser->te); + r = (*(browser->te))->viewRect; + HUnlock(browser->te); + if (PtInRect(p, &r)) { + TEClick(p, ((event->modifiers & shiftKey) != 0), browser->te); - if ((link->pos <= off) && (off < link->pos + link->len)) { - TESetText(link->link, strlen(link->link), browser->input_te); - HLock(browser->input_te); - InvalRect(&(*(browser->input_te))->viewRect); - HUnlock(browser->input_te); - browser->state = BROWSER_STATE_ARTICLE_GET; - break; + off = TEGetOffset(p, browser->te); + for (n = 0; n < browser->links_count; n++) { + struct browser_link *link = &browser->links[n]; + + if ((link->pos <= off) && (off < link->pos + link->len)) { + TESetText(link->link, strlen(link->link), browser->input_te); + HLock(browser->input_te); + InvalRect(&(*(browser->input_te))->viewRect); + HUnlock(browser->input_te); + browser->state = BROWSER_STATE_ARTICLE_GET; + break; + } } + + browser_update_menu(browser); + return; } - - browser_update_menu(browser); - return; } switch (part = FindControl(p, browser->win, &control)) { @@ -375,9 +413,12 @@ browser_mouse_down(struct focusable *focusable, EventR case inDownButton: case inPageUp: case inPageDown: - if (control == browser->te_scroller) - SetTrackControlTE(browser->te); - else + if (control == browser->te_scroller) { + if (browser_debug_enabled(browser)) + SetTrackControlTE(browser->debug_te); + else + SetTrackControlTE(browser->te); + } else break; TrackControl(control, p, TrackMouseDownInControl); break; @@ -388,9 +429,14 @@ browser_mouse_down(struct focusable *focusable, EventR adj = val - GetCtlValue(control); if (adj != 0) { val -= adj; - if (control == browser->te_scroller) - TEScroll(0, adj * TEGetHeight(0, 0, browser->te), - browser->te); + if (control == browser->te_scroller) { + if (browser_debug_enabled(browser)) + TEScroll(0, adj * TEGetHeight(0, 0, + browser->debug_te), browser->debug_te); + else + TEScroll(0, adj * TEGetHeight(0, 0, + browser->te), browser->te); + } SetCtlValue(control, val); } break; @@ -463,8 +509,10 @@ browser_live_search(struct browser *browser) } for (n = 0; n < nresults; n++) { + size_t len; + len = strlen(results[n]); LAddRow(1, cell.v, browser->search_results); - LSetCell(results[n], strlen(results[n]), cell, + LSetCell(results[n], len, cell, browser->search_results); cell.v++; xfree(&results[n]); @@ -506,21 +554,179 @@ browser_handle_menu(struct focusable *focusable, short case EDIT_MENU_ID: switch (item) { case EDIT_MENU_COPY_ID: - TECopy(browser->te); + if (browser_debug_enabled(browser)) + TECopy(browser->debug_te); + else + TECopy(browser->te); return true; case EDIT_MENU_SELECT_ALL_ID: - TESetSelect(0, 1024 * 32, browser->te); + if (browser_debug_enabled(browser)) + TESetSelect(0, 1024 * 32, browser->debug_te); + else + TESetSelect(0, 1024 * 32, browser->te); return true; } break; + case VIEW_MENU_ID: + switch (item) { + case VIEW_MENU_DEBUG_ID: { + Rect bounds, te_bounds; + + bounds = browser->te_bounds; + te_bounds = bounds; + InsetRect(&te_bounds, 2, 2); + + HLock(browser->debug_te); + HLock(browser->te); + + SetCtlValue(browser->te_scroller, GetCtlMin(browser->te_scroller)); + + if (browser_debug_enabled(browser)) { + /* disable debugging */ + SetItemMark(view_menu, VIEW_MENU_DEBUG_ID, noMark); + + (*(browser->debug_te))->destRect = (*(browser->te))->destRect; + (*(browser->debug_te))->viewRect = (*(browser->te))->viewRect; + (*(browser->te))->destRect = te_bounds; + (*(browser->te))->viewRect = bounds; + + EraseRect(&(*(browser->te))->destRect); + TEUpdate(&(*(browser->te))->destRect, browser->te); + UpdateScrollbarForTE(browser->win, browser->te_scroller, + browser->te, false); + } else { + /* enable debugging */ + SetItemMark(view_menu, VIEW_MENU_DEBUG_ID, checkMark); + + (*(browser->te))->destRect = (*(browser->debug_te))->destRect; + (*(browser->te))->viewRect = (*(browser->debug_te))->viewRect; + (*(browser->debug_te))->destRect = te_bounds; + (*(browser->debug_te))->viewRect = bounds; + + EraseRect(&(*(browser->debug_te))->destRect); + TEUpdate(&(*(browser->debug_te))->destRect, browser->debug_te); + UpdateScrollbarForTE(browser->win, browser->te_scroller, + browser->debug_te, false); + } + + HUnlock(browser->debug_te); + HLock(browser->te); + break; + } + } + break; } return false; } +bool +browser_debug_enabled(struct browser *browser) +{ + short mark; + + GetItemMark(view_menu, VIEW_MENU_DEBUG_ID, &mark); + + return (mark != noMark); +} + +size_t +browser_debug_print(struct browser *browser, const char *str, + size_t len) +{ + char tstr[1024]; + short line_height; + short was_len; + size_t n = 0; + + line_height = BROWSER_FONT_SIZE + 3; + + HLock(browser->debug_te); + was_len = (*(browser->debug_te))->teLength; + HUnlock(browser->debug_te); + + browser_avoid_te_overflow(browser, browser->debug_te, line_height); + + while (len) { + if (*str == '\n') + tstr[n++] = '\r'; + else + tstr[n++] = *str; + + str++; + len--; + + if (n == sizeof(tstr) || len == 0) { + TESetSelect(SHRT_MAX, SHRT_MAX, browser->debug_te); + TEInsert(tstr, n, browser->debug_te); + if (len == 0) + break; + n = 0; + } + } + + if (was_len == 0) { + SetCtlValue(browser->te_scroller, GetCtlMin(browser->te_scroller)); + UpdateScrollbarForTE(browser->win, browser->te_scroller, + browser->debug_te, false); + } + + HUnlock(browser->debug_te); + + return len; +} + +bool +browser_avoid_te_overflow(struct browser *browser, TEHandle te, + short line_height) +{ + RgnHandle savergn; + Rect zerorect = { 0, 0, 0, 0 }; + + HLock(te); + + /* too many lines */ + if ((*te)->nLines >= (nitems((*te)->lineStarts) - 10)) + goto te_overflow; + + /* too many characters */ + if ((*te)->teLength >= (SHRT_MAX - 500)) + goto te_overflow; + + /* rect of all lines is too tall */ + if ((*te)->nLines * line_height >= (SHRT_MAX - 100)) + goto te_overflow; + + HUnlock(te); + + return false; + +te_overflow: + savergn = NewRgn(); + GetClip(savergn); + /* create an empty clip region so all TE updates are hidden */ + ClipRect(&zerorect); + + /* select some lines at the start, delete them */ + TESetSelect(0, (*te)->lineStarts[5], te); + TEDelete(te); + + /* scroll up, causing a repaint */ + TEPinScroll(0, INT_MAX, te); + + /* then scroll back down to what it looked like before we did anything */ + TEPinScroll(0, -INT_MAX, te); + + /* resume normal drawing */ + SetClip(savergn); + DisposeRgn(savergn); + + HUnlock(te); + + return true; +} + #define BROWSER_SCRAP_ELEMENTS 20 -#define BROWSER_FONT_SIZE 10 -#define BROWSER_FONT geneva static Handle scrp_rec_h = NULL; size_t @@ -610,46 +816,9 @@ browser_print(struct browser *browser, const char *str HUnlock(scrp_rec_h); - HLock(browser->te); - - /* check for TE overflow */ - - /* too many lines */ - if ((*(browser->te))->nLines >= - (nitems((*(browser->te))->lineStarts) - 10)) - goto te_overflow; - - /* too many characters */ - if ((*(browser->te))->teLength >= (SHRT_MAX - 500)) - goto te_overflow; - - /* rect of all lines is too tall */ - if ((*(browser->te))->nLines * line_height >= (SHRT_MAX - 100)) - goto te_overflow; - - goto no_overflow; - -te_overflow: - savergn = NewRgn(); - GetClip(savergn); - /* create an empty clip region so all TE updates are hidden */ - ClipRect(&zerorect); + browser_avoid_te_overflow(browser, browser->te, line_height); - /* select some lines at the start, delete them */ - TESetSelect(0, (*(browser->te))->lineStarts[5], browser->te); - TEDelete(browser->te); - - /* scroll up, causing a repaint */ - TEPinScroll(0, INT_MAX, browser->te); - - /* then scroll back down to what it looked like before we did anything */ - TEPinScroll(0, -INT_MAX, browser->te); - - /* resume normal drawing */ - SetClip(savergn); - DisposeRgn(savergn); - -no_overflow: + HLock(browser->te); was_len = (*(browser->te))->teLength; if (style & STYLE_LINK) @@ -659,6 +828,16 @@ no_overflow: if ((last_style & STYLE_ITALIC) && !(style & STYLE_ITALIC)) TEStylInsert(" ", 1, scrp_rec_h, browser->te); + if (style & (STYLE_H1 | STYLE_H2 | STYLE_H3 | STYLE_H4 | STYLE_H5)) { + while (len && (str[0] == ' ' || str[0] == '\r')) { + str++; + len--; + } + while (len && (str[len - 1] == ' ' || str[len - 1] == '\r')) { + len--; + } + } + TEStylInsert(str, len, scrp_rec_h, browser->te); if (style & (STYLE_H1 | STYLE_H2)) @@ -668,8 +847,8 @@ no_overflow: if (was_len == 0) { SetCtlValue(browser->te_scroller, GetCtlMin(browser->te_scroller)); - UpdateScrollbarForTE(browser->te_scroller, browser->te, false); - UpdtControl(browser->win, browser->win->visRgn); + UpdateScrollbarForTE(browser->win, browser->te_scroller, + browser->te, false); } HUnlock(browser->te); @@ -688,6 +867,14 @@ browser_clear(struct browser *browser) xfree(&browser->links[n].link); xfree(&browser->links); + browser->links_count = 0; + browser->links_size = 0; + + TEPinScroll(0, -SHRT_MAX, browser->debug_te); + TESetText("", 0, browser->te); + TESetText("", 0, browser->debug_te); + + SetCtlValue(browser->te_scroller, GetCtlMin(browser->te_scroller)); } void --- browser.h Mon Sep 5 23:34:36 2022 +++ browser.h Tue Sep 6 10:49:31 2022 @@ -49,7 +49,9 @@ struct browser { WindowPtr win; TEHandle input_te; unsigned long last_input_for_search; + Rect te_bounds; TEHandle te; + TEHandle debug_te; ControlHandle te_scroller; ListHandle search_results; struct wikipedia_request *wpr; @@ -63,5 +65,8 @@ size_t browser_print(struct browser *browser, const ch unsigned long style); void browser_clear(struct browser *browser); void browser_draw_line(struct browser *browser); +bool browser_debug_enabled(struct browser *browser); +size_t browser_debug_print(struct browser *browser, const char *str, + size_t len); #endif --- http.c Thu Sep 1 09:33:32 2022 +++ http.c Wed Sep 7 15:27:25 2022 @@ -92,7 +92,7 @@ cleanup: } char * -url_encode(char *str) +url_encode(unsigned char *str) { char *ret = NULL; size_t len, n; --- http.h Wed Aug 31 16:19:58 2022 +++ http.h Wed Sep 7 15:27:42 2022 @@ -41,12 +41,12 @@ struct http_request { char *message; char chunk[2048]; - size_t chunk_len; - size_t chunk_off; + ssize_t chunk_len; + ssize_t chunk_off; }; struct url * url_parse(const char *str); -char * url_encode(char *str); +char * url_encode(unsigned char *str); struct http_request * http_get(const char *url); ssize_t http_req_read(struct http_request *req, char *data, size_t len); --- main.c Tue Sep 6 19:08:52 2022 +++ main.c Wed Sep 7 11:07:26 2022 @@ -214,4 +214,4 @@ menu_defaults(void) DisableItem(edit_menu, EDIT_MENU_COPY_ID); DisableItem(edit_menu, EDIT_MENU_PASTE_ID); DisableItem(edit_menu, EDIT_MENU_SELECT_ALL_ID); -} +} --- wikipedia.c Mon Sep 5 23:27:57 2022 +++ wikipedia.c Wed Sep 7 15:42:01 2022 @@ -21,6 +21,7 @@ #include "wikipedia.h" #include "http.h" #include "pdjson.h" +#include "utf8.h" #include "util.h" /* en.wikipedia.org doesn't support non-TLS :( */ @@ -32,27 +33,30 @@ wikipedia_fetch_article(struct browser *browser, char static char url[256]; struct wikipedia_request *wpr; short state; - char *c; + char *nencoded; + unsigned char *uname; - /* "Macintosh Plus" -> "Macintosh_Plus" */ - for (c = name; *c != '\0'; c++) { - if (*c == ' ') - *c = '_'; - } + progress("Fetching article \"%s\"...", name); wpr = xmalloczero(sizeof(struct wikipedia_request), "fetch_article wpr"); wpr->browser = browser; - - progress("Contacting Wikipedia..."); + uname = macroman_to_utf8_string((unsigned char *)name, strlen(name)); + nencoded = url_encode(uname); + xfree(&uname); + snprintf(url, sizeof(url), "http://%s/w/api.php?action=query&" "prop=revisions&rvslots=*&rvprop=content&" - "format=xml&titles=%s", WIKIPEDIA_HOST, name); + "format=xml&titles=%s", WIKIPEDIA_HOST, nencoded); + xfree(&nencoded); wpr->http_request = http_get(url); http_req_skip_header(wpr->http_request); wpr->state = WP_STATE_XML_INIT; wpr->normalized_title = xstrdup(name, "normalized_title"); + + browser_debug_print(wpr->browser, wpr->http_request->chunk, + wpr->http_request->chunk_len); return wpr; } @@ -65,12 +69,17 @@ wikipedia_fetch_search_results(struct browser *browser json_stream json; struct http_request *req; char *qencoded; + char **rets = NULL; + char *str = NULL, *nstr = NULL, c; + unsigned char *uquery; enum json_type type; short strings = 0; - char **rets = NULL; - size_t nrets = 0; + size_t nrets = 0, len, n, npos; + utf8_char utf8 = { 0 }; - qencoded = url_encode(query); + uquery = macroman_to_utf8_string((unsigned char *)query, strlen(query)); + qencoded = url_encode(uquery); + xfree(&uquery); snprintf(url, sizeof(url), "http://%s/w/api.php?action=opensearch&" "format=json&formatversion=2&namespace=0&limit=10&" @@ -97,8 +106,39 @@ wikipedia_fetch_search_results(struct browser *browser nrets++; rets = xreallocarray(rets, sizeof(Ptr), nrets); - rets[nrets - 1] = xstrdup(json_get_string(&json, NULL), - "search result"); + + str = (char *)json_get_string(&json, NULL); + len = strlen(str); + nstr = xmalloc(len + 1, "search result"); + + for (n = 0, npos = 0; n < len; n++) { + c = str[n]; + + if ((unsigned char)c >= UTF8_RANGE_START && + (unsigned char)c <= UTF8_RANGE_END) { + if (utf8[0] == 0) + utf8[0] = c; + else if (utf8[1] == 0) + utf8[1] = c; + else if (utf8[2] == 0) + utf8[2] = c; + else if (utf8[3] == 0) + utf8[3] = c; + else { + /* bogus */ + utf8[0] = 0; + c = 0; + } + + c = utf8_to_macroman(&utf8); + if (c) + memset(&utf8, 0, sizeof(utf8)); + } + if (c) + nstr[npos++] = c; + } + nstr[npos] = '\0'; + rets[nrets - 1] = nstr; } else if (type == JSON_ARRAY_END) { break; } @@ -113,6 +153,20 @@ wikipedia_fetch_search_results(struct browser *browser } void +wikipedia_request_present(struct wikipedia_request *wpr) +{ + char title[255]; + + snprintf(title, sizeof(title), "%s: %s", PROGRAM_NAME, + wpr->normalized_title); + SetWTitle(wpr->browser->win, CtoPstr(title)); + + browser_clear(wpr->browser); + browser_print(wpr->browser, wpr->normalized_title, + strlen(wpr->normalized_title), STYLE_H1); +} + +void wikipedia_request_process(struct wikipedia_request *wpr) { struct http_request *req = wpr->http_request; @@ -122,8 +176,7 @@ wikipedia_request_process(struct wikipedia_request *wp XML_DEFAULT, XML_IN_NORMALIZED } xstate = 0; - bool dump = false; - unsigned char utf8[4] = { 0 }; + utf8_char utf8 = { 0 }; get_char: if (req->chunk_len == 0 || (req->chunk_off + 1 > req->chunk_len)) { @@ -131,13 +184,16 @@ get_char: sizeof(req->chunk)); req->chunk_off = 0; - if (req->chunk_len == 0 || (req->chunk_off + 1 > req->chunk_len)) + if (req->chunk_len < 1 || (req->chunk_off + 1 > req->chunk_len)) { + wpr->state = WP_STATE_DONE; goto done_parsing; + } + + browser_debug_print(wpr->browser, req->chunk, req->chunk_len); } switch (wpr->state) { case WP_STATE_XML_INIT: - progress("Fetching and parsing response..."); wpr->buf_size = 1024; wpr->buf_len = 0; wpr->buf = xmalloc(wpr->buf_size, "wpr buf"); @@ -179,14 +235,6 @@ get_char: goto get_char; case WP_STATE_WIKITEXT_INIT: - snprintf(wpr->buf, wpr->buf_size, "%s: %s", PROGRAM_NAME, - wpr->normalized_title); - SetWTitle(wpr->browser->win, CtoPstr(wpr->buf)); - - browser_clear(wpr->browser); - browser_print(wpr->browser, wpr->normalized_title, - strlen(wpr->normalized_title), STYLE_H1); - wpr->article_len = 0; wpr->buf_len = 0; wpr->buf[0] = '\0'; @@ -205,8 +253,13 @@ get_char: c = req->chunk[req->chunk_off]; last = wpr->buf + wpr->buf_len - 1; - if (c == '<' || c == '\0') + if (strncmp(wpr->buf, "prayer", 6) == 0) + wpr->curlys = 0; + + if (c == '<' || c == '\0') { + wpr->state = WP_STATE_DONE; goto done_parsing; + } /* character conversions */ @@ -230,7 +283,8 @@ get_char: last = wpr->buf + wpr->buf_len - 1; } else if (c == '\n') { c = '\r'; - } else if ((unsigned char)c >= 0x80 && (unsigned char)c < 0xf5) { + } else if ((unsigned char)c >= UTF8_RANGE_START && + (unsigned char)c <= UTF8_RANGE_END) { /* utf-8 */ if (utf8[0] == 0) utf8[0] = c; @@ -246,48 +300,9 @@ get_char: c = 0; } + c = utf8_to_macroman(&utf8); if (c) - c = 0; - else - c = '?'; - - if (utf8[0] >= 0xc2 && utf8[0] <= 0xdf && utf8[1] != 0) { - /* 2 byte */ - if (utf8[0] == 0xc3 && utf8[1] == 0x97) - c = 'x'; - else if (utf8[0] == 0xc3 && utf8[1] == 0xa9) - /* e accent */ - c = 'é'; // 0x8e - else if (utf8[0] == 0xc3 && utf8[1] == 0xb6) - /* o umlat */ - c = 'ö'; // 0x9a - else - c = '?'; - utf8[0] = utf8[1] = utf8[2] = 0; - } else if (utf8[0] >= 0xe0 && utf8[0] <= 0xef && utf8[2] != 0) { - /* 3-byte */ - if ((utf8[0] == 0xe2 && utf8[1] == 0x80 && utf8[2] == 0x9c) || - (utf8[0] == 0xe2 && utf8[1] == 0x80 && utf8[2] == 0x9d)) - /* smart quote */ - c = '"'; - else if (utf8[0] == 0xe2 && utf8[1] == 0x80 && utf8[2] == 0x93) - /* n-dash */ - c = '–'; // 0xd0 - else if (utf8[0] == 0xe2 && utf8[1] == 0x80 && utf8[2] == 0x94) - /* m-dash */ - c = '—'; // 0xd1 - else if ((utf8[0] == 0xe2 && utf8[1] == 0x80 && utf8[2] == 0x98) || - (utf8[0] == 0xe2 && utf8[1] == 0x80 && utf8[2] == 0x99)) - /* apos */ - c = '\''; - else - c = '?'; - utf8[0] = utf8[1] = utf8[2] = 0; - } else if (utf8[0] >= 0xf0 && utf8[0] <= 0xf4 && utf8[3] != 0) { - /* 4-byte */ - c = '?'; - utf8[0] = utf8[1] = utf8[2] = 0; - } + memset(&utf8, 0, sizeof(utf8)); } /* check for style changes */ @@ -409,12 +424,37 @@ get_char: /* maybe we can do something with these later */ if (wpr->last_style & STYLE_REF) wpr->buf_len = 0; - + + /* we can't show inline images */ + if ((wpr->last_style & STYLE_LINK) && + strncmp(wpr->buf, "File:", 5) == 0) { + wpr->buf_len = 0; + wpr->trim_whitespace = true; + } + if (wpr->last_style & (STYLE_TEMPLATE | STYLE_H1 | STYLE_H2 | STYLE_H3 | STYLE_H4 | STYLE_H5)) wpr->trim_whitespace = true; - + + if ((wpr->style & STYLE_LINK) && wpr->article_len == 0 && + strncmp(wpr->buf, "#REDIRECT ", 10) == 0) { + wpr->buf_len = 0; + wpr->redirect = true; + } else if (wpr->redirect && + !(wpr->style & STYLE_LINK) && + (wpr->last_style & STYLE_LINK)) { + if (wpr->normalized_title) + xfree(&wpr->normalized_title); + wpr->buf[wpr->buf_len] = '\0'; + wpr->normalized_title = xstrdup(wpr->buf, "title"); + wpr->state = WP_STATE_HAVE_REDIRECT; + goto done_parsing; + } + if (wpr->buf_len) { + if (wpr->article_len == 0) + wikipedia_request_present(wpr); + browser_print(wpr->browser, wpr->buf, wpr->buf_len, wpr->last_style); wpr->article_len += wpr->buf_len; @@ -423,7 +463,7 @@ get_char: wpr->last_style = wpr->style; } - /* and finally, add the new character */ + /* remove whitespace */ if (c != 0 && wpr->trim_whitespace) { if (c == '\r' || c == '\t' || c == ' ') /* trim whitespace after these */ @@ -432,6 +472,7 @@ get_char: wpr->trim_whitespace = false; } + /* and finally, add the new character */ if (c != 0) wpr->buf[wpr->buf_len++] = c; @@ -441,8 +482,6 @@ get_char: } done_parsing: - wpr->state = WP_STATE_DONE; - if (wpr->buf != NULL) xfree(&wpr->buf); --- wikipedia.h Mon Sep 5 22:35:12 2022 +++ wikipedia.h Tue Sep 6 23:06:18 2022 @@ -37,8 +37,11 @@ #define EDIT_MENU_PASTE_ID 3 #define EDIT_MENU_SELECT_ALL_ID 4 -extern MenuHandle file_menu, edit_menu; +#define VIEW_MENU_ID 131 +#define VIEW_MENU_DEBUG_ID 1 +extern MenuHandle file_menu, edit_menu, view_menu; + void menu_defaults(void); enum { @@ -46,6 +49,7 @@ enum { WP_STATE_XML_PARSE, WP_STATE_WIKITEXT_INIT, WP_STATE_WIKITEXT_PARSE, + WP_STATE_HAVE_REDIRECT, WP_STATE_DONE }; @@ -61,7 +65,7 @@ struct wikipedia_request { size_t buf_len; short refs, curlys, brackets; unsigned long style, last_style; - bool trim_whitespace; + bool trim_whitespace, redirect; }; struct wikipedia_request * wikipedia_fetch_article(struct browser *, @@ -70,6 +74,7 @@ size_t wikipedia_fetch_search_results(struct browser * char ***results); struct wikipedia_request * wikipedia_read_cached_article(struct browser *browser, char *name); +void wikipedia_request_present(struct wikipedia_request *wpr); void wikipedia_request_process(struct wikipedia_request *wpr); void wikipedia_request_abort(struct wikipedia_request *wpr);