jcs
/wikipedia
/amendments
/32
browser+wikipedia: Support UTF8, article redirections, "View Source" mode
jcs made amendment 32 over 2 years ago
--- browser.c Tue Sep 6 10:05:04 2022
+++ browser.c Wed Sep 7 13:41:26 2022
@@ -24,7 +24,9 @@
#include "http.h"
#include "util.h"
-#define PADDING 10
+#define PADDING 10
+#define BROWSER_FONT_SIZE 10
+#define BROWSER_FONT geneva
bool browser_close(struct focusable *focusable);
void browser_idle(struct focusable *focusable, EventRecord *event);
@@ -35,6 +37,9 @@ void browser_mouse_down(struct focusable *focusable, E
bool browser_handle_menu(struct focusable *focusable, short menu,
short item);
void browser_atexit(struct focusable *focusable);
+bool browser_avoid_te_overflow(struct browser *browser, TEHandle te,
+ short line_height);
+bool browser_debug_enabled(struct browser *browser);
void browser_live_search(struct browser *browser);
void browser_hide_search_results(struct browser *browser);
@@ -68,14 +73,6 @@ browser_idle(struct focusable *focusable, EventRecord
HUnlock(te->hText);
HUnlock(browser->input_te);
- HLock(browser->te);
- te = *(browser->te);
- if (te->teLength > 0) {
- TESetText("", 0, browser->te);
- InvalRect(&te->viewRect);
- }
- HUnlock(browser->te);
-
SetCursor(*(GetCursor(watchCursor)));
browser->wpr = wikipedia_fetch_article(browser, input);
xfree(&input);
@@ -90,14 +87,24 @@ browser_idle(struct focusable *focusable, EventRecord
wikipedia_request_process(browser->wpr);
- if (browser->wpr->state == WP_STATE_DONE) {
+ if (browser->wpr->state == WP_STATE_DONE)
browser->state = BROWSER_STATE_ARTICLE_DONE;
- break;
+ else if (browser->wpr->state == WP_STATE_HAVE_REDIRECT) {
+ progress("Following redirect to %s...",
+ browser->wpr->normalized_title);
+ TESetText(browser->wpr->normalized_title,
+ strlen(browser->wpr->normalized_title), browser->input_te);
+ HLock(browser->input_te);
+ InvalRect(&(*(browser->input_te))->viewRect);
+ HUnlock(browser->input_te);
+ browser->state = BROWSER_STATE_ARTICLE_GET;
+ xfree(&browser->wpr);
}
+
break;
case BROWSER_STATE_ARTICLE_DONE:
- UpdateScrollbarForTE(browser->te_scroller, browser->te, false);
- UpdtControl(browser->win, browser->win->visRgn);
+ UpdateScrollbarForTE(browser->win, browser->te_scroller,
+ browser->te, false);
progress(NULL);
SetCursor(&arrow);
browser->state = BROWSER_STATE_IDLE;
@@ -124,8 +131,8 @@ browser_init(void)
/* main window */
width = screenBits.bounds.right - screenBits.bounds.left - PADDING;
- if (width > 500)
- width = 500;
+ if (width > 540)
+ width = 540;
height = screenBits.bounds.bottom - screenBits.bounds.top -
PADDING - (GetMBarHeight() * 2);
if (height > 340)
@@ -153,18 +160,34 @@ browser_init(void)
TEAutoView(true, browser->input_te);
TEActivate(browser->input_te);
+ /* main article TE bounds */
+ browser->te_bounds.top = (*(browser->input_te))->viewRect.bottom +
+ PADDING;
+ browser->te_bounds.left = PADDING;
+ browser->te_bounds.right = browser->win->portRect.right -
+ SCROLLBAR_WIDTH - PADDING;
+ browser->te_bounds.bottom = browser->win->portRect.bottom - PADDING;
+
+ /* debug TE, off-screen until enabled */
+ bounds = browser->te_bounds;
+ bounds.top += browser->win->portRect.bottom;
+ bounds.bottom += browser->win->portRect.bottom;
+ te_bounds = bounds;
+ InsetRect(&te_bounds, 2, 2);
+ browser->debug_te = TENew(&te_bounds, &bounds);
+ TEAutoView(false, browser->debug_te);
+ (*(browser->debug_te))->caretHook = NullCaretHook;
+ TEActivate(browser->debug_te);
+
/* main article TE */
- bounds.top = (*(browser->input_te))->viewRect.bottom + PADDING;
- bounds.left = PADDING;
- bounds.right = browser->win->portRect.right - SCROLLBAR_WIDTH - PADDING;
- bounds.bottom = browser->win->portRect.bottom - PADDING;
+ bounds = browser->te_bounds;
te_bounds = bounds;
InsetRect(&te_bounds, 2, 2);
browser->te = TEStylNew(&te_bounds, &bounds);
TEAutoView(false, browser->te);
(*(browser->te))->caretHook = NullCaretHook;
TEActivate(browser->te);
-
+
/* scrollbar for diff text */
bounds.right = browser->win->portRect.right - PADDING;
bounds.left = bounds.right - SCROLLBAR_WIDTH;
@@ -174,7 +197,8 @@ browser_init(void)
1, 1, 1, scrollBarProc, 0L);
browser_update_menu(browser);
- UpdateScrollbarForTE(browser->te_scroller, browser->te, true);
+ UpdateScrollbarForTE(browser->win, browser->te_scroller,
+ browser->te, true);
focusable = xmalloczero(sizeof(struct focusable), "focusable");
focusable->cookie = browser;
@@ -241,10 +265,13 @@ browser_update_menu(struct browser *browser)
DisableItem(edit_menu, EDIT_MENU_PASTE_ID);
- if (te->nLines == 0)
+ if (te->nLines == 0) {
DisableItem(edit_menu, EDIT_MENU_SELECT_ALL_ID);
- else
+ DisableItem(edit_menu, VIEW_MENU_DEBUG_ID);
+ } else {
EnableItem(edit_menu, EDIT_MENU_SELECT_ALL_ID);
+ EnableItem(edit_menu, VIEW_MENU_DEBUG_ID);
+ }
}
void
@@ -344,28 +371,39 @@ browser_mouse_down(struct focusable *focusable, EventR
}
}
- HLock(browser->te);
- r = (*(browser->te))->viewRect;
- HUnlock(browser->te);
- if (PtInRect(p, &r)) {
- TEClick(p, ((event->modifiers & shiftKey) != 0), browser->te);
-
- off = TEGetOffset(p, browser->te);
- for (n = 0; n < browser->links_count; n++) {
- struct browser_link *link = &browser->links[n];
+ if (browser_debug_enabled(browser)) {
+ HLock(browser->debug_te);
+ r = (*(browser->debug_te))->viewRect;
+ HUnlock(browser->debug_te);
+ if (PtInRect(p, &r)) {
+ TEClick(p, ((event->modifiers & shiftKey) != 0), browser->debug_te);
+ browser_update_menu(browser);
+ return;
+ }
+ } else {
+ HLock(browser->te);
+ r = (*(browser->te))->viewRect;
+ HUnlock(browser->te);
+ if (PtInRect(p, &r)) {
+ TEClick(p, ((event->modifiers & shiftKey) != 0), browser->te);
- if ((link->pos <= off) && (off < link->pos + link->len)) {
- TESetText(link->link, strlen(link->link), browser->input_te);
- HLock(browser->input_te);
- InvalRect(&(*(browser->input_te))->viewRect);
- HUnlock(browser->input_te);
- browser->state = BROWSER_STATE_ARTICLE_GET;
- break;
+ off = TEGetOffset(p, browser->te);
+ for (n = 0; n < browser->links_count; n++) {
+ struct browser_link *link = &browser->links[n];
+
+ if ((link->pos <= off) && (off < link->pos + link->len)) {
+ TESetText(link->link, strlen(link->link), browser->input_te);
+ HLock(browser->input_te);
+ InvalRect(&(*(browser->input_te))->viewRect);
+ HUnlock(browser->input_te);
+ browser->state = BROWSER_STATE_ARTICLE_GET;
+ break;
+ }
}
+
+ browser_update_menu(browser);
+ return;
}
-
- browser_update_menu(browser);
- return;
}
switch (part = FindControl(p, browser->win, &control)) {
@@ -375,9 +413,12 @@ browser_mouse_down(struct focusable *focusable, EventR
case inDownButton:
case inPageUp:
case inPageDown:
- if (control == browser->te_scroller)
- SetTrackControlTE(browser->te);
- else
+ if (control == browser->te_scroller) {
+ if (browser_debug_enabled(browser))
+ SetTrackControlTE(browser->debug_te);
+ else
+ SetTrackControlTE(browser->te);
+ } else
break;
TrackControl(control, p, TrackMouseDownInControl);
break;
@@ -388,9 +429,14 @@ browser_mouse_down(struct focusable *focusable, EventR
adj = val - GetCtlValue(control);
if (adj != 0) {
val -= adj;
- if (control == browser->te_scroller)
- TEScroll(0, adj * TEGetHeight(0, 0, browser->te),
- browser->te);
+ if (control == browser->te_scroller) {
+ if (browser_debug_enabled(browser))
+ TEScroll(0, adj * TEGetHeight(0, 0,
+ browser->debug_te), browser->debug_te);
+ else
+ TEScroll(0, adj * TEGetHeight(0, 0,
+ browser->te), browser->te);
+ }
SetCtlValue(control, val);
}
break;
@@ -463,8 +509,10 @@ browser_live_search(struct browser *browser)
}
for (n = 0; n < nresults; n++) {
+ size_t len;
+ len = strlen(results[n]);
LAddRow(1, cell.v, browser->search_results);
- LSetCell(results[n], strlen(results[n]), cell,
+ LSetCell(results[n], len, cell,
browser->search_results);
cell.v++;
xfree(&results[n]);
@@ -506,21 +554,179 @@ browser_handle_menu(struct focusable *focusable, short
case EDIT_MENU_ID:
switch (item) {
case EDIT_MENU_COPY_ID:
- TECopy(browser->te);
+ if (browser_debug_enabled(browser))
+ TECopy(browser->debug_te);
+ else
+ TECopy(browser->te);
return true;
case EDIT_MENU_SELECT_ALL_ID:
- TESetSelect(0, 1024 * 32, browser->te);
+ if (browser_debug_enabled(browser))
+ TESetSelect(0, 1024 * 32, browser->debug_te);
+ else
+ TESetSelect(0, 1024 * 32, browser->te);
return true;
}
break;
+ case VIEW_MENU_ID:
+ switch (item) {
+ case VIEW_MENU_DEBUG_ID: {
+ Rect bounds, te_bounds;
+
+ bounds = browser->te_bounds;
+ te_bounds = bounds;
+ InsetRect(&te_bounds, 2, 2);
+
+ HLock(browser->debug_te);
+ HLock(browser->te);
+
+ SetCtlValue(browser->te_scroller, GetCtlMin(browser->te_scroller));
+
+ if (browser_debug_enabled(browser)) {
+ /* disable debugging */
+ SetItemMark(view_menu, VIEW_MENU_DEBUG_ID, noMark);
+
+ (*(browser->debug_te))->destRect = (*(browser->te))->destRect;
+ (*(browser->debug_te))->viewRect = (*(browser->te))->viewRect;
+ (*(browser->te))->destRect = te_bounds;
+ (*(browser->te))->viewRect = bounds;
+
+ EraseRect(&(*(browser->te))->destRect);
+ TEUpdate(&(*(browser->te))->destRect, browser->te);
+ UpdateScrollbarForTE(browser->win, browser->te_scroller,
+ browser->te, false);
+ } else {
+ /* enable debugging */
+ SetItemMark(view_menu, VIEW_MENU_DEBUG_ID, checkMark);
+
+ (*(browser->te))->destRect = (*(browser->debug_te))->destRect;
+ (*(browser->te))->viewRect = (*(browser->debug_te))->viewRect;
+ (*(browser->debug_te))->destRect = te_bounds;
+ (*(browser->debug_te))->viewRect = bounds;
+
+ EraseRect(&(*(browser->debug_te))->destRect);
+ TEUpdate(&(*(browser->debug_te))->destRect, browser->debug_te);
+ UpdateScrollbarForTE(browser->win, browser->te_scroller,
+ browser->debug_te, false);
+ }
+
+ HUnlock(browser->debug_te);
+ HLock(browser->te);
+ break;
+ }
+ }
+ break;
}
return false;
}
+bool
+browser_debug_enabled(struct browser *browser)
+{
+ short mark;
+
+ GetItemMark(view_menu, VIEW_MENU_DEBUG_ID, &mark);
+
+ return (mark != noMark);
+}
+
+size_t
+browser_debug_print(struct browser *browser, const char *str,
+ size_t len)
+{
+ char tstr[1024];
+ short line_height;
+ short was_len;
+ size_t n = 0;
+
+ line_height = BROWSER_FONT_SIZE + 3;
+
+ HLock(browser->debug_te);
+ was_len = (*(browser->debug_te))->teLength;
+ HUnlock(browser->debug_te);
+
+ browser_avoid_te_overflow(browser, browser->debug_te, line_height);
+
+ while (len) {
+ if (*str == '\n')
+ tstr[n++] = '\r';
+ else
+ tstr[n++] = *str;
+
+ str++;
+ len--;
+
+ if (n == sizeof(tstr) || len == 0) {
+ TESetSelect(SHRT_MAX, SHRT_MAX, browser->debug_te);
+ TEInsert(tstr, n, browser->debug_te);
+ if (len == 0)
+ break;
+ n = 0;
+ }
+ }
+
+ if (was_len == 0) {
+ SetCtlValue(browser->te_scroller, GetCtlMin(browser->te_scroller));
+ UpdateScrollbarForTE(browser->win, browser->te_scroller,
+ browser->debug_te, false);
+ }
+
+ HUnlock(browser->debug_te);
+
+ return len;
+}
+
+bool
+browser_avoid_te_overflow(struct browser *browser, TEHandle te,
+ short line_height)
+{
+ RgnHandle savergn;
+ Rect zerorect = { 0, 0, 0, 0 };
+
+ HLock(te);
+
+ /* too many lines */
+ if ((*te)->nLines >= (nitems((*te)->lineStarts) - 10))
+ goto te_overflow;
+
+ /* too many characters */
+ if ((*te)->teLength >= (SHRT_MAX - 500))
+ goto te_overflow;
+
+ /* rect of all lines is too tall */
+ if ((*te)->nLines * line_height >= (SHRT_MAX - 100))
+ goto te_overflow;
+
+ HUnlock(te);
+
+ return false;
+
+te_overflow:
+ savergn = NewRgn();
+ GetClip(savergn);
+ /* create an empty clip region so all TE updates are hidden */
+ ClipRect(&zerorect);
+
+ /* select some lines at the start, delete them */
+ TESetSelect(0, (*te)->lineStarts[5], te);
+ TEDelete(te);
+
+ /* scroll up, causing a repaint */
+ TEPinScroll(0, INT_MAX, te);
+
+ /* then scroll back down to what it looked like before we did anything */
+ TEPinScroll(0, -INT_MAX, te);
+
+ /* resume normal drawing */
+ SetClip(savergn);
+ DisposeRgn(savergn);
+
+ HUnlock(te);
+
+ return true;
+}
+
#define BROWSER_SCRAP_ELEMENTS 20
-#define BROWSER_FONT_SIZE 10
-#define BROWSER_FONT geneva
static Handle scrp_rec_h = NULL;
size_t
@@ -610,46 +816,9 @@ browser_print(struct browser *browser, const char *str
HUnlock(scrp_rec_h);
- HLock(browser->te);
-
- /* check for TE overflow */
-
- /* too many lines */
- if ((*(browser->te))->nLines >=
- (nitems((*(browser->te))->lineStarts) - 10))
- goto te_overflow;
-
- /* too many characters */
- if ((*(browser->te))->teLength >= (SHRT_MAX - 500))
- goto te_overflow;
-
- /* rect of all lines is too tall */
- if ((*(browser->te))->nLines * line_height >= (SHRT_MAX - 100))
- goto te_overflow;
-
- goto no_overflow;
-
-te_overflow:
- savergn = NewRgn();
- GetClip(savergn);
- /* create an empty clip region so all TE updates are hidden */
- ClipRect(&zerorect);
+ browser_avoid_te_overflow(browser, browser->te, line_height);
- /* select some lines at the start, delete them */
- TESetSelect(0, (*(browser->te))->lineStarts[5], browser->te);
- TEDelete(browser->te);
-
- /* scroll up, causing a repaint */
- TEPinScroll(0, INT_MAX, browser->te);
-
- /* then scroll back down to what it looked like before we did anything */
- TEPinScroll(0, -INT_MAX, browser->te);
-
- /* resume normal drawing */
- SetClip(savergn);
- DisposeRgn(savergn);
-
-no_overflow:
+ HLock(browser->te);
was_len = (*(browser->te))->teLength;
if (style & STYLE_LINK)
@@ -659,6 +828,16 @@ no_overflow:
if ((last_style & STYLE_ITALIC) && !(style & STYLE_ITALIC))
TEStylInsert(" ", 1, scrp_rec_h, browser->te);
+ if (style & (STYLE_H1 | STYLE_H2 | STYLE_H3 | STYLE_H4 | STYLE_H5)) {
+ while (len && (str[0] == ' ' || str[0] == '\r')) {
+ str++;
+ len--;
+ }
+ while (len && (str[len - 1] == ' ' || str[len - 1] == '\r')) {
+ len--;
+ }
+ }
+
TEStylInsert(str, len, scrp_rec_h, browser->te);
if (style & (STYLE_H1 | STYLE_H2))
@@ -668,8 +847,8 @@ no_overflow:
if (was_len == 0) {
SetCtlValue(browser->te_scroller, GetCtlMin(browser->te_scroller));
- UpdateScrollbarForTE(browser->te_scroller, browser->te, false);
- UpdtControl(browser->win, browser->win->visRgn);
+ UpdateScrollbarForTE(browser->win, browser->te_scroller,
+ browser->te, false);
}
HUnlock(browser->te);
@@ -688,6 +867,14 @@ browser_clear(struct browser *browser)
xfree(&browser->links[n].link);
xfree(&browser->links);
+ browser->links_count = 0;
+ browser->links_size = 0;
+
+ TEPinScroll(0, -SHRT_MAX, browser->debug_te);
+ TESetText("", 0, browser->te);
+ TESetText("", 0, browser->debug_te);
+
+ SetCtlValue(browser->te_scroller, GetCtlMin(browser->te_scroller));
}
void
--- browser.h Mon Sep 5 23:34:36 2022
+++ browser.h Tue Sep 6 10:49:31 2022
@@ -49,7 +49,9 @@ struct browser {
WindowPtr win;
TEHandle input_te;
unsigned long last_input_for_search;
+ Rect te_bounds;
TEHandle te;
+ TEHandle debug_te;
ControlHandle te_scroller;
ListHandle search_results;
struct wikipedia_request *wpr;
@@ -63,5 +65,8 @@ size_t browser_print(struct browser *browser, const ch
unsigned long style);
void browser_clear(struct browser *browser);
void browser_draw_line(struct browser *browser);
+bool browser_debug_enabled(struct browser *browser);
+size_t browser_debug_print(struct browser *browser, const char *str,
+ size_t len);
#endif
--- http.c Thu Sep 1 09:33:32 2022
+++ http.c Wed Sep 7 15:27:25 2022
@@ -92,7 +92,7 @@ cleanup:
}
char *
-url_encode(char *str)
+url_encode(unsigned char *str)
{
char *ret = NULL;
size_t len, n;
--- http.h Wed Aug 31 16:19:58 2022
+++ http.h Wed Sep 7 15:27:42 2022
@@ -41,12 +41,12 @@ struct http_request {
char *message;
char chunk[2048];
- size_t chunk_len;
- size_t chunk_off;
+ ssize_t chunk_len;
+ ssize_t chunk_off;
};
struct url * url_parse(const char *str);
-char * url_encode(char *str);
+char * url_encode(unsigned char *str);
struct http_request * http_get(const char *url);
ssize_t http_req_read(struct http_request *req, char *data, size_t len);
--- main.c Tue Sep 6 19:08:52 2022
+++ main.c Wed Sep 7 11:07:26 2022
@@ -214,4 +214,4 @@ menu_defaults(void)
DisableItem(edit_menu, EDIT_MENU_COPY_ID);
DisableItem(edit_menu, EDIT_MENU_PASTE_ID);
DisableItem(edit_menu, EDIT_MENU_SELECT_ALL_ID);
-}
+}
--- wikipedia.c Mon Sep 5 23:27:57 2022
+++ wikipedia.c Wed Sep 7 15:42:01 2022
@@ -21,6 +21,7 @@
#include "wikipedia.h"
#include "http.h"
#include "pdjson.h"
+#include "utf8.h"
#include "util.h"
/* en.wikipedia.org doesn't support non-TLS :( */
@@ -32,27 +33,30 @@ wikipedia_fetch_article(struct browser *browser, char
static char url[256];
struct wikipedia_request *wpr;
short state;
- char *c;
+ char *nencoded;
+ unsigned char *uname;
- /* "Macintosh Plus" -> "Macintosh_Plus" */
- for (c = name; *c != '\0'; c++) {
- if (*c == ' ')
- *c = '_';
- }
+ progress("Fetching article \"%s\"...", name);
wpr = xmalloczero(sizeof(struct wikipedia_request),
"fetch_article wpr");
wpr->browser = browser;
-
- progress("Contacting Wikipedia...");
+ uname = macroman_to_utf8_string((unsigned char *)name, strlen(name));
+ nencoded = url_encode(uname);
+ xfree(&uname);
+
snprintf(url, sizeof(url), "http://%s/w/api.php?action=query&"
"prop=revisions&rvslots=*&rvprop=content&"
- "format=xml&titles=%s", WIKIPEDIA_HOST, name);
+ "format=xml&titles=%s", WIKIPEDIA_HOST, nencoded);
+ xfree(&nencoded);
wpr->http_request = http_get(url);
http_req_skip_header(wpr->http_request);
wpr->state = WP_STATE_XML_INIT;
wpr->normalized_title = xstrdup(name, "normalized_title");
+
+ browser_debug_print(wpr->browser, wpr->http_request->chunk,
+ wpr->http_request->chunk_len);
return wpr;
}
@@ -65,12 +69,17 @@ wikipedia_fetch_search_results(struct browser *browser
json_stream json;
struct http_request *req;
char *qencoded;
+ char **rets = NULL;
+ char *str = NULL, *nstr = NULL, c;
+ unsigned char *uquery;
enum json_type type;
short strings = 0;
- char **rets = NULL;
- size_t nrets = 0;
+ size_t nrets = 0, len, n, npos;
+ utf8_char utf8 = { 0 };
- qencoded = url_encode(query);
+ uquery = macroman_to_utf8_string((unsigned char *)query, strlen(query));
+ qencoded = url_encode(uquery);
+ xfree(&uquery);
snprintf(url, sizeof(url), "http://%s/w/api.php?action=opensearch&"
"format=json&formatversion=2&namespace=0&limit=10&"
@@ -97,8 +106,39 @@ wikipedia_fetch_search_results(struct browser *browser
nrets++;
rets = xreallocarray(rets, sizeof(Ptr), nrets);
- rets[nrets - 1] = xstrdup(json_get_string(&json, NULL),
- "search result");
+
+ str = (char *)json_get_string(&json, NULL);
+ len = strlen(str);
+ nstr = xmalloc(len + 1, "search result");
+
+ for (n = 0, npos = 0; n < len; n++) {
+ c = str[n];
+
+ if ((unsigned char)c >= UTF8_RANGE_START &&
+ (unsigned char)c <= UTF8_RANGE_END) {
+ if (utf8[0] == 0)
+ utf8[0] = c;
+ else if (utf8[1] == 0)
+ utf8[1] = c;
+ else if (utf8[2] == 0)
+ utf8[2] = c;
+ else if (utf8[3] == 0)
+ utf8[3] = c;
+ else {
+ /* bogus */
+ utf8[0] = 0;
+ c = 0;
+ }
+
+ c = utf8_to_macroman(&utf8);
+ if (c)
+ memset(&utf8, 0, sizeof(utf8));
+ }
+ if (c)
+ nstr[npos++] = c;
+ }
+ nstr[npos] = '\0';
+ rets[nrets - 1] = nstr;
} else if (type == JSON_ARRAY_END) {
break;
}
@@ -113,6 +153,20 @@ wikipedia_fetch_search_results(struct browser *browser
}
void
+wikipedia_request_present(struct wikipedia_request *wpr)
+{
+ char title[255];
+
+ snprintf(title, sizeof(title), "%s: %s", PROGRAM_NAME,
+ wpr->normalized_title);
+ SetWTitle(wpr->browser->win, CtoPstr(title));
+
+ browser_clear(wpr->browser);
+ browser_print(wpr->browser, wpr->normalized_title,
+ strlen(wpr->normalized_title), STYLE_H1);
+}
+
+void
wikipedia_request_process(struct wikipedia_request *wpr)
{
struct http_request *req = wpr->http_request;
@@ -122,8 +176,7 @@ wikipedia_request_process(struct wikipedia_request *wp
XML_DEFAULT,
XML_IN_NORMALIZED
} xstate = 0;
- bool dump = false;
- unsigned char utf8[4] = { 0 };
+ utf8_char utf8 = { 0 };
get_char:
if (req->chunk_len == 0 || (req->chunk_off + 1 > req->chunk_len)) {
@@ -131,13 +184,16 @@ get_char:
sizeof(req->chunk));
req->chunk_off = 0;
- if (req->chunk_len == 0 || (req->chunk_off + 1 > req->chunk_len))
+ if (req->chunk_len < 1 || (req->chunk_off + 1 > req->chunk_len)) {
+ wpr->state = WP_STATE_DONE;
goto done_parsing;
+ }
+
+ browser_debug_print(wpr->browser, req->chunk, req->chunk_len);
}
switch (wpr->state) {
case WP_STATE_XML_INIT:
- progress("Fetching and parsing response...");
wpr->buf_size = 1024;
wpr->buf_len = 0;
wpr->buf = xmalloc(wpr->buf_size, "wpr buf");
@@ -179,14 +235,6 @@ get_char:
goto get_char;
case WP_STATE_WIKITEXT_INIT:
- snprintf(wpr->buf, wpr->buf_size, "%s: %s", PROGRAM_NAME,
- wpr->normalized_title);
- SetWTitle(wpr->browser->win, CtoPstr(wpr->buf));
-
- browser_clear(wpr->browser);
- browser_print(wpr->browser, wpr->normalized_title,
- strlen(wpr->normalized_title), STYLE_H1);
-
wpr->article_len = 0;
wpr->buf_len = 0;
wpr->buf[0] = '\0';
@@ -205,8 +253,13 @@ get_char:
c = req->chunk[req->chunk_off];
last = wpr->buf + wpr->buf_len - 1;
- if (c == '<' || c == '\0')
+ if (strncmp(wpr->buf, "prayer", 6) == 0)
+ wpr->curlys = 0;
+
+ if (c == '<' || c == '\0') {
+ wpr->state = WP_STATE_DONE;
goto done_parsing;
+ }
/* character conversions */
@@ -230,7 +283,8 @@ get_char:
last = wpr->buf + wpr->buf_len - 1;
} else if (c == '\n') {
c = '\r';
- } else if ((unsigned char)c >= 0x80 && (unsigned char)c < 0xf5) {
+ } else if ((unsigned char)c >= UTF8_RANGE_START &&
+ (unsigned char)c <= UTF8_RANGE_END) {
/* utf-8 */
if (utf8[0] == 0)
utf8[0] = c;
@@ -246,48 +300,9 @@ get_char:
c = 0;
}
+ c = utf8_to_macroman(&utf8);
if (c)
- c = 0;
- else
- c = '?';
-
- if (utf8[0] >= 0xc2 && utf8[0] <= 0xdf && utf8[1] != 0) {
- /* 2 byte */
- if (utf8[0] == 0xc3 && utf8[1] == 0x97)
- c = 'x';
- else if (utf8[0] == 0xc3 && utf8[1] == 0xa9)
- /* e accent */
- c = 'é'; // 0x8e
- else if (utf8[0] == 0xc3 && utf8[1] == 0xb6)
- /* o umlat */
- c = 'ö'; // 0x9a
- else
- c = '?';
- utf8[0] = utf8[1] = utf8[2] = 0;
- } else if (utf8[0] >= 0xe0 && utf8[0] <= 0xef && utf8[2] != 0) {
- /* 3-byte */
- if ((utf8[0] == 0xe2 && utf8[1] == 0x80 && utf8[2] == 0x9c) ||
- (utf8[0] == 0xe2 && utf8[1] == 0x80 && utf8[2] == 0x9d))
- /* smart quote */
- c = '"';
- else if (utf8[0] == 0xe2 && utf8[1] == 0x80 && utf8[2] == 0x93)
- /* n-dash */
- c = '–'; // 0xd0
- else if (utf8[0] == 0xe2 && utf8[1] == 0x80 && utf8[2] == 0x94)
- /* m-dash */
- c = '—'; // 0xd1
- else if ((utf8[0] == 0xe2 && utf8[1] == 0x80 && utf8[2] == 0x98) ||
- (utf8[0] == 0xe2 && utf8[1] == 0x80 && utf8[2] == 0x99))
- /* apos */
- c = '\'';
- else
- c = '?';
- utf8[0] = utf8[1] = utf8[2] = 0;
- } else if (utf8[0] >= 0xf0 && utf8[0] <= 0xf4 && utf8[3] != 0) {
- /* 4-byte */
- c = '?';
- utf8[0] = utf8[1] = utf8[2] = 0;
- }
+ memset(&utf8, 0, sizeof(utf8));
}
/* check for style changes */
@@ -409,12 +424,37 @@ get_char:
/* maybe we can do something with these later */
if (wpr->last_style & STYLE_REF)
wpr->buf_len = 0;
-
+
+ /* we can't show inline images */
+ if ((wpr->last_style & STYLE_LINK) &&
+ strncmp(wpr->buf, "File:", 5) == 0) {
+ wpr->buf_len = 0;
+ wpr->trim_whitespace = true;
+ }
+
if (wpr->last_style & (STYLE_TEMPLATE |
STYLE_H1 | STYLE_H2 | STYLE_H3 | STYLE_H4 | STYLE_H5))
wpr->trim_whitespace = true;
-
+
+ if ((wpr->style & STYLE_LINK) && wpr->article_len == 0 &&
+ strncmp(wpr->buf, "#REDIRECT ", 10) == 0) {
+ wpr->buf_len = 0;
+ wpr->redirect = true;
+ } else if (wpr->redirect &&
+ !(wpr->style & STYLE_LINK) &&
+ (wpr->last_style & STYLE_LINK)) {
+ if (wpr->normalized_title)
+ xfree(&wpr->normalized_title);
+ wpr->buf[wpr->buf_len] = '\0';
+ wpr->normalized_title = xstrdup(wpr->buf, "title");
+ wpr->state = WP_STATE_HAVE_REDIRECT;
+ goto done_parsing;
+ }
+
if (wpr->buf_len) {
+ if (wpr->article_len == 0)
+ wikipedia_request_present(wpr);
+
browser_print(wpr->browser, wpr->buf, wpr->buf_len,
wpr->last_style);
wpr->article_len += wpr->buf_len;
@@ -423,7 +463,7 @@ get_char:
wpr->last_style = wpr->style;
}
- /* and finally, add the new character */
+ /* remove whitespace */
if (c != 0 && wpr->trim_whitespace) {
if (c == '\r' || c == '\t' || c == ' ')
/* trim whitespace after these */
@@ -432,6 +472,7 @@ get_char:
wpr->trim_whitespace = false;
}
+ /* and finally, add the new character */
if (c != 0)
wpr->buf[wpr->buf_len++] = c;
@@ -441,8 +482,6 @@ get_char:
}
done_parsing:
- wpr->state = WP_STATE_DONE;
-
if (wpr->buf != NULL)
xfree(&wpr->buf);
--- wikipedia.h Mon Sep 5 22:35:12 2022
+++ wikipedia.h Tue Sep 6 23:06:18 2022
@@ -37,8 +37,11 @@
#define EDIT_MENU_PASTE_ID 3
#define EDIT_MENU_SELECT_ALL_ID 4
-extern MenuHandle file_menu, edit_menu;
+#define VIEW_MENU_ID 131
+#define VIEW_MENU_DEBUG_ID 1
+extern MenuHandle file_menu, edit_menu, view_menu;
+
void menu_defaults(void);
enum {
@@ -46,6 +49,7 @@ enum {
WP_STATE_XML_PARSE,
WP_STATE_WIKITEXT_INIT,
WP_STATE_WIKITEXT_PARSE,
+ WP_STATE_HAVE_REDIRECT,
WP_STATE_DONE
};
@@ -61,7 +65,7 @@ struct wikipedia_request {
size_t buf_len;
short refs, curlys, brackets;
unsigned long style, last_style;
- bool trim_whitespace;
+ bool trim_whitespace, redirect;
};
struct wikipedia_request * wikipedia_fetch_article(struct browser *,
@@ -70,6 +74,7 @@ size_t wikipedia_fetch_search_results(struct browser *
char ***results);
struct wikipedia_request * wikipedia_read_cached_article(struct browser *browser,
char *name);
+void wikipedia_request_present(struct wikipedia_request *wpr);
void wikipedia_request_process(struct wikipedia_request *wpr);
void wikipedia_request_abort(struct wikipedia_request *wpr);