jcs
/wikipedia
/amendments
/14
wikipedia: Slightly faster WikiText parsing
jcs made amendment 14 over 2 years ago
--- wikipedia.c Thu Aug 25 14:13:25 2022
+++ wikipedia.c Thu Aug 25 15:59:08 2022
@@ -223,6 +223,10 @@ wikipedia_request_process(struct wikipedia_request *wp
if (type == JSON_ERROR)
wpr->state = WP_STATE_DONE;
else {
+ char title[100];
+ snprintf(title, sizeof(title), "%s: %s", PROGRAM_NAME,
+ wpr->normalized_title);
+ SetWTitle(wpr->browser->win, CtoPstr(title));
wpr->state = WP_STATE_PARSE_WIKITEXT;
progress("Formatting article...");
}
@@ -295,7 +299,7 @@ wikipedia_request_process(struct wikipedia_request *wp
wpr->article[n] = str[n];
}
}
-
+
next_context:
if (context_type == JSON_OBJECT && tmp_depth > 0) {
if (tmp_depth % 2 == 0)
@@ -326,66 +330,77 @@ next_context:
c++;
} else if (bracket_level > 0) {
c++;
- } else if (strncmp(c, "<ref", 4) == 0) {
+ } else if (c[0] == '<' && c[1] == 'r' && c[2] == 'e' &&
+ c[3] == 'f') {
+ /* <ref */
in_ref = true;
c += 4;
- } else if (strncmp(c, "</ref>", 6) == 0) {
+ } else if (c[0] == '<' && c[1] == '/' && c[2] == 'r' &&
+ c[3] == 'e' && c[4] == 'f' && c[5] == '>') {
+ /* </ref> */
in_ref = false;
c += 6;
} else if (in_ref) {
c++;
- } else if (strncmp(c, "'''", 3) == 0) {
- if (style & STYLE_BOLD)
- style &= ~(STYLE_BOLD);
- else
- style |= STYLE_BOLD;
- c += 3;
- } else if (strncmp(c, "''", 2) == 0) {
- if (style & STYLE_ITALIC)
- style &= ~(STYLE_ITALIC);
- else
- style |= STYLE_ITALIC;
- c += 2;
- } else if (strncmp(c, "=====", 5) == 0) {
- if (style & STYLE_H5)
- style &= ~(STYLE_H5);
- else
- style |= STYLE_H5;
- c += 5;
- } else if (strncmp(c, "====", 4) == 0) {
- if (style & STYLE_H4)
- style &= ~(STYLE_H4);
- else
- style |= STYLE_H4;
- c += 4;
- } else if (strncmp(c, "===", 3) == 0) {
- if (style & STYLE_H3)
- style &= ~(STYLE_H3);
- else
- style |= STYLE_H3;
- c += 3;
- } else if (strncmp(c, "==", 2) == 0) {
- if (style & STYLE_H2)
- style &= ~(STYLE_H2);
- else
- style |= STYLE_H2;
- c += 2;
- } else if (*c == '=') {
- if (style & STYLE_H1)
- style &= ~(STYLE_H1);
- else
- style |= STYLE_H1;
- c += 1;
- } else if (strncmp(c, "[[", 2) == 0) {
+ } else if (c[0] == '\'' && c[1] == '\'') {
+ /* ''' or '' */
+ if (c[2] == '\'') {
+ if (style & STYLE_BOLD)
+ style &= ~(STYLE_BOLD);
+ else
+ style |= STYLE_BOLD;
+ c += 3;
+ } else {
+ if (style & STYLE_ITALIC)
+ style &= ~(STYLE_ITALIC);
+ else
+ style |= STYLE_ITALIC;
+ c += 2;
+ }
+ } else if (c[0] == '=' && c[1] == '=') {
+ /* == or === or ==== or ===== */
+ if (c[2] == '=') {
+ if (c[3] == '=') {
+ if (c[4] == '=') {
+ if (style & STYLE_H5)
+ style &= ~(STYLE_H5);
+ else
+ style |= STYLE_H5;
+ c += 5;
+ } else {
+ if (style & STYLE_H4)
+ style &= ~(STYLE_H4);
+ else
+ style |= STYLE_H4;
+ c += 4;
+ }
+ } else {
+ if (style & STYLE_H3)
+ style &= ~(STYLE_H3);
+ else
+ style |= STYLE_H3;
+ c += 3;
+ }
+ } else {
+ if (style & STYLE_H2)
+ style &= ~(STYLE_H2);
+ else
+ style |= STYLE_H2;
+ c += 2;
+ }
+
+ while (*c == ' ')
+ c++;
+ } else if (c[0] == '[' && c[1] == '[') {
style |= STYLE_LINK;
c += 2;
- } else if (strncmp(c, "]]", 2) == 0) {
+ } else if (c[0] == ']' && c[1] == ']') {
style &= ~(STYLE_LINK);
c += 2;
- } else if (*c == '\r' && !printed) {
+ } else if (c[0] == '\r' && !printed) {
/* skip leading newlines */
c++;
- } else if (strncmp(c, " ", 6) == 0) {
+ } else if (c[0] == '&' && strncmp(c, " ", 6) == 0) {
buf[buflen++] = ' ';
c += 6;
} else {
@@ -403,11 +418,9 @@ next_context:
}
}
- UpdateScrollbarForTE(wpr->browser->te_scroller,
- wpr->browser->te, true);
- SetWTitle(wpr->browser->win, CtoPstr(wpr->normalized_title));
wpr->state = WP_STATE_DONE;
- progress(NULL);
+ wpr->browser->state = BROWSER_STATE_SEARCH_DONE;
+
break;
}
}