AmendHub

Download:

jcs

/

wikipedia

/

amendments

/

14

wikipedia: Slightly faster WikiText parsing


jcs made amendment 14 about 1 year ago
--- wikipedia.c Thu Aug 25 14:13:25 2022 +++ wikipedia.c Thu Aug 25 15:59:08 2022 @@ -223,6 +223,10 @@ wikipedia_request_process(struct wikipedia_request *wp if (type == JSON_ERROR) wpr->state = WP_STATE_DONE; else { + char title[100]; + snprintf(title, sizeof(title), "%s: %s", PROGRAM_NAME, + wpr->normalized_title); + SetWTitle(wpr->browser->win, CtoPstr(title)); wpr->state = WP_STATE_PARSE_WIKITEXT; progress("Formatting article..."); } @@ -295,7 +299,7 @@ wikipedia_request_process(struct wikipedia_request *wp wpr->article[n] = str[n]; } } - + next_context: if (context_type == JSON_OBJECT && tmp_depth > 0) { if (tmp_depth % 2 == 0) @@ -326,66 +330,77 @@ next_context: c++; } else if (bracket_level > 0) { c++; - } else if (strncmp(c, "<ref", 4) == 0) { + } else if (c[0] == '<' && c[1] == 'r' && c[2] == 'e' && + c[3] == 'f') { + /* <ref */ in_ref = true; c += 4; - } else if (strncmp(c, "</ref>", 6) == 0) { + } else if (c[0] == '<' && c[1] == '/' && c[2] == 'r' && + c[3] == 'e' && c[4] == 'f' && c[5] == '>') { + /* </ref> */ in_ref = false; c += 6; } else if (in_ref) { c++; - } else if (strncmp(c, "'''", 3) == 0) { - if (style & STYLE_BOLD) - style &= ~(STYLE_BOLD); - else - style |= STYLE_BOLD; - c += 3; - } else if (strncmp(c, "''", 2) == 0) { - if (style & STYLE_ITALIC) - style &= ~(STYLE_ITALIC); - else - style |= STYLE_ITALIC; - c += 2; - } else if (strncmp(c, "=====", 5) == 0) { - if (style & STYLE_H5) - style &= ~(STYLE_H5); - else - style |= STYLE_H5; - c += 5; - } else if (strncmp(c, "====", 4) == 0) { - if (style & STYLE_H4) - style &= ~(STYLE_H4); - else - style |= STYLE_H4; - c += 4; - } else if (strncmp(c, "===", 3) == 0) { - if (style & STYLE_H3) - style &= ~(STYLE_H3); - else - style |= STYLE_H3; - c += 3; - } else if (strncmp(c, "==", 2) == 0) { - if (style & STYLE_H2) - style &= ~(STYLE_H2); - else - style |= STYLE_H2; - c += 2; - } else if (*c == '=') { - if (style & STYLE_H1) - style &= ~(STYLE_H1); - else - style |= STYLE_H1; - c += 1; - } else if (strncmp(c, "[[", 2) == 0) { + } else if (c[0] == '\'' && c[1] == '\'') { + /* ''' or '' */ + if (c[2] == '\'') { + if (style & STYLE_BOLD) + style &= ~(STYLE_BOLD); + else + style |= STYLE_BOLD; + c += 3; + } else { + if (style & STYLE_ITALIC) + style &= ~(STYLE_ITALIC); + else + style |= STYLE_ITALIC; + c += 2; + } + } else if (c[0] == '=' && c[1] == '=') { + /* == or === or ==== or ===== */ + if (c[2] == '=') { + if (c[3] == '=') { + if (c[4] == '=') { + if (style & STYLE_H5) + style &= ~(STYLE_H5); + else + style |= STYLE_H5; + c += 5; + } else { + if (style & STYLE_H4) + style &= ~(STYLE_H4); + else + style |= STYLE_H4; + c += 4; + } + } else { + if (style & STYLE_H3) + style &= ~(STYLE_H3); + else + style |= STYLE_H3; + c += 3; + } + } else { + if (style & STYLE_H2) + style &= ~(STYLE_H2); + else + style |= STYLE_H2; + c += 2; + } + + while (*c == ' ') + c++; + } else if (c[0] == '[' && c[1] == '[') { style |= STYLE_LINK; c += 2; - } else if (strncmp(c, "]]", 2) == 0) { + } else if (c[0] == ']' && c[1] == ']') { style &= ~(STYLE_LINK); c += 2; - } else if (*c == '\r' && !printed) { + } else if (c[0] == '\r' && !printed) { /* skip leading newlines */ c++; - } else if (strncmp(c, "&nbsp;", 6) == 0) { + } else if (c[0] == '&' && strncmp(c, "&nbsp;", 6) == 0) { buf[buflen++] = ' '; c += 6; } else { @@ -403,11 +418,9 @@ next_context: } } - UpdateScrollbarForTE(wpr->browser->te_scroller, - wpr->browser->te, true); - SetWTitle(wpr->browser->win, CtoPstr(wpr->normalized_title)); wpr->state = WP_STATE_DONE; - progress(NULL); + wpr->browser->state = BROWSER_STATE_SEARCH_DONE; + break; } }