jcs
/wikipedia
/amendments
/33
wikipedia: Use XML API for searching too, get rid of pdjson
jcs made amendment 33 over 2 years ago
--- wikipedia.c Wed Sep 7 15:42:01 2022
+++ wikipedia.c Wed Sep 7 16:45:33 2022
@@ -20,7 +20,6 @@
#include "wikipedia.h"
#include "http.h"
-#include "pdjson.h"
#include "utf8.h"
#include "util.h"
@@ -66,86 +65,99 @@ wikipedia_fetch_search_results(struct browser *browser
char ***results)
{
static char url[256];
- json_stream json;
struct http_request *req;
char *qencoded;
char **rets = NULL;
char *str = NULL, *nstr = NULL, c;
unsigned char *uquery;
- enum json_type type;
short strings = 0;
size_t nrets = 0, len, n, npos;
utf8_char utf8 = { 0 };
+ enum xml_state {
+ XML_DEFAULT,
+ XML_IN_TAG,
+ XML_IN_TEXT
+ } xstate = 0;
+ char *buf;
+ size_t buf_size;
+ size_t buf_len;
uquery = macroman_to_utf8_string((unsigned char *)query, strlen(query));
qencoded = url_encode(uquery);
xfree(&uquery);
snprintf(url, sizeof(url), "http://%s/w/api.php?action=opensearch&"
- "format=json&formatversion=2&namespace=0&limit=10&"
- "search=%s", WIKIPEDIA_HOST, qencoded);
+ "format=xml&namespace=0&limit=10&redirects=return&search=%s",
+ WIKIPEDIA_HOST, qencoded);
xfree(&qencoded);
req = http_get(url);
http_req_skip_header(req);
-
- json_open_user(&json, http_req_chunk_read, http_req_chunk_peek, req);
-
+
+ buf_size = 256;
+ buf_len = 0;
+ buf = xmalloc(buf_size, "xml buf");
+
for (;;) {
- type = json_next(&json);
+ if (req->chunk_len == 0 || (req->chunk_off + 1 > req->chunk_len)) {
+ req->chunk_len = http_req_read(req, req->chunk,
+ sizeof(req->chunk));
+ req->chunk_off = 0;
+
+ if (req->chunk_len < 1 || (req->chunk_off + 1 > req->chunk_len))
+ break;
+ }
- if (type == JSON_ERROR || type == JSON_DONE ||
- type == JSON_ARRAY_END)
- break;
+ c = req->chunk[req->chunk_off++];
+
+ if (c == '<') {
+ if (xstate == XML_IN_TEXT) {
+ nrets++;
+ rets = xreallocarray(rets, sizeof(Ptr), nrets);
+ nstr = xstrndup(buf, buf_len, "search result");
+ rets[nrets - 1] = nstr;
+ }
- if (type == JSON_STRING) {
- strings++;
-
- /* skip first, it'll be our query */
- if (strings == 1)
- continue;
-
- nrets++;
- rets = xreallocarray(rets, sizeof(Ptr), nrets);
-
- str = (char *)json_get_string(&json, NULL);
- len = strlen(str);
- nstr = xmalloc(len + 1, "search result");
-
- for (n = 0, npos = 0; n < len; n++) {
- c = str[n];
+ buf[0] = '\0';
+ buf_len = 0;
+ xstate = XML_IN_TAG;
+ } else if (c == '>') {
+ if (xstate == XML_IN_TAG &&
+ strncmp(buf, "Text xml:", 9) == 0)
+ xstate = XML_IN_TEXT;
+ else
+ xstate = XML_DEFAULT;
- if ((unsigned char)c >= UTF8_RANGE_START &&
- (unsigned char)c <= UTF8_RANGE_END) {
- if (utf8[0] == 0)
- utf8[0] = c;
- else if (utf8[1] == 0)
- utf8[1] = c;
- else if (utf8[2] == 0)
- utf8[2] = c;
- else if (utf8[3] == 0)
- utf8[3] = c;
- else {
- /* bogus */
- utf8[0] = 0;
- c = 0;
- }
-
- c = utf8_to_macroman(&utf8);
- if (c)
- memset(&utf8, 0, sizeof(utf8));
+ buf[0] = '\0';
+ buf_len = 0;
+ } else if (buf_len < buf_size) {
+ if ((unsigned char)c >= UTF8_RANGE_START &&
+ (unsigned char)c <= UTF8_RANGE_END) {
+ if (utf8[0] == 0)
+ utf8[0] = c;
+ else if (utf8[1] == 0)
+ utf8[1] = c;
+ else if (utf8[2] == 0)
+ utf8[2] = c;
+ else if (utf8[3] == 0)
+ utf8[3] = c;
+ else {
+ /* bogus */
+ utf8[0] = 0;
+ c = 0;
}
+
+ c = utf8_to_macroman(&utf8);
if (c)
- nstr[npos++] = c;
+ memset(&utf8, 0, sizeof(utf8));
}
- nstr[npos] = '\0';
- rets[nrets - 1] = nstr;
- } else if (type == JSON_ARRAY_END) {
- break;
+
+ if (c)
+ buf[buf_len++] = c;
}
}
- json_close(&json);
http_req_free(&req);
+ http_req_free(&buf);
*results = rets;
--- wikipedia.h Tue Sep 6 23:06:18 2022
+++ wikipedia.h Wed Sep 7 16:47:24 2022
@@ -19,7 +19,6 @@
#include "browser.h"
#include "http.h"
-#include "pdjson.h"
#define PROGRAM_NAME "Wikipedia"