Download
jcs
/wikipedia
/wikipedia.c
(View History)
jcs wikipedia: Try resizing buf if we run out of room, don't just dump it | Latest amendment: 55 on 2023-10-31 |
1 | /* |
2 | * Copyright (c) 2021-2022 joshua stein <jcs@jcs.org> |
3 | * |
4 | * Permission to use, copy, modify, and distribute this software for any |
5 | * purpose with or without fee is hereby granted, provided that the above |
6 | * copyright notice and this permission notice appear in all copies. |
7 | * |
8 | * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES |
9 | * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF |
10 | * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR |
11 | * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES |
12 | * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN |
13 | * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF |
14 | * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. |
15 | */ |
16 | |
17 | #include <stdarg.h> |
18 | #include <stdio.h> |
19 | #include <string.h> |
20 | |
21 | #include "wikipedia.h" |
22 | #include "http.h" |
23 | #include "utf8.h" |
24 | #include "util.h" |
25 | |
26 | struct wikipedia_request * |
27 | wikipedia_fetch_article(struct browser *browser, char *name) |
28 | { |
29 | static char url[256]; |
30 | struct wikipedia_request *wpr; |
31 | short state; |
32 | char *nencoded, *hostname; |
33 | unsigned char *uname; |
34 | |
35 | progress("Fetching \"%s\"...", name); |
36 | |
37 | wpr = xmalloczero(sizeof(struct wikipedia_request)); |
38 | if (wpr == NULL) { |
39 | progress(NULL); |
40 | warn("Out of memory!"); |
41 | return NULL; |
42 | } |
43 | wpr->browser = browser; |
44 | |
45 | uname = macroman_to_utf8_string((unsigned char *)name, strlen(name)); |
46 | if (uname == NULL) { |
47 | progress(NULL); |
48 | xfree(&wpr); |
49 | return NULL; |
50 | } |
51 | nencoded = url_encode(uname); |
52 | xfree(&uname); |
53 | if (nencoded == NULL) { |
54 | progress(NULL); |
55 | xfree(&wpr); |
56 | return NULL; |
57 | } |
58 | |
59 | hostname = xGetStringAsChar(STR_HOSTNAME_ID); |
60 | if (hostname == NULL) { |
61 | progress(NULL); |
62 | warn("No Wikipedia hostname set, check Settings"); |
63 | xfree(&wpr); |
64 | return NULL; |
65 | } |
66 | |
67 | snprintf(url, sizeof(url), "http://%s/w/api.php?action=query&" |
68 | "prop=revisions&rvslots=*&rvprop=size|content&" |
69 | "format=xml&titles=%s", hostname, nencoded); |
70 | xfree(&nencoded); |
71 | xfree(&hostname); |
72 | wpr->http_request = http_get(url); |
73 | if (wpr->http_request == NULL) { |
74 | progress(NULL); |
75 | xfree(&nencoded); |
76 | xfree(&wpr); |
77 | return NULL; |
78 | } |
79 | |
80 | http_req_skip_header(wpr->http_request); |
81 | wpr->read_len = wpr->http_request->chunk_len; |
82 | |
83 | wpr->normalized_title = xstrdup(name); |
84 | if (wpr->normalized_title == NULL) { |
85 | progress(NULL); |
86 | warn("Out of memory!"); |
87 | xfree(&nencoded); |
88 | xfree(&wpr); |
89 | return NULL; |
90 | } |
91 | wpr->state = WP_STATE_XML_INIT; |
92 | |
93 | browser_debug_print(wpr->browser, wpr->http_request->chunk, |
94 | wpr->http_request->chunk_len); |
95 | |
96 | return wpr; |
97 | } |
98 | |
99 | size_t |
100 | wikipedia_fetch_search_results(struct browser *browser, char *query, |
101 | char ***results) /* a triple-star program! */ |
102 | { |
103 | static char url[256]; |
104 | struct http_request *req; |
105 | char *qencoded, *hostname; |
106 | char **rets = NULL, **trets = NULL; |
107 | char *str = NULL, *nstr = NULL, c; |
108 | unsigned char *uquery; |
109 | short strings = 0; |
110 | size_t nrets = 0, len, n, npos; |
111 | utf8_char utf8 = { 0 }; |
112 | enum xml_state { |
113 | XML_DEFAULT, |
114 | XML_IN_TAG, |
115 | XML_IN_TEXT |
116 | } xstate = XML_DEFAULT; |
117 | char *buf, *obuf; |
118 | size_t buf_size; |
119 | size_t buf_idx; |
120 | |
121 | uquery = macroman_to_utf8_string((unsigned char *)query, |
122 | strlen(query)); |
123 | if (uquery == NULL) |
124 | return 0; |
125 | qencoded = url_encode(uquery); |
126 | xfree(&uquery); |
127 | if (qencoded == NULL) |
128 | return 0; |
129 | |
130 | hostname = xGetStringAsChar(STR_HOSTNAME_ID); |
131 | if (hostname == NULL) { |
132 | warn("No Wikipedia hostname set, check Settings"); |
133 | return 0; |
134 | } |
135 | |
136 | len = snprintf(url, sizeof(url), "http://%s/w/api.php?" |
137 | "action=opensearch&format=xml&namespace=0&limit=10&" |
138 | "redirects=return&search=%s", hostname, qencoded); |
139 | xfree(&qencoded); |
140 | xfree(&hostname); |
141 | if (len > sizeof(url)) |
142 | return 0; |
143 | |
144 | req = http_get(url); |
145 | if (req == NULL) |
146 | return 0; |
147 | |
148 | http_req_skip_header(req); |
149 | |
150 | buf_size = 256; |
151 | buf_idx = 0; |
152 | buf = xmalloc(buf_size); |
153 | if (buf == NULL) { |
154 | warn("Out of memory!"); |
155 | http_req_free(&req); |
156 | return 0; |
157 | } |
158 | |
159 | for (;;) { |
160 | if (req->chunk_len == 0 || (req->chunk_off + 1 > req->chunk_len)) { |
161 | req->chunk_len = http_req_read(req, req->chunk, |
162 | sizeof(req->chunk)); |
163 | req->chunk_off = 0; |
164 | |
165 | if (req->chunk_len < 1 || (req->chunk_off + 1 > req->chunk_len)) |
166 | break; |
167 | } |
168 | |
169 | c = req->chunk[req->chunk_off++]; |
170 | |
171 | if (c == '<') { |
172 | if (xstate == XML_IN_TEXT) { |
173 | nrets++; |
174 | trets = xreallocarray(rets, sizeof(Ptr), nrets); |
175 | if (trets == NULL) { |
176 | warn("Out of memory!"); |
177 | break; |
178 | } |
179 | rets = trets; |
180 | nstr = xstrndup(buf, buf_idx); |
181 | if (nstr == NULL) { |
182 | warn("Out of memory!"); |
183 | break; |
184 | } |
185 | rets[nrets - 1] = nstr; |
186 | } |
187 | |
188 | buf[0] = '\0'; |
189 | buf_idx = 0; |
190 | xstate = XML_IN_TAG; |
191 | } else if (c == '>') { |
192 | if (xstate == XML_IN_TAG && strncmp(buf, "Text xml:", 9) == 0) |
193 | xstate = XML_IN_TEXT; |
194 | else |
195 | xstate = XML_DEFAULT; |
196 | |
197 | buf[0] = '\0'; |
198 | buf_idx = 0; |
199 | } else if (xstate == XML_IN_TAG || xstate == XML_IN_TEXT) { |
200 | if (xstate == XML_IN_TAG && buf_idx == 9 && |
201 | strncmp(buf, "Text xml:", 9) != 0) { |
202 | /* not an interesting tag, don't bother saving it */ |
203 | xstate = XML_DEFAULT; |
204 | buf[0] = '\0'; |
205 | buf_idx = 0; |
206 | continue; |
207 | } |
208 | |
209 | if ((unsigned char)c >= UTF8_RANGE_START && |
210 | (unsigned char)c <= UTF8_RANGE_END) { |
211 | if (utf8[0] == 0) |
212 | utf8[0] = c; |
213 | else if (utf8[1] == 0) |
214 | utf8[1] = c; |
215 | else if (utf8[2] == 0) |
216 | utf8[2] = c; |
217 | else if (utf8[3] == 0) |
218 | utf8[3] = c; |
219 | else { |
220 | /* bogus */ |
221 | utf8[0] = 0; |
222 | c = 0; |
223 | } |
224 | |
225 | c = utf8_to_macroman(&utf8); |
226 | if (c) |
227 | memset(&utf8, 0, sizeof(utf8)); |
228 | } |
229 | |
230 | if (c) { |
231 | if (buf_idx >= buf_size) { |
232 | buf_size *= 2; |
233 | obuf = buf; |
234 | buf = xrealloc(buf, buf_size); |
235 | if (buf == NULL) { |
236 | warn("Out of text buffer space, failed resizing " |
237 | "to %d bytes", buf_size); |
238 | xfree(&obuf); |
239 | http_req_free(&req); |
240 | return 0; |
241 | } |
242 | } |
243 | |
244 | buf[buf_idx++] = c; |
245 | } |
246 | } |
247 | } |
248 | |
249 | http_req_free(&req); |
250 | xfree(&buf); |
251 | |
252 | *results = rets; |
253 | |
254 | return nrets; |
255 | } |
256 | |
257 | void |
258 | wikipedia_request_present(struct wikipedia_request *wpr) |
259 | { |
260 | char title[255]; |
261 | |
262 | snprintf(title, sizeof(title), "%s: %s", PROGRAM_NAME, |
263 | wpr->normalized_title); |
264 | SetWTitle(wpr->browser->win, CtoPstr(title)); |
265 | |
266 | browser_clear(wpr->browser); |
267 | browser_print(wpr->browser, wpr->normalized_title, |
268 | strlen(wpr->normalized_title), STYLE_H1); |
269 | } |
270 | |
271 | void |
272 | wikipedia_request_process(struct wikipedia_request *wpr) |
273 | { |
274 | struct http_request *req = wpr->http_request; |
275 | size_t len, n; |
276 | short pct; |
277 | char c, *last, *tbuf; |
278 | enum xml_state { |
279 | XML_DEFAULT, |
280 | XML_IN_NORMALIZED |
281 | } xstate = 0; |
282 | utf8_char utf8 = { 0 }; |
283 | |
284 | get_char: |
285 | if (req->chunk_len == 0 || (req->chunk_off >= req->chunk_len)) { |
286 | req->chunk_len = http_req_read(req, req->chunk, |
287 | sizeof(req->chunk)); |
288 | req->chunk_off = 0; |
289 | wpr->read_len += req->chunk_len; |
290 | |
291 | if (req->chunk_len < 1 || (req->chunk_off >= req->chunk_len)) { |
292 | wpr->state = WP_STATE_DONE; |
293 | goto done_parsing; |
294 | } |
295 | |
296 | if (req->content_len > 0) { |
297 | pct = (wpr->read_len * 100) / req->content_len; |
298 | if (pct > 100) |
299 | pct = 100; |
300 | |
301 | progress("Fetching \"%s\" (%d%%)...", |
302 | wpr->normalized_title, pct); |
303 | } |
304 | |
305 | browser_debug_print(wpr->browser, req->chunk, req->chunk_len); |
306 | } |
307 | |
308 | switch (wpr->state) { |
309 | case WP_STATE_XML_INIT: |
310 | wpr->buf_size = 1024; |
311 | wpr->buf_idx = 0; |
312 | wpr->buf = xmalloc(wpr->buf_size); |
313 | if (wpr->buf == NULL) { |
314 | warn("Out of memory!"); |
315 | wpr->state = WP_STATE_DONE; |
316 | break; |
317 | } |
318 | wpr->state = WP_STATE_XML_PARSE; |
319 | goto get_char; |
320 | |
321 | case WP_STATE_XML_PARSE: |
322 | c = req->chunk[req->chunk_off++]; |
323 | |
324 | if (c == '<') { |
325 | wpr->buf[0] = '\0'; |
326 | wpr->buf_idx = 0; |
327 | } else if (c == '>') { |
328 | wpr->buf[wpr->buf_idx] = '\0'; |
329 | if (xstate == XML_DEFAULT) { |
330 | if (strcmp(wpr->buf, "normalized") == 0) { |
331 | xstate = XML_IN_NORMALIZED; |
332 | } else if (strncmp(wpr->buf, "slot ", 5) == 0) { |
333 | wpr->state = WP_STATE_WIKITEXT_INIT; |
334 | } else if (!req->content_len && |
335 | strncmp(wpr->buf, "rev size=", 9) == 0) { |
336 | if (sscanf(wpr->buf, "rev size=\"%ld\"", &len) == 1) |
337 | req->content_len = len; |
338 | } |
339 | } else if (xstate == XML_IN_NORMALIZED) { |
340 | char from_normalized[255], to_normalized[255]; |
341 | size_t count; |
342 | |
343 | if (sscanf(wpr->buf, "n from=\"%254[^\"]\" to=\"%254[^\"]\"%n", |
344 | &from_normalized, &to_normalized, &count) == 2 && |
345 | count > 10) { |
346 | if (wpr->normalized_title != NULL) |
347 | xfree(&wpr->normalized_title); |
348 | wpr->normalized_title = xstrdup(to_normalized); |
349 | if (wpr->normalized_title == NULL) { |
350 | warn("Out of memory!"); |
351 | goto done_parsing; |
352 | } |
353 | } else |
354 | xstate = XML_DEFAULT; |
355 | } |
356 | } else { |
357 | if (wpr->buf_idx >= wpr->buf_size) |
358 | panic("ran out of buf space parsing xml"); |
359 | wpr->buf[wpr->buf_idx++] = c; |
360 | } |
361 | |
362 | goto get_char; |
363 | |
364 | case WP_STATE_WIKITEXT_INIT: |
365 | wpr->article_len = 0; |
366 | wpr->buf_idx = 0; |
367 | wpr->buf[0] = '\0'; |
368 | |
369 | wpr->curlys = 0; |
370 | wpr->brackets = 0; |
371 | wpr->refs = 0; |
372 | wpr->style = 0; |
373 | wpr->last_style = 0; |
374 | wpr->trim_whitespace = true; |
375 | |
376 | wpr->state = WP_STATE_WIKITEXT_PARSE; |
377 | /* FALLTHROUGH */ |
378 | |
379 | case WP_STATE_WIKITEXT_PARSE: { |
380 | c = req->chunk[req->chunk_off]; |
381 | last = wpr->buf + wpr->buf_idx - 1; |
382 | |
383 | if (c == '<' || c == '\0') { |
384 | wpr->state = WP_STATE_DONE; |
385 | goto done_parsing; |
386 | } |
387 | |
388 | /* character conversions */ |
389 | |
390 | if (c == ';') { |
391 | /* XML entity decode */ |
392 | if (wpr->buf_idx >= 4 && |
393 | last[-3] == '&' && last[-2] == 'a' && last[-1] == 'm' && |
394 | last[0] == 'p') { |
395 | c = '&'; |
396 | wpr->buf_idx -= 4; |
397 | } else if (wpr->buf_idx >= 5 && |
398 | last[-4] == '&' && last[-3] == 'n' && last[-2] == 'b' && |
399 | last[-1] == 's' && last[0] == 'p') { |
400 | c = ' '; |
401 | wpr->buf_idx -= 5; |
402 | } else if (wpr->buf_idx >= 2 && |
403 | last[-2] == '&' && last[-1] == 'l' && last[0] == 't') { |
404 | c = '<'; |
405 | wpr->buf_idx -= 3; |
406 | } else if (wpr->buf_idx >= 2 && |
407 | last[-2] == '&' && last[-1] == 'g' && last[0] == 't') { |
408 | c = '>'; |
409 | wpr->buf_idx -= 3; |
410 | } |
411 | last = wpr->buf + wpr->buf_idx - 1; |
412 | } else if (c == '\n') { |
413 | c = '\r'; |
414 | } else if ((unsigned char)c >= UTF8_RANGE_START && |
415 | (unsigned char)c <= UTF8_RANGE_END) { |
416 | /* utf-8 */ |
417 | if (utf8[0] == 0) |
418 | utf8[0] = c; |
419 | else if (utf8[1] == 0) |
420 | utf8[1] = c; |
421 | else if (utf8[2] == 0) |
422 | utf8[2] = c; |
423 | else if (utf8[3] == 0) |
424 | utf8[3] = c; |
425 | else { |
426 | /* bogus */ |
427 | utf8[0] = 0; |
428 | c = 0; |
429 | } |
430 | |
431 | c = utf8_to_macroman(&utf8); |
432 | if (c) |
433 | memset(&utf8, 0, sizeof(utf8)); |
434 | } |
435 | |
436 | /* check for style changes */ |
437 | |
438 | if (wpr->buf_idx >= 1 && |
439 | last[0] == '{' && (c == '{' || c == '|')) { |
440 | wpr->curlys++; |
441 | wpr->buf_idx--; |
442 | wpr->style |= STYLE_TEMPLATE; |
443 | c = 0; |
444 | } else if (wpr->buf_idx >= 1 && |
445 | (last[0] == '}' || last[0] == '|') && c == '}') { |
446 | if (wpr->curlys) |
447 | wpr->curlys--; |
448 | wpr->buf_idx--; |
449 | if (wpr->curlys == 0) |
450 | wpr->style &= ~(STYLE_TEMPLATE); |
451 | c = 0; |
452 | } else if (wpr->buf_idx >= 1 && |
453 | last[0] == '[' && c == '[') { |
454 | if (wpr->brackets) |
455 | wpr->brackets++; |
456 | wpr->buf_idx--; |
457 | wpr->style |= STYLE_LINK; |
458 | c = 0; |
459 | } else if (wpr->buf_idx >= 1 && |
460 | last[0] == ']' && c == ']') { |
461 | if (wpr->brackets) |
462 | wpr->brackets--; |
463 | wpr->buf_idx--; |
464 | if (wpr->brackets == 0) |
465 | wpr->style &= ~(STYLE_LINK); |
466 | c = 0; |
467 | } else if (wpr->buf_idx >= 2 && |
468 | last[-1] == '\'' && last[0] == '\'' && c == '\'') { |
469 | if (wpr->style & STYLE_BOLD) |
470 | wpr->style &= ~(STYLE_BOLD); |
471 | else |
472 | wpr->style |= STYLE_BOLD; |
473 | wpr->buf_idx -= 2; |
474 | c = 0; |
475 | } else if (wpr->buf_idx >= 2 && |
476 | last[-1] == '\'' && last[0] == '\'' && c != '\'') { |
477 | if (wpr->style & STYLE_ITALIC) |
478 | wpr->style &= ~(STYLE_ITALIC); |
479 | else |
480 | wpr->style |= STYLE_ITALIC; |
481 | wpr->buf_idx -= 2; |
482 | /* keep c */ |
483 | } else if (wpr->buf_idx >= 4 && |
484 | last[-3] == '=' && last[-2] == '=' && last[-1] == '=' && |
485 | last[0] == '=' && c == '=') { |
486 | if (wpr->style & STYLE_H5) |
487 | wpr->style &= ~(STYLE_H5); |
488 | else |
489 | wpr->style |= STYLE_H5; |
490 | wpr->buf_idx -= 4; |
491 | c = 0; |
492 | } else if (wpr->buf_idx >= 4 && |
493 | last[-3] == '=' && last[-2] == '=' && last[-1] == '=' && |
494 | last[0] == '=' && c != '=') { |
495 | if (wpr->style & STYLE_H4) |
496 | wpr->style &= ~(STYLE_H4); |
497 | else |
498 | wpr->style |= STYLE_H4; |
499 | wpr->buf_idx -= 4; |
500 | /* keep c */ |
501 | } else if (wpr->buf_idx >= 3 && |
502 | last[-2] == '=' && last[-1] == '=' && last[0] == '=' && |
503 | c != '=') { |
504 | if (wpr->style & STYLE_H3) |
505 | wpr->style &= ~(STYLE_H3); |
506 | else |
507 | wpr->style |= STYLE_H3; |
508 | wpr->buf_idx -= 3; |
509 | /* keep c */ |
510 | } else if (wpr->buf_idx >= 2 && |
511 | last[-1] == '=' && last[0] == '=' && c != '=') { |
512 | if (wpr->style & STYLE_H2) |
513 | wpr->style &= ~(STYLE_H2); |
514 | else |
515 | wpr->style |= STYLE_H2; |
516 | wpr->buf_idx -= 2; |
517 | /* keep c */ |
518 | } else if (wpr->buf_idx >= 3 && |
519 | last[-2] == '<' && last[-1] == 'r' && last[0] == 'e' && |
520 | c == 'f') { |
521 | /* <ref */ |
522 | wpr->refs++; |
523 | wpr->style |= STYLE_REF; |
524 | wpr->buf_idx -= 3; |
525 | c = 0; |
526 | } else if ((wpr->style & STYLE_REF) && |
527 | ((wpr->buf_idx >= 5 && last[-4] == '<' && last[-3] == '/' && |
528 | last[-2] == 'r' && last[-1] == 'e' && last[0] == 'f' && |
529 | c == '>') || |
530 | (wpr->buf_idx >= 1 && last[0] == '/' && c == '>'))) { |
531 | /* </ref> or <ref /> */ |
532 | if (wpr->refs) |
533 | wpr->refs--; |
534 | if (wpr->refs == 0) |
535 | wpr->style &= ~(STYLE_REF); |
536 | c = 0; |
537 | } |
538 | |
539 | /* |
540 | * If our style changed as of this character, dump the buffer in |
541 | * the previous style and clear the buffer. |
542 | */ |
543 | |
544 | if (wpr->style != wpr->last_style) { |
545 | if (wpr->last_style & STYLE_TEMPLATE) { |
546 | if (strncmp(wpr->buf, "convert|", 8) == 0) { |
547 | /* convert|5.1|lb|... */ |
548 | /* convert|9|in|cm|adj=on */ |
549 | char *conv, *conv2; |
550 | size_t len; |
551 | |
552 | conv = xmalloc(wpr->buf_idx); |
553 | if (conv == NULL) { |
554 | warn("Failed allocating %ld", wpr->buf_idx); |
555 | break; |
556 | } |
557 | conv2 = xmalloc(wpr->buf_idx); |
558 | if (conv2 == NULL) { |
559 | warn("Failed allocating %ld", wpr->buf_idx); |
560 | xfree(&conv); |
561 | break; |
562 | } |
563 | wpr->buf[wpr->buf_idx] = '\0'; |
564 | if (sscanf(wpr->buf, "convert|%[^|]|%[^|]|%n", conv, |
565 | conv2, &len) == 2 && len >= 13) |
566 | wpr->buf_idx = snprintf(wpr->buf, wpr->buf_size, |
567 | "%s %s ", conv, conv2); |
568 | else |
569 | wpr->buf_idx = 0; |
570 | xfree(&conv); |
571 | xfree(&conv2); |
572 | } else |
573 | wpr->buf_idx = 0; |
574 | } |
575 | |
576 | /* maybe we can do something with these later */ |
577 | if (wpr->last_style & STYLE_REF) |
578 | wpr->buf_idx = 0; |
579 | |
580 | /* we can't show inline images */ |
581 | if ((wpr->last_style & STYLE_LINK) && |
582 | strncmp(wpr->buf, "File:", 5) == 0) { |
583 | wpr->buf_idx = 0; |
584 | wpr->trim_whitespace = true; |
585 | } |
586 | |
587 | if (wpr->last_style & (STYLE_TEMPLATE | |
588 | STYLE_H1 | STYLE_H2 | STYLE_H3 | STYLE_H4 | STYLE_H5)) |
589 | wpr->trim_whitespace = true; |
590 | |
591 | if ((wpr->style & STYLE_LINK) && wpr->article_len == 0 && |
592 | strncmp(wpr->buf, "#REDIRECT ", 10) == 0) { |
593 | wpr->buf_idx = 0; |
594 | wpr->redirect = true; |
595 | } else if (wpr->redirect && |
596 | !(wpr->style & STYLE_LINK) && (wpr->last_style & STYLE_LINK)) { |
597 | if (wpr->normalized_title) |
598 | xfree(&wpr->normalized_title); |
599 | wpr->buf[wpr->buf_idx] = '\0'; |
600 | wpr->normalized_title = xstrdup(wpr->buf); |
601 | if (wpr->normalized_title == NULL) { |
602 | warn("Out of memory!"); |
603 | wpr->state = WP_STATE_DONE; |
604 | } else |
605 | wpr->state = WP_STATE_HAVE_REDIRECT; |
606 | goto done_parsing; |
607 | } |
608 | |
609 | if (wpr->buf_idx) { |
610 | if (wpr->article_len == 0) |
611 | wikipedia_request_present(wpr); |
612 | |
613 | if (!browser_print(wpr->browser, wpr->buf, wpr->buf_idx, |
614 | wpr->last_style)) { |
615 | wpr->state = WP_STATE_DONE; |
616 | goto done_parsing; |
617 | } |
618 | wpr->article_len += wpr->buf_idx; |
619 | wpr->buf_idx = 0; |
620 | } |
621 | wpr->last_style = wpr->style; |
622 | } |
623 | |
624 | /* remove whitespace */ |
625 | if (c != 0 && wpr->trim_whitespace) { |
626 | if (c == '\r' || c == '\t' || c == ' ') |
627 | /* trim whitespace after these */ |
628 | c = 0; |
629 | else |
630 | wpr->trim_whitespace = false; |
631 | } |
632 | |
633 | /* and finally, add the new character */ |
634 | if (c != 0) { |
635 | if (wpr->buf_idx >= wpr->buf_size) { |
636 | tbuf = wpr->buf; |
637 | wpr->buf = xrealloc(wpr->buf, wpr->buf_size * 2); |
638 | if (wpr->buf == NULL) { |
639 | wpr->buf = tbuf; |
640 | warn("Failed resizing parse buffer to %ld bytes, " |
641 | "not enough memory", wpr->buf_size * 2); |
642 | wpr->state = WP_STATE_DONE; |
643 | goto done_parsing; |
644 | } |
645 | wpr->buf_size *= 2; |
646 | } |
647 | wpr->buf[wpr->buf_idx++] = c; |
648 | } |
649 | |
650 | req->chunk_off++; |
651 | goto get_char; |
652 | } |
653 | } |
654 | |
655 | done_parsing: |
656 | if (wpr->buf != NULL) |
657 | xfree(&wpr->buf); |
658 | |
659 | if (wpr->http_request != NULL) |
660 | http_req_free(&wpr->http_request); |
661 | } |
662 | |
663 | void |
664 | wikipedia_request_free(struct wikipedia_request **wprptr) |
665 | { |
666 | struct wikipedia_request *wpr = (struct wikipedia_request *)*wprptr; |
667 | |
668 | if (wpr == NULL) { |
669 | *wprptr = NULL; |
670 | return; |
671 | } |
672 | |
673 | if (wpr->http_request != NULL) |
674 | http_req_free(&wpr->http_request); |
675 | |
676 | *wprptr = NULL; |
677 | } |