Download
jcs
/wikipedia
/pdjson.c
(View History)
jcs pdjson: Remove, no longer needed | Latest amendment: 34 on 2022-09-07 |
1 | /* |
2 | * This is free and unencumbered software released into the public domain. |
3 | * |
4 | * Anyone is free to copy, modify, publish, use, compile, sell, or |
5 | * distribute this software, either in source code form or as a compiled |
6 | * binary, for any purpose, commercial or non-commercial, and by any |
7 | * means. |
8 | * |
9 | * In jurisdictions that recognize copyright laws, the author or authors |
10 | * of this software dedicate any and all copyright interest in the |
11 | * software to the public domain. We make this dedication for the benefit |
12 | * of the public at large and to the detriment of our heirs and |
13 | * successors. We intend this dedication to be an overt act of |
14 | * relinquishment in perpetuity of all present and future rights to this |
15 | * software under copyright law. |
16 | * |
17 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, |
18 | * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF |
19 | * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. |
20 | * IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR |
21 | * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, |
22 | * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR |
23 | * OTHER DEALINGS IN THE SOFTWARE. |
24 | * |
25 | * For more information, please refer to <http://unlicense.org/> |
26 | */ |
27 | |
28 | #include <stdarg.h> |
29 | #include <stdlib.h> |
30 | #include <string.h> |
31 | #include <ctype.h> |
32 | #include "pdjson.h" |
33 | |
34 | #define JSON_FLAG_ERROR (1u << 0) |
35 | #define JSON_FLAG_STREAMING (1u << 1) |
36 | |
37 | #define JSON_MALLOC_CHUNK 1024 |
38 | |
39 | const char *json_typename[] = { |
40 | NULL, |
41 | "ERROR", |
42 | "DONE", |
43 | "OBJECT", |
44 | "OBJECT_END", |
45 | "ARRAY", |
46 | "ARRAY_END", |
47 | "STRING", |
48 | "NUMBER", |
49 | "TRUE", |
50 | "FALSE", |
51 | "NULL", |
52 | NULL |
53 | }; |
54 | |
55 | void json_error(json_stream *json, char *format, ...); |
56 | |
57 | void |
58 | json_error(json_stream *json, char *format, ...) |
59 | { |
60 | va_list ap; |
61 | |
62 | if (!(json->flags & JSON_FLAG_ERROR)) { |
63 | json->flags |= JSON_FLAG_ERROR; |
64 | va_start(ap, format); |
65 | vsnprintf(json->errmsg, sizeof(json->errmsg), format, ap); |
66 | va_end(ap); |
67 | } |
68 | } |
69 | |
70 | /* See also PDJSON_STACK_MAX below. */ |
71 | #ifndef PDJSON_STACK_INC |
72 | # define PDJSON_STACK_INC 4 |
73 | #endif |
74 | |
75 | struct json_stack { |
76 | enum json_type type; |
77 | long count; |
78 | }; |
79 | |
80 | static enum json_type |
81 | push(json_stream *json, enum json_type type) |
82 | { |
83 | json->stack_top++; |
84 | |
85 | #ifdef PDJSON_STACK_MAX |
86 | if (json->stack_top > PDJSON_STACK_MAX) { |
87 | json_error(json, "%s", "maximum depth of nesting reached"); |
88 | return JSON_ERROR; |
89 | } |
90 | #endif |
91 | |
92 | if (json->stack_top >= json->stack_size) { |
93 | struct json_stack *stack; |
94 | size_t size; |
95 | |
96 | size = (json->stack_size + PDJSON_STACK_INC) * sizeof(*json->stack); |
97 | stack = (struct json_stack *)json->alloc.realloc(json->stack, size); |
98 | if (stack == NULL) { |
99 | json_error(json, "%s", "out of memory"); |
100 | return JSON_ERROR; |
101 | } |
102 | |
103 | json->stack_size += PDJSON_STACK_INC; |
104 | json->stack = stack; |
105 | } |
106 | |
107 | json->stack[json->stack_top].type = type; |
108 | json->stack[json->stack_top].count = 0; |
109 | |
110 | return type; |
111 | } |
112 | |
113 | static enum json_type |
114 | pop(json_stream *json, int c, enum json_type expected) |
115 | { |
116 | if (json->stack == NULL || |
117 | json->stack[json->stack_top].type != expected) { |
118 | json_error(json, "unexpected byte '%c'", c); |
119 | return JSON_ERROR; |
120 | } |
121 | |
122 | json->stack_top--; |
123 | return (expected == JSON_ARRAY ? JSON_ARRAY_END : JSON_OBJECT_END); |
124 | } |
125 | |
126 | static short |
127 | buffer_peek(struct json_source *source) |
128 | { |
129 | if (source->position < source->source.buffer.length) |
130 | return source->source.buffer.buffer[source->position]; |
131 | |
132 | return EOF; |
133 | } |
134 | |
135 | static short |
136 | buffer_get(struct json_source *source) |
137 | { |
138 | short c = source->peek(source); |
139 | source->position++; |
140 | return c; |
141 | } |
142 | |
143 | static short |
144 | stream_get(struct json_source *source) |
145 | { |
146 | source->position++; |
147 | return fgetc(source->source.stream.stream); |
148 | } |
149 | |
150 | static short |
151 | stream_peek(struct json_source *source) |
152 | { |
153 | int c = fgetc(source->source.stream.stream); |
154 | ungetc(c, source->source.stream.stream); |
155 | return c; |
156 | } |
157 | |
158 | static void |
159 | init(json_stream *json) |
160 | { |
161 | json->lineno = 1; |
162 | json->flags = JSON_FLAG_STREAMING; |
163 | json->errmsg[0] = '\0'; |
164 | json->ntokens = 0; |
165 | json->next = (enum json_type)0; |
166 | |
167 | json->stack = NULL; |
168 | json->stack_top = -1; |
169 | json->stack_size = 0; |
170 | |
171 | json->data.string = NULL; |
172 | json->data.string_size = 0; |
173 | json->data.string_fill = 0; |
174 | json->source.position = 0; |
175 | |
176 | json->alloc.malloc = malloc; |
177 | json->alloc.realloc = realloc; |
178 | json->alloc.free = free; |
179 | } |
180 | |
181 | static enum json_type |
182 | is_match(json_stream *json, const char *pattern, enum json_type type) |
183 | { |
184 | short c; |
185 | const char *p; |
186 | |
187 | for (p = pattern; *p; p++) { |
188 | if (*p != (c = json->source.get(&json->source))) { |
189 | json_error(json, "expected '%c' instead of byte '%c'", *p, c); |
190 | return JSON_ERROR; |
191 | } |
192 | } |
193 | return type; |
194 | } |
195 | |
196 | static short |
197 | pushchar(json_stream *json, int c) |
198 | { |
199 | if (json->data.string_fill == json->data.string_size) { |
200 | size_t size; |
201 | char *buffer; |
202 | |
203 | size = json->data.string_size + JSON_MALLOC_CHUNK; |
204 | buffer = (char *)json->alloc.realloc(json->data.string, size); |
205 | if (buffer == NULL) { |
206 | json_error(json, "%s", "out of memory"); |
207 | return -1; |
208 | } |
209 | |
210 | json->data.string_size = size; |
211 | json->data.string = buffer; |
212 | } |
213 | json->data.string[json->data.string_fill++] = c; |
214 | return 0; |
215 | } |
216 | |
217 | static short |
218 | init_string(json_stream *json) |
219 | { |
220 | json->data.string_fill = 0; |
221 | if (json->data.string == NULL) { |
222 | json->data.string_size = JSON_MALLOC_CHUNK; |
223 | json->data.string = json->alloc.malloc(json->data.string_size); |
224 | if (json->data.string == NULL) { |
225 | json_error(json, "%s", "out of memory"); |
226 | return -1; |
227 | } |
228 | } |
229 | json->data.string[0] = '\0'; |
230 | return 0; |
231 | } |
232 | |
233 | static short |
234 | encode_utf8(json_stream *json, unsigned long c) |
235 | { |
236 | if (c < 0x80UL) { |
237 | return pushchar(json, c); |
238 | } |
239 | |
240 | if (c < 0x0800UL) { |
241 | return !((pushchar(json, (c >> 6 & 0x1F) | 0xC0) == 0) && |
242 | (pushchar(json, (c >> 0 & 0x3F) | 0x80) == 0)); |
243 | } |
244 | |
245 | if (c < 0x010000UL) { |
246 | if (c >= 0xd800 && c <= 0xdfff) { |
247 | json_error(json, "invalid codepoint %06lx", c); |
248 | return -1; |
249 | } |
250 | return !((pushchar(json, (c >> 12 & 0x0F) | 0xE0) == 0) && |
251 | (pushchar(json, (c >> 6 & 0x3F) | 0x80) == 0) && |
252 | (pushchar(json, (c >> 0 & 0x3F) | 0x80) == 0)); |
253 | } |
254 | |
255 | if (c < 0x110000UL) { |
256 | return !((pushchar(json, (c >> 18 & 0x07) | 0xF0) == 0) && |
257 | (pushchar(json, (c >> 12 & 0x3F) | 0x80) == 0) && |
258 | (pushchar(json, (c >> 6 & 0x3F) | 0x80) == 0) && |
259 | (pushchar(json, (c >> 0 & 0x3F) | 0x80) == 0)); |
260 | } |
261 | |
262 | json_error(json, "unable to encode %06lx as UTF-8", c); |
263 | return -1; |
264 | } |
265 | |
266 | static short |
267 | hexchar(int c) |
268 | { |
269 | switch (c) { |
270 | case '0': return 0; |
271 | case '1': return 1; |
272 | case '2': return 2; |
273 | case '3': return 3; |
274 | case '4': return 4; |
275 | case '5': return 5; |
276 | case '6': return 6; |
277 | case '7': return 7; |
278 | case '8': return 8; |
279 | case '9': return 9; |
280 | case 'a': |
281 | case 'A': return 10; |
282 | case 'b': |
283 | case 'B': return 11; |
284 | case 'c': |
285 | case 'C': return 12; |
286 | case 'd': |
287 | case 'D': return 13; |
288 | case 'e': |
289 | case 'E': return 14; |
290 | case 'f': |
291 | case 'F': return 15; |
292 | default: |
293 | return -1; |
294 | } |
295 | } |
296 | |
297 | static long |
298 | read_unicode_cp(json_stream *json) |
299 | { |
300 | long cp = 0; |
301 | int shift = 12; |
302 | size_t i; |
303 | short c, hc; |
304 | |
305 | for (i = 0; i < 4; i++) { |
306 | c = json->source.get(&json->source); |
307 | |
308 | if (c == EOF) { |
309 | json_error(json, "%s", "unterminated string literal in Unicode"); |
310 | return -1; |
311 | } |
312 | |
313 | if ((hc = hexchar(c)) == -1) { |
314 | json_error(json, "invalid escape Unicode byte '%c'", c); |
315 | return -1; |
316 | } |
317 | |
318 | cp += hc * (1 << shift); |
319 | shift -= 4; |
320 | } |
321 | |
322 | return cp; |
323 | } |
324 | |
325 | static short |
326 | read_unicode(json_stream *json) |
327 | { |
328 | long cp, h, l; |
329 | short c; |
330 | |
331 | if ((cp = read_unicode_cp(json)) == -1) { |
332 | return -1; |
333 | } |
334 | |
335 | if (cp >= 0xd800 && cp <= 0xdbff) { |
336 | /* |
337 | * This is the high portion of a surrogate pair; we need to read |
338 | * the lower portion to get the codepoint |
339 | */ |
340 | h = cp; |
341 | |
342 | c = json->source.get(&json->source); |
343 | if (c == EOF) { |
344 | json_error(json, "%s", "unterminated string literal in Unicode"); |
345 | return -1; |
346 | } |
347 | if (c != '\\') { |
348 | json_error(json, "invalid continuation for surrogate pair '%c', " |
349 | "expected '\\'", c); |
350 | return -1; |
351 | } |
352 | |
353 | c = json->source.get(&json->source); |
354 | if (c == EOF) { |
355 | json_error(json, "%s", "unterminated string literal in Unicode"); |
356 | return -1; |
357 | } |
358 | if (c != 'u') { |
359 | json_error(json, "invalid continuation for surrogate pair '%c', " |
360 | "expected 'u'", c); |
361 | return -1; |
362 | } |
363 | |
364 | if ((l = read_unicode_cp(json)) == -1) { |
365 | return -1; |
366 | } |
367 | if (l < 0xdc00 || l > 0xdfff) { |
368 | json_error(json, "surrogate pair continuation \\u%04lx out " |
369 | "of range (dc00-dfff)", l); |
370 | return -1; |
371 | } |
372 | |
373 | cp = ((h - 0xd800) * 0x400) + ((l - 0xdc00) + 0x10000); |
374 | } else if (cp >= 0xdc00 && cp <= 0xdfff) { |
375 | json_error(json, "dangling surrogate \\u%04lx", cp); |
376 | return -1; |
377 | } |
378 | |
379 | return encode_utf8(json, cp); |
380 | } |
381 | |
382 | static short |
383 | read_escaped(json_stream *json) |
384 | { |
385 | int c; |
386 | |
387 | c = json->source.get(&json->source); |
388 | if (c == EOF) { |
389 | json_error(json, "%s", "unterminated string literal in escape"); |
390 | return -1; |
391 | } |
392 | |
393 | if (c == 'u') { |
394 | if (read_unicode(json) != 0) |
395 | return -1; |
396 | } else { |
397 | switch (c) { |
398 | case '\\': |
399 | case 'b': |
400 | case 'f': |
401 | case 'n': |
402 | case 'r': |
403 | case 't': |
404 | case '/': |
405 | case '"': |
406 | { |
407 | const char *codes = "\\bfnrt/\""; |
408 | const char *p = strchr(codes, c); |
409 | if (pushchar(json, "\\\b\f\n\r\t/\""[p - codes]) != 0) |
410 | return -1; |
411 | } |
412 | break; |
413 | default: |
414 | json_error(json, "invalid escaped byte '%c'", c); |
415 | return -1; |
416 | } |
417 | } |
418 | return 0; |
419 | } |
420 | |
421 | static bool |
422 | char_needs_escaping(short c) |
423 | { |
424 | if ((c >= 0) && (c < 0x20 || c == 0x22 || c == 0x5c)) { |
425 | return true; |
426 | } |
427 | |
428 | return false; |
429 | } |
430 | |
431 | static short |
432 | utf8_seq_length(char byte) |
433 | { |
434 | unsigned char u = (unsigned char) byte; |
435 | |
436 | if (u < 0x80) |
437 | return 1; |
438 | |
439 | if (0x80 <= u && u <= 0xBF) { |
440 | // second, third or fourth byte of a multi-byte |
441 | // sequence, i.e. a "continuation byte" |
442 | return 0; |
443 | } |
444 | if (u == 0xC0 || u == 0xC1) { |
445 | // overlong encoding of an ASCII byte |
446 | return 0; |
447 | } |
448 | if (0xC2 <= u && u <= 0xDF) { |
449 | // 2-byte sequence |
450 | return 2; |
451 | } |
452 | if (0xE0 <= u && u <= 0xEF) { |
453 | // 3-byte sequence |
454 | return 3; |
455 | } |
456 | if (0xF0 <= u && u <= 0xF4) { |
457 | // 4-byte sequence |
458 | return 4; |
459 | } |
460 | |
461 | // u >= 0xF5 |
462 | // Restricted (start of 4-, 5- or 6-byte sequence) or invalid UTF-8 |
463 | return 0; |
464 | } |
465 | |
466 | static bool |
467 | is_legal_utf8(const unsigned char *bytes, int length) |
468 | { |
469 | unsigned char a; |
470 | const unsigned char* srcptr = bytes + length; |
471 | |
472 | if (0 == bytes || 0 == length) |
473 | return false; |
474 | |
475 | switch (length) { |
476 | default: |
477 | return false; |
478 | // Everything else falls through when true. |
479 | case 4: |
480 | if ((a = (*--srcptr)) < 0x80 || a > 0xBF) |
481 | return false; |
482 | /* FALLTHRU */ |
483 | case 3: |
484 | if ((a = (*--srcptr)) < 0x80 || a > 0xBF) |
485 | return false; |
486 | /* FALLTHRU */ |
487 | case 2: |
488 | a = (*--srcptr); |
489 | switch (*bytes) { |
490 | case 0xE0: |
491 | if (a < 0xA0 || a > 0xBF) |
492 | return false; |
493 | break; |
494 | case 0xED: |
495 | if (a < 0x80 || a > 0x9F) |
496 | return false; |
497 | break; |
498 | case 0xF0: |
499 | if (a < 0x90 || a > 0xBF) |
500 | return false; |
501 | break; |
502 | case 0xF4: |
503 | if (a < 0x80 || a > 0x8F) |
504 | return false; |
505 | break; |
506 | default: |
507 | if (a < 0x80 || a > 0xBF) |
508 | return false; |
509 | break; |
510 | } |
511 | /* FALLTHRU */ |
512 | case 1: |
513 | if (*bytes >= 0x80 && *bytes < 0xC2) |
514 | return false; |
515 | } |
516 | |
517 | return (*bytes <= 0xF4); |
518 | } |
519 | |
520 | static short |
521 | read_utf8(json_stream* json, short next_char) |
522 | { |
523 | short count; |
524 | char buffer[4]; |
525 | short i; |
526 | |
527 | count = utf8_seq_length(next_char); |
528 | if (!count) { |
529 | json_error(json, "%s", "invalid UTF-8 character"); |
530 | return -1; |
531 | } |
532 | |
533 | buffer[0] = next_char; |
534 | for (i = 1; i < count; ++i) { |
535 | buffer[i] = json->source.get(&json->source);; |
536 | } |
537 | |
538 | if (!is_legal_utf8((unsigned char*) buffer, count)) { |
539 | json_error(json, "%s", "invalid UTF-8 text"); |
540 | return -1; |
541 | } |
542 | |
543 | for (i = 0; i < count; ++i) { |
544 | if (pushchar(json, buffer[i]) != 0) |
545 | return -1; |
546 | } |
547 | |
548 | return 0; |
549 | } |
550 | |
551 | static enum json_type |
552 | read_string(json_stream *json) |
553 | { |
554 | short c; |
555 | |
556 | if (init_string(json) != 0) |
557 | return JSON_ERROR; |
558 | |
559 | for (;;) { |
560 | c = json->source.get(&json->source); |
561 | if (c == EOF) { |
562 | json_error(json, "%s", "unterminated string literal"); |
563 | return JSON_ERROR; |
564 | } |
565 | if (c == '"') { |
566 | if (pushchar(json, '\0') == 0) |
567 | return JSON_STRING; |
568 | return JSON_ERROR; |
569 | } |
570 | if (c == '\\') { |
571 | if (read_escaped(json) != 0) |
572 | return JSON_ERROR; |
573 | } else if ((unsigned) c >= 0x80) { |
574 | if (read_utf8(json, c) != 0) |
575 | return JSON_ERROR; |
576 | } else { |
577 | if (char_needs_escaping(c)) { |
578 | json_error(json, "%s", |
579 | "unescaped control character in string"); |
580 | return JSON_ERROR; |
581 | } |
582 | |
583 | if (pushchar(json, c) != 0) |
584 | return JSON_ERROR; |
585 | } |
586 | } |
587 | |
588 | return JSON_ERROR; |
589 | } |
590 | |
591 | static bool |
592 | is_digit(int c) |
593 | { |
594 | return (c >= '0' && c <= '9'); |
595 | } |
596 | |
597 | static short |
598 | read_digits(json_stream *json) |
599 | { |
600 | short c; |
601 | unsigned nread = 0; |
602 | |
603 | while (is_digit(c = json->source.peek(&json->source))) { |
604 | if (pushchar(json, json->source.get(&json->source)) != 0) |
605 | return -1; |
606 | |
607 | nread++; |
608 | } |
609 | |
610 | if (nread == 0) { |
611 | json_error(json, "expected digit instead of byte '%c'", c); |
612 | return -1; |
613 | } |
614 | |
615 | return 0; |
616 | } |
617 | |
618 | static enum json_type |
619 | read_number(json_stream *json, short c) |
620 | { |
621 | if (pushchar(json, c) != 0) |
622 | return JSON_ERROR; |
623 | |
624 | if (c == '-') { |
625 | c = json->source.get(&json->source); |
626 | if (is_digit(c)) { |
627 | return read_number(json, c); |
628 | } |
629 | json_error(json, "unexpected byte '%c' in number", c); |
630 | return JSON_ERROR; |
631 | } else if (c >= '0' && c <= '9') { |
632 | c = json->source.peek(&json->source); |
633 | if (is_digit(c)) { |
634 | if (read_digits(json) != 0) |
635 | return JSON_ERROR; |
636 | } |
637 | } |
638 | |
639 | /* Up to decimal or exponent has been read. */ |
640 | c = json->source.peek(&json->source); |
641 | if (strchr(".eE", c) == NULL) { |
642 | if (pushchar(json, '\0') != 0) |
643 | return JSON_ERROR; |
644 | else |
645 | return JSON_NUMBER; |
646 | } |
647 | if (c == '.') { |
648 | json->source.get(&json->source); // consume . |
649 | if (pushchar(json, c) != 0) |
650 | return JSON_ERROR; |
651 | if (read_digits(json) != 0) |
652 | return JSON_ERROR; |
653 | } |
654 | /* Check for exponent. */ |
655 | c = json->source.peek(&json->source); |
656 | if (c == 'e' || c == 'E') { |
657 | json->source.get(&json->source); // consume e/E |
658 | if (pushchar(json, c) != 0) |
659 | return JSON_ERROR; |
660 | c = json->source.peek(&json->source); |
661 | if (c == '+' || c == '-') { |
662 | json->source.get(&json->source); // consume |
663 | if (pushchar(json, c) != 0) |
664 | return JSON_ERROR; |
665 | if (read_digits(json) != 0) |
666 | return JSON_ERROR; |
667 | } else if (is_digit(c)) { |
668 | if (read_digits(json) != 0) |
669 | return JSON_ERROR; |
670 | } else { |
671 | json_error(json, "unexpected byte '%c' in number", c); |
672 | return JSON_ERROR; |
673 | } |
674 | } |
675 | |
676 | if (pushchar(json, '\0') != 0) |
677 | return JSON_ERROR; |
678 | |
679 | return JSON_NUMBER; |
680 | } |
681 | |
682 | bool |
683 | json_isspace(short c) |
684 | { |
685 | switch (c) { |
686 | case 0x09: |
687 | case 0x0a: |
688 | case 0x0d: |
689 | case 0x20: |
690 | return true; |
691 | } |
692 | |
693 | return false; |
694 | } |
695 | |
696 | /* Returns the next non-whitespace character in the stream. */ |
697 | static short |
698 | next(json_stream *json) |
699 | { |
700 | short c; |
701 | |
702 | while (json_isspace(c = json->source.get(&json->source))) { |
703 | if (c == '\n') |
704 | json->lineno++; |
705 | } |
706 | |
707 | return c; |
708 | } |
709 | |
710 | static enum json_type |
711 | read_value(json_stream *json, short c) |
712 | { |
713 | json->ntokens++; |
714 | |
715 | switch (c) { |
716 | case EOF: |
717 | json_error(json, "%s", "unexpected end of text"); |
718 | return JSON_ERROR; |
719 | case '{': |
720 | return push(json, JSON_OBJECT); |
721 | case '[': |
722 | return push(json, JSON_ARRAY); |
723 | case '"': |
724 | return read_string(json); |
725 | case 'n': |
726 | return is_match(json, "ull", JSON_NULL); |
727 | case 'f': |
728 | return is_match(json, "alse", JSON_FALSE); |
729 | case 't': |
730 | return is_match(json, "rue", JSON_TRUE); |
731 | case '0': |
732 | case '1': |
733 | case '2': |
734 | case '3': |
735 | case '4': |
736 | case '5': |
737 | case '6': |
738 | case '7': |
739 | case '8': |
740 | case '9': |
741 | case '-': |
742 | if (init_string(json) != 0) |
743 | return JSON_ERROR; |
744 | return read_number(json, c); |
745 | default: |
746 | json_error(json, "unexpected byte '%c' in value", c); |
747 | return JSON_ERROR; |
748 | } |
749 | } |
750 | |
751 | enum json_type |
752 | json_peek(json_stream *json) |
753 | { |
754 | enum json_type next; |
755 | if (json->next) |
756 | next = json->next; |
757 | else |
758 | next = json->next = json_next(json); |
759 | return next; |
760 | } |
761 | |
762 | enum json_type |
763 | json_next(json_stream *json) |
764 | { |
765 | short c; |
766 | enum json_type value; |
767 | |
768 | if (json->flags & JSON_FLAG_ERROR) |
769 | return JSON_ERROR; |
770 | if (json->next != 0) { |
771 | enum json_type next = json->next; |
772 | json->next = (enum json_type)0; |
773 | return next; |
774 | } |
775 | if (json->ntokens > 0 && json->stack_top == (size_t)-1) { |
776 | /* |
777 | * In the streaming mode leave any trailing whitespaces in the |
778 | * stream. This allows the user to validate any desired |
779 | * separation between values (such as newlines) using |
780 | * json_source_get/peek() with any remaining whitespaces ignored |
781 | * as leading when we parse the next value. |
782 | */ |
783 | if (!(json->flags & JSON_FLAG_STREAMING)) { |
784 | short c; |
785 | |
786 | do { |
787 | c = json->source.peek(&json->source); |
788 | if (json_isspace(c)) { |
789 | c = json->source.get(&json->source); |
790 | } |
791 | } while (json_isspace(c)); |
792 | |
793 | if (c != EOF) { |
794 | json_error(json, |
795 | "expected end of text instead of byte '%c'", c); |
796 | return JSON_ERROR; |
797 | } |
798 | } |
799 | |
800 | return JSON_DONE; |
801 | } |
802 | c = next(json); |
803 | if (json->stack_top == (size_t)-1) { |
804 | if (c == EOF && (json->flags & JSON_FLAG_STREAMING)) |
805 | return JSON_DONE; |
806 | |
807 | return read_value(json, c); |
808 | } |
809 | if (json->stack[json->stack_top].type == JSON_ARRAY) { |
810 | if (json->stack[json->stack_top].count == 0) { |
811 | if (c == ']') { |
812 | return pop(json, c, JSON_ARRAY); |
813 | } |
814 | json->stack[json->stack_top].count++; |
815 | return read_value(json, c); |
816 | } else if (c == ',') { |
817 | json->stack[json->stack_top].count++; |
818 | return read_value(json, next(json)); |
819 | } else if (c == ']') { |
820 | return pop(json, c, JSON_ARRAY); |
821 | } else { |
822 | json_error(json, "unexpected byte '%c'", c); |
823 | return JSON_ERROR; |
824 | } |
825 | } else if (json->stack[json->stack_top].type == JSON_OBJECT) { |
826 | if (json->stack[json->stack_top].count == 0) { |
827 | if (c == '}') { |
828 | return pop(json, c, JSON_OBJECT); |
829 | } |
830 | |
831 | /* No member name/value pairs yet. */ |
832 | value = read_value(json, c); |
833 | if (value != JSON_STRING) { |
834 | if (value != JSON_ERROR) |
835 | json_error(json, "%s", "expected member name or '}'"); |
836 | return JSON_ERROR; |
837 | } else { |
838 | json->stack[json->stack_top].count++; |
839 | return value; |
840 | } |
841 | } else if ((json->stack[json->stack_top].count % 2) == 0) { |
842 | /* Expecting comma followed by member name. */ |
843 | if (c != ',' && c != '}') { |
844 | json_error(json, |
845 | "%s", "expected ',' or '}' after member value"); |
846 | return JSON_ERROR; |
847 | } else if (c == '}') { |
848 | return pop(json, c, JSON_OBJECT); |
849 | } else { |
850 | enum json_type value = read_value(json, next(json)); |
851 | if (value != JSON_STRING) { |
852 | if (value != JSON_ERROR) |
853 | json_error(json, "%s", "expected member name"); |
854 | return JSON_ERROR; |
855 | } else { |
856 | json->stack[json->stack_top].count++; |
857 | return value; |
858 | } |
859 | } |
860 | } else if ((json->stack[json->stack_top].count % 2) == 1) { |
861 | /* Expecting colon followed by value. */ |
862 | if (c != ':') { |
863 | json_error(json, "%s", "expected ':' after member name"); |
864 | return JSON_ERROR; |
865 | } else { |
866 | json->stack[json->stack_top].count++; |
867 | return read_value(json, next(json)); |
868 | } |
869 | } |
870 | } |
871 | json_error(json, "%s", "invalid parser state"); |
872 | return JSON_ERROR; |
873 | } |
874 | |
875 | void |
876 | json_reset(json_stream *json) |
877 | { |
878 | json->stack_top = -1; |
879 | json->ntokens = 0; |
880 | json->flags &= ~JSON_FLAG_ERROR; |
881 | json->errmsg[0] = '\0'; |
882 | } |
883 | |
884 | enum json_type |
885 | json_skip(json_stream *json) |
886 | { |
887 | enum json_type type, skip; |
888 | size_t cnt_arr = 0; |
889 | size_t cnt_obj = 0; |
890 | |
891 | type = json_next(json); |
892 | |
893 | for (skip = type; ; skip = json_next(json)) { |
894 | if (skip == JSON_ERROR || skip == JSON_DONE) |
895 | return skip; |
896 | |
897 | if (skip == JSON_ARRAY) { |
898 | ++cnt_arr; |
899 | } else if (skip == JSON_ARRAY_END && cnt_arr > 0) { |
900 | --cnt_arr; |
901 | } else if (skip == JSON_OBJECT) { |
902 | ++cnt_obj; |
903 | } else if (skip == JSON_OBJECT_END && cnt_obj > 0) { |
904 | --cnt_obj; |
905 | } |
906 | |
907 | if (!cnt_arr && !cnt_obj) |
908 | break; |
909 | } |
910 | |
911 | return type; |
912 | } |
913 | |
914 | enum json_type |
915 | json_skip_until(json_stream *json, enum json_type type) |
916 | { |
917 | for (;;) { |
918 | enum json_type skip = json_skip(json); |
919 | |
920 | if (skip == JSON_ERROR || skip == JSON_DONE) |
921 | return skip; |
922 | |
923 | if (skip == type) |
924 | break; |
925 | } |
926 | |
927 | return type; |
928 | } |
929 | |
930 | const char * |
931 | json_get_string(json_stream *json, size_t *length) |
932 | { |
933 | if (length != NULL) |
934 | *length = json->data.string_fill; |
935 | if (json->data.string == NULL) |
936 | return ""; |
937 | return json->data.string; |
938 | } |
939 | |
940 | double |
941 | json_get_number(json_stream *json) |
942 | { |
943 | char *p = json->data.string; |
944 | return p == NULL ? 0 : atoi(p); /* XXX: this should use strtod */ |
945 | } |
946 | |
947 | const char * |
948 | json_get_error(json_stream *json) |
949 | { |
950 | return (json->flags & JSON_FLAG_ERROR ? json->errmsg : NULL); |
951 | } |
952 | |
953 | size_t |
954 | json_get_lineno(json_stream *json) |
955 | { |
956 | return json->lineno; |
957 | } |
958 | |
959 | size_t |
960 | json_get_position(json_stream *json) |
961 | { |
962 | return json->source.position; |
963 | } |
964 | |
965 | size_t |
966 | json_get_depth(json_stream *json) |
967 | { |
968 | return json->stack_top + 1; |
969 | } |
970 | |
971 | /* |
972 | * Return the current parsing context, that is, JSON_OBJECT if we are |
973 | * inside an object, JSON_ARRAY if we are inside an array, and JSON_DONE |
974 | * if we are not yet/anymore in either. |
975 | * |
976 | * Additionally, for the first two cases, also return the number of parsing |
977 | * events that have already been observed at this level with |
978 | * json_next/peek(). In particular, inside an object, an odd number would |
979 | * indicate that the just observed JSON_STRING event is a member name. |
980 | */ |
981 | enum json_type |
982 | json_get_context(json_stream *json, size_t *count) |
983 | { |
984 | if (json->stack_top == (size_t)-1) |
985 | return JSON_DONE; |
986 | |
987 | if (count != NULL) |
988 | *count = json->stack[json->stack_top].count; |
989 | |
990 | return json->stack[json->stack_top].type; |
991 | } |
992 | |
993 | short |
994 | json_source_get(json_stream *json) |
995 | { |
996 | short c = json->source.get(&json->source); |
997 | if (c == '\n') |
998 | json->lineno++; |
999 | return c; |
1000 | } |
1001 | |
1002 | short |
1003 | json_source_peek(json_stream *json) |
1004 | { |
1005 | return json->source.peek(&json->source); |
1006 | } |
1007 | |
1008 | void |
1009 | json_open_buffer(json_stream *json, const void *buffer, size_t size) |
1010 | { |
1011 | init(json); |
1012 | json->source.get = buffer_get; |
1013 | json->source.peek = buffer_peek; |
1014 | json->source.source.buffer.buffer = (const char *)buffer; |
1015 | json->source.source.buffer.length = size; |
1016 | } |
1017 | |
1018 | void |
1019 | json_open_string(json_stream *json, const char *string) |
1020 | { |
1021 | json_open_buffer(json, string, strlen(string)); |
1022 | } |
1023 | |
1024 | void |
1025 | json_open_stream(json_stream *json, FILE * stream) |
1026 | { |
1027 | init(json); |
1028 | json->source.get = stream_get; |
1029 | json->source.peek = stream_peek; |
1030 | json->source.source.stream.stream = stream; |
1031 | } |
1032 | |
1033 | static short |
1034 | user_get(struct json_source *json) |
1035 | { |
1036 | return json->source.user.get(json->source.user.ptr); |
1037 | } |
1038 | |
1039 | static short |
1040 | user_peek(struct json_source *json) |
1041 | { |
1042 | return json->source.user.peek(json->source.user.ptr); |
1043 | } |
1044 | |
1045 | void |
1046 | json_open_user(json_stream *json, json_user_io get, json_user_io peek, |
1047 | void *user) |
1048 | { |
1049 | init(json); |
1050 | json->source.get = user_get; |
1051 | json->source.peek = user_peek; |
1052 | json->source.source.user.ptr = user; |
1053 | json->source.source.user.get = get; |
1054 | json->source.source.user.peek = peek; |
1055 | } |
1056 | |
1057 | void |
1058 | json_set_allocator(json_stream *json, json_allocator *a) |
1059 | { |
1060 | json->alloc = *a; |
1061 | } |
1062 | |
1063 | void |
1064 | json_set_streaming(json_stream *json, bool streaming) |
1065 | { |
1066 | if (streaming) |
1067 | json->flags |= JSON_FLAG_STREAMING; |
1068 | else |
1069 | json->flags &= ~JSON_FLAG_STREAMING; |
1070 | } |
1071 | |
1072 | void |
1073 | json_close(json_stream *json) |
1074 | { |
1075 | json->alloc.free(json->stack); |
1076 | json->alloc.free(json->data.string); |
1077 | } |