/* * This is free and unencumbered software released into the public domain. * * Anyone is free to copy, modify, publish, use, compile, sell, or * distribute this software, either in source code form or as a compiled * binary, for any purpose, commercial or non-commercial, and by any * means. * * In jurisdictions that recognize copyright laws, the author or authors * of this software dedicate any and all copyright interest in the * software to the public domain. We make this dedication for the benefit * of the public at large and to the detriment of our heirs and * successors. We intend this dedication to be an overt act of * relinquishment in perpetuity of all present and future rights to this * software under copyright law. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. * IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR * OTHER DEALINGS IN THE SOFTWARE. * * For more information, please refer to */ #include #include #include #include #include "pdjson.h" #define JSON_FLAG_ERROR (1u << 0) #define JSON_FLAG_STREAMING (1u << 1) #define JSON_MALLOC_CHUNK 1024 const char *json_typename[] = { NULL, "ERROR", "DONE", "OBJECT", "OBJECT_END", "ARRAY", "ARRAY_END", "STRING", "NUMBER", "TRUE", "FALSE", "NULL", NULL }; void json_error(json_stream *json, char *format, ...); void json_error(json_stream *json, char *format, ...) { va_list ap; if (!(json->flags & JSON_FLAG_ERROR)) { json->flags |= JSON_FLAG_ERROR; va_start(ap, format); vsnprintf(json->errmsg, sizeof(json->errmsg), format, ap); va_end(ap); } } /* See also PDJSON_STACK_MAX below. */ #ifndef PDJSON_STACK_INC # define PDJSON_STACK_INC 4 #endif struct json_stack { enum json_type type; long count; }; static enum json_type push(json_stream *json, enum json_type type) { json->stack_top++; #ifdef PDJSON_STACK_MAX if (json->stack_top > PDJSON_STACK_MAX) { json_error(json, "%s", "maximum depth of nesting reached"); return JSON_ERROR; } #endif if (json->stack_top >= json->stack_size) { struct json_stack *stack; size_t size; size = (json->stack_size + PDJSON_STACK_INC) * sizeof(*json->stack); stack = (struct json_stack *)json->alloc.realloc(json->stack, size); if (stack == NULL) { json_error(json, "%s", "out of memory"); return JSON_ERROR; } json->stack_size += PDJSON_STACK_INC; json->stack = stack; } json->stack[json->stack_top].type = type; json->stack[json->stack_top].count = 0; return type; } static enum json_type pop(json_stream *json, int c, enum json_type expected) { if (json->stack == NULL || json->stack[json->stack_top].type != expected) { json_error(json, "unexpected byte '%c'", c); return JSON_ERROR; } json->stack_top--; return (expected == JSON_ARRAY ? JSON_ARRAY_END : JSON_OBJECT_END); } static short buffer_peek(struct json_source *source) { if (source->position < source->source.buffer.length) return source->source.buffer.buffer[source->position]; return EOF; } static short buffer_get(struct json_source *source) { short c = source->peek(source); source->position++; return c; } static short stream_get(struct json_source *source) { source->position++; return fgetc(source->source.stream.stream); } static short stream_peek(struct json_source *source) { int c = fgetc(source->source.stream.stream); ungetc(c, source->source.stream.stream); return c; } static void init(json_stream *json) { json->lineno = 1; json->flags = JSON_FLAG_STREAMING; json->errmsg[0] = '\0'; json->ntokens = 0; json->next = (enum json_type)0; json->stack = NULL; json->stack_top = -1; json->stack_size = 0; json->data.string = NULL; json->data.string_size = 0; json->data.string_fill = 0; json->source.position = 0; json->alloc.malloc = malloc; json->alloc.realloc = realloc; json->alloc.free = free; } static enum json_type is_match(json_stream *json, const char *pattern, enum json_type type) { short c; const char *p; for (p = pattern; *p; p++) { if (*p != (c = json->source.get(&json->source))) { json_error(json, "expected '%c' instead of byte '%c'", *p, c); return JSON_ERROR; } } return type; } static short pushchar(json_stream *json, int c) { if (json->data.string_fill == json->data.string_size) { size_t size; char *buffer; size = json->data.string_size + JSON_MALLOC_CHUNK; buffer = (char *)json->alloc.realloc(json->data.string, size); if (buffer == NULL) { json_error(json, "%s", "out of memory"); return -1; } json->data.string_size = size; json->data.string = buffer; } json->data.string[json->data.string_fill++] = c; return 0; } static short init_string(json_stream *json) { json->data.string_fill = 0; if (json->data.string == NULL) { json->data.string_size = JSON_MALLOC_CHUNK; json->data.string = json->alloc.malloc(json->data.string_size); if (json->data.string == NULL) { json_error(json, "%s", "out of memory"); return -1; } } json->data.string[0] = '\0'; return 0; } static short encode_utf8(json_stream *json, unsigned long c) { if (c < 0x80UL) { return pushchar(json, c); } if (c < 0x0800UL) { return !((pushchar(json, (c >> 6 & 0x1F) | 0xC0) == 0) && (pushchar(json, (c >> 0 & 0x3F) | 0x80) == 0)); } if (c < 0x010000UL) { if (c >= 0xd800 && c <= 0xdfff) { json_error(json, "invalid codepoint %06lx", c); return -1; } return !((pushchar(json, (c >> 12 & 0x0F) | 0xE0) == 0) && (pushchar(json, (c >> 6 & 0x3F) | 0x80) == 0) && (pushchar(json, (c >> 0 & 0x3F) | 0x80) == 0)); } if (c < 0x110000UL) { return !((pushchar(json, (c >> 18 & 0x07) | 0xF0) == 0) && (pushchar(json, (c >> 12 & 0x3F) | 0x80) == 0) && (pushchar(json, (c >> 6 & 0x3F) | 0x80) == 0) && (pushchar(json, (c >> 0 & 0x3F) | 0x80) == 0)); } json_error(json, "unable to encode %06lx as UTF-8", c); return -1; } static short hexchar(int c) { switch (c) { case '0': return 0; case '1': return 1; case '2': return 2; case '3': return 3; case '4': return 4; case '5': return 5; case '6': return 6; case '7': return 7; case '8': return 8; case '9': return 9; case 'a': case 'A': return 10; case 'b': case 'B': return 11; case 'c': case 'C': return 12; case 'd': case 'D': return 13; case 'e': case 'E': return 14; case 'f': case 'F': return 15; default: return -1; } } static long read_unicode_cp(json_stream *json) { long cp = 0; int shift = 12; size_t i; short c, hc; for (i = 0; i < 4; i++) { c = json->source.get(&json->source); if (c == EOF) { json_error(json, "%s", "unterminated string literal in Unicode"); return -1; } if ((hc = hexchar(c)) == -1) { json_error(json, "invalid escape Unicode byte '%c'", c); return -1; } cp += hc * (1 << shift); shift -= 4; } return cp; } static short read_unicode(json_stream *json) { long cp, h, l; short c; if ((cp = read_unicode_cp(json)) == -1) { return -1; } if (cp >= 0xd800 && cp <= 0xdbff) { /* * This is the high portion of a surrogate pair; we need to read * the lower portion to get the codepoint */ h = cp; c = json->source.get(&json->source); if (c == EOF) { json_error(json, "%s", "unterminated string literal in Unicode"); return -1; } if (c != '\\') { json_error(json, "invalid continuation for surrogate pair '%c', " "expected '\\'", c); return -1; } c = json->source.get(&json->source); if (c == EOF) { json_error(json, "%s", "unterminated string literal in Unicode"); return -1; } if (c != 'u') { json_error(json, "invalid continuation for surrogate pair '%c', " "expected 'u'", c); return -1; } if ((l = read_unicode_cp(json)) == -1) { return -1; } if (l < 0xdc00 || l > 0xdfff) { json_error(json, "surrogate pair continuation \\u%04lx out " "of range (dc00-dfff)", l); return -1; } cp = ((h - 0xd800) * 0x400) + ((l - 0xdc00) + 0x10000); } else if (cp >= 0xdc00 && cp <= 0xdfff) { json_error(json, "dangling surrogate \\u%04lx", cp); return -1; } return encode_utf8(json, cp); } static short read_escaped(json_stream *json) { int c; c = json->source.get(&json->source); if (c == EOF) { json_error(json, "%s", "unterminated string literal in escape"); return -1; } if (c == 'u') { if (read_unicode(json) != 0) return -1; } else { switch (c) { case '\\': case 'b': case 'f': case 'n': case 'r': case 't': case '/': case '"': { const char *codes = "\\bfnrt/\""; const char *p = strchr(codes, c); if (pushchar(json, "\\\b\f\n\r\t/\""[p - codes]) != 0) return -1; } break; default: json_error(json, "invalid escaped byte '%c'", c); return -1; } } return 0; } static bool char_needs_escaping(short c) { if ((c >= 0) && (c < 0x20 || c == 0x22 || c == 0x5c)) { return true; } return false; } static short utf8_seq_length(char byte) { unsigned char u = (unsigned char) byte; if (u < 0x80) return 1; if (0x80 <= u && u <= 0xBF) { // second, third or fourth byte of a multi-byte // sequence, i.e. a "continuation byte" return 0; } if (u == 0xC0 || u == 0xC1) { // overlong encoding of an ASCII byte return 0; } if (0xC2 <= u && u <= 0xDF) { // 2-byte sequence return 2; } if (0xE0 <= u && u <= 0xEF) { // 3-byte sequence return 3; } if (0xF0 <= u && u <= 0xF4) { // 4-byte sequence return 4; } // u >= 0xF5 // Restricted (start of 4-, 5- or 6-byte sequence) or invalid UTF-8 return 0; } static bool is_legal_utf8(const unsigned char *bytes, int length) { unsigned char a; const unsigned char* srcptr = bytes + length; if (0 == bytes || 0 == length) return false; switch (length) { default: return false; // Everything else falls through when true. case 4: if ((a = (*--srcptr)) < 0x80 || a > 0xBF) return false; /* FALLTHRU */ case 3: if ((a = (*--srcptr)) < 0x80 || a > 0xBF) return false; /* FALLTHRU */ case 2: a = (*--srcptr); switch (*bytes) { case 0xE0: if (a < 0xA0 || a > 0xBF) return false; break; case 0xED: if (a < 0x80 || a > 0x9F) return false; break; case 0xF0: if (a < 0x90 || a > 0xBF) return false; break; case 0xF4: if (a < 0x80 || a > 0x8F) return false; break; default: if (a < 0x80 || a > 0xBF) return false; break; } /* FALLTHRU */ case 1: if (*bytes >= 0x80 && *bytes < 0xC2) return false; } return (*bytes <= 0xF4); } static short read_utf8(json_stream* json, short next_char) { short count; char buffer[4]; short i; count = utf8_seq_length(next_char); if (!count) { json_error(json, "%s", "invalid UTF-8 character"); return -1; } buffer[0] = next_char; for (i = 1; i < count; ++i) { buffer[i] = json->source.get(&json->source);; } if (!is_legal_utf8((unsigned char*) buffer, count)) { json_error(json, "%s", "invalid UTF-8 text"); return -1; } for (i = 0; i < count; ++i) { if (pushchar(json, buffer[i]) != 0) return -1; } return 0; } static enum json_type read_string(json_stream *json) { short c; if (init_string(json) != 0) return JSON_ERROR; for (;;) { c = json->source.get(&json->source); if (c == EOF) { json_error(json, "%s", "unterminated string literal"); return JSON_ERROR; } if (c == '"') { if (pushchar(json, '\0') == 0) return JSON_STRING; return JSON_ERROR; } if (c == '\\') { if (read_escaped(json) != 0) return JSON_ERROR; } else if ((unsigned) c >= 0x80) { if (read_utf8(json, c) != 0) return JSON_ERROR; } else { if (char_needs_escaping(c)) { json_error(json, "%s", "unescaped control character in string"); return JSON_ERROR; } if (pushchar(json, c) != 0) return JSON_ERROR; } } return JSON_ERROR; } static bool is_digit(int c) { return (c >= '0' && c <= '9'); } static short read_digits(json_stream *json) { short c; unsigned nread = 0; while (is_digit(c = json->source.peek(&json->source))) { if (pushchar(json, json->source.get(&json->source)) != 0) return -1; nread++; } if (nread == 0) { json_error(json, "expected digit instead of byte '%c'", c); return -1; } return 0; } static enum json_type read_number(json_stream *json, short c) { if (pushchar(json, c) != 0) return JSON_ERROR; if (c == '-') { c = json->source.get(&json->source); if (is_digit(c)) { return read_number(json, c); } json_error(json, "unexpected byte '%c' in number", c); return JSON_ERROR; } else if (c >= '0' && c <= '9') { c = json->source.peek(&json->source); if (is_digit(c)) { if (read_digits(json) != 0) return JSON_ERROR; } } /* Up to decimal or exponent has been read. */ c = json->source.peek(&json->source); if (strchr(".eE", c) == NULL) { if (pushchar(json, '\0') != 0) return JSON_ERROR; else return JSON_NUMBER; } if (c == '.') { json->source.get(&json->source); // consume . if (pushchar(json, c) != 0) return JSON_ERROR; if (read_digits(json) != 0) return JSON_ERROR; } /* Check for exponent. */ c = json->source.peek(&json->source); if (c == 'e' || c == 'E') { json->source.get(&json->source); // consume e/E if (pushchar(json, c) != 0) return JSON_ERROR; c = json->source.peek(&json->source); if (c == '+' || c == '-') { json->source.get(&json->source); // consume if (pushchar(json, c) != 0) return JSON_ERROR; if (read_digits(json) != 0) return JSON_ERROR; } else if (is_digit(c)) { if (read_digits(json) != 0) return JSON_ERROR; } else { json_error(json, "unexpected byte '%c' in number", c); return JSON_ERROR; } } if (pushchar(json, '\0') != 0) return JSON_ERROR; return JSON_NUMBER; } bool json_isspace(short c) { switch (c) { case 0x09: case 0x0a: case 0x0d: case 0x20: return true; } return false; } /* Returns the next non-whitespace character in the stream. */ static short next(json_stream *json) { short c; while (json_isspace(c = json->source.get(&json->source))) { if (c == '\n') json->lineno++; } return c; } static enum json_type read_value(json_stream *json, short c) { json->ntokens++; switch (c) { case EOF: json_error(json, "%s", "unexpected end of text"); return JSON_ERROR; case '{': return push(json, JSON_OBJECT); case '[': return push(json, JSON_ARRAY); case '"': return read_string(json); case 'n': return is_match(json, "ull", JSON_NULL); case 'f': return is_match(json, "alse", JSON_FALSE); case 't': return is_match(json, "rue", JSON_TRUE); case '0': case '1': case '2': case '3': case '4': case '5': case '6': case '7': case '8': case '9': case '-': if (init_string(json) != 0) return JSON_ERROR; return read_number(json, c); default: json_error(json, "unexpected byte '%c' in value", c); return JSON_ERROR; } } enum json_type json_peek(json_stream *json) { enum json_type next; if (json->next) next = json->next; else next = json->next = json_next(json); return next; } enum json_type json_next(json_stream *json) { short c; enum json_type value; if (json->flags & JSON_FLAG_ERROR) return JSON_ERROR; if (json->next != 0) { enum json_type next = json->next; json->next = (enum json_type)0; return next; } if (json->ntokens > 0 && json->stack_top == (size_t)-1) { /* * In the streaming mode leave any trailing whitespaces in the * stream. This allows the user to validate any desired * separation between values (such as newlines) using * json_source_get/peek() with any remaining whitespaces ignored * as leading when we parse the next value. */ if (!(json->flags & JSON_FLAG_STREAMING)) { short c; do { c = json->source.peek(&json->source); if (json_isspace(c)) { c = json->source.get(&json->source); } } while (json_isspace(c)); if (c != EOF) { json_error(json, "expected end of text instead of byte '%c'", c); return JSON_ERROR; } } return JSON_DONE; } c = next(json); if (json->stack_top == (size_t)-1) { if (c == EOF && (json->flags & JSON_FLAG_STREAMING)) return JSON_DONE; return read_value(json, c); } if (json->stack[json->stack_top].type == JSON_ARRAY) { if (json->stack[json->stack_top].count == 0) { if (c == ']') { return pop(json, c, JSON_ARRAY); } json->stack[json->stack_top].count++; return read_value(json, c); } else if (c == ',') { json->stack[json->stack_top].count++; return read_value(json, next(json)); } else if (c == ']') { return pop(json, c, JSON_ARRAY); } else { json_error(json, "unexpected byte '%c'", c); return JSON_ERROR; } } else if (json->stack[json->stack_top].type == JSON_OBJECT) { if (json->stack[json->stack_top].count == 0) { if (c == '}') { return pop(json, c, JSON_OBJECT); } /* No member name/value pairs yet. */ value = read_value(json, c); if (value != JSON_STRING) { if (value != JSON_ERROR) json_error(json, "%s", "expected member name or '}'"); return JSON_ERROR; } else { json->stack[json->stack_top].count++; return value; } } else if ((json->stack[json->stack_top].count % 2) == 0) { /* Expecting comma followed by member name. */ if (c != ',' && c != '}') { json_error(json, "%s", "expected ',' or '}' after member value"); return JSON_ERROR; } else if (c == '}') { return pop(json, c, JSON_OBJECT); } else { enum json_type value = read_value(json, next(json)); if (value != JSON_STRING) { if (value != JSON_ERROR) json_error(json, "%s", "expected member name"); return JSON_ERROR; } else { json->stack[json->stack_top].count++; return value; } } } else if ((json->stack[json->stack_top].count % 2) == 1) { /* Expecting colon followed by value. */ if (c != ':') { json_error(json, "%s", "expected ':' after member name"); return JSON_ERROR; } else { json->stack[json->stack_top].count++; return read_value(json, next(json)); } } } json_error(json, "%s", "invalid parser state"); return JSON_ERROR; } void json_reset(json_stream *json) { json->stack_top = -1; json->ntokens = 0; json->flags &= ~JSON_FLAG_ERROR; json->errmsg[0] = '\0'; } enum json_type json_skip(json_stream *json) { enum json_type type, skip; size_t cnt_arr = 0; size_t cnt_obj = 0; type = json_next(json); for (skip = type; ; skip = json_next(json)) { if (skip == JSON_ERROR || skip == JSON_DONE) return skip; if (skip == JSON_ARRAY) { ++cnt_arr; } else if (skip == JSON_ARRAY_END && cnt_arr > 0) { --cnt_arr; } else if (skip == JSON_OBJECT) { ++cnt_obj; } else if (skip == JSON_OBJECT_END && cnt_obj > 0) { --cnt_obj; } if (!cnt_arr && !cnt_obj) break; } return type; } enum json_type json_skip_until(json_stream *json, enum json_type type) { for (;;) { enum json_type skip = json_skip(json); if (skip == JSON_ERROR || skip == JSON_DONE) return skip; if (skip == type) break; } return type; } const char * json_get_string(json_stream *json, size_t *length) { if (length != NULL) *length = json->data.string_fill; if (json->data.string == NULL) return ""; return json->data.string; } double json_get_number(json_stream *json) { char *p = json->data.string; return p == NULL ? 0 : atoi(p); /* XXX: this should use strtod */ } const char * json_get_error(json_stream *json) { return (json->flags & JSON_FLAG_ERROR ? json->errmsg : NULL); } size_t json_get_lineno(json_stream *json) { return json->lineno; } size_t json_get_position(json_stream *json) { return json->source.position; } size_t json_get_depth(json_stream *json) { return json->stack_top + 1; } /* * Return the current parsing context, that is, JSON_OBJECT if we are * inside an object, JSON_ARRAY if we are inside an array, and JSON_DONE * if we are not yet/anymore in either. * * Additionally, for the first two cases, also return the number of parsing * events that have already been observed at this level with * json_next/peek(). In particular, inside an object, an odd number would * indicate that the just observed JSON_STRING event is a member name. */ enum json_type json_get_context(json_stream *json, size_t *count) { if (json->stack_top == (size_t)-1) return JSON_DONE; if (count != NULL) *count = json->stack[json->stack_top].count; return json->stack[json->stack_top].type; } short json_source_get(json_stream *json) { short c = json->source.get(&json->source); if (c == '\n') json->lineno++; return c; } short json_source_peek(json_stream *json) { return json->source.peek(&json->source); } void json_open_buffer(json_stream *json, const void *buffer, size_t size) { init(json); json->source.get = buffer_get; json->source.peek = buffer_peek; json->source.source.buffer.buffer = (const char *)buffer; json->source.source.buffer.length = size; } void json_open_string(json_stream *json, const char *string) { json_open_buffer(json, string, strlen(string)); } void json_open_stream(json_stream *json, FILE * stream) { init(json); json->source.get = stream_get; json->source.peek = stream_peek; json->source.source.stream.stream = stream; } static short user_get(struct json_source *json) { return json->source.user.get(json->source.user.ptr); } static short user_peek(struct json_source *json) { return json->source.user.peek(json->source.user.ptr); } void json_open_user(json_stream *json, json_user_io get, json_user_io peek, void *user) { init(json); json->source.get = user_get; json->source.peek = user_peek; json->source.source.user.ptr = user; json->source.source.user.get = get; json->source.source.user.peek = peek; } void json_set_allocator(json_stream *json, json_allocator *a) { json->alloc = *a; } void json_set_streaming(json_stream *json, bool streaming) { if (streaming) json->flags |= JSON_FLAG_STREAMING; else json->flags &= ~JSON_FLAG_STREAMING; } void json_close(json_stream *json) { json->alloc.free(json->stack); json->alloc.free(json->data.string); }