C语言实现极简递归下降JSON parser解析器

本文介绍了一款仅200行代码的极简JSON解析器,采用递归下降方式实现,支持基本的JSON结构解析,包括字符串、数字、布尔值、null、对象和数组。解析器限制了字符串为ASCII编码,数字为64位long类型。

摘要生成于 C知道 ,由 DeepSeek-R1 满血版支持, 前往体验 >

JSON字符串解析利用递归下降非常容易实现。本文实现了一个200多行的极简JSON parser解析器,基本功能都具备,一些限制是:字符串字符只支持ASCII编码,数字解析只支持64位的long整形,进一步的功能也比较容易扩展。

JSON语法分析

根据 https://tools.ietf.org/html/rfc7159 ,摘录部分如下:

JSON-text = ws value ws
begin-array     = ws %x5B ws  ; [ left square bracket
begin-object    = ws %x7B ws  ; { left curly bracket
end-array       = ws %x5D ws  ; ] right square bracket
end-object      = ws %x7D ws  ; } right curly bracket
name-separator  = ws %x3A ws  ; : colon
value-separator = ws %x2C ws  ; , comma

ws = *(%x20 /      ; Space
       %x09 /      ; Horizontal tab
       %x0A /      ; Line feed or New line
       %x0D )      ; Carriage return
       
value = false / null / true / object / array / number / string
        false = %x66.61.6c.73.65   ; false
        null  = %x6e.75.6c.6c      ; null
        true  = %x74.72.75.65      ; true

object = begin-object [ member *( value-separator member ) ]
         end-object
         member = string name-separator value

可以先自己构建一些测试JSON字符串在 http://json.parser.online.fr/ 解析,然后再测试自己的程序。

数据结构设计

JSON是可嵌套的结构,解析后的JSON存储自然也是一个嵌套的结构,解析程序主题算法也是一个递归下降,数据结构设计如下:

json root:1个object
object:1或多个<key, value>, 可以用链表管理
array: 1或多个value,可以用链表管理
key:string
value:string | number | array | object

一个嵌套的数据结构设计完毕后,就可以手写递归下降的JSON解析器了。

JSON解析器

#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <ctype.h>

#define X64_LONG_MAX 9223372036854775807L

typedef enum json_type_e {
    JSON_NUM,
    JSON_STRING,
    JSON_TRUE,
    JSON_FALSE,
    JSON_NULL,
    JSON_OBJ,
    JSON_ARRAY,
} json_type_t;

typedef struct json_value_s json_value_t;
typedef struct json_obj_s json_obj_t;
typedef struct json_array_s json_array_t;

struct json_value_s {
    json_type_t type;
    union {
        json_obj_t *obj;
        json_array_t *array;
        long num;
        char *str;
    } val;
};

struct json_obj_s {
    char *key;
    json_value_t value;
    json_obj_t *next;
};

struct json_array_s {
    json_value_t value;
    json_array_t *next;
};

typedef struct json_ctx_s {
    const char *cur;
} json_ctx_t;

int parse_obj(json_ctx_t *ctx, json_value_t *v);

int parse_value(json_ctx_t *ctx, json_value_t *v);

int is_whitespace(char c) {
    return c == ' ' || c == '\r' || c == '\n' || c == '\t';
}

void skip_whitespace(json_ctx_t *ctx) {
    while (is_whitespace(*ctx->cur)) {
        ctx->cur++;
    }
}

int parse_string(json_ctx_t *ctx, char **out) {
    ctx->cur++;
    const char *begin = ctx->cur;
    size_t num = 0;
    while (*ctx->cur != '"') {
        ctx->cur++;
        num++;
    }
    char *tmp = (char *) malloc(num + 1);
    if (tmp == NULL)
        return -1;
    strncpy(tmp, begin, num);
    tmp[num] = '\0';
    *out = tmp;
    ctx->cur++;
    return 0;
}

// x64 long
int parse_number(json_ctx_t *ctx, json_value_t *v) {
    long n = 0;
    // https://tools.ietf.org/html/rfc7159#page-6
    if (ctx->cur[0] == '0' && ctx->cur[1] != '0' && isdigit(ctx->cur[1]))
        return -1;

    int sign_flag = 1;
    if (*ctx->cur == '-') {
        sign_flag = -1;
        ctx->cur++;
    }

    while (isdigit(*ctx->cur)) {
        if (n > X64_LONG_MAX / 10)
            return -1;
        if (n == X64_LONG_MAX / 10 && *ctx->cur - '0' > X64_LONG_MAX % 10)
            return -1;
        n = n * 10 + (*ctx->cur) - '0';
        ctx->cur++;
    }
    v->type = JSON_NUM;
    v->val.num = n * sign_flag;
    return 0;
}

int parse_array(json_ctx_t *ctx, json_value_t *v) {
    if (*ctx->cur != '[')
        return -1;

    ctx->cur++;
    skip_whitespace(ctx);
    if (*ctx->cur == ']')
        return 0;

    v->type = JSON_ARRAY;
    while (1) {
        json_array_t *a = malloc(sizeof(json_array_t));
        if (a == NULL)
            return -1;
        a->next = v->val.array;
        v->val.array = a;

        if (parse_value(ctx, &a->value) != 0)
            return -1;
        skip_whitespace(ctx);

        if (*ctx->cur == ']') {
            ctx->cur++;
            return 0;
        }

        if (*ctx->cur == ',') {
            ctx->cur++;
        } else {
            return -1;
        }
        skip_whitespace(ctx);
    }
}

int parse_obj(json_ctx_t *ctx, json_value_t *v) {
    if (*ctx->cur != '{')
        return -1;

    ctx->cur++;
    skip_whitespace(ctx);
    if (*ctx->cur == '}')
        return 0;

    v->type = JSON_OBJ;
    while (1) {
        json_obj_t *m = malloc(sizeof(json_obj_t));
        if (m == NULL)
            return -1;
        m->next = v->val.obj;
        v->val.obj = m;

        // key
        if (*ctx->cur == '"') {
            parse_string(ctx, &m->key);
        } else {
            return -1;
        }
        skip_whitespace(ctx);

        // colon
        if (*ctx->cur++ != ':')
            return -1;
        skip_whitespace(ctx);

        // value
        if (parse_value(ctx, &m->value) == -1)
            return -1;

        // next element
        skip_whitespace(ctx);
        if (*ctx->cur == ',') {
            ctx->cur++;
            skip_whitespace(ctx);
        } else if (*ctx->cur == '}') {
            ctx->cur++;
            return 0;
        } else {
            return -1;
        }
    }
}

int parse_string_word(json_ctx_t *ctx, const char *word, json_value_t *v, json_type_t type) {
    while (*word) {
        if (*ctx->cur != *word)
            return -1;
        ctx->cur++;
        word++;
    }
    v->type = type;
    return 0;
}

int parse_value(json_ctx_t *ctx, json_value_t *v) {
    switch (*ctx->cur) {
        case '"':
            v->type = JSON_STRING;
            return parse_string(ctx, &v->val.str);
        case '{':
            v->type = JSON_OBJ;
            v->val.obj = NULL;
            return parse_obj(ctx, v);
        case 't':
            v->type = JSON_TRUE;
            return parse_string_word(ctx, "true", v, JSON_TRUE);
        case 'f':
            v->type = JSON_FALSE;
            return parse_string_word(ctx, "false", v, JSON_FALSE);
        case 'n':
            v->type = JSON_NULL;
            return parse_string_word(ctx, "null", v, JSON_NULL);
        case '[':
            v->type = JSON_ARRAY;
            v->val.array = NULL;
            return parse_array(ctx, v);
        default:
            return parse_number(ctx, v);
    }
}

json_value_t *parse(const char *json) {
    json_ctx_t ctx;
    ctx.cur = json;
    skip_whitespace(&ctx);
    if (*ctx.cur != '{')  // first must an object
        return NULL;
    json_value_t *v = malloc(sizeof(json_value_t));
    if (v == NULL)
        return NULL;
    int ret = parse_value(&ctx, v);
    if (ret != 0) {
        printf("parse ret %d\n", ret);
        //TODO: free memory
    }
    return v;
}

void show(json_value_t *value) {
    switch (value->type) {
        case JSON_STRING:
            printf("[str]%s\n", value->val.str);
            break;
        case JSON_NUM:
            printf("[num]%ld\n", value->val.num);
            break;
        case JSON_OBJ:
            printf("[obj]-----start\n");
            json_obj_t *head = value->val.obj;
            while (head) {
                printf("[key]%s:\t", head->key);
                printf("[value]");
                show(&head->value);
                head = head->next;
            }
            printf("[obj]-----end\n");
            break;
        case JSON_ARRAY:
            printf("[array]-----start\n");
            json_array_t *arr = value->val.array;
            while (arr) {
                printf("[value]");
                show(&arr->value);
                arr = arr->next;
            }
            printf("[array]-----end\n");
            break;
        case JSON_TRUE:
            printf("true\n");
            break;
        case JSON_FALSE:
            printf("false\n");
            break;
        case JSON_NULL:
            printf("null\n");
            break;
        default:
            return;
    }
}

int main() {
    char *json = "{\"hi\":[1,\"hi\",{\"hello\":22}],\n"
                 "\"isNull  \":null,\n"
                 "\"isTrue\":  true,\n"
                 "\"hello2\":-2,\n"
                 "\"arr2\":[\"hi\",3]}";
    json_value_t *value = parse(json);
    if (value == NULL) {
        printf("parse error!\n");
        return -1;
    }
    show(value);
    return 0;
}

后面可以进一步扩展 ?

JSON++ Build Status Introduction JSON++ is a light-weight JSON parser, writer and reader written in C++. JSON++ can also convert JSON documents into lossless XML documents. Contributors http://github.com/hjiang http://github.com/elanthis http://github.com/r-lyeh If you've made substantial contribution, please add your link here. Why another JSON parser? Perhaps because web service clients are usually written in dynamic languages these days, none of the existing C++ JSON parsers fitted my needs very well, so I wrote one that I used in another project. My goals for JSON++ were: Efficient in both memory and speed. No third party dependencies. JSON++ only depends on the standard C++ library. Cross platform. Robust. Small and convenient API. Most of the time, you only need to call one function and two function templates. Easy to integrate. JSON++ only has one source file and one header file. Just compile the source file and link with your program. Able to construct documents dynamically. JSON writer: write documents in JSON format. Other contributors have sinced added more functionalities: XML writer: convert documents to JSONx format. See http://goo.gl/I3cxs for details. XML writer: convert documents to JXML format. See https://github.com/r-lyeh/JXML for details. XML writer: convert documents to JXMLex format. See https://github.com/r-lyeh/JXMLex for details. XML writer: convert documents to tagged XML format. See https://github.com/hjiang/jsonxx/issues/12 for details. Compiler version You need a modern C++ compiler. For older compilers, please try legacy branch. Configuration Strict/permissive parsing JSONxx can parse JSON documents both in strict or permissive mode. When jsonxx::Settings::Parser is set to Strict, JSONxx parser will accept: Fully conformant JSON documents only. When jsonxx::Settings::Parser is set to Permissive, JSONxx parser will accept: Fully conformant JSON documents Ending commas in arrays and objects: { "array": [0,1,2,], } Single quoted strings: ['hello', "world"] C++ style comments: { "width": 320, "height": 240 } //Picture details Default value is Permissive. When jsonxx::Settings::UnquotedKeys is set to Enabled, JSONxx parser will accept: Unquoted keys: {name: "world"} Default value is Disabled. Assertions JSONxx uses internally JSONXX_ASSERT(...) macro that works both in debug and release mode. Set jsonxx::Settings::Assertions value to Disabled to disable assertions. Default value is Enabled. Usage The following snippets are from one of the unit tests. They are quite self-descriptive. using namespace std; using namespace jsonxx; string teststr( "{" " \"foo\" : 1," " \"bar\" : false," " \"person\" : {\"name\" : \"GWB\", \"age\" : 60,}," " \"data\": [\"abcd\", 42]," "}" ); // Parse string or stream Object o; assert(o.parse(teststr)); // Validation. Checking for JSON types and values as well assert(1 == o.get<Number>("foo")); assert(o.has<Boolean>("bar")); assert(o.has<Object>("person")); assert(o.get<Object>("person").has<Number>("age")); assert(!o.get<Object>("person").has<Boolean>("old")); assert(o.get<Object>("person").get<Boolean>("old", false)); assert(o.has<Array>("data")); assert(o.get<Array>("data").get<Number>(1) == 42); assert(o.get<Array>("data").get<String>(0) == "abcd"); assert(o.get<Array>("data").get<String>(2, "hello") == "hello"); assert(!o.has<Number>("data")); cout << o.json() << endl; // JSON output cout << o.xml(JSONx) << endl; // JSON to XML conversion (JSONx subtype) cout << o.xml(JXML) << endl; // JSON to XML conversion (JXML subtype) cout << o.xml(JXMLex) << endl; // JSON to XML conversion (JXMLex subtype) // Generate JSON document dynamically using namespace std; using namespace jsonxx; Array a; a << 123; a << "hello world"; a << 3.1415; a << 99.95f; a << 'h'; a << Object("key", "value"); Object o; o << "key1" << "value"; o << "key2" << 123; o << "key3" << a; cout << o.json() << endl; To do Custom JSON comments (C style /**/) when permissive parsing is enabled.
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值