【编译原理】json解析器的全流程实现

本文详细介绍了如何设计和实现一个JSON解析器,包括JSON的数据结构、文法设计、程序实现以及测试。首先,讲解了JSON的六种数据类型和基本语法。接着,分析了JSON的词法设计,定义了10种词组类型,并给出了文法设计及文法符号分析。然后,通过DFA展示了文法分析过程,并构建了文法分析表。程序实现部分,使用C语言编写了JSON解析器,实现了从JSON字符串到C结构体的转换。最后,提供了测试代码和测试结果,验证了解析器的正确性。

摘要生成于 C知道 ,由 DeepSeek-R1 满血版支持, 前往体验 >

目录

一、json结构

1. 数据类型

2. 语法

二、文法

1. 词法设计

2. 语法设计

3. 文法符号分析

4. 文法DFA

5. 文法分析表

三、程序实现

1. 程序思路

2. 源码实现

3. 测试

测试代码

测试结果

四、结语


一、json结构

简单地介绍一下json的数据类型和语法

1. 数据类型

json每一个数据都是一个“对象”,其数据类型有6种:null、array、object、bool、number、string。

2. 语法

一个文件只能有一个对象,一个array可以存储多个对象,对象之间用逗号分隔,不限数据类型;一个object也可以存储多个对象,而且是按照key-value形式存储,每一个key都对应一个对象,同样也不限数据类型。

二、文法

1. 词法设计

json字符串可拆分成10种词组:

①数字:由负号或数字开头,其后由数字和一个小数点组成的字符串

②字符串:由引号开头、引号结尾的字符串

③布尔值:true、false

④空值:null

⑤上花括号:{ 

⑥下花括号:}

⑦上中括号:[

⑧下中括号:]

⑨冒号

⑩逗号

2. 语法设计

设定文法

其中

 开始符是,产生式如下图:

 注:文法未经优化

3. 文法符号分析

FIRST集合:

 FOLLOW集合:

4. 文法DFA

这个DFA是手动画的,所以可能会有那么一丢丢问题。

5. 文法分析表

根据DFA依次填表(已结合FOLLOW集合),得到以下分析表:

(sn表示移进,并进入状态n;r n表示根据第n条产生式进行规约)

状态ACTIONGOTO
数字字符串布尔值空值{}[],:$SABCD
1s5s6s7s8s2s9  34 
2 s12s10  11 
3 acc  
4 r2  
5 r3r3r3r3  
6 r4r4r4r4  
7 r5r5r5r5  
8 r6r6r6r6  
9s5s6s7s8s2s9s18  1920
10 r9r9r9r9  
11 s13s14   
12 s16   
13 r7r7r7r7  
14 s12  15 
15 r12s14   
16s5s6s7s8s2s9  17 
17 r11r11   
18 r10r10r10r10  
19 r13r13   
20 s21s22   
21 r8r8r8r8  
22s5s6s7s8s2s9 1923
23       r14s22       

三、程序实现

1. 程序思路

①把文本拆分成若干个词组(名为token),形成一个token序列,并在末尾插入一个表示结束的token(即文法中的$)

②根据文法表,判断token序列的顺序是否符合设定的语法

③将合法的token序列转换为C的json数据结构

2. 源码实现

语言:C99标准及以上

注:代码未经优化

头文件:

#ifndef LIB_JSON_H
#define LIB_JSON_H

// Json 数据类型
#define Null 0
#define Array 1
#define Object 2
#define Bool 3
#define Number 4
#define String 5

// 解析错误码
#define SUCCESS 0 // 解析成功
#define ERROR -1 // 语法错误
#define UNKONW_CHARACTOR -2 // 无法识别的字符
#define LOST_QUOTATION -3 // 缺少引号
#define MULTIPLE_POINTS -4 // 多个小数点

#ifndef __cplusplus
typedef char bool;
#define true 1
#define false 0
#endif

struct JsonObj
{
    char *name; // 名称
    void *data; // 数据内容
    int type; // 数据类型
    int length; // array或object长度
    int capacity; // array或object容量
    int rindex; // array或object的读指针
};

typedef struct JsonObj * JsonObjPtr;

struct ParseResult
{
    int row; // 错误所在行号
    int col; // 错误所在列号
    int error_code; // 错误码
    JsonObjPtr result; // 解析成功的json对象类型指针,须手动调用Free释放内存
};

#ifdef __cplusplus
extern "C" 
{
#endif

/*
 * 释放空间
 */
extern void Free(JsonObjPtr obj);

/*
 * 创建Json类型结构,内存由父结点管理,若无父结点,则需手动管理
 */
extern JsonObjPtr Create(int type);
extern JsonObjPtr CreateNull();
extern JsonObjPtr CreateBool();
extern JsonObjPtr CreateNumber();
extern JsonObjPtr CreateString();
extern JsonObjPtr CreateArray();
extern JsonObjPtr CreateObject();

/*
 * 函数返回值,失败返回0,成功返回1
 */

/*
 * 判断结点类型
 */
extern bool IsNull(const JsonObjPtr obj);
extern bool IsBool(const JsonObjPtr obj);
extern bool IsNumber(const JsonObjPtr obj);
extern bool IsString(const JsonObjPtr obj);
extern bool IsArray(const JsonObjPtr obj);
extern bool IsObject(const JsonObjPtr obj);

/*
 * 设置结点值
 */
extern bool SetBool(JsonObjPtr obj, bool value);
extern bool SetNumber(JsonObjPtr obj, double value);
extern bool SetString(JsonObjPtr obj, const char *value);

/*
 * 获取结点值
 */
extern bool GetBool(JsonObjPtr obj);
extern double GetNumber(JsonObjPtr obj);
extern const char *GetString(JsonObjPtr obj);

/*
 * 在末尾增加数组元素
 */
extern bool AppendArrayElement(JsonObjPtr obj, JsonObjPtr c);
extern bool AppendObjectElement(JsonObjPtr obj, const char *name, JsonObjPtr c);

/*
 * 在末尾删除数组元素
 */
extern bool RemoveArrayElement(JsonObjPtr obj);

/*
 * 删除对象结点的元素
 */
extern bool RemoveObjectElement(JsonObjPtr obj, const char *name);

/*
 * 获取数组元素
 */
extern JsonObjPtr GetArrayElement(JsonObjPtr obj, int index);

/*
 * 获取对象元素
 */
extern JsonObjPtr GetObjectElement(JsonObjPtr obj, const char *name);

/*
 * 遍历第一个数组或对象结点
 */
extern JsonObjPtr FirstElement(JsonObjPtr obj);

/*
 * 遍历下一个数组或对象结点
 */
extern JsonObjPtr NextElement(JsonObjPtr obj);

/*
 * 解析json字符串
 * @str  json字符串
 * @size  字符串长度
 * @return  ParseResult对象
 */
extern struct ParseResult Parse(const char *str, int size);

// 获取解析错误信息
extern const char * ErrorMsg(const struct ParseResult *pr);

// 获取成功解析时的json对象指针,需要手动调用Free()释放内存
extern JsonObjPtr GetJsonObjPtr(struct ParseResult *pr);

// 判断解析是否成功
extern bool ParseSuccess(const struct ParseResult *pr);

// 获取解析错误所在的位置
extern int ParseErrorRow(const struct ParseResult *pr);
extern int ParseErrorCol(const struct ParseResult *pr);

/*
 * json object转化成字符串
 * @buff  用于存放字符串结果的内存空间
 * @newl  新行的字符,可以为NULL
 * @newc  行的起始符,可以为NULL
 * @aoc  object冒号后的字符,可以为NULL
 * @aac  array逗号后的字符,可以为NULL
 * @return  返回字符串长度
 */
extern int ToString(JsonObjPtr obj, char *buff, const char *newl, const char *newc, const char *aoc, const char *aac);

/*
 * 将json对象转化成格式化的字符串
 * @return  ToString(obj, buff, "\n", "    ", " ", " ")
 */
extern int ToFormatString(JsonObjPtr obj, char *buff);

/*
 * 将json对象转化成非格式化的字符串
 * @return  ToString(obj, buff, NULL, NULL, NULL, NULL)
 */
extern int ToUnformatString(JsonObjPtr obj, char *buff);

#ifdef __cplusplus
}
#endif

#endif // LIB_JSON_H

源文件:

#include "libjson.h"
#include <stdlib.h> // malloc  realloc  free  atof  gcvt 
#include <string.h> // memcpy  strlen  strcmp
#include <limits.h> // INT_MIN

#ifndef NULL
#define NULL 0
#endif

#define IndexObj(obj, index) \
    *(((JsonObjPtr *)obj->data) + index)

static void FreeChildren(JsonObjPtr obj)
{
    JsonObjPtr *array_data = (JsonObjPtr *)obj->data;
    int i = 0;
    while(i < obj->length)
    {
        Free(array_data[i++]);
    }
}

void Free(JsonObjPtr obj)
{
    if(NULL == obj)
    {
        return;
    }

    switch(obj->type)
    {
    case Array:
    case Object: 
        FreeChildren(obj); 
    case Bool:
    case Number:
    case String: 
        free(obj->data);
    default: 
        free(obj); 
        break;
    }
}

JsonObjPtr Create(int type)
{
    switch(type)
    {
    case Array: return CreateArray();
    case Object: return CreateObject();
    case Bool: return CreateBool();
    case Number: return CreateNumber();
    case String: return CreateString();
    default: return CreateNull();
    }
}

JsonObjPtr CreateNull()
{
    JsonObjPtr ptr = (JsonObjPtr)malloc(sizeof(struct JsonObj));
    ptr->data = NULL;
    ptr->length = 0;
    ptr->rindex = 0;
    ptr->type = Null;
    ptr->capacity = 0;
    ptr->name = NULL;
    return ptr;
}

JsonObjPtr CreateBool()
{
    JsonObjPtr ptr = (JsonObjPtr)malloc(sizeof(struct JsonObj));
    ptr->data = malloc(sizeof(bool));
    ptr->length = 0;
    ptr->rindex = 0;
    ptr->type = Bool;
    ptr->capacity = 0;
    ptr->name = NULL;
    return ptr;
}

JsonObjPtr CreateNumber()
{
    JsonObjPtr ptr = (JsonObjPtr)malloc(sizeof(struct JsonObj));
    ptr->data = malloc(sizeof(double));
    ptr->length = 0;
    ptr->rindex = 0;
    ptr->type = Number;
    ptr->capacity = 0;
    ptr->name = NULL;
    return ptr;
}

JsonObjPtr CreateString()
{
    JsonObjPtr ptr = (JsonObjPtr)malloc(sizeof(struct JsonObj));
    ptr->data = malloc(sizeof(char));
    *(char *)ptr->data = '\0';
    ptr->length = 1;
    ptr->rindex = 0;
    ptr->type = String;
    ptr->capacity = 1;
    ptr->name = NULL;
    return ptr;
}

JsonObjPtr CreateArray()
{
    JsonObjPtr ptr = (JsonObjPtr)malloc(sizeof(struct JsonObj));
    ptr->data = malloc(sizeof(JsonObjPtr));
    ptr->length = 0;
    ptr->rindex = 0;
    ptr->type = Array;
    ptr->capacity = 1;
    ptr->name = NULL;
    return ptr;
}

JsonObjPtr CreateObject()
{
    JsonObjPtr ptr = (JsonObjPtr)malloc(sizeof(struct JsonObj));
    ptr->data = malloc(sizeof(JsonObjPtr));
    ptr->length = 0;
    ptr->rindex = 0;
    ptr->type = Object;
    ptr->capacity = 1;
    ptr->name = NULL;
    return ptr;
}

bool IsNull(const JsonObjPtr obj)
{
    return obj->type == Null;
}

bool IsBool(const JsonObjPtr obj)
{
    return obj->type == Bool;
}

bool IsNumber(const JsonObjPtr obj)
{
    return obj->type == Number;
}

bool IsString(const JsonObjPtr obj)
{
    return obj->type == String;
}

bool IsArray(const JsonObjPtr obj)
{
    return obj->type == Array;
}

bool IsObject(const JsonObjPtr obj)
{
    return obj->type == Object;
}

bool SetBool(JsonObjPtr obj, bool value)
{
    if(!IsBool(obj))
    {
        return false;
    }
    *(bool *)obj->data = value;
    return true;
}

bool SetNumber(JsonObjPtr obj, double value)
{
    if(!IsNumber(obj))
    {
        return false;
    }
    *(double *)obj->data = value;
    return true;
}

bool SetString(JsonObjPtr obj, const char *value)
{
    int len;
    if(!IsString(obj))
    {
        return false;
    }
    len = (int)strlen(value);
    free(obj->data);
    obj->data = malloc(sizeof(char) * len + 1);
    memcpy(obj->data, value, len);
    *(((char *)obj->data) + len) = '\0';
    return true;
}

bool GetBool(JsonObjPtr obj)
{
    return *((bool *)obj->data); 
}

double GetNumber(JsonObjPtr obj)
{
    return *((double *)obj->data); 
}

const char *GetString(JsonObjPtr obj)
{
    return (const char *)obj->data; 
}

static void AppendElement(JsonObjPtr obj, JsonObjPtr c)
{
    if(obj->length >= obj->capacity)
    {
        obj->capacity *= 2;
        obj->data = realloc(obj->data, sizeof(JsonObjPtr) * obj->capacity);
    }

    IndexObj(obj, obj->length++) = c;
}

bool AppendArrayElement(JsonObjPtr obj, JsonObjPtr c)
{
    if(!IsArray(obj))
    {
        return false;
    }

    AppendElement(obj, c);
    return true;
}

bool AppendObjectElement(JsonObjPtr obj, const char *name, JsonObjPtr c)
{
    int nlen;
    if(!IsObject(obj))
    {
        return false;
    }

    nlen = strlen(name);
    c->name = (char *)realloc(c->name, sizeof(char) * nlen + 1);
    memcpy(c->name, name, nlen);
    c->name[nlen] = '\0';

    AppendElement(obj, c);
    return true;
}

bool RemoveArrayElement(JsonObjPtr obj)
{
    if(!IsArray(obj))
    {
        return false;
    }

    if(obj->length < 1)
    {
        return false;
    }

    Free(IndexObj(obj, obj->length - 1));
    --obj->length;
    return true;
}

bool RemoveObjectElement(JsonObjPtr obj, const char *name)
{
    int i = 0;
    if(!IsObject(obj))
    {
        return false;
    }

    while(i < obj->length)
    {
        JsonObjPtr ptr = IndexObj(obj, i);
        if(0 == strcmp(name, ptr->name))
        {
            break;
        }
        ++i;
    }

    if(i >= obj->length)
    {
        return true;
    }

    --obj->length;
    while(i++ < obj->length)
    {
        IndexObj(obj, i - 1) = IndexObj(obj, i);
    }
    return true;
}

JsonObjPtr GetArrayElement(JsonObjPtr obj, int index)
{
    if(!IsArray(obj))
    {
        return NULL;
    }

    if(index >= obj->length)
    {
        return NULL;
    }

    return IndexObj(obj, index);
}

JsonObjPtr GetObjectElement(JsonObjPtr obj, const char *name)
{
    int i = 0;
    if(!IsObject(obj))
    {
        return NULL;
    }

    while(i < obj->length)
    {
        JsonObjPtr ptr = IndexObj(obj, i);
        if(0 == strcmp(name, ptr->name))
        {
            return ptr;
        }
        ++i;
    }

    return NULL;
}

JsonObjPtr FirstElement(JsonObjPtr obj)
{
    if(!IsArray(obj) && !IsObject(obj))
    {
        return NULL; 
    }

    obj->rindex = 0;
    return obj->rindex >= obj->length ? NULL : IndexObj(obj, obj->rindex);
}

JsonObjPtr NextElement(JsonObjPtr obj)
{
    if(!IsArray(obj) && !IsObject(obj))
    {
        return NULL; 
    }
    
    ++obj->rindex;
    return obj->rindex >= obj->length ? NULL : IndexObj(obj, obj->rindex);
}

/*
 * =========================================================
 * ==========================解析实现========================
 * =========================================================
 */
#define TTYPE_NUMBER 0 // number
#define TTYPE_STRING 1 // string
#define TTYPE_BOOL 2 // true、false
#define TTYPE_NULL 3 // null
#define TTYPE_UP_OBJECT 4 // {
#define TTYPE_DOWN_OBJECT 5 // }
#define TTYPE_UP_ARRAY 6 // [
#define TTYPE_DOWN_ARRAY 7 // ]
#define TTYPE_COMMA 8 // 逗号
#define TTYPE_COLON 9 // 冒号
#define TTYPE_END 10 // 结束符

#define is_point(x) ((x) == '.')
#define is_digital(x) ((x) <= '9' && (x) >= '0')

struct Token
{
    int row;
    int col;
    int type;
    char *str;
};

static int GetTokenList(const char *cur, const char *end, struct Token **out, int *err_code)
{
    const char *tmp = NULL;
    struct Token *v, *tmp_token = NULL;
    int count = 0, cap = 1; // 数量、容量
    int row = 1, col = 0;// 当前行列号
    bool point = false; // 是否遇到小数点

    while(cur != end)
    {
        if(count >= cap)
        {
            cap *= 2;
            *out = (struct Token *)realloc(*out, sizeof(struct Token) * cap);
        }

        v = &(*out)[count];
        ++col;
        v->row = row;
        v->col = col;

        switch(*cur)
        {
        case '{':
            v->type = TTYPE_UP_OBJECT;
            v->str = NULL;
            break;
        case '}':
            v->type = TTYPE_DOWN_OBJECT;
            v->str = NULL;
            break;
        case '[':
            v->type = TTYPE_UP_ARRAY;
            v->str = NULL;
            break;
        case ']':
            v->type = TTYPE_DOWN_ARRAY;
            v->str = NULL;
            break;
        case ':':
            v->type = TTYPE_COLON;
            v->str = NULL;
            break;
        case ',':
            v->type = TTYPE_COMMA;
            v->str = NULL;
            break;
        case '\\':
            break;
        case '-':
        case '0': case '1': case '2': case '3': case '4':
        case '5': case '6': case '7': case '8': case '9':
            tmp = cur;
            ++tmp;
            while(is_digital(*tmp) && tmp != end)
            { 
                if(is_point(*++tmp))
                {
                    ++tmp; 
                    if(point)
                    {
                        *err_code = MULTIPLE_POINTS;
                        return count;
                    }
                    point = true;
                }
            }

            point = false;
            v->type = TTYPE_NUMBER;
            v->str = malloc(tmp - cur + 1);
            memcpy(v->str, cur, tmp - cur);
            v->str[tmp - cur] = '\0';
            cur = --tmp;
            break;

        case '"':
            tmp = cur;
            ++tmp;
            while(tmp != end)
            { 
                if(*tmp == '\\')
                {
                    if(++tmp == end)
                    {
                        break;
                    }
                }
                else if(*tmp == '"')
                {
                    break;
                }
                ++tmp;
            }
            v->type = TTYPE_STRING;
            v->str = malloc(tmp - cur);
            memcpy(v->str, cur + 1, tmp - cur - 1);
            v->str[tmp - cur - 1] = '\0';
            cur = tmp;
            break;

        case 'n':
            if(cur + 3 >= end)
            {
                *err_code = LOST_QUOTATION;
                return count;
            }
            if(*(cur + 1) != 'u' || *(cur + 2) != 'l' || *(cur + 3) != 'l')
            {
                *err_code = LOST_QUOTATION;
                return count;
            }
            v->type = TTYPE_NULL;
            v->str = malloc(5);
            memcpy(v->str, cur, 4);
            v->str[4] = '\0';
            cur += 3;
            break;
        
        case 't':
            if(cur + 3 >= end)
            {
                *err_code = LOST_QUOTATION;
                return count;
            }
            if(*(cur + 1) != 'r' || *(cur + 2) != 'u' || *(cur + 3) != 'e')
            {
                *err_code = LOST_QUOTATION;
                return count;
            }
            v->type = TTYPE_BOOL;
            v->str = malloc(5);
            memcpy(v->str, cur, 4);
            v->str[4] = '\0';
            cur += 3;
            break;

        case 'f':
            if(cur + 4 >= end)
            {
                *err_code = LOST_QUOTATION;
                return count;
            }
            if(*(cur + 1) != 'a' || *(cur + 2) != 'l' || *(cur + 3) != 's' || *(cur + 4) != 'e')
            {
                *err_code = LOST_QUOTATION;
                return count;
            }
            v->type = TTYPE_BOOL;
            v->str = malloc(6);
            memcpy(v->str, cur, 5);
            v->str[5] = '\0';
            cur += 4;
            break;

        case ' ':
        case '\t':
            --count;
            break;
        case '\n':
        case '\r':
            ++row;
            col = 1;
            --count;
            break;
        default:
            *err_code = UNKONW_CHARACTOR;
            return count;
        }

        ++count;
        ++cur;
    }

    if(count > 0)
    {
        if(count >= cap)
        {
            cap *= 2;
            *out = (struct Token *)realloc(*out, sizeof(struct Token) * cap);
        }

        v = &(*out)[count++];
        tmp_token = &(*out)[count - 2];
        v->type = TTYPE_END;
        v->str = NULL;
        v->row = tmp_token->row;
        v->col = tmp_token->col + (NULL == tmp_token->str ? 0 : strlen(tmp_token->str));
    }

    *err_code = SUCCESS;
    return count;
}

#ifdef _DEBUG
#include <stdio.h> 
static void _PrintStack(int *st, int count)
{
    int i = 0;
    if(count <= 0)
    { return; }

    printf("%d", st[i++]);
    while(i < count)
    {
        printf(",%d", st[i++]);
    }
}
static void _PrintType(int type)
{
    switch (type)
    {
    case TTYPE_NUMBER:
        printf("number");
        break;
    case TTYPE_STRING:
        printf("string");
        break;
    case TTYPE_BOOL:
        printf("bool");
        break;
    case TTYPE_NULL:
        printf("null");
        break;
    case TTYPE_UP_OBJECT:
        printf("{");
        break;
    case TTYPE_DOWN_OBJECT:
        printf("}");
        break;
    case TTYPE_UP_ARRAY:
        printf("[");
        break;
    case TTYPE_DOWN_ARRAY:
        printf("]");
        break;
    case TTYPE_COMMA:
        printf(",");
        break;
    case TTYPE_COLON:
        printf(":");
        break;
    case TTYPE_END:
        printf("$");
        break;
    default:
        break;
    }
}

#define PrintStack(st, count) _PrintStack(st, count)
#define Printf printf
#define PrintType(t) _PrintType(t)

#else

#define PrintStack(st, count)
#define Printf(...)
#define PrintType(t)

#endif

/*
 * 语法分析,判断语法是否合法
 */
static void CheckSyntax(
    struct Token *t, int size, int *err_code, int *err_row, int *err_col)
{
    // S:0  A:1  B:2  C:3  D:4
    static const int INFER[14] = { 0, 1, 2, 2, 2, 2, 2, 2, 2, 2, 3, 3, 4, 4 };
    // 规约式长度
    static const int STATUTE_LEN[14] = { 2, 1, 1, 1, 1, 1, 3, 3, 2, 2, 3, 3, 1, 3};
    static const int GOTO[23][5] =
    {
            //     S        A        B        C        D
    /* 1*/  {INT_MIN,       3,       4, INT_MIN, INT_MIN},
    /* 2*/  {INT_MIN, INT_MIN, INT_MIN,      11, INT_MIN},
    /* 3*/  {INT_MIN, INT_MIN, INT_MIN, INT_MIN, INT_MIN},
    /* 4*/  {INT_MIN, INT_MIN, INT_MIN, INT_MIN, INT_MIN},
    /* 5*/  {INT_MIN, INT_MIN, INT_MIN, INT_MIN, INT_MIN},
    /* 6*/  {INT_MIN, INT_MIN, INT_MIN, INT_MIN, INT_MIN},
    /* 7*/  {INT_MIN, INT_MIN, INT_MIN, INT_MIN, INT_MIN},
    /* 8*/  {INT_MIN, INT_MIN, INT_MIN, INT_MIN, INT_MIN},
    /* 9*/  {INT_MIN, INT_MIN,      19, INT_MIN,      20},
    /*10*/  {INT_MIN, INT_MIN, INT_MIN, INT_MIN, INT_MIN},
    /*11*/  {INT_MIN, INT_MIN, INT_MIN, INT_MIN, INT_MIN},
    /*12*/  {INT_MIN, INT_MIN, INT_MIN, INT_MIN, INT_MIN},
    /*13*/  {INT_MIN, INT_MIN, INT_MIN, INT_MIN, INT_MIN},
    /*14*/  {INT_MIN, INT_MIN, INT_MIN,      15, INT_MIN},
    /*15*/  {INT_MIN, INT_MIN, INT_MIN, INT_MIN, INT_MIN},
    /*16*/  {INT_MIN, INT_MIN,      17, INT_MIN, INT_MIN},
    /*17*/  {INT_MIN, INT_MIN, INT_MIN, INT_MIN, INT_MIN},
    /*18*/  {INT_MIN, INT_MIN, INT_MIN, INT_MIN, INT_MIN},
    /*19*/  {INT_MIN, INT_MIN, INT_MIN, INT_MIN, INT_MIN},
    /*20*/  {INT_MIN, INT_MIN, INT_MIN, INT_MIN, INT_MIN},
    /*21*/  {INT_MIN, INT_MIN, INT_MIN, INT_MIN, INT_MIN},
    /*22*/  {INT_MIN, INT_MIN,      19, INT_MIN,      23},
    /*23*/  {INT_MIN, INT_MIN, INT_MIN, INT_MIN, INT_MIN},
    };

    static const int ACTION[23][11] = 
    {
            //    数字   字符串    布尔值     空值        {        }        [        ]        ,        :        $
    /* 1*/  {       5,       6,       7,       8,       2, INT_MIN,       9, INT_MIN, INT_MIN, INT_MIN, INT_MIN},
    /* 2*/  { INT_MIN,      12, INT_MIN, INT_MIN, INT_MIN,      10, INT_MIN, INT_MIN, INT_MIN, INT_MIN, INT_MIN},
    /* 3*/  { INT_MIN, INT_MIN, INT_MIN, INT_MIN, INT_MIN, INT_MIN, INT_MIN, INT_MIN, INT_MIN, INT_MIN,       0},
    /* 4*/  { INT_MIN, INT_MIN, INT_MIN, INT_MIN, INT_MIN, INT_MIN, INT_MIN, INT_MIN, INT_MIN, INT_MIN,      -2},
    /* 5*/  { INT_MIN, INT_MIN, INT_MIN, INT_MIN, INT_MIN,      -3, INT_MIN,      -3,      -3, INT_MIN,      -3},
    /* 6*/  { INT_MIN, INT_MIN, INT_MIN, INT_MIN, INT_MIN,      -4, INT_MIN,      -4,      -4, INT_MIN,      -4},
    /* 7*/  { INT_MIN, INT_MIN, INT_MIN, INT_MIN, INT_MIN,      -5, INT_MIN,      -5,      -5, INT_MIN,      -5},
    /* 8*/  { INT_MIN, INT_MIN, INT_MIN, INT_MIN, INT_MIN,      -6, INT_MIN,      -6,      -6, INT_MIN,      -6},
    /* 9*/  {       5,       6,       7,       8,       2, INT_MIN,       9,      18, INT_MIN, INT_MIN, INT_MIN},
    /*10*/  { INT_MIN, INT_MIN, INT_MIN, INT_MIN, INT_MIN,      -9, INT_MIN,      -9,      -9, INT_MIN,      -9},
    /*11*/  { INT_MIN, INT_MIN, INT_MIN, INT_MIN, INT_MIN,      13, INT_MIN, INT_MIN,      14, INT_MIN, INT_MIN},
    /*12*/  { INT_MIN, INT_MIN, INT_MIN, INT_MIN, INT_MIN, INT_MIN, INT_MIN, INT_MIN, INT_MIN,      16, INT_MIN},
    /*13*/  { INT_MIN, INT_MIN, INT_MIN, INT_MIN, INT_MIN,      -7, INT_MIN,      -7,      -7, INT_MIN,      -7},
    /*14*/  { INT_MIN,      12, INT_MIN, INT_MIN, INT_MIN, INT_MIN, INT_MIN, INT_MIN, INT_MIN, INT_MIN, INT_MIN},
    /*15*/  { INT_MIN, INT_MIN, INT_MIN, INT_MIN, INT_MIN,     -12, INT_MIN, INT_MIN,      14, INT_MIN, INT_MIN},
    /*16*/  {       5,       6,       7,       8,       2, INT_MIN,       9, INT_MIN, INT_MIN, INT_MIN, INT_MIN},
    /*17*/  { INT_MIN, INT_MIN, INT_MIN, INT_MIN, INT_MIN,     -11, INT_MIN, INT_MIN,     -11, INT_MIN, INT_MIN},
    /*18*/  { INT_MIN, INT_MIN, INT_MIN, INT_MIN, INT_MIN,     -10, INT_MIN,     -10,     -10, INT_MIN,     -10},
    /*19*/  { INT_MIN, INT_MIN, INT_MIN, INT_MIN, INT_MIN, INT_MIN, INT_MIN,     -13,     -13, INT_MIN, INT_MIN},
    /*20*/  { INT_MIN, INT_MIN, INT_MIN, INT_MIN, INT_MIN, INT_MIN, INT_MIN,      21,      22, INT_MIN, INT_MIN},
    /*21*/  { INT_MIN, INT_MIN, INT_MIN, INT_MIN, INT_MIN,      -8, INT_MIN,      -8,      -8, INT_MIN,      -8},
    /*22*/  {       5,       6,       7,       8,       2, INT_MIN,       9, INT_MIN, INT_MIN, INT_MIN, INT_MIN},
    /*23*/  { INT_MIN, INT_MIN, INT_MIN, INT_MIN, INT_MIN, INT_MIN, INT_MIN,     -14,      22, INT_MIN, INT_MIN},
    };

    struct Token *cur = NULL; // 当前token
    int index = 0; // Token序列访问索引

    int state_size = 16; // 状态栈空间大小
    int state_index = 0; // 状态栈元素数量
    int *st = malloc(sizeof(int) * state_size); // 状态栈

    int vi = 0; // 当前状态栈顶值
    int action_state = 0; // 当前action表值
    int goto_state = 0; // 当前goto表值
    int infer_state = 0; // 当前规约获得的非终结符

    st[state_index++] = 1;

    while(state_index > 0)
    {
        PrintStack(st, state_index);
        // 取栈顶元素
        vi = st[state_index - 1];
        cur = &t[index];
        
        Printf("  ");
        PrintType(cur->type);

        action_state = ACTION[vi - 1][cur->type];
        // 结束了
        if(action_state == 0)
        {
            Printf("  success\n");
            break;
        }
        // 非法语法
        else if(action_state == INT_MIN)
        {
            *err_code = ERROR;
            *err_row = cur->row;
            *err_col = cur->col;
            Printf("\n");
            return;
        }

        // 移进操作
        if(action_state > 0)
        {
            if(state_index >= state_size)
            {
                state_size *= 2;
                st = (int *)realloc(st, sizeof(int) * state_size);
            }
            st[state_index++] = action_state;
            ++index;
            Printf("  shift-%d", action_state);
        }
        // 规约操作
        else
        {
            state_index -= STATUTE_LEN[- action_state - 1];
            // 重新获取栈顶元素
            vi = st[state_index - 1];
            Printf("  reduce%d", action_state);

            // 获取goto表值
            infer_state = INFER[- action_state - 1];
            goto_state = GOTO[vi - 1][infer_state];
            if(goto_state == INT_MIN)
            {
                *err_code = ERROR;
                *err_row = cur->row;
                *err_col = cur->col;
                Printf("\n");
                return;
            }
            if(state_index >= state_size)
            {
                state_size *= 2;
                st = (int *)realloc(st, sizeof(int) * state_size);
            }
            st[state_index++] = goto_state;
            Printf("  goto-%d", goto_state);
        }

        Printf("\n");
    }

    if(index != size - 1)
    {
        Printf("\n");
        cur = &t[index];
        *err_code = ERROR;
        *err_row = cur->row;
        *err_col = cur->col;
    }

    free(st);
}

static JsonObjPtr CreateNewObjWithToken(struct Token *t)
{
    JsonObjPtr result = NULL;
    switch (t->type)
    {
    case TTYPE_NUMBER:
        result = CreateNumber();
        SetNumber(result, atof(t->str));
        break;
    case TTYPE_STRING:
        result = CreateString();
        result->data = t->str;
        t->str = NULL;
        break;
    case TTYPE_BOOL:
        result = CreateBool();
        SetBool(result, strcmp("true", t->str) == 0 ? true : false);
        break;
    case TTYPE_NULL:
        result = CreateNull();
        break;
    case TTYPE_UP_OBJECT:
        result = CreateObject();
        break;
    case TTYPE_UP_ARRAY:
        result = CreateArray();
        break;
    default:
        break;
    }
    return result;
}

/*
 * 将token序列转化为Json对象
 */
static JsonObjPtr ChangeToJsonObject(struct Token *t)
{
    JsonObjPtr result = NULL; // 解析结果
    JsonObjPtr new_obj = NULL; // 新对象
    struct Token *cur = NULL; // 当前token
    JsonObjPtr top = NULL; // 栈顶结构
    int index = 0; // Token序列访问索引

    int st_cap = 4; // 栈大小
    int st_index = 0; // 栈元素数量
    JsonObjPtr *st = (JsonObjPtr *)malloc(sizeof(JsonObjPtr) * st_cap); // 栈

    struct Token *name_token = NULL; // 用于存储object时的name token

    result = CreateNewObjWithToken(&t[index++]); // 先创建根节点
    if(IsObject(result) || IsArray(result))
    {
        st[st_index++] = result;
    }

    while(st_index > 0)
    {
        cur = &t[index++];
        top = st[st_index - 1];

        switch (cur->type)
        {
        case TTYPE_NUMBER:
            new_obj = CreateNumber();
            SetNumber(new_obj, atof(cur->str));
            break;
        case TTYPE_STRING:
            if(NULL != name_token || !IsObject(top))
            {
                new_obj = CreateString();
                new_obj->data = cur->str;
                cur->str = NULL;
            }
            else 
            {
                name_token = cur;
                continue;
            }
            break;
        case TTYPE_BOOL:
            new_obj = CreateBool();
            SetBool(new_obj, strcmp("true", cur->str) == 0 ? true : false);
            break;
        case TTYPE_NULL:
            new_obj = CreateNull();
            break;
        case TTYPE_UP_OBJECT:
            new_obj = CreateObject();
            if(st_index >= st_cap)
            {
                st_cap *= 2;
                st = (JsonObjPtr *)realloc(st, sizeof(JsonObjPtr) * st_cap);
            }
            st[st_index++] = new_obj;
            break;
        case TTYPE_DOWN_OBJECT:
            --st_index;
            continue;
        case TTYPE_UP_ARRAY:
            new_obj = CreateArray();
            if(st_index >= st_cap)
            {
                st_cap *= 2;
                st = (JsonObjPtr *)realloc(st, sizeof(JsonObjPtr) * st_cap);
            }
            st[st_index++] = new_obj;
            break;
        case TTYPE_DOWN_ARRAY:
            --st_index;
            continue;
        case TTYPE_END:
            st_index = 0;
            continue;
        default:
            continue;
        }

        if(NULL != name_token)
        {
            new_obj->name = name_token->str;
            name_token->str = NULL;
            name_token = NULL;
        }
        AppendElement(top, new_obj);
    } 

    return result;
}

struct ParseResult Parse(const char *str, int size)
{
    struct ParseResult result;
    const char *cur = str, *end = str + size;
    struct Token *t = (struct Token *)malloc(sizeof(struct Token));
    int count = GetTokenList(cur, end, &t, &result.error_code);
    
    if(result.error_code != SUCCESS)
    {
        if(count > 0)
        {
            result.row = t[count - 1].row;
            result.col = t[count - 1].col;
        }
        else 
        {
            result.row = 0;
            result.col = 0;
        }
    }
    else if(count > 0)
    {
        CheckSyntax(t, count, &result.error_code, &result.row, &result.col);
        if(ParseSuccess(&result))
        {
            result.result = ChangeToJsonObject(t);
        }
    }
    while(count-- > 0)
    {
        if(NULL != t[count].str)
        {
            free(t[count].str);
        }
    }
    free(t);
    return result;
}

const char * ErrorMsg(const struct ParseResult *pr)
{
    if(NULL == pr)
    {
        return "";
    }

    switch (pr->error_code)
    {
    case ERROR:
        return "parse error";
    case UNKONW_CHARACTOR:
        return "unkown charactor";
    case LOST_QUOTATION:
        return "maybe lost quotation";
    case MULTIPLE_POINTS:
        return "decimal point is too many";
    }

    return "unkown error";
}

JsonObjPtr GetJsonObjPtr(struct ParseResult *pr)
{
    return NULL == pr ? NULL : pr->result;
}

bool ParseSuccess(const struct ParseResult *pr)
{
    return NULL == pr ? false : (pr->error_code == SUCCESS);
}

int ParseErrorRow(const struct ParseResult *pr)
{
    return NULL == pr ? -1 : pr->row;
}

int ParseErrorCol(const struct ParseResult *pr)
{
    return NULL == pr ? -1 : pr->col;
}

static int NumberToString(double num, char *buff)
{
    int count = 15;
    gcvt(num, 16, buff);
    while(buff[count] == '0' || buff[count] == '\0')
    {
        --count;
    }
    if(buff[count] == '.')
    {
        --count;
    }
    return count + 1;
}

static int WriteBuffer(char *buff, const char *str, int len, int count)
{
    int result = 0;
    while(count-- > 0)
    {
        memcpy(buff + result, str, len);
        result += len;
    }
    return result;
}

int ToString(JsonObjPtr obj, char *buff, const char *newl, const char *newc, const char *aoc, const char *aac)
{
    int buff_index = 0; // buff索引
    int str_length = 0;

    JsonObjPtr cur = obj, next = NULL, parent = NULL;
    int st_cap = 4; // 栈大小
    int st_index = 0; // 栈元素数量
    JsonObjPtr *st = (JsonObjPtr *)malloc(sizeof(JsonObjPtr) * st_cap); // 栈

    int newl_len = strlen(NULL == newl ? (newl = "") : newl);
    int newc_len = strlen(NULL == newc ? (newc = "") : newc);
    int aoc_len = strlen(NULL == aoc ? (aoc = "") : aoc);
    int aac_len = strlen(NULL == aac ? (aac = "") : aac);

    while(NULL != cur)
    {
        if(NULL != parent)
        {
            next = NextElement(parent);
            if(NULL == next)
            {
                buff_index += WriteBuffer(buff + buff_index, newl, newl_len, 1);
                buff_index += WriteBuffer(buff + buff_index, newc, newc_len, st_index - 1);

                buff[buff_index++] = parent->type == Array ? ']' : '}';
                if(--st_index == 0)
                {
                    break;
                }
                parent = st[st_index - 1];
                continue;
            }
            buff[buff_index++] = ',';
            if(IsObject(parent))
            {
                buff_index += WriteBuffer(buff + buff_index, newl, newl_len, 1);
                buff_index += WriteBuffer(buff + buff_index, newc, newc_len, st_index);
            }
            else
            {
                buff_index += WriteBuffer(buff + buff_index, aac, aac_len, 1);
            }
            cur = next;
        }
        else if(st_index > 0)
        {
            parent = st[st_index - 1];
            cur = FirstElement(parent);
        }

        if(NULL != parent && NULL != cur && IsObject(parent))
        {
            buff[buff_index++] = '"';
            buff_index += WriteBuffer(buff + buff_index, cur->name, strlen(cur->name), 1);
            buff[buff_index++] = '"';
            buff[buff_index++] = ':';
            buff_index += WriteBuffer(buff + buff_index, aoc, aoc_len, 1);
        }

        switch (cur->type)
        {
        case Null:
            buff[buff_index++] = 'n';
            buff[buff_index++] = 'u';
            buff[buff_index++] = 'l';
            buff[buff_index++] = 'l';
            break;
        case Array:
            buff[buff_index++] = '[';

            goto new_layer;
            break;
        case Object:
            buff[buff_index++] = '{';

        new_layer:
            if(st_index >= st_cap)
            {
                st_cap *= 2;
                st = (JsonObjPtr *)realloc(st, sizeof(JsonObjPtr) * st_cap); // 栈
            }
            st[st_index++] = cur;
            parent = NULL;

            buff_index += WriteBuffer(buff + buff_index, newl, newl_len, 1);
            buff_index += WriteBuffer(buff + buff_index, newc, newc_len, st_index);
            break;
        case Bool:
            if(GetBool(cur))
            {
                buff[buff_index++] = 't';
                buff[buff_index++] = 'r';
                buff[buff_index++] = 'u';
                buff[buff_index++] = 'e';
            }
            else 
            {
                buff[buff_index++] = 'f';
                buff[buff_index++] = 'a';
                buff[buff_index++] = 'l';
                buff[buff_index++] = 's';
                buff[buff_index++] = 'e';
            }
            break;
        case Number:
            buff_index += NumberToString(GetNumber(cur), buff + buff_index);
            break;
        case String:
            buff[buff_index++] = '"';
            str_length = strlen(GetString(cur));
            memcpy(buff + buff_index, GetString(cur), str_length);
            buff_index += str_length;
            buff[buff_index++] = '"';
            break;
        default:
            break;
        }
    }

    buff[buff_index] = '\0';
    return buff_index;
}

int ToFormatString(JsonObjPtr obj, char *buff)
{ 
    return ToString(obj, buff, "\n", "    ", " ", " "); 
}

int ToUnformatString(JsonObjPtr obj, char *buff)
{ 
    return ToString(obj, buff, NULL, NULL, NULL, NULL); 
}

3. 测试

测试代码

#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include "libjson.h"

static const char *input_json_str =
"                       \
{                       \
    \"aaaa\": 1000,     \
    \"bbbb\": \"value\",\
    \"cccc\": [         \
        null, true, false \
    ],                  \
    \"dddd\": \"wocao\" \
}                       \
";

static void test()
{
    char buff[1024];
    JsonObjPtr result = NULL, new_obj = NULL;
    struct ParseResult pr = Parse(input_json_str, strlen(input_json_str));
    if(!ParseSuccess(&pr))
    {
        printf("parse error!!! at row(%d) col(%d)\n", 
            ParseErrorRow(&pr), ParseErrorCol(&pr));
        return;
    }

    printf("parse success!!!\n");
    result = GetJsonObjPtr(&pr);
    if(NULL != result)
    {
        ToUnformatString(result, buff);
        printf("unformat string: %s\n", buff);
    }

    printf("-----------add node-------------\n");
    new_obj = CreateString();
    SetString(new_obj, "yohohoho");
    AppendObjectElement(result, "new node", new_obj);
    ToUnformatString(result, buff);
    printf("unformat string: %s\n", buff);
    printf("==========================\n");
    ToFormatString(result, buff);
    printf("format string: \n%s\n", buff);

    Free(result);
}

int main()
{
    test();
    // system("pause");
    return 0;
}

测试结果

①文法分析过程

 ②测试输出结果:

四、结语

这个json解析器是我在系统地复习了编译原理之后的实验品,如有问题,欢迎指出!

评论 4
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值