C语言学习记录20240609-优快云博客

本文链接：https://blog.youkuaiyun.com/SmileBasic/article/details/139566656

功能：把网站地图的网址列表转换成树结构。接受标准输入的网址，输出结果为 json 格式。
命名：根据功能将程序命名为 UrlsToTree，太长不方便输入，太短容易重名，最后命名为 u2tr。
用法：

cat sitemap.txt | u2tr > tree.json
u2tr < sitemap.txt > tree.json

树结构，用结构体构造节点，有子节点，声明结构体时有调用自身，形成递归结构，结构体采用别名命名时必须有结构体名。

节点数不确定，也就不能在声明时确定变量占用的内存大小，需要在堆中动态分配内存 malloc，动态分配内存必须有相应的手动回收操作 free。

网址从第三个斜线分割成根网址和路径两部分，去掉问号和井号后的参数和 fragment 。

需要避免重复创建节点。

需要转义网址中影响 json 输出结果的特殊符号，如双引号、反斜线。

最后还是参考《大话数据结构》160页树的孩子表示法，结合 AI 生成修改得到了下面的代码。

主要缺陷：没有考虑网址列表中不同根网址情况，如不同协议、不同域名、不同端口。
在这里插入图片描述

#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <stdbool.h>

#define BUFFER_SIZE 1024

// 将网址分为根网址和路径两部分
typedef struct {
    char *part1;
    char *part2;
} URL;

void remove_query_fragment(char *url) {
    char *p = strpbrk(url, "?#");
    if (p != NULL) {
        *p = '\0';
    }
}

bool split_url(const char *url, URL *result) {
    char *url_copy = strdup(url);
    remove_query_fragment(url_copy);
    
    const char *current = url_copy;
    int parts_found = 0;
    for (int i = 0; i < 3; i++) {
        current = strstr(current, "/");
        if (current == NULL) {
            free(url_copy);
            return false;
        }
        current += 1; // Move to the end of the matched substring
        parts_found++;
    }
    if (parts_found == 3) {
        size_t part1_len = current - url_copy;
        result->part1 = (char *)malloc(part1_len);
        strncpy(result->part1, url_copy, part1_len - 1);
        result->part1[part1_len - 1] = '\0';
        result->part2 = strdup(current);
        free(url_copy);
        return true;
    }
    free(url_copy);
    return false;
}

char* escape_json_string(const char* str) {
    size_t len = strlen(str);
    char* escaped_str = malloc(2 * len + 1); // Worst case: every character needs escaping
    char* p = escaped_str;

    for (size_t i = 0; i < len; i++) {
        if (str[i] == '"' || str[i] == '\\') {
            *p++ = '\\';
        }
        *p++ = str[i];
    }
    *p = '\0';

    return escaped_str;
}

// 定义树节点的数据结构
typedef struct TreeNode {
    char *data;
    struct TreeNode **children;
    int childCount;
} TreeNode;

// 创建新节点
TreeNode *createNode(const char *data) {
    TreeNode *newNode = (TreeNode *)malloc(sizeof(TreeNode));
    newNode->data = escape_json_string(data); // Escape JSON special characters
    newNode->childCount = 0;
    newNode->children = NULL;
    return newNode;
}

// 销毁节点及其子树
void destroyTree(TreeNode *root) {
    if (root == NULL) {
        return;
    }
    for (int i = 0; i < root->childCount; i++) {
        destroyTree(root->children[i]);
    }
    free(root->data); // Free the data string
    free(root->children);
    free(root);
}

// 添加子节点到父节点
TreeNode *addChild(TreeNode *parent, TreeNode *child) {
    // 判断父节点中是否存在相同数据的子节点
    for (int i = 0; i < parent->childCount; i++) {
        if (strcmp(parent->children[i]->data, child->data) == 0) {
            free(child->data); // Avoid memory leak
            free(child);
            return parent->children[i];
        }
    }

    // 添加子节点到父节点
    parent->children = (TreeNode **)realloc(parent->children, (parent->childCount + 1) * sizeof(TreeNode *));
    parent->children[parent->childCount++] = child;
    return child;
}

// 将路径字符串转换为树结构
void convertPathToTree(TreeNode *root, char *path) {
    TreeNode *currentNode = root;

    char *token = strtok(path, "/");
    while (token != NULL) {
        TreeNode *newNode = createNode(token);
        currentNode = addChild(currentNode, newNode);
        token = strtok(NULL, "/");
    }
}

// 深度优先遍历树并输出为 JSON 格式
void depthFirstTraversal(TreeNode *node) {
    if (node == NULL) return;

    // 输出当前节点信息
    if (node->data != NULL) {
        printf("{\"data\": \"%s\", \"children\": [", node->data);
    }

    // 递归遍历子节点
    for (int i = 0; i < node->childCount; i++) {
        depthFirstTraversal(node->children[i]);
        if (i < node->childCount - 1) {
            printf(",");
        }
    }

    if (node->data != NULL) {
        printf("]}");
    }
}

int main() {
    char buffer[BUFFER_SIZE];
    TreeNode *root = NULL;

    // 动态读取网址列表
    while (fgets(buffer, BUFFER_SIZE, stdin)) {
        buffer[strcspn(buffer, "\n")] = '\0'; // 去掉换行符

        URL result;
        if (split_url(buffer, &result)) {
            if (root == NULL) {
                root = createNode(result.part1);
            }

            convertPathToTree(root, result.part2);

            free(result.part1);
            free(result.part2);
        } else {
            fprintf(stderr, "Failed to split URL: %s\n", buffer);
        }
    }

    if (root != NULL) {
        // 深度优先遍历并输出节点信息
        depthFirstTraversal(root);
        printf("\n");

        // 销毁树
        destroyTree(root);
    }

    return 0;
}