定义ignore配置文件忽略文件统计

本文介绍了一个用于文件系统统计的C++程序,利用正则表达式和Trie树进行文件过滤与大小统计。程序读取配置文件,解析忽略规则,并遍历指定目录,统计文件大小,同时展示详细的大小单位。此外,还实现了一个Trie树类,用于高效存储和查找字符串前缀。

摘要生成于 C知道 ,由 DeepSeek-R1 满血版支持, 前往体验 >

/*#include <iostream>
#include <string>
#include <regex>
#include <boost/property_tree/ptree.hpp>
#include <boost/property_tree/ini_parser.hpp>

int main() {
    std::regex reg1(".*\\.xml");
    bool found = regex_match("value.xml",reg1);
    std::cout << found << std::endl;

    boost::property_tree::ptree properties;
    boost::property_tree::ini_parser::read_ini("./ignore.ini", properties);
    boost::property_tree::basic_ptree<std::string, std::string>items = properties.get_child("");
    for (auto item = begin(items);item != end(items);++item) {
        std::cout << item->first.data() << " " << item->second.data() << std::endl;
    }
    return 0;
}*/
#include <stdio.h>
#include <stdlib.h>
#include <unistd.h>
#include <string.h>
#include <sys/stat.h>
#include <sys/types.h>
#include <dirent.h>
#include <iostream>
#include <string>
#include <vector>
#include <set>
#include <regex>
#include <boost/property_tree/ptree.hpp>
#include <boost/property_tree/ini_parser.hpp>
#include "trie_tree.hpp"
static const int SIZE_UNIT_NUM = 5;
struct file_info {
    file_info(const std::string &str) : file_name(str) {
        total_size = 0;
        detail_size.resize(SIZE_UNIT_NUM, 0);
    }
    inline void show_info() const {
        std::cout << "file name = " << file_name << " size = " << total_size << "B" << " detail size:" << detail_size[4] << "T" << detail_size[3] << "G" << detail_size[2] << "M" << detail_size[1] << "K" << detail_size[0] << "B" << std::endl;
    }
    std::string file_name;
    size_t total_size;
    std::vector<size_t>detail_size; // Byte KB MB GB TB    
};
class file_system {
public:
    file_system() = default;
    file_system(const file_system &) = delete;
    file_system & operator = (const file_system &) = delete;
    ~file_system() = default;
public:
    bool init(const char *path = "./ignore.ini") {
        if (false == trieTree_.init()) {
            std::cerr << "trie tree init failed." << std::endl;
            return false;
        }
        parse_ini_file(path);
        return true;
    }
    void stat_file_info(const char *dir_path) {
        get_file_info(dir_path);
        for (const auto &info : file_info_set_) {
            total_size_ += info.total_size;
            info.show_info();
        }
        file_info fileinfo(dir_path);
        stat_file_size(fileinfo, total_size_);
        fileinfo.show_info();
    }
    size_t get_total_dir_size() const {
        return total_size_;
    }
private:
    void parse_ini_file(const char *path) {
        boost::property_tree::ptree properties;
        try {
            boost::property_tree::ini_parser::read_ini(path, properties);
        }
        catch (std::exception &e) {
            std::cerr << e.what() << std::endl;
            return;
        }
        boost::property_tree::basic_ptree<std::string, std::string>items = properties.get_child("");
        for (auto item = begin(items);item != end(items);++item) {
            trieTree_.insert(item->second.data().c_str());
        }
    }
    bool need_to_ignore(const char *name) {
        int j = strlen(name) - 1;
        for (;j >= 0;j--) {
            if ('.' == name[j]) {
                break;
            }
        }
        if (j < 0) {
            return false;
        }
        name += (j + 1);
        return trieTree_.find(name);
    }
    void stat_file_size(file_info &fileinfo, size_t bytes) {
        fileinfo.total_size = bytes;
        for (auto i = 0;i < SIZE_UNIT_NUM;i++) {
            auto x = bytes / 1024;
            auto y = bytes % 1024;
            fileinfo.detail_size[i] = y;
            if (0 == x) {
                break;
            }
            bytes = x;
        }
    }
    void get_file_info(const char *dir_path) {
        DIR *dirp = nullptr;
        struct stat stat_buf;
        dirp = ::opendir(dir_path);
        if (nullptr == dirp) {
            std::cerr << dir_path << " opendir failed." << std::endl;
            ::exit(-1);
        }
        ::chdir(dir_path);
        struct dirent *entry = nullptr;
        while (nullptr != (entry = readdir(dirp))) {
            ::lstat(entry->d_name, &stat_buf);
            if (true == need_to_ignore(entry->d_name)) {
                continue;
            }
            if (!S_ISDIR(stat_buf.st_mode)) {
                file_info fileinfo(entry->d_name);
                stat_file_size(fileinfo, stat_buf.st_size);
                file_info_set_.emplace_back(fileinfo);
                continue;
            }
            if (0 == strcmp(entry->d_name, ".") || 0 == strcmp(entry->d_name, "..")) {
                continue;
            }
            file_info fileinfo(entry->d_name);
            stat_file_size(fileinfo, stat_buf.st_size);
            file_info_set_.emplace_back(fileinfo);
            get_file_info(entry->d_name);
        }
        ::chdir("..");
        ::closedir(dirp);
    }
 
private:
    size_t total_size_ = 0;
    std::vector<file_info>file_info_set_;
    trie_tree trieTree_;
};
 
int main() {
    file_system filesystem;
    if (false == filesystem.init()) {
        return -1;
    }
    filesystem.stat_file_info("/root/rel/log");
 
 
    return 0;
}
#include <stdlib.h>
#include <time.h>
#include <iostream>
#include <vector>
#include <string>
#include <thread>
#define SAFE_DEL(p) do { if (nullptr != p) { delete p; p = nullptr; }} while (0)
enum class TrieNodeType {
    UNCOMPLETED,
    COMPLETED
};
struct trie_node {
    static const size_t MAX_NODE_NUM = 256;
    TrieNodeType type = TrieNodeType::UNCOMPLETED;
    trie_node *links[MAX_NODE_NUM] = { nullptr };
};
class trie_tree {
public:
    trie_tree() = default;
    trie_tree(const trie_tree &) = delete;
    trie_tree & operator = (const trie_tree &) = delete;
    virtual ~trie_tree() {
        destory(root_);
    }
public:
    bool init() {
        try {
            root_ = new trie_node;
        }
        catch (...) {
            return false;
        }
        init_succ = true;
        return true;
    }
    bool insert(const char *word) {
        char ch = 0;
        trie_node *ptr = nullptr;
        if (false == init_succ) {
            return false;
        }
        auto cur_node_ptr = root_;
        for (int i = 0;word[i];i++) {
            ch = word[i];
            if (false == charater_is_valid(ch)) {
                return false;
            }
            auto &link = cur_node_ptr->links[ch];
            if (nullptr == link) {
                ptr = create_node();
                link = ptr;
            }
            cur_node_ptr = link;
        }
        if (TrieNodeType::COMPLETED == cur_node_ptr->type) {
            std::cerr << "word:" << word << " reapted." << std::endl;
            return false;
        }
        cur_node_ptr->type = TrieNodeType::COMPLETED;
        return true;
    }
    bool find(const char *word) {
        char ch = 0;
        trie_node *ptr = nullptr;
        if (false == init_succ) {
            return false;
        }
        auto cur_node_ptr = root_;
        for (int i = 0;word[i];i++) {
            ch = word[i];
            if (false == charater_is_valid(ch)) {
                return false;
            }
            auto &link = cur_node_ptr->links[ch];
            if (nullptr == link) {
                return false;
            }
            cur_node_ptr = link;
        }
        return TrieNodeType::COMPLETED == cur_node_ptr->type;
    }
    bool find(const char *prefix_word, std::vector<std::string>&words) {
        char ch = 0;
        trie_node *ptr = nullptr;
        if (false == init_succ) {
            return false;
        }
        auto cur_node_ptr = root_;
        std::string str;
        for (int i = 0;prefix_word[i];i++) {
            ch = prefix_word[i];
            if (false == charater_is_valid(ch)) {
                return false;
            }
            auto &link = cur_node_ptr->links[ch];
            if (nullptr == link) {
                return false;
            }
            str += ch;
            cur_node_ptr = link;
        }
        find_all_words(cur_node_ptr, str, words);
        return true;
    }
private:
    inline trie_node *create_node() {
        trie_node *ptr = nullptr;
        try {
            ptr = new trie_node;
        }
        catch (std::exception &e) {
            std::cerr << e.what() << std::endl;
            ::exit(-1);
        }
        return ptr;
    }
    bool charater_is_valid(char ch) {
        return (ch >= 0 && ch < trie_node::MAX_NODE_NUM);
    }
    void find_all_words(const trie_node *ptr, std::string &str, std::vector<std::string>&words) {
        if (nullptr == ptr) {
            return;
        }
        char ch = 0;
        for (size_t index = 0;index < trie_node::MAX_NODE_NUM;index++) {
            auto &link = ptr->links[index];
            if (nullptr == link) {
                continue;
            }
            ch = index;
            str += ch;
            find_all_words(link, str, words);
        }
        if (TrieNodeType::COMPLETED == ptr->type) {
            words.emplace_back(str);
        }
        str.erase(end(str) - 1);
    }
    void destory(trie_node *&ptr) {
        if (nullptr == ptr) {
            return;
        }
        for (size_t index = 0;index < trie_node::MAX_NODE_NUM;index++) {
            destory(ptr->links[index]);
        }
        SAFE_DEL(ptr);
    }
private:
    trie_node *root_ = nullptr;
    bool init_succ = false;
};
XML=xml
JSON=json
TXT=txt

 

评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值