基于KMP算法的路径下文本查询程序的c++实现2.0版

根据C++Primer的TextQuery改编而来

#undef UNICODE
#include<iostream>
#include<string>
#include<set>
#include<map>
#include<vector>
#include<fstream>
#include<sstream>
#include<algorithm>
#include<math.h>
#include <memory>
#include <cstring>
#include <windows.h>
using namespace std;
std::vector<std::string> folder_files;
std::vector<std::string> fileList(const std::string& folder_path);    //file_query function


class TextQuery{
public:
    typedef std::vector<std::string>::size_type line_no;            //line number

    //load file into internal memory
    void read_file(std::ifstream &is){
        store_file(is);

        build_map();
    }
    std::set<line_no> run_query(const std::string&) const;          //query the input word from the input file
    std::string text_line(line_no) const;                           //catch the words per line
private:
    void store_file(std::ifstream&);
    void build_map();
    std::vector<std::string> lines_of_text;                         //a line number of the words
    std::map<std::string, std::set<line_no>> word_map;              //store the relastionships between words and line_numbers
};

void cal_next(const char *str, int *next, int len)
{
    next[0] = -1;//next[0]初始化为-1,-1表示不存在相同的最大前缀和最大后缀
    int k = -1;//k初始化为-1
    for (int q = 1; q <= len - 1; q++)
    {
        while (k > -1 && str[k + 1] != str[q])//如果下一个不同,那么k就变成next[k],注意next[k]是小于k的,无论k取任何值。
        {
            k = next[k];//往前回溯
        }
        if (str[k + 1] == str[q])//如果相同,k++
        {
            k = k + 1;
        }
        next[q] = k;//这个是把算的k的值(就是相同的最大前缀和最大后缀长)赋给next[q]
    }
}

int KMP(const char *str, int slen, const char *ptr, int plen)
{
    int *next = new int[plen];
    cal_next(ptr, next, plen);//计算next数组
    int k = -1;
    for (int i = 0; i < slen; i++)
    {
        while (k >-1 && ptr[k + 1] != str[i])//ptr和str不匹配,且k>-1(表示ptr和str有部分匹配)
            k = next[k];//往前回溯
        if (ptr[k + 1] == str[i])
            k = k + 1;
        if (k == plen - 1)//说明k移动到ptr的最末端
        {
            //cout << "在位置" << i-plen+1<< endl;
            //k = -1;//重新初始化,寻找下一个
            //i = i - plen + 2;//i定位到找到位置处的下一个位置(这里默认存在两个匹配字符串可以部分重叠)
            return i - plen + 1;//返回相应的位置
        }
    }
    return -1;
}

//as the name of the function :store the file into a vector
void TextQuery::store_file(ifstream &is){
    string textline;
    while (getline(is, textline)){
        lines_of_text.push_back(textline);
    }
}

int position;
//create a map ,the keys are words of every line ,the value is line number
void TextQuery::build_map(){
    for (line_no line_num = 0; line_num != lines_of_text.size(); line_num++){
        istringstream line(lines_of_text[line_num]);
        string word;
        while (line >> word){
            word_map[word].insert(line_num);                    //vector<string>::size_type unsupport (+-*/) operation
        }
    }
}

set<TextQuery::line_no> TextQuery::run_query(const string &query_word) const{
    map< string, set<line_no>>::const_iterator loc = word_map.begin();
    for (; loc != word_map.end(); ++loc){
        if (-1 != KMP(loc->first.c_str(), loc->first.length(), query_word.c_str(), query_word.length())){
            return loc->second;
        }
    }
    return set<line_no>();  //返回一个空集合?
}

//return the string of the line_number 
string TextQuery::text_line(line_no line) const{
    if (line < lines_of_text.size()){
        return lines_of_text[line];
    }
    throw std::out_of_range("line number out of range");
}


//the first elem is the ifstream,the second elem is the file-name
ifstream& open_file(ifstream &in, const string &file){
    in.close();
    in.clear();
    in.open(file.c_str());
    return in;
}

//check the word appeared times, if it appeared more than one times ,add "s" to the end of the word
string make_plural(rsize_t cnt, const string& word, const string& words){
    return (cnt == 1) ? word : word + words;
}

//print the result of text-query
void print_results(const set<TextQuery::line_no>& locs, const string& sought, const TextQuery &file){
    typedef set<TextQuery::line_no> line_nums;
    line_nums::size_type size = locs.size();
    cout << sought << " occurs " << size << " " << make_plural(size, "time", "s") << endl;
    line_nums::const_iterator it = locs.begin();
    for (; it != locs.end(); ++it){
        cout << "\t(line" << ((*it) + 1) << ")" << file.text_line(*it) << endl;
    }
}

int text_Query(const std::string& file_path, const std::string& s){
    ifstream infile;

    if (!open_file(infile, file_path)){
        cerr << "No input file!" << endl;
        return EXIT_FAILURE;
    }
    TextQuery tq;
    tq.read_file(infile);

    if (!cin || s == "q") return 0;
    set<TextQuery::line_no> locs = tq.run_query(s);
    print_results(locs, s, tq);
    return 0;
}

void do_it(){
    std::cout << "please input the folder path which you want query,then click enter(the separator must be '\\')" << endl;
    std::string folderpath;
    while (getline(cin, folderpath)){                                           //input a folder_path

        if (folderpath == "q"){                                                 //quit the programe
            std::cout << "y/n?" << endl;
            if (getchar() == 'y') return ;
        }

        folder_files = fileList(folderpath);                                    //obtain the quantity of files
        string s;
        std::cout << "please input a string which you want to query(click enter as the end):" << endl;
        getline(std::cin, s);

        for (std::vector<std::string>::iterator i = folder_files.begin(); i != folder_files.end(); ++i){
            cout << *i<<":" << endl;
            text_Query(*i, s);
            cout << endl;
        }
    }
}

void test(){
    std::string folder;
    getline(std::cin, folder);
    fileList(folder);
}
int main(){
    //test();
    do_it();
    return 0;
}

//obtain the all files' path under the folder_path
std::vector<std::string> fileList(const std::string& folder_path)
{
    WIN32_FIND_DATA FindData;
    HANDLE hError;

    int file_count(0);
    std::string file_path(folder_path); //路径名
    std::string full_file_path; //全路径名 

    file_path.append("/*.*");
    hError = FindFirstFile(file_path.c_str(), &FindData);
    if (hError == INVALID_HANDLE_VALUE) {
        std::cout << "failed to search files." << std::endl;
        return std::vector<std::string>();
    }
    while (FindNextFile(hError, &FindData))
    {
        //过虑".", "..", "-q"
        if (0 == strcmp(FindData.cFileName, ".") ||
            0 == strcmp(FindData.cFileName, "..") ||
            0 == strcmp(FindData.cFileName, "-q"))
        {
            continue;
        }

        //完整路径
        full_file_path.append(folder_path);
        full_file_path.append("/");
        full_file_path.append(FindData.cFileName);
        ++file_count;

        if (FindData.dwFileAttributes & FILE_ATTRIBUTE_DIRECTORY){
            fileList(full_file_path);
        }
        else{
            folder_files.push_back(full_file_path);
        }
        full_file_path.clear(); //清空目录
    }
    return folder_files;
}

喜欢的顶一下,就是对我的支持

评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值