基于KMP算法的TXT文本查询工具

#include<iostream>
#include<string>
#include<set>
#include<map>
#include<vector>
#include<fstream>
#include<sstream>
#include<algorithm>
#include<math.h>
using namespace std;

class TextQuery{
public:
    typedef std::vector<std::string>::size_type line_no;            //line number

    //load file into internal memory
    void read_file(std::ifstream &is){                              
        store_file(is);                                             

        build_map();                                                
    }
    std::set<line_no> run_query(const std::string&) const;          //query the input word from the input file
    std::string text_line(line_no) const;                           //catch the words per line
private:
    void store_file(std::ifstream&);
    void build_map();
    std::vector<std::string> lines_of_text;                         //a line number of the words
    std::map<std::string, std::set<line_no>> word_map;              //store the relastionships between words and line_numbers
};

void cal_next(const char *str, int *next, int len)
{
    next[0] = -1;//next[0]初始化为-1,-1表示不存在相同的最大前缀和最大后缀
    int k = -1;//k初始化为-1
    for (int q = 1; q <= len - 1; q++)
    {
        while (k > -1 && str[k + 1] != str[q])//如果下一个不同,那么k就变成next[k],注意next[k]是小于k的,无论k取任何值。
        {
            k = next[k];//往前回溯
        }
        if (str[k + 1] == str[q])//如果相同,k++
        {
            k = k + 1;
        }
        next[q] = k;//这个是把算的k的值(就是相同的最大前缀和最大后缀长)赋给next[q]
    }
}

int KMP(const char *str, int slen, const char *ptr, int plen)
{
    int *next = new int[plen];
    cal_next(ptr, next, plen);//计算next数组
    int k = -1;
    for (int i = 0; i < slen; i++)
    {
        while (k >-1 && ptr[k + 1] != str[i])//ptr和str不匹配,且k>-1(表示ptr和str有部分匹配)
            k = next[k];//往前回溯
        if (ptr[k + 1] == str[i])
            k = k + 1;
        if (k == plen - 1)//说明k移动到ptr的最末端
        {
            //cout << "在位置" << i-plen+1<< endl;
            //k = -1;//重新初始化,寻找下一个
            //i = i - plen + 2;//i定位到找到位置处的下一个位置(这里默认存在两个匹配字符串可以部分重叠)
            return i - plen + 1;//返回相应的位置
        }
    }
    return -1;
}

//as the name of the function :store the file into a vector
void TextQuery::store_file(ifstream &is){
    string textline;
    while (getline(is, textline)){
        lines_of_text.push_back(textline);
    }
}

int position;
//create a map ,the keys are words of every line ,the value is line number
void TextQuery::build_map(){
    for (line_no line_num = 0; line_num != lines_of_text.size(); line_num++){
        istringstream line(lines_of_text[line_num]);
        string word;
        while (line >> word){
            word_map[word].insert(line_num);                    //vector<string>::size_type is not supported (+-*/) operation
        }
    }
}

set<TextQuery::line_no> TextQuery::run_query(const string &query_word) const{
    /*map < string, set<line_no>>::const_iterator loc = word_map.find(query_word);
    if (loc == word_map.end()){
        return set<line_no>();
    }
    else{
        return loc->second;
    }*/
    map< string, set<line_no>>::const_iterator loc = word_map.begin();
    for (; loc != word_map.end(); ++loc){
        if (-1 != KMP(loc->first.c_str(), loc->first.length(), query_word.c_str(), query_word.length())){
            return loc->second;
        }
    }
    return set<line_no>();  //return a void set
}

//return the string of the line_number 
string TextQuery::text_line(line_no line) const{
    if (line < lines_of_text.size()){
        return lines_of_text[line];
    }
    throw std::out_of_range("line number out of range");
}


//the first elem is the ifstream,the second elem is the filename
ifstream& open_file(ifstream &in, const string &file){
    in.close();
    in.clear();
    in.open(file.c_str());
    return in;
}

//to check the word appeared how many times, if it appeared more than one times ,add "s" to the end of the word
string make_plural(rsize_t cnt, const string& word, const string& words){
    return (cnt == 1) ? word : word+words;
}

//print the result of text-query 
void print_results(const set<TextQuery::line_no>& locs, const string& sought, const TextQuery &file){
    typedef set<TextQuery::line_no> line_nums;
    line_nums::size_type size = locs.size();
    cout << sought << " occurs " << size << " " << make_plural(size, "time", "s") << endl;
    line_nums::const_iterator it = locs.begin();
    for (; it != locs.end(); ++it){
        cout << "\t(line" << ((*it) + 1)<< ")" << file.text_line(*it) << endl;
    }
}


int main(int argc, char **argv){
    ifstream infile;

    if (argc < 2 || !open_file(infile, argv[1])){
        cerr << "No input file!" << endl;
        return EXIT_FAILURE;
    }
    TextQuery tq;
    tq.read_file(infile);
    while (true){
        cout << "enter word to look for , or q to quit:";
        string s;
        cin >> s;
        if (!cin || s == "q") break;
        set<TextQuery::line_no> locs = tq.run_query(s);
        print_results(locs, s, tq);
    }
    return 0;
}
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值