#include<iostream>#include<string>#include<set>#include<map>#include<vector>#include<fstream>#include<sstream>#include<algorithm>#include<math.h>usingnamespacestd;
class TextQuery{
public:
typedefstd::vector<std::string>::size_type line_no; //line number//load file into internal memoryvoid read_file(std::ifstream &is){
store_file(is);
build_map();
}
std::set<line_no> run_query(conststd::string&) const; //query the input word from the input filestd::string text_line(line_no) const; //catch the words per lineprivate:
void store_file(std::ifstream&);
void build_map();
std::vector<std::string> lines_of_text; //a line number of the wordsstd::map<std::string, std::set<line_no>> word_map; //store the relastionships between words and line_numbers
};
void cal_next(constchar *str, int *next, int len)
{
next[0] = -1;//next[0]初始化为-1,-1表示不存在相同的最大前缀和最大后缀int k = -1;//k初始化为-1for (int q = 1; q <= len - 1; q++)
{
while (k > -1 && str[k + 1] != str[q])//如果下一个不同,那么k就变成next[k],注意next[k]是小于k的,无论k取任何值。
{
k = next[k];//往前回溯
}
if (str[k + 1] == str[q])//如果相同,k++
{
k = k + 1;
}
next[q] = k;//这个是把算的k的值(就是相同的最大前缀和最大后缀长)赋给next[q]
}
}
int KMP(constchar *str, int slen, constchar *ptr, int plen)
{
int *next = newint[plen];
cal_next(ptr, next, plen);//计算next数组int k = -1;
for (int i = 0; i < slen; i++)
{
while (k >-1 && ptr[k + 1] != str[i])//ptr和str不匹配,且k>-1(表示ptr和str有部分匹配)
k = next[k];//往前回溯if (ptr[k + 1] == str[i])
k = k + 1;
if (k == plen - 1)//说明k移动到ptr的最末端
{
//cout << "在位置" << i-plen+1<< endl;//k = -1;//重新初始化,寻找下一个//i = i - plen + 2;//i定位到找到位置处的下一个位置(这里默认存在两个匹配字符串可以部分重叠)return i - plen + 1;//返回相应的位置
}
}
return -1;
}
//as the name of the function :store the file into a vectorvoid TextQuery::store_file(ifstream &is){
string textline;
while (getline(is, textline)){
lines_of_text.push_back(textline);
}
}
int position;
//create a map ,the keys are words of every line ,the value is line numbervoid TextQuery::build_map(){
for (line_no line_num = 0; line_num != lines_of_text.size(); line_num++){
istringstream line(lines_of_text[line_num]);
string word;
while (line >> word){
word_map[word].insert(line_num); //vector<string>::size_type is not supported (+-*/) operation
}
}
}
set<TextQuery::line_no> TextQuery::run_query(conststring &query_word) const{
/*map < string, set<line_no>>::const_iterator loc = word_map.find(query_word);
if (loc == word_map.end()){
return set<line_no>();
}
else{
return loc->second;
}*/map< string, set<line_no>>::const_iterator loc = word_map.begin();
for (; loc != word_map.end(); ++loc){
if (-1 != KMP(loc->first.c_str(), loc->first.length(), query_word.c_str(), query_word.length())){
return loc->second;
}
}
returnset<line_no>(); //return a void set
}
//return the string of the line_number string TextQuery::text_line(line_no line) const{
if (line < lines_of_text.size()){
return lines_of_text[line];
}
throwstd::out_of_range("line number out of range");
}
//the first elem is the ifstream,the second elem is the filename
ifstream& open_file(ifstream &in, conststring &file){
in.close();
in.clear();
in.open(file.c_str());
return in;
}
//to check the word appeared how many times, if it appeared more than one times ,add "s" to the end of the wordstring make_plural(rsize_t cnt, conststring& word, conststring& words){
return (cnt == 1) ? word : word+words;
}
//print the result of text-query void print_results(constset<TextQuery::line_no>& locs, conststring& sought, const TextQuery &file){
typedefset<TextQuery::line_no> line_nums;
line_nums::size_type size = locs.size();
cout << sought << " occurs " << size << " " << make_plural(size, "time", "s") << endl;
line_nums::const_iterator it = locs.begin();
for (; it != locs.end(); ++it){
cout << "\t(line" << ((*it) + 1)<< ")" << file.text_line(*it) << endl;
}
}
int main(int argc, char **argv){
ifstream infile;
if (argc < 2 || !open_file(infile, argv[1])){
cerr << "No input file!" << endl;
return EXIT_FAILURE;
}
TextQuery tq;
tq.read_file(infile);
while (true){
cout << "enter word to look for , or q to quit:";
string s;
cin >> s;
if (!cin || s == "q") break;
set<TextQuery::line_no> locs = tq.run_query(s);
print_results(locs, s, tq);
}
return0;
}