写了字符串匹配的2中方法,一种是回溯的,一种是不回溯的。不回溯的应该是传说中的类kmp算法,具体我也没深究。我写这代码只为了锻炼思维和编码能力。
1.回溯法实现字符串匹配:
#ifndef __COMMON_MATCH_HPP__ #define __COMMON_MATCH_HPP__ #include <stdlib.h> #include "string.h" int CommonSubString(char *main_string, char *sub_string, int cmp_start_pos) { if (NULL == main_string || NULL == sub_string || 0 == strlen(sub_string) || cmp_start_pos < 0) return -1; while (strlen(main_string) - cmp_start_pos >= strlen(sub_string)) { int i = cmp_start_pos, j = 0; while (main_string[i ++] == sub_string[j ++]) { if (strlen(sub_string) == j) { return cmp_start_pos; } } ++ cmp_start_pos; } return -1; } #endif // __COMMON_MATCH_HPP__
2.非回溯法实现字符串匹配。由于不想用动态分配内存的方法来创建数组,所以子串长度有限制。
#ifndef __KMP_MATCH_HPP__ #define __KMP_MATCH_HPP__ #include <stdlib.h> #include "string.h" const static int KMP_INVALID_CMP_INDEX = -1; const static int MAX_KMP_NEXT_CMP_INDEX_LIST_SIZE = 256; bool BuildKMPNextCmpIndexList(char *pattern_string, int next_cmp_index_list[MAX_KMP_NEXT_CMP_INDEX_LIST_SIZE]) { if (NULL == pattern_string || NULL == next_cmp_index_list || strlen(pattern_string) > MAX_KMP_NEXT_CMP_INDEX_LIST_SIZE) { return false; } next_cmp_index_list[0] = KMP_INVALID_CMP_INDEX; int tmp_index = 1; while (pattern_string[tmp_index] == pattern_string[0] && tmp_index < static_cast<int>(strlen(pattern_string))) { next_cmp_index_list[tmp_index ++] = KMP_INVALID_CMP_INDEX; } if (tmp_index >= static_cast<int>(strlen(pattern_string))) return true; next_cmp_index_list[tmp_index] = tmp_index - 1; int j = tmp_index, k = next_cmp_index_list[j]; while (j < static_cast<int>(strlen(pattern_string))) { if (k <= KMP_INVALID_CMP_INDEX) { next_cmp_index_list[++ j] = 0; k = next_cmp_index_list[j]; } else { if (pattern_string[k] == pattern_string[j]) { next_cmp_index_list[++ j] = k + 1; k = next_cmp_index_list[j]; } else { k = next_cmp_index_list[k]; } } } return true; } int KMPSubString(char *main_string, char *sub_string, int cmp_start_pos) { if (NULL == main_string || NULL == sub_string || 0 == strlen(sub_string) || cmp_start_pos < 0 || strlen(main_string) < strlen(sub_string) + cmp_start_pos) return KMP_INVALID_CMP_INDEX; int next_cmp_index_list[MAX_KMP_NEXT_CMP_INDEX_LIST_SIZE] = {0}; if (!BuildKMPNextCmpIndexList(sub_string, next_cmp_index_list)) return KMP_INVALID_CMP_INDEX; const int main_string_len = static_cast<int>(strlen(main_string)); int sub_string_len = static_cast<int>(strlen(sub_string)); int main_index = cmp_start_pos, sub_index = 0; while (main_index < main_string_len && sub_index < sub_string_len && (main_string_len - main_index) >= (sub_string_len - sub_index)) { if (main_string[main_index] == sub_string[sub_index]) { ++ main_index; ++ sub_index; if (sub_index >= sub_string_len) return main_index - sub_string_len; } else { sub_index = next_cmp_index_list[sub_index]; if (sub_index <= KMP_INVALID_CMP_INDEX) { sub_index = 0; ++ main_index; } } } return KMP_INVALID_CMP_INDEX; } #endif // __KMP_MATCH_HPP__
3.测试代码:


#include "common_match.hpp" #include "kmp_match.hpp" int main(int argc, char ** argv) { char main_string[128] = "acffaaaabaabcacdafadf"; char sub_string[32] = "aaaabaabcac"; int common_result = CommonSubString(main_string, sub_string, 0); int kmp_result = KMPSubString(main_string, sub_string, 0); }