一维DP解决文本匹配问题-优快云博客

本文链接：https://blog.youkuaiyun.com/fyfcauc/article/details/38090903

    // 740K 266MS   G++
    #include <cstdio>  
    #include <string>  
    #include <cstring>  
    #include <iostream>  
      
    using namespace std;  
      
    int DP[305][305]; //  
      
    int DP2[305];  
      
    string dict[605];  
    int W; // num of dict  
    int L; // length of message  
      
    string message;  
      
    #define INF 999999  
      
    int getRemoveNum(int begin, int end, const string & cmpStr, int * lastMatch) { // -1 means no solution  
        // cout<<"getRemoveNum "<<begin<<" "<<end<<" "<<cmpStr<<endl;  
        if (cmpStr.size() > end - begin +1) { // if str length even longer than message part  
            return -1;  
        }  
        int matchNum = 0;  
        int beginPos = begin;  
        int cmpStrSize = cmpStr.size();  
        for (int i = 0; i <= cmpStrSize - 1; i++) {  
      
            while(beginPos <= end) {  
                if (message[beginPos++] == cmpStr[i]) {  
                    if (i == cmpStrSize - 1) {  
                        *lastMatch = beginPos-1;  
                    }  
                    // firstMatch  
                    matchNum++;  
                    break;  
                }  
            }  
            if (beginPos > end) {  
                break;  
            }  
        }  
        if (matchNum == cmpStrSize) { // if all char in   
            // cout<<begin<<" "<<end<<" "<<cmpStr<<" "<<end - begin + 1 - cmpStrSize<<" "<<*lastMatch<<endl;  
            return (end - begin + 1 - cmpStrSize);  
        } else {  
            return -1;  
        }  
    }  
      
    void solve2() {  
        memset(DP2, 0xFF, sizeof(DP2));  
      
        for (int i = L-1; i >= 0; i--) {  
            int minDP = INF;  
            int lastMatch;  
            if (i == L-1) {  
                for (int k = 0; k < W; k++) { // process the whole str(i<->j)  
                    int res = getRemoveNum(i, L-1, dict[k], &lastMatch);  
                    if (res != -1) {  
                        minDP = minDP < res ? minDP: res;  
                    }  
                }  
                if (minDP == INF) {  
                    DP2[L-1] = -1;  
                } else {  
                    DP2[L-1] = minDP;  
                }  
                // printf("%d %d\n", i, DP2[i]);  
            } else {  
                for (int k = 0; k < W; k++) { // process the whole str(i<->j)  
                    int res = getRemoveNum(i, L-1, dict[k], &lastMatch);  
                    if (res != -1) { // if can match some  
                        if (DP2[lastMatch + 1] != -1) {  
                            // int res1 = res - (firstMatch -1 + 1 - DP2[firstMatch + dict[k].size()]);  
                            int res1 = res - (L - 1 - lastMatch - DP2[lastMatch+1]); 
                            minDP = minDP < res1 ? minDP : res1;  
                        } else {
                            minDP = minDP < res ? minDP : res;
                        }  
                    }  
                }  
                if (minDP == INF) {  
                    if (DP2[i+1] == -1) {  
                        DP2[i] = -1;
                    } else {  
                        DP2[i] = DP2[i+1] + 1;  
                    }  
                } else {  
                    DP2[i] = minDP;  
                }  
                // printf("%d %d\n", i, DP2[i]);  
            }  
        }  
        printf("%d\n", DP2[0]);  
    }  
      
      
    // void solve() {  
    //     memset(DP, 0xFF, sizeof(DP)); // all init to -1 means no match  
      
    //     for (int i = L-1; i >= 0; i--) {  
    //         for (int j = 0; j <= L-1; j++) {  
      
    //             if (i > j) {  
    //                 continue;  
    //             }  
    //             // printf("%d %d\n", i, j);  
    //             int minDP = INF;  
      
    //             for (int k = 0; k < W; k++) { // process the whole str(i<->j)  
    //                 int res = getRemoveNum(i, j, dict[k]);  
    //                 if (res != -1) {  
    //                     minDP = minDP < res ? minDP: res;  
    //                 }  
    //             }  
      
    //             for (int k = i; k <= j-1; k++) { // get the sum of i<->k k+1<->j  
    //                 if (DP[i][k] != -1 && DP[k+1][j] != -1) {  
    //                     int sum = DP[i][k] + DP[k+1][j];  
    //                     minDP = sum < minDP ? sum: minDP;  
    //                 }  
    //             }  
    //             DP[i][j] = (minDP == INF ? -1: minDP);  
    //         }  
    //     }  
    //     printf("%d\n", DP[0][L-1]);  
    // }  
      
    int main() {  
        while(scanf("%d %d", &W, &L) !=EOF) {  
            cin>>message;  
            for (int i = 0; i < W; i++) {  
                cin>>dict[i];  
            }  
            solve2();  
        }  
    }

纯DP角度看，是一道简单的DP题（只用了一维的DP矩阵），不过感觉还是状态转移方程能想到有点困难。

一开始试了下朴素的DP，及二维矩阵 DP[i][j] 表示从message i 到 j的区间内，能够去掉的最少字母，这样

DP[i][j] 有这些可能的取值： i到j整一个大字符串（不拆分）进行匹配，以及 i到k 和 k到j（拆分）两个字符串（i<=k<=j）匹配的和（D[i][k] + D[k][j]）中的最小值，

当时写的时候就觉得要TLE，果然，不过好歹答案是对的，只能这样安慰自己了。

后来搜了下，发现思路出乎意料的简单：一个一维的DP就够了，DP[i] 表示 message 从1 到 L（message末尾）进行匹配的最少删除数目（为什么不从0到i，是因为从后开始在处理的时候更方便），那么DP[i]的取值会有这些情况：

如果在DP[i]里根本匹配不到什么dict的词，那么DP[i] = DP[i+1] +1;

如果能匹配到dict词（从i开始，最多到L）最少删除数是N，那么这时候有两种情况：

设匹配完的dict词的最后一个字母在message的位置是k（最后匹配位置），那么从k+1 到 L中间还有一段字符，这段字符的最少删除之前也已经求出来了及：DP[K+1]，

如果DP[k+1] = -1（及从k+1到L没有完整的匹配），那么DP[i] = N（只有从i到k这段匹配有效）

如果DP[k+1] = M，那么说明在k+1到L这段字符还有别的匹配，N一定不是最少的删除数（因为N把整个k+1到L的字符都认为是要删除的，但是其实k+1到L中至少有一个字母是不用删除的，因此一定还有一个比N小至少1的删除数）。

在从k+1到L这段字符串中，DP[k+1]表示此段匹配的最少删除数，那么不应该被删除的字符数V就是 (L- K -1 + 1 - DP[k+1]) ，而N其实就是把V多算了进去，

因此这段的最少删除数就是 N - V = N - L + K + DP[K+1]，