LintCode 107: Word Break (DFS, DP经典题!)

本文深入探讨了WordBreak问题的多种解决方案，包括深度优先搜索(DFS)、带记忆化的深度优先搜索和动态规划(DP)方法。通过实例分析，比较了不同算法的优劣，展示了如何有效利用记忆化和动态规划减少重复计算，提高算法效率。

Word Break
Given a string s and a dictionary of words dict, determine if s can be break into a space-separated sequence of one or more dictionary words.

Example
Given s = “lintcode”, dict = [“lint”, “code”].

Return true because “lintcode” can be break as “lint code”.

解法1：DFS。超时 (95%的时候)

class Solution {
public:
    /*
     * @param s: A string
     * @param dict: A dictionary of words dict
     * @return: A boolean
     */
    bool wordBreak(string &s, unordered_set<string> &dict) {
        int lenS = s.size();
        int lenD = dict.size();
        maxWordLen = 0;
        minWordLen = lenS;
        
        for (auto w : dict) {
            maxWordLen = max(maxWordLen, (int)w.size());
            minWordLen = min(minWordLen, (int)w.size());
        }
        
        vector<string> sol;
        vector<vector<string>> result;
        
        helper(s, dict, sol, result);
        if (result.size() > 0) return true;
        return false;
    }
    
private:

    void helper(string &s, unordered_set<string> &dict, vector<string> &sol, vector<vector<string>> &result) {
        
        int len = s.size();
        if (len == 0) {
            result.push_back(sol);
            return;
        }
        if (len < minWordLen) 
            return;
        
        for (int i = minWordLen; i <= maxWordLen; ++i) {
            string w = s.substr(0, i);
            if (dict.find(w) == dict.end()) continue;
            sol.push_back(w);
            string newStr = s.substr(i, len - i);
            helper(newStr, dict, sol, result);
            sol.pop_back();
        }
    }
    
    int minWordLen;
    int maxWordLen;
};

解法2：还是DFS，s不动index动。Pass。

class Solution {
public:
    /*
     * @param s: A string
     * @param dict: A dictionary of words dict
     * @return: A boolean
     */
    bool wordBreak(string &s, unordered_set<string> &dict) {
        int lenS = s.size();
        int lenD = dict.size();
        maxWordLen = 0;
        minWordLen = lenS;
        
        for (auto w : dict) {
            maxWordLen = max(maxWordLen, (int)w.size());
            minWordLen = min(minWordLen, (int)w.size());
        }
        
        vector<string> sol;
        vector<vector<string>> result;
        
        helper(s, 0, dict, sol, result);
        if (result.size() > 0) return true;
        return false;
    }
    
private:

    void helper(string &s, int index, unordered_set<string> &dict, vector<string> &sol, vector<vector<string>> &result) {
        
        int len = s.size();
        if (index == len) {
            result.push_back(sol);
            return;
        }
        
        if (index + minWordLen > len)    //剪枝
            return;
        
        for (int i = minWordLen; i <= maxWordLen; ++i) {
            string w = s.substr(index, i);
            if (dict.find(w) == dict.end()) continue;
            sol.push_back(w);
            helper(s, index + i, dict, sol, result);
            sol.pop_back();
        }
    }
    
    int minWordLen;
    int maxWordLen;
};

解法3：DFS+Memorization
即把s分成str1和str2，如果str1在dict中则可调用子问题，输入为str2。
Memozation可以减枝。
代码如下：

class Solution {
public:
    /*
     * @param s: A string
     * @param dict: A dictionary of words dict
     * @return: A boolean
     */
    bool wordBreak(string &s, unordered_set<string> &dict) {
        if (s.size() == 0 && dict.size() == 0) return true;
        unordered_map<string, bool> memo;
        return helper(s, dict, memo);
    }
    
private:
    bool helper(string &s, unordered_set<string> &dict, unordered_map<string, bool> & memo) {
        if (s.size() == 0) return false;
        if (memo.find(s) != memo.end()) return memo[s];
        
        int len = s.size();
        for (int i = 1; i <= len; ++i) {
            string subStr1 = s.substr(0, i);
            
            if (dict.find(subStr1) != dict.end()) {
                if (subStr1 == s) {
                    memo[s] = true;
                    return true;
                }
                string subStr2 = s.substr(i);
                bool result = helper(subStr2, dict, memo);
                if (result) {
                    memo[s] = true;    
                    return true;
                }
            }
        }
        memo[s] = false;
        return false;
    }
   
};

二刷：还是DFS+memorization。上面那个过不了96%。这个能过。

class Solution {
public:
    /*
     * @param s: A string
     * @param dict: A dictionary of words dict
     * @return: A boolean
     */
    bool wordBreak(string &s, unordered_set<string> &dict) {
        map<int, int> memo; //0: uninitialized, 1: true, -1: false
        minLen = dict.size() == 0 ? 0 : INT_MAX;
        maxLen = dict.size() == 0 ? 0 : INT_MIN;
        memo[0] = 1;
        for (auto w : dict) {
            maxLen = max(maxLen, (int)w.size());
            minLen = min(minLen, (int)w.size());
        }
         
        return dfs(s, dict, s.size(), memo); //也可以先dfs(s, dict, s.size(), memo)，然后return memo[s.size()];
    }
private:    
    bool dfs(string &s, unordered_set<string> &dict, int pos, map<int, int> &memo) {
        if (pos == 0){
            return true;
        }        
        int upperlimit = min(maxLen, pos);
        //  |------------------------pos-----------|
        //                      <-----
        //                      len    
        for (int len = minLen; len <= upperlimit; len++){
            string str = s.substr(pos - len, len);
            if (dict.find(str) == dict.end() || memo[pos - len] == -1){
                continue;
            }
            if (memo[pos - len] == 1 || dfs(s, dict, pos - len, memo)) {
                memo[pos] = 1;
                return true;
            }
        }
        memo[pos] = 0;
        return false;
    }
    int maxLen, minLen;
};

用map<int, bool>也可以。但是要注意map[pos] == false有两种情况，一种是map[pos]确实是false，另一种是map[pos]还未赋值，但默认为false。当然，如果用unordered_map会更快。

class Solution {
public:
    /*
     * @param s: A string
     * @param dict: A dictionary of words dict
     * @return: A boolean
     */
    bool wordBreak(string &s, unordered_set<string> &dict) {
        //map<int, int> memo; //0: uninitialized, 1: true, -1: false
        map<int, bool> memo; //<pos, bool>
        minLen = dict.size() == 0 ? 0 : INT_MAX;
        maxLen = dict.size() == 0 ? 0 : INT_MIN;
        memo[0] = true;
        for (auto w : dict) {
            maxLen = max(maxLen, (int)w.size());
            minLen = min(minLen, (int)w.size());
        }
        dfs(s, dict, s.size(), memo);
        return memo[s.size()];
    }
private:    
    bool dfs(string &s, unordered_set<string> &dict, int pos, map<int, bool> &memo) {
        if (pos == 0){
            return true;
        }        
        int upperlimit = min(maxLen, pos);
        for (int len = minLen; len <= upperlimit; len++){
            string str = s.substr(pos - len, len);
            if (dict.find(str) == dict.end() || (memo.find(pos - len) != memo.end() && !memo[pos - len])){
                continue;
            }
            if (memo[pos - len] || dfs(s, dict, pos - len, memo)) {
                memo[pos] = 1;
                return true;
            }
        }
        memo[pos] = false;
        return false;
    }
    int maxLen, minLen;
};

上面的思路是pos从尾扫到头，最后看memo[s.size()]。也可以让pos从头扫到尾，最后看memo[0]。两者都是属于dfs里面的分治法。

class Solution {
public:
    /*
     * @param s: A string
     * @param dict: A dictionary of words dict
     * @return: A boolean
     */
    bool wordBreak(string &s, unordered_set<string> &dict) {
        unordered_map<int, bool> memo; //<pos, bool>
        minLen = dict.size() == 0 ? 0 : INT_MAX;
        maxLen = dict.size() == 0 ? 0 : INT_MIN;
        memo[s.size()] = true;
        for (auto w : dict) {
            maxLen = max(maxLen, (int)w.size());
            minLen = min(minLen, (int)w.size());
        }
        dfs(s, dict, 0, memo);
        return memo[0];
    }
private:    
    bool dfs(string &s, unordered_set<string> &dict, int pos, unordered_map<int, bool> &memo) {
        if (pos == s.size()){
            return true;
        }        
        int upperlimit = maxLen;
        //  |---------pos--------------------|
        //              ---------->
        //                len  

        for (int len = minLen; len <= upperlimit; len++){
            string str = s.substr(pos, len);
            if (dict.find(str) == dict.end() || (memo.find(pos + len) != memo.end() && !memo[pos + len])){
                continue;
            }
            if (memo[pos + len] || dfs(s, dict, pos + len, memo)) {
                memo[pos] = 1;
                return true;
            }
        }
        memo[pos] = false;
        return false;
    }
    int maxLen, minLen;
};

解法4：DP。j是表示i的某个长度的前缀。

class Solution {
public:
    bool wordBreak(string s, unordered_set<string>& wordSet) {
        int n = s.size();
        vector<bool> dp(n + 1, false);
        dp[0] = true;
        for (int i = 0; i <= n; ++i) {
            for (int j = 0; j < i; ++j) {
                if (dp[j] && wordSet.find(s.substr(j, i - j)) != wordSet.end()) {
                    dp[i] = true;
                    break;
                }
            }
        }
        return dp.back();
    }
};

解法5: 还是DP。但是j是表示可能匹配的字符串的长度，所以加以minSize和maxSize的上限。显然，这个方法更好，因为字符串的长度不会很长，所以j循环次数少而且效率很高，因为一旦匹配上，就可以退出循环。
i从头扫到尾,dp[0]一开始等于true，最后看dp[len].

class Solution {
public:
    bool wordBreak(string s, unordered_set<string>& wordSet) {
        int n = s.size();
        vector<bool> dp(n + 1, false);
        int minSize = INT_MAX, maxSize = INT_MIN;
        for (auto w : wordSet) {
            minSize = min(minSize, (int)w.size());
            maxSize = max(maxSize, (int)w.size());
        }
        dp[0] = true;
        //  ------------i-->---------
        //       <--j---| 
        //
        for (int i = 1; i <= n; ++i) {
            for (int j = minSize; j <= maxSize; j++) {
                if (i >= j && dp[i - j] && wordSet.find(s.substr(i - j, j)) != wordSet.end()) {
                    dp[i] = true;
                    break;
                }
            }
            //for (int j = 0; j < i; ++j) {
            //    if (dp[j] && wordSet.find(s.substr(j, i - j)) != wordSet.end()) {
            //        dp[i] = true;
            //        break;
            //    }
            //}
        }
        return dp[n];
    }
};

还是DP，不过是pos从尾扫到头，一开始dp[len] = true，最后看dp[0]。

class Solution {
public:
    /*
     * @param s: A string
     * @param dict: A dictionary of words dict
     * @return: A boolean
     */
    bool wordBreak(string &s, unordered_set<string> &dict) {
        vector<bool> dp(s.size() + 1, false);
        int len = s.size();
        dp[len] = true;
        int minSize = INT_MAX, maxSize = 0;
        for (auto w : dict) {
            minSize = min(minSize, (int)w.size());
            maxSize = max(maxSize, (int)w.size());
        }
        
        //  ------<--pos-----------------------
        //            |----i-----> 
        for (int pos = len - 1; pos >= 0; pos--) {
            for (int i = minSize; i <= maxSize && pos + i <= len; i++) {
                string str = s.substr(pos, i);
                if (dict.find(str) == dict.end()) continue;
                if (dp[pos + i]) {
                    dp[pos] = true;
                    break;
                }
            }
        }
        return dp[0];
    }
};