- Word Break
Given a string s and a dictionary of words dict, determine if s can be break into a space-separated sequence of one or more dictionary words.
Example
Given s = “lintcode”, dict = [“lint”, “code”].
Return true because “lintcode” can be break as “lint code”.
解法1:DFS。超时 (95%的时候)
class Solution {
public:
/*
* @param s: A string
* @param dict: A dictionary of words dict
* @return: A boolean
*/
bool wordBreak(string &s, unordered_set<string> &dict) {
int lenS = s.size();
int lenD = dict.size();
maxWordLen = 0;
minWordLen = lenS;
for (auto w : dict) {
maxWordLen = max(maxWordLen, (int)w.size());
minWordLen = min(minWordLen, (int)w.size());
}
vector<string> sol;
vector<vector<string>> result;
helper(s, dict, sol, result);
if (result.size() > 0) return true;
return false;
}
private:
void helper(string &s, unordered_set<string> &dict, vector<string> &sol, vector<vector<string>> &result) {
int len = s.size();
if (len == 0) {
result.push_back(sol);
return;
}
if (len < minWordLen)
return;
for (int i = minWordLen; i <= maxWordLen; ++i) {
string w = s.substr(0, i);
if (dict.find(w) == dict.end()) continue;
sol.push_back(w);
string newStr = s.substr(i, len - i);
helper(newStr, dict, sol, result);
sol.pop_back();
}
}
int minWordLen;
int maxWordLen;
};
解法2:还是DFS,s不动index动。Pass。
class Solution {
public:
/*
* @param s: A string
* @param dict: A dictionary of words dict
* @return: A boolean
*/
bool wordBreak(string &s, unordered_set<string> &dict) {
int lenS = s.size();
int lenD = dict.size();
maxWordLen = 0;
minWordLen = lenS;
for (auto w : dict) {
maxWordLen = max(maxWordLen, (int)w.size());
minWordLen = min(minWordLen, (int)w.size());
}
vector<string> sol;
vector<vector<string>> result;
helper(s, 0, dict, sol, result);
if (result.size() > 0) return true;
return false;
}
private:
void helper(string &s, int index, unordered_set<string> &dict, vector<string> &sol, vector<vector<string>> &result) {
int len = s.size();
if (index == len) {
result.push_back(sol);
return;
}
if (index + minWordLen > len) //剪枝
return;
for (int i = minWordLen; i <= maxWordLen; ++i) {
string w = s.substr(index, i);
if (dict.find(w) == dict.end()) continue;
sol.push_back(w);
helper(s, index + i, dict, sol, result);
sol.pop_back();
}
}
int minWordLen;
int maxWordLen;
};
解法3:DFS+Memorization
即把s分成str1和str2,如果str1在dict中则可调用子问题,输入为str2。
Memozation可以减枝。
代码如下:
class Solution {
public:
/*
* @param s: A string
* @param dict: A dictionary of words dict
* @return: A boolean
*/
bool wordBreak(string &s, unordered_set<string> &dict) {
if (s.size() == 0 && dict.size() == 0) return true;
unordered_map<string, bool> memo;
return helper(s, dict, memo);
}
private:
bool helper(string &s, unordered_set<string> &dict, unordered_map<string, bool> & memo) {
if (s.size() == 0) return false;
if (memo.find(s) != memo.end()) return memo[s];
int len = s.size();
for (int i = 1; i <= len; ++i) {
string subStr1 = s.substr(0, i);
if (dict.find(subStr1) != dict.end()) {
if (subStr1 == s) {
memo[s] = true;
return true;
}
string subStr2 = s.substr(i);
bool result = helper(subStr2, dict, memo);
if (result) {
memo[s] = true;
return true;
}
}
}
memo[s] = false;
return false;
}
};
二刷:还是DFS+memorization。上面那个过不了96%。这个能过。
class Solution {
public:
/*
* @param s: A string
* @param dict: A dictionary of words dict
* @return: A boolean
*/
bool wordBreak(string &s, unordered_set<string> &dict) {
map<int, int> memo; //0: uninitialized, 1: true, -1: false
minLen = dict.size() == 0 ? 0 : INT_MAX;
maxLen = dict.size() == 0 ? 0 : INT_MIN;
memo[0] = 1;
for (auto w : dict) {
maxLen = max(maxLen, (int)w.size());
minLen = min(minLen, (int)w.size());
}
return dfs(s, dict, s.size(), memo); //也可以先dfs(s, dict, s.size(), memo),然后return memo[s.size()];
}
private:
bool dfs(string &s, unordered_set<string> &dict, int pos, map<int, int> &memo) {
if (pos == 0){
return true;
}
int upperlimit = min(maxLen, pos);
// |------------------------pos-----------|
// <-----
// len
for (int len = minLen; len <= upperlimit; len++){
string str = s.substr(pos - len, len);
if (dict.find(str) == dict.end() || memo[pos - len] == -1){
continue;
}
if (memo[pos - len] == 1 || dfs(s, dict, pos - len, memo)) {
memo[pos] = 1;
return true;
}
}
memo[pos] = 0;
return false;
}
int maxLen, minLen;
};
用map<int, bool>也可以。但是要注意map[pos] == false有两种情况,一种是map[pos]确实是false,另一种是map[pos]还未赋值,但默认为false。当然,如果用unordered_map会更快。
class Solution {
public:
/*
* @param s: A string
* @param dict: A dictionary of words dict
* @return: A boolean
*/
bool wordBreak(string &s, unordered_set<string> &dict) {
//map<int, int> memo; //0: uninitialized, 1: true, -1: false
map<int, bool> memo; //<pos, bool>
minLen = dict.size() == 0 ? 0 : INT_MAX;
maxLen = dict.size() == 0 ? 0 : INT_MIN;
memo[0] = true;
for (auto w : dict) {
maxLen = max(maxLen, (int)w.size());
minLen = min(minLen, (int)w.size());
}
dfs(s, dict, s.size(), memo);
return memo[s.size()];
}
private:
bool dfs(string &s, unordered_set<string> &dict, int pos, map<int, bool> &memo) {
if (pos == 0){
return true;
}
int upperlimit = min(maxLen, pos);
for (int len = minLen; len <= upperlimit; len++){
string str = s.substr(pos - len, len);
if (dict.find(str) == dict.end() || (memo.find(pos - len) != memo.end() && !memo[pos - len])){
continue;
}
if (memo[pos - len] || dfs(s, dict, pos - len, memo)) {
memo[pos] = 1;
return true;
}
}
memo[pos] = false;
return false;
}
int maxLen, minLen;
};
上面的思路是pos从尾扫到头,最后看memo[s.size()]。也可以让pos从头扫到尾,最后看memo[0]。两者都是属于dfs里面的分治法。
class Solution {
public:
/*
* @param s: A string
* @param dict: A dictionary of words dict
* @return: A boolean
*/
bool wordBreak(string &s, unordered_set<string> &dict) {
unordered_map<int, bool> memo; //<pos, bool>
minLen = dict.size() == 0 ? 0 : INT_MAX;
maxLen = dict.size() == 0 ? 0 : INT_MIN;
memo[s.size()] = true;
for (auto w : dict) {
maxLen = max(maxLen, (int)w.size());
minLen = min(minLen, (int)w.size());
}
dfs(s, dict, 0, memo);
return memo[0];
}
private:
bool dfs(string &s, unordered_set<string> &dict, int pos, unordered_map<int, bool> &memo) {
if (pos == s.size()){
return true;
}
int upperlimit = maxLen;
// |---------pos--------------------|
// ---------->
// len
for (int len = minLen; len <= upperlimit; len++){
string str = s.substr(pos, len);
if (dict.find(str) == dict.end() || (memo.find(pos + len) != memo.end() && !memo[pos + len])){
continue;
}
if (memo[pos + len] || dfs(s, dict, pos + len, memo)) {
memo[pos] = 1;
return true;
}
}
memo[pos] = false;
return false;
}
int maxLen, minLen;
};
解法4:DP。j是表示i的某个长度的前缀。
class Solution {
public:
bool wordBreak(string s, unordered_set<string>& wordSet) {
int n = s.size();
vector<bool> dp(n + 1, false);
dp[0] = true;
for (int i = 0; i <= n; ++i) {
for (int j = 0; j < i; ++j) {
if (dp[j] && wordSet.find(s.substr(j, i - j)) != wordSet.end()) {
dp[i] = true;
break;
}
}
}
return dp.back();
}
};
解法5: 还是DP。但是j是表示可能匹配的字符串的长度,所以加以minSize和maxSize的上限。显然,这个方法更好,因为字符串的长度不会很长,所以j循环次数少而且效率很高,因为一旦匹配上,就可以退出循环。
i从头扫到尾,dp[0]一开始等于true,最后看dp[len].
class Solution {
public:
bool wordBreak(string s, unordered_set<string>& wordSet) {
int n = s.size();
vector<bool> dp(n + 1, false);
int minSize = INT_MAX, maxSize = INT_MIN;
for (auto w : wordSet) {
minSize = min(minSize, (int)w.size());
maxSize = max(maxSize, (int)w.size());
}
dp[0] = true;
// ------------i-->---------
// <--j---|
//
for (int i = 1; i <= n; ++i) {
for (int j = minSize; j <= maxSize; j++) {
if (i >= j && dp[i - j] && wordSet.find(s.substr(i - j, j)) != wordSet.end()) {
dp[i] = true;
break;
}
}
//for (int j = 0; j < i; ++j) {
// if (dp[j] && wordSet.find(s.substr(j, i - j)) != wordSet.end()) {
// dp[i] = true;
// break;
// }
//}
}
return dp[n];
}
};
还是DP,不过是pos从尾扫到头,一开始dp[len] = true,最后看dp[0]。
class Solution {
public:
/*
* @param s: A string
* @param dict: A dictionary of words dict
* @return: A boolean
*/
bool wordBreak(string &s, unordered_set<string> &dict) {
vector<bool> dp(s.size() + 1, false);
int len = s.size();
dp[len] = true;
int minSize = INT_MAX, maxSize = 0;
for (auto w : dict) {
minSize = min(minSize, (int)w.size());
maxSize = max(maxSize, (int)w.size());
}
// ------<--pos-----------------------
// |----i----->
for (int pos = len - 1; pos >= 0; pos--) {
for (int i = minSize; i <= maxSize && pos + i <= len; i++) {
string str = s.substr(pos, i);
if (dict.find(str) == dict.end()) continue;
if (dp[pos + i]) {
dp[pos] = true;
break;
}
}
}
return dp[0];
}
};
本文深入探讨了WordBreak问题的多种解决方案,包括深度优先搜索(DFS)、带记忆化的深度优先搜索和动态规划(DP)方法。通过实例分析,比较了不同算法的优劣,展示了如何有效利用记忆化和动态规划减少重复计算,提高算法效率。

被折叠的 条评论
为什么被折叠?



