https://leetcode.com/problems/substring-with-concatenation-of-all-words/
You are given a string, s, and a list of words, words, that are all of the same length. Find all starting indices of substring(s) in s that is a concatenation of each word in words exactly once and without any intervening characters.
For example, given:
s: "barfoothefoobarman"
words: ["foo", "bar"]
You should return the indices: [0,9]
.
(order does not matter).
思路1:
只用Hash
class Solution {
public:
vector<int> findSubstring(string s, vector<string>& words) {
if("" == s || 0 == words.size() || 0 == words[0].size()) {
return vector<int>();
}
vector<int> indices;
int wdLen = words[0].size(), sLen = s.size(), wdNum = words.size();
map<string, int> wdCnt;
for(string wd : words) {
++wdCnt[wd];
}
for(int i=0, lst=sLen-wdNum*wdLen; i<=lst; ++i) {
int j = 0;
map<string, int> wdFd;
while(j < wdNum) {
string wd = s.substr(i+j*wdLen, wdLen);
if(wdCnt.find(wd) != wdCnt.end()) {
if(++wdFd[wd] > wdCnt[wd]) {
break;
}
} else {
break;
}
++j;
}
if(j == wdNum) {
indices.push_back(i);
}
}
return indices;
}
};
思路2:
自定义hash + 双指针。
这个方法比较复杂,时间快很多。当然,复杂度还是一样的,不过,这种方法其实是不严格的,因为hash的计算在不取模的情况才对,实际计算机最多是64位的,那么一次hash的结果是1/2^64的冲突概率。参考一下。
去质数p=151,对于word, wd = foo, hash[foo] = 'f'*p^2 +'o'*p+'o';
1.按这个方法求hash,然后建立从wd到id,id到count的映射。
2.在s中使用同样的方法建立每个位置到后面wlen长单词的hash。
3.最后,然后用双指针去寻找。
这个算法比较复杂,具体看代码。
class Solution {
private:
const int p = 151;// 大质数,用于求hash
unordered_map<int, int> htoi;// hash -> id 的映射
// itoc[i];id=i -> cnt个数 id[i]:s中第i个字符 -> id
vector<int> ans, id, itoc;
// mh 用于求hash,icnt用于记录当前id,自增
int slen, wlen, wcnt, mh, icnt;
bool init(string &s, vector<string> &words){
ans.clear();
if(!(wcnt = words.size()) || !(wlen = words[0].size())|| !(slen = s.size()) || slen < wlen*wcnt){
return false;
}
icnt = 0;
htoi.clear();
itoc.clear();
itoc.resize(wcnt + 1, 0);
id.clear();
id.resize(slen, 0);
mh = 1;
for(int i = 1; i < wlen; ++i){
mh *= p;
}
return true;
}
// 返回s中第一个wlen长的字符串的hash
int converntString(string &s){
int t = 0;
for(int i = 0; i < wlen; ++i){
t = t*p + s[i];
}
return t;
}
// 给words中每个word产生一个id和hash,并记录每个word出现次数
void genHashForWords(vector<string> &words){
for(int i = 0; i < wcnt; ++i){
int h = converntString(words[i]);
if(htoi.find(h) == htoi.end()){
htoi[h] = ++icnt;// 增加id
}
++itoc[htoi[h]];
}
}
// 给s产生hash
void genHashForStr(string &s){
// 这里,不在words中的hash将直接返回0
int h = converntString(s);
id[0] = htoi[h];
for(int i = 1; i <= slen - wlen; ++i){
h = (h - mh * s[i - 1]) * p + s[i + wlen - 1];
id[i] = htoi[h];
}
}
void find(){
for(int i = 0; i < wlen; ++i){
int st = i, ed = i;
vector<int> use(wcnt + 1, 0);
while(ed <= slen - wlen){
int tid = id[ed];
if(tid == 0){
use.clear();
use.resize(wcnt + 1, 0);
st = ed = ed + wlen;
} else {
++use[tid];
if(use[tid] > itoc[tid]){
while(id[st] != tid){
--use[id[st]];
st += wlen;
}
--use[id[st]];
st += wlen;
}
if(ed - st == wlen * (wcnt - 1)){
ans.push_back(st);
}
ed += wlen;
}
}
}
}
public:
vector<int> &findSubstring(string s, vector<string> &words) {
if(!init(s, words)){
return ans;
}
genHashForWords(words);
genHashForStr(s);
find();
return ans;
}
};
实际上,直接用map + 双指针就好了。
class Solution {
public:
vector<int> findSubstring(string s, vector<string>& words) {
if("" == s || 0 == words.size() || 0 == words[0].size()) {
return vector<int>();
}
vector<int> indices;
int wdLen = words[0].size(), sLen = s.size(), wdNum = words.size();
int subLen = wdNum*wdLen, lastSt=sLen-subLen, lastEd=sLen-wdLen;
unordered_map<string, int> wdCnt, wdFd;
for(string wd : words) {
++wdCnt[wd];
}
for(int i=0; i<wdLen; ++i) {
wdFd.clear();
int st = i, ed = i;
while(st<=lastSt && ed <= lastEd) {
string sub = s.substr(ed, wdLen);
if(wdCnt.find(sub) != wdCnt.end()) {
++wdFd[sub];
ed += wdLen;
if(wdFd[sub] > wdCnt[sub]) {
string before;
while((before = s.substr(st, wdLen)) != sub) {
--wdFd[before];
st += wdLen;
}
st += wdLen;
--wdFd[sub];
}
if(ed - st == subLen) {
wdFd.clear();
indices.push_back(st);
st = ed = st + wdLen;
}
} else {
wdFd.clear();
st = ed = st + wdLen;
}
} // while(st<=lastST && ed <= lastEd)
}
return indices;
}
};