Substring with Concatenation of All Words
class Solution:
# @param S, a string
# @param L, a list of string
# @return a list of integer
def findSubstring(self, S, L):
lenS = len(S)
#remove duplicate words in L
#L = list(set(L))
n = len(L)
ansIndex = []
if n == 0: return ansIndex
m = len(L[0])
if lenS < n * m: return ansIndex
matchTable = {}
occurTimes = [0] * n
for i in range(n):
if L[i] not in matchTable:
matchTable[L[i]] = i
occurTimes[i] = 1
else:
occurTimes[matchTable[L[i]]] += 1
matchIndex = [-1] * (lenS - m + 1)
for i in range(lenS - m + 1):
subs = S[i:i + m]
if subs in matchTable:
matchIndex[i] = matchTable[subs]
for i in range(m):
j, k = i, i
occurlst = [0] * n
while k < lenS - m + 1:
if k - j == m * n: ansIndex.append(j)
if matchIndex[k] == -1:
while j < k:
occurlst[matchIndex[j]] -= 1
j += m
k += m
j = k
elif occurlst[matchIndex[k]] < occurTimes[matchIndex[k]]:
occurlst[matchIndex[k]] += 1
k += m
else:
while matchIndex[j] != matchIndex[k]:
occurlst[matchIndex[j]] -= 1
j += m
j += m
k += m
if k - j == m * n: ansIndex.append(j)
return ansIndex
class Solution {
public:
vector<int> overlap(const vector<int> &tokens, vector<int> mp) {
vector<int> ans;
int n = mp.size(), st = 0, ed = 0, unmatch = mp.size();
while (ed < tokens.size()) {
if (tokens[ed] == -1) {
while (st < ed) {
if (mp[tokens[st++]]++ == 0) {
unmatch++;
}
}
st = ed + 1;
} else if (mp[tokens[ed]] > 0) {
if (mp[tokens[ed]]-- == 1) {
unmatch--;
if (unmatch == 0) {
ans.push_back(st);
}
}
} else {
//mp[i] never smaller than 0
while (st < ed && tokens[st] != tokens[ed]) {
if (mp[tokens[st++]]++ == 0) {
unmatch++;
}
}
++st;
if (unmatch == 0) {
ans.push_back(st);
}
}
++ed;
}
return ans;
}
vector<int> findSubstring(string s, vector<string>& words) {
map<string, int> strid;
int idindx = 0;
vector<int> occurtimes, ans;
if (words.empty()) {
return ans;
}
for (int i = 0; i < words.size(); ++i) {
if (strid.find(words[i]) == strid.end()) {
occurtimes.push_back(1);
strid[words[i]] = idindx++;
} else {
occurtimes[strid[words[i]]]++;
}
}
int len = words[0].length();
vector<int> tokens;
for (int i = 0; i < len; ++i) {
tokens.clear();
for (int j = i; j + len <= s.length(); j += len) {
string stoken = s.substr(j, len);
if (strid.find(stoken) == strid.end()) {
tokens.push_back(-1);
} else {
tokens.push_back(strid[stoken]);
}
}
vector<int> matchpositions = overlap(tokens, occurtimes);
for (int k = 0; k < matchpositions.size(); ++k) {
ans.push_back(i + matchpositions[k] * len);
}
}
return ans;
}
};