leetcode 日经贴，python,cpp code -Substring with Concatenation of All Words-优快云博客

本文介绍了一种高效的字符串匹配算法，该算法能在给定的字符串中找到所有由特定单词列表构成的子串的位置。通过构建匹配表并使用滑动窗口技术，算法能够有效地处理重复单词，并在O(N)的时间复杂度内完成任务。

Substring with Concatenation of All Words

class Solution:
    # @param S, a string
    # @param L, a list of string
    # @return a list of integer
    def findSubstring(self, S, L):
        lenS = len(S)
        #remove duplicate words in L
        #L = list(set(L))
        n = len(L)
        ansIndex = []
        if n == 0: return ansIndex
        m = len(L[0])
        if lenS < n * m: return ansIndex
        matchTable = {}
        occurTimes = [0] * n
        for i in range(n):
            if L[i] not in matchTable:
                matchTable[L[i]] = i
                occurTimes[i] = 1
            else:
                occurTimes[matchTable[L[i]]] += 1
                
        matchIndex = [-1] * (lenS - m + 1)
        for i in range(lenS - m + 1):
            subs = S[i:i + m]
            if subs in matchTable:
                matchIndex[i] = matchTable[subs]
        for i in range(m):
            j, k = i, i
            occurlst = [0] * n
            while k < lenS - m + 1:
                if k - j == m * n: ansIndex.append(j)
                if matchIndex[k] == -1:
                    while j < k:
                        occurlst[matchIndex[j]] -= 1
                        j += m
                    k += m
                    j = k
                elif occurlst[matchIndex[k]] < occurTimes[matchIndex[k]]:
                    occurlst[matchIndex[k]] += 1
                    k += m
                else:
                    while matchIndex[j] != matchIndex[k]:
                        occurlst[matchIndex[j]] -= 1
                        j += m
                    j += m
                    k += m
            if k - j == m * n: ansIndex.append(j)
            
        return ansIndex

class Solution {
public:
    vector<int> overlap(const vector<int> &tokens, vector<int> mp) {
        vector<int> ans;
        int n = mp.size(), st = 0, ed = 0, unmatch = mp.size();
        while (ed < tokens.size()) {
            if (tokens[ed] == -1) {
                while (st < ed) {
                    if (mp[tokens[st++]]++ == 0) {
                        unmatch++;
                    }
                }
                st = ed + 1;
            } else if (mp[tokens[ed]] > 0) {
                if (mp[tokens[ed]]-- == 1) {
                    unmatch--;
                    if (unmatch == 0) {
                        ans.push_back(st);
                    }
                }
            } else {
                //mp[i] never smaller than 0
                while (st < ed && tokens[st] != tokens[ed]) {
                    if (mp[tokens[st++]]++ == 0) {
                        unmatch++;
                    }
                }
                ++st;
                if (unmatch == 0) {
                    ans.push_back(st);
                }
            }
            ++ed;
        }
        return ans;
    }
    vector<int> findSubstring(string s, vector<string>& words) {
        map<string, int> strid;
        int idindx = 0;
        vector<int> occurtimes, ans;
        if (words.empty()) {
            return ans;
        }
        for (int i = 0; i < words.size(); ++i) {
            if (strid.find(words[i]) == strid.end()) {
                occurtimes.push_back(1);
                strid[words[i]] = idindx++;
            } else {
                occurtimes[strid[words[i]]]++;
            }
        }
        int len = words[0].length();
        vector<int> tokens;
        for (int i = 0; i < len; ++i) {
            tokens.clear();
            for (int j = i; j + len <= s.length(); j += len) {
                string stoken = s.substr(j, len);
                if (strid.find(stoken) == strid.end()) {
                    tokens.push_back(-1);
                } else {
                    tokens.push_back(strid[stoken]);
                }
            }
            vector<int> matchpositions = overlap(tokens, occurtimes);
            for (int k = 0; k < matchpositions.size(); ++k) {
                ans.push_back(i + matchpositions[k] * len);
            }
        }
        return ans;
    }
};