leetcode 日经贴,python,cpp code -Substring with Concatenation of All Words

本文介绍了一种高效的字符串匹配算法,该算法能在给定的字符串中找到所有由特定单词列表构成的子串的位置。通过构建匹配表并使用滑动窗口技术,算法能够有效地处理重复单词,并在O(N)的时间复杂度内完成任务。

摘要生成于 C知道 ,由 DeepSeek-R1 满血版支持, 前往体验 >

Substring with Concatenation of All Words



class Solution:
    # @param S, a string
    # @param L, a list of string
    # @return a list of integer
    def findSubstring(self, S, L):
        lenS = len(S)
        #remove duplicate words in L
        #L = list(set(L))
        n = len(L)
        ansIndex = []
        if n == 0: return ansIndex
        m = len(L[0])
        if lenS < n * m: return ansIndex
        matchTable = {}
        occurTimes = [0] * n
        for i in range(n):
            if L[i] not in matchTable:
                matchTable[L[i]] = i
                occurTimes[i] = 1
            else:
                occurTimes[matchTable[L[i]]] += 1
                
        matchIndex = [-1] * (lenS - m + 1)
        for i in range(lenS - m + 1):
            subs = S[i:i + m]
            if subs in matchTable:
                matchIndex[i] = matchTable[subs]
        for i in range(m):
            j, k = i, i
            occurlst = [0] * n
            while k < lenS - m + 1:
                if k - j == m * n: ansIndex.append(j)
                if matchIndex[k] == -1:
                    while j < k:
                        occurlst[matchIndex[j]] -= 1
                        j += m
                    k += m
                    j = k
                elif occurlst[matchIndex[k]] < occurTimes[matchIndex[k]]:
                    occurlst[matchIndex[k]] += 1
                    k += m
                else:
                    while matchIndex[j] != matchIndex[k]:
                        occurlst[matchIndex[j]] -= 1
                        j += m
                    j += m
                    k += m
            if k - j == m * n: ansIndex.append(j)
            
        return ansIndex


class Solution {
public:
    vector<int> overlap(const vector<int> &tokens, vector<int> mp) {
        vector<int> ans;
        int n = mp.size(), st = 0, ed = 0, unmatch = mp.size();
        while (ed < tokens.size()) {
            if (tokens[ed] == -1) {
                while (st < ed) {
                    if (mp[tokens[st++]]++ == 0) {
                        unmatch++;
                    }
                }
                st = ed + 1;
            } else if (mp[tokens[ed]] > 0) {
                if (mp[tokens[ed]]-- == 1) {
                    unmatch--;
                    if (unmatch == 0) {
                        ans.push_back(st);
                    }
                }
            } else {
                //mp[i] never smaller than 0
                while (st < ed && tokens[st] != tokens[ed]) {
                    if (mp[tokens[st++]]++ == 0) {
                        unmatch++;
                    }
                }
                ++st;
                if (unmatch == 0) {
                    ans.push_back(st);
                }
            }
            ++ed;
        }
        return ans;
    }
    vector<int> findSubstring(string s, vector<string>& words) {
        map<string, int> strid;
        int idindx = 0;
        vector<int> occurtimes, ans;
        if (words.empty()) {
            return ans;
        }
        for (int i = 0; i < words.size(); ++i) {
            if (strid.find(words[i]) == strid.end()) {
                occurtimes.push_back(1);
                strid[words[i]] = idindx++;
            } else {
                occurtimes[strid[words[i]]]++;
            }
        }
        int len = words[0].length();
        vector<int> tokens;
        for (int i = 0; i < len; ++i) {
            tokens.clear();
            for (int j = i; j + len <= s.length(); j += len) {
                string stoken = s.substr(j, len);
                if (strid.find(stoken) == strid.end()) {
                    tokens.push_back(-1);
                } else {
                    tokens.push_back(strid[stoken]);
                }
            }
            vector<int> matchpositions = overlap(tokens, occurtimes);
            for (int k = 0; k < matchpositions.size(); ++k) {
                ans.push_back(i + matchpositions[k] * len);
            }
        }
        return ans;
    }
};


 

评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值