【leetcode】Substring with Concatenation of All Words

最新推荐文章于 2021-01-05 17:33:28 发布

Cabinathor

最新推荐文章于 2021-01-05 17:33:28 发布

阅读量305

点赞数

CC 4.0 BY-SA版权

分类专栏： Leetcode

本文链接：https://blog.youkuaiyun.com/lmxmimihuhu/article/details/47829471

Leetcode 专栏收录该内容

222 篇文章

订阅专栏

本文介绍了两种子串拼接查找算法，一种仅使用哈希，另一种结合自定义哈希和双指针方法，用于在给定字符串中查找由一组单词组成的连续子串。详细解释了算法步骤和实现细节。

摘要生成于 C知道，由 DeepSeek-R1 满血版支持，前往体验 >

https://leetcode.com/problems/substring-with-concatenation-of-all-words/

You are given a string, s, and a list of words, words, that are all of the same length. Find all starting indices of substring(s) in s that is a concatenation of each word in words exactly once and without any intervening characters.

For example, given:
s: "barfoothefoobarman"
words: ["foo", "bar"]

You should return the indices: [0,9].
(order does not matter).

思路1：

只用Hash

class Solution {
public:
    vector<int> findSubstring(string s, vector<string>& words) {
        if("" == s || 0 == words.size() || 0 == words[0].size()) {
            return vector<int>();
        }
        vector<int> indices;
        int wdLen = words[0].size(), sLen = s.size(), wdNum = words.size();
        
        map<string, int> wdCnt;
        for(string wd : words) {
            ++wdCnt[wd];
        }
        
        for(int i=0, lst=sLen-wdNum*wdLen; i<=lst; ++i) {
            int j = 0;
            map<string, int> wdFd;
            while(j < wdNum) {
                string wd = s.substr(i+j*wdLen, wdLen);
                if(wdCnt.find(wd) != wdCnt.end()) {
                    if(++wdFd[wd] > wdCnt[wd]) {
                        break;
                    }
                } else {
                    break;
                }
                ++j;
            }
            if(j == wdNum) {
                indices.push_back(i);
            }
        }
        return indices;
    }
};

思路2：

自定义hash + 双指针。

这个方法比较复杂，时间快很多。当然，复杂度还是一样的，不过，这种方法其实是不严格的，因为hash的计算在不取模的情况才对，实际计算机最多是64位的，那么一次hash的结果是1/2^64的冲突概率。参考一下。

去质数p=151,对于word, wd = foo， hash[foo] = 'f'*p^2 +'o'*p+'o';

1.按这个方法求hash，然后建立从wd到id，id到count的映射。

2.在s中使用同样的方法建立每个位置到后面wlen长单词的hash。

3.最后，然后用双指针去寻找。

这个算法比较复杂，具体看代码。

class Solution {
private:
    const int p = 151;// 大质数，用于求hash
    unordered_map<int, int> htoi;// hash -> id 的映射
	// itoc[i];id=i -> cnt个数     id[i]:s中第i个字符 -> id
    vector<int> ans, id, itoc;   
    // mh 用于求hash，icnt用于记录当前id，自增
	int slen, wlen, wcnt, mh, icnt;
	
    
    bool init(string &s, vector<string> &words){
        ans.clear();
		if(!(wcnt = words.size()) || !(wlen = words[0].size())|| !(slen = s.size()) || slen < wlen*wcnt){
            return false;
        }
        icnt = 0;
        htoi.clear();
        itoc.clear();
		itoc.resize(wcnt + 1, 0);
        id.clear();
        id.resize(slen, 0);
		mh = 1;
        for(int i = 1; i < wlen; ++i){
            mh *= p;
        }
        return true;
    }
    
	// 返回s中第一个wlen长的字符串的hash
    int converntString(string &s){
        int t = 0;
        for(int i = 0; i < wlen; ++i){
            t = t*p + s[i];
        }
        return t;
    }
    
	// 给words中每个word产生一个id和hash，并记录每个word出现次数
    void genHashForWords(vector<string> &words){
        for(int i = 0; i < wcnt; ++i){
            int h = converntString(words[i]);
            if(htoi.find(h) == htoi.end()){
                htoi[h] = ++icnt;// 增加id
            }
            ++itoc[htoi[h]];
        }
    }
    
	// 给s产生hash
    void genHashForStr(string &s){
		// 这里，不在words中的hash将直接返回0
        int h = converntString(s);
        id[0] = htoi[h];
        for(int i = 1; i <= slen - wlen; ++i){
			h = (h - mh * s[i - 1]) * p + s[i + wlen - 1];
            id[i] = htoi[h];
        }
    }
    
    void find(){
        for(int i = 0; i < wlen; ++i){
            int st = i, ed = i;
            vector<int> use(wcnt + 1, 0);
            while(ed <= slen - wlen){
                int tid = id[ed];
                if(tid == 0){
                    use.clear();
					use.resize(wcnt + 1, 0);
                    st = ed = ed + wlen;
                } else {
                    ++use[tid];
                    if(use[tid] > itoc[tid]){
                        while(id[st] != tid){
                            --use[id[st]];
                            st += wlen;
                        }
                        --use[id[st]];
                        st += wlen;
                    }
                    if(ed - st == wlen * (wcnt - 1)){
                        ans.push_back(st);
                    }
					ed += wlen;
                }
            }
        }
    }
    
public:
    vector<int> &findSubstring(string s, vector<string> &words) {
        if(!init(s, words)){
            return ans;
        }
        genHashForWords(words);
        genHashForStr(s);
        find();
        return ans;
    }
};

实际上，直接用map + 双指针就好了。

class Solution {
public:
    vector<int> findSubstring(string s, vector<string>& words) {
        if("" == s || 0 == words.size() || 0 == words[0].size()) {
            return vector<int>();
        }
        vector<int> indices;
        int wdLen = words[0].size(), sLen = s.size(), wdNum = words.size();
        int subLen = wdNum*wdLen, lastSt=sLen-subLen, lastEd=sLen-wdLen;
        
        unordered_map<string, int> wdCnt, wdFd;
        for(string wd : words) {
            ++wdCnt[wd];
        }
        
        for(int i=0; i<wdLen; ++i) {
            wdFd.clear();
            int st = i, ed = i;
            while(st<=lastSt && ed <= lastEd) {
                string sub = s.substr(ed, wdLen);
                if(wdCnt.find(sub) != wdCnt.end()) {
                    ++wdFd[sub];
					ed += wdLen;
                    if(wdFd[sub] > wdCnt[sub]) {
                        string before;
                        while((before = s.substr(st, wdLen)) != sub) {
                            --wdFd[before];
                            st += wdLen;
                        }
                        st += wdLen;
                        --wdFd[sub];
                    }
                    if(ed - st == subLen) {
						wdFd.clear();
                        indices.push_back(st);
                        st = ed = st + wdLen;
                    }
                } else {
                    wdFd.clear();
                    st = ed = st + wdLen;
                }
            } // while(st<=lastST && ed <= lastEd)
        }
        return indices;
    }
};