All DNA is composed of a series of nucleotides abbreviated as A, C, G, and T, for example: "ACGAATTCCG". When studying DNA, it is sometimes useful to identify repeated sequences within the DNA.
Write a function to find all the 10-letter-long sequences (substrings) that occur more than once in a DNA molecule.
For example,
Given s = "AAAAACCCCCAAAAACCCCCCAAAAAGGGTTT", Return: ["AAAAACCCCC", "CCCCCAAAAA"].
class Solution {
public:
vector<string> findRepeatedDnaSequences(string s) {
map<int, int> m_map;
map<char, int> convert;
int cur = 0;
convert['A'] = 0;
convert['C'] = 1;
convert['G'] = 2;
convert['T'] = 3;
vector<string> result;
if(s.size() < 10)
return result;
for(int i = 0; i < s.size(); i++)
{
cur = (cur << 2 | convert[s[i]]) & 0xFFFFF;
if(m_map[cur] == 1 && i >= 9)
result.push_back(s.substr(i-9, 10));
if(i >= 9)
m_map[cur]++;
}
return result;
}
};