-:Sunday算法
最后一个是Sunday 算法,实际上比Boyer-Moore 还快,呵呵。长江后浪推前浪。
原始论文:Daniel M. Sunday, A very fast substring search algorithm, Communications of the ACM, v.33 n.8, p.132-142, Aug. 1990
看原始论文的题目,D.M. Sunday 貌似是故意想气气Boyer-Moore 两位大牛似的。呵呵。不过实际上的确Sunday 算法的确比BM 算法要快,而且更简单。
Sunday 的算法思想和Horspool 有些相似,但是。当出现不匹配的时候,却不是去找匹配串中不匹配的字符在模式串的位置,而是直接找最右边对齐的右一位的那个字符在模式串的位置。
比如:
匹配串:abcbc zdxzc
模式串:zbcac
恩,这里我们看到b-a 没有对上,我们就看匹配串中的z 在模式串的位置,然后,嘿嘿。
匹配串:abcbczdxzc
模式串: zbcac
如果模式串中的没有那个字符怎么办呢?很简单,跳过去呗。
匹配串:abcbc edxzcs
模式串:zbcac
e 不在模式串中出现
那么我们就
匹配串:abcbcedxzcs
模式串: zbcac
int findStr(const char* text,const char* pattern)
{
if(text==NULL||pattern==NULL)
return -1;
int i,j,temp;
int textLen=strlen(text);
int patternLen=strlen(pattern);
int size=1<<(sizeof(char)*8);
int* moveTable=new int[size];
//precompute the moving table, pattern will move 'patternLen+1' steps by default
for(i=0;i<size;i++)
moveTable[i]=patternLen+1;
for(i=0;i<patternLen;i++)
moveTable[pattern[i]]=patternLen-i;
//compare the text and pattern backwards
for(i=patternLen-1;i<textLen;)
{
j=patternLen-1;
temp=i;
while(pattern[j]==text[temp]&&j>=0)
{
j--;
temp--;
}
if(j<0)
return temp+1;
i+=moveTable[text[i+1]];
}
return -1;
}
int main()
{
cout << "Hello World!" << endl;
const char* text="we call on the western nations to commit more money to the poorest country";
const char* pattern="more";
cout<<findStr(text,pattern);
return 0;
}
1 import redis
2 import random
3
4 def fillDB(conn):
5 conn.execute_command('CONFIG SET maxmemory-policy noeviction')
6 count=0
7 try:
8 while 1:
9 conn.execute_command('SET user:%d super_hero'%count )
10 count+=1
11 except Exception as e:
12 print e
13 conn.execute_command('CONFIG SET maxmemory-policy allkeys-lru')
14 return count
15
16 def simulate(conn,dbSize,dataPro,probability,times):
17 '''
18 conn--------------the redis client instance
19 dbSize------------the size of the DB,actually it's the number of string stored in it
20 dataPro-----------the proportion of data that are frequently visited
21 probability-------the probability with which the frequent data will be accessed
22 times-------------the number of simulation times
23 '''
24 dataAmount=dbSize*2
25 item=-1
26 for i in range(0,times):
27 visitPro = random.random()
28 if visitPro<=probability:
29 item=random.randint(0,dataAmount*dataPro)
30 else:
31 item=random.randint(dataAmount*dataPro+1,dataAmount)
32
33 if not conn.execute_command('GET user:%d'%item):
34 conn.execute_command('SET user:%d super_hero'%item)
35
36 if __name__=='__main__':
37 print 'before running this,please make sure that you have limited the maxmemory'
38 conn = redis.StrictRedis()
39 '''
40 cacheSize = fillDB(conn)
41 print 'the cacheSize is %d'%cacheSize
42 simulate(conn,cacheSize,0.2,0.8,10000)
43 '''
44 info = conn.execute_command('info')
45 info = info.split('\r\n')
46
47 hits=0
48 misses=0
49 for s in info:
50 if 'keyspace_hits' in s:
51 hits = int(s[len('keyspace_hits')+1:len(s)])
52 elif 'keyspace_misses' in s:
53 misses = int(s[len('keyspace_misses')+1:len(s)])
54 break
55
56 print 'keyspace_hits = %d\n'%hits,'keyspace_misses = %d\n'%misses
57 print 'hit_rate = %f'%(hits/float(hits+misses))