#!/usr/bin/env python #-*- coding:utf-8 -*-import re
import redis
conn = redis.Redis()# 文本序列化 deftokenize(content):# 请在下面完成要求的功能 #********* Begin *********#
words =set()for word in re.findall("[a-z]{2,}", content.lower()):iflen(word)>=2:
words.add(word)return words
#********* End *********## 创建文本的反向索引 defindex_document(content):# 请在下面完成要求的功能 #********* Begin *********#
content_id = conn.incr("content:id")
conn.hset("contents", content_id, content)
words = tokenize(content)
pipeline = conn.pipeline(True)for word in words:
pipeline.sadd('keyword:'+ word, content_id)
pipeline.execute()#********* End *********#
第2关:基本搜索操作
#!/usr/bin/env python #-*- coding:utf-8 -*-import re
import uuid
import redis
conn = redis.Redis()# 解析检索式 defparse(query):# 请在下面完成要求的功能 #********* Begin *********#
unwanted =set()
wanted =[]
synonym =set()for qword in re.findall("[+-]?[a-z]{2,}", query.lower()):
prefix = qword[:1]if prefix in'+-':
qword = qword[1:]else:
prefix =Noneif prefix =='-':
unwanted.add(qword)continueif synonym andnot prefix:
wanted.append(list(synonym))
synonym =set()
synonym.add(qword)if synonym:
wanted.append(list(synonym))return wanted,list(unwanted)#********* End *********## 对集合进行交/并/差操作,并暂存至临时集合 defset_common(method, names):# 请在下面完成要求的功能 #********* Begin *********#
common_id =str(uuid.uuid4())
names =["keyword:"+ name for name in names]getattr(conn, method)("keyword:"+ common_id,*names)
conn.expire("keyword:"+ common_id,60)return common_id
#********* End *********#
第3关:实现搜索
#!/usr/bin/env python #-*- coding:utf-8 -*-import re
import uuid
import redis
conn = redis.Redis()# 执行搜索 defsearch(query):# 请在下面完成要求的功能 #********* Begin *********#
wanted, unwanted = parse(query)ifnot wanted:returnNone
to_intersect =[]for qwords in wanted:iflen(qwords)>1:
to_intersect.append(set_common("sunionstore", qwords))else:
to_intersect.append(qwords[0])iflen(to_intersect)>1:
result = set_common("sinterstore", to_intersect)else:
result = to_intersect[0]if unwanted:
unwanted.insert(0, result)
result = set_common("sdiffstore", unwanted)return conn.smembers("keyword:"+ result)#********* End *********## 解析检索式 defparse(query):
unwanted =set()
wanted =[]
synonym =set()for qword in re.findall("[+-]?[a-z]{2,}", query.lower()):
prefix = qword[:1]if prefix in'+-':
qword = qword[1:]else:
prefix =Noneif prefix =='-':
unwanted.add(qword)continueif synonym andnot prefix:
wanted.append(list(synonym))
synonym =set()
synonym.add(qword)if synonym:
wanted.append(list(synonym))return wanted,list(unwanted)# 对集合进行交/并/差操作,并暂存至临时集合 defset_common(method, names):
common_id =str(uuid.uuid4())
names =["keyword:"+ name for name in names]getattr(conn, method)("keyword:"+ common_id,*names)
conn.expire("keyword:"+ common_id,60)return common_id