from collections import Counter
d = dict()
d["t1"] = ["L1", "L2", "L5", ]
d["t2"] = ["L2", "L4", ]
d["t3"] = ["L2", "L3", ]
d["t4"] = ["L1", "L2", "L4", ]
d["t5"] = ["L1", "L3", ]
d["t6"] = ["L2", "L3", ]
d["t7"] = ["L1", "L3", ]
d["t8"] = ["L1", "L2", "L3", "L5", ]
d["t9"] = ["L1", "L2", "L3", ]
# 定义类
class Apriori:
# 构造函数
def __init__(self, d, min_sup):
“”“
d:要处理的数据字典
min_sup:最小支持度
”“”
self.d = d
self.min_sup = min_sup
all_vals = []
for v in d.values():
all_vals += v
self.init_count = Counter(all_vals)
self.init_keys = self.init_count.keys()
self.init_vals = self.init_count.values()
self.d_temp = dict()
self.C_keys_temp = None
self.C_val_temp = None
self.L_keys_temp = []
self.L_val_temp = []
self.not_in_L = []
self.L_final_keys = None
self.L_final_vals = None
def get_C1(self):
return self.init_count
def get_L1(self):
for k in self.init_count.keys():
if self.init_count[k] >= self.min_sup:
self.d_temp[k] = self.init_count[k]
else:
self.not_in_L.append(set(k))
return self.d_temp
def run(self):
count = 0
C1 = self.get_C1()
L1 = self.get_L1()
self.C_keys_temp = C1.keys()
self.C_val_temp = C1.values()
self.L_keys_temp = L1.keys()
self.L_val_temp = L1.values()
# 如果L1 为空,停掉
if len(self.L_keys_temp) == 0:
return None
# 否则获取C2
else:
C = []
n = len(C1)
C = [[]]*int(n*(n - 1)/2)
count = 0
for i in range(n -1):
for j in range(i+1, n):
C[count]={list(C1)[i], list(C1)[j]}
count += 1
self.C_keys_temp = []
self.C_val_temp = [0] * len(C)
for i in C:
flag = True
for j in self.d.values():
if i.issubset(set(j)):
flag = False
if i not in self.C_keys_temp:
self.C_keys_temp.append(i)
self.C_val_temp[self.C_keys_temp.index(i)] = 1
else:
self.C_val_temp[self.C_keys_temp.index(i)] += 1
if flag:
self.C_keys_temp.append(i)
self.C_val_temp[self.C_keys_temp.index(i)] = 0
self.C_val_temp = self.C_val_temp[:len(self.C_keys_temp)]
flag = True
while(flag):
self.L_final_keys = self.L_keys_temp
self.L_final_vals = self.L_val_temp
self.result = zip(self.L_final_keys,self.L_final_vals)
# 都L 的临时变量清零
self.L_keys_temp = []
self.L_val_temp = []
# 由 C 构建 L
for i in range(len(self.C_keys_temp)):
if self.C_val_temp[i] >= self.min_sup:
self.L_keys_temp.append(self.C_keys_temp[i])
self.L_val_temp.append(self.C_val_temp[i])
else:
self.not_in_L.append(self.C_keys_temp[i])
# 由 L 构造 C
if len(self.L_keys_temp) != 0:
# 构造下一个C
###################################################################
C = []
# 外连接
for i in range(len(self.L_keys_temp) - 1):
for j in range(i+1, len(self.L_keys_temp)):
if self.L_keys_temp[i].intersection(self.L_keys_temp[j]) != set():
union = self.L_keys_temp[i].union(self.L_keys_temp[j])
flag_in = True
for nt in self.not_in_L:
if nt.issubset(union):
flag_in = False
self.not_in_L.append(union)
break
if union not in C and flag_in:
C.append(union)
self.C_keys_temp = []
self.C_val_temp = [0] * len(C)
# 统计
for i in C:
flag_in = True
for j in self.d.values():
if i.issubset(set(j)):
flag_in = False
if i not in self.C_keys_temp:
self.C_keys_temp.append(i)
self.C_val_temp[self.C_keys_temp.index(i)] = 1
else:
self.C_val_temp[self.C_keys_temp.index(i)] += 1
if flag_in:
self.C_keys_temp.append(i)
self.C_val_temp[self.C_keys_temp.index(i)] = 0
self.C_val_temp = self.C_val_temp[:len(self.C_keys_temp)]
#################################
else:
flag = False
def print_result(self):
print("最终结果:")
for i in range(len(self.L_final_keys)):
print(self.L_final_keys[i]," ",self.L_final_vals[i])
def print_confidence(self):
for i in range(len(self.L_final_keys)):
for j in list(self.L_final_keys[i]):
n_k = 0
n_v = 0
for d_v in self.d.values():
if j in d_v:
n_k +=1
if set([v for v in list(self.L_final_keys[i]) if v!=j]).issubset(set(d_v)):
n_v +=1
if self.confidence <= round(self.L_final_vals[i]/n_k,2):
print(j," ===> ", set([v for v in list(self.L_final_keys[i]) if v!=j])," confidence_val = ",round(self.L_final_vals[i]/n_k,2))
if self.confidence <= round(self.L_final_vals[i]/n_v,2):
print(set([v for v in list(self.L_final_keys[i]) if v!=j])," ===> ", j," confidence_val = ",round((self.L_final_vals[i]/n_v),2))
print("\n")
# 支持度为 3
a = Apriori(d,3)
a.run()
a.print_result()
# 支持度为 2
aa = Apriori(d,2)
aa.run()
aa.print_result()
# 支持度为 2,置信度为0.5
aa = Apriori(d,2, 0.5)
aa.run()
aa.print_result()