import numpy as np
from collections import defaultdict
from operator import itemgetter
def Affinity_Analysis(dataset,features,nums_feature);
if __name__ == "__main__":
dataset_filename="数据包路径"
X=np.loadtxt(dataset_filename)
features=['bread','milk','cheese','apple','banana']
nums_feature=5
Affinity_Analysis(X,features,nums_feature)
def Affinity_Analysis(dataset,features,nums_feature):
valid_rules=defaultdict(int)
invalid_rules=defaultdict(int)
nums_occurances=defaultdict(int)
for sample in dataset:
for primise in range(nums_feature):
if sample[primise]==0: continue
nums_occurances[primise]+=1
for conclusion in range(nums_feature):
if conclusion==primise: continue
if sample[conclusion]==1: valid_rules[(primise,conclusion)]+=1
else: invalid_rules[(primise,conclusion)]+=1
support=valid_rules
confidence=defaultdict(float)
for primise,conclusion in valid_rules.keys():
confidence[(primise,conclusion)]=valid_rules[(primise,conclusion)]/nums_occurances[primise]
sorted_support=sorted(support.items(),key=itemgetter(1),reverse=True)
sorted_confidence=sorted(confidence.items(),key=itemgetter(1),reverse=True)
print('\n支持度由高到低:')
for i in sorted_support:
print("[{0} {1}]\t- Support: {2}".format(features[i[0][0]],features[i[0][1]],i[1]))
print('\n置信度由高到低:')
for i in sorted_confidence:
print("[{0} {1}]\t- Support: {2:.3f}".format(features[i[0][0]],features[i[0][1]],i[1]))