sklearn实战-乳腺癌细胞数据挖掘(博客主亲自录制视频教程)
https://study.163.com/course/introduction.htm?courseId=1005269003&utm_campaign=commission&utm_source=cp-400000000398149&utm_medium=share
censor_xwgj_function.py脚本
需要list_xwgj_function,chi_square两个脚本支持
# -*- coding: utf-8 -*-
"""
Created on Thu Aug 25 10:06:52 2016
审核性味归经与功能的脚本
@author: Administrator
"""
import xlrd,csv
import list_xwgj_function,chi_square
#三元素组合[寒,苦,肝]
thelist_xwgj_function=list_xwgj_function.thelist_xwgj_function
#多元素组合
#thelist_xwgj_function=list_xwgj_function_multiElements.list_xwgj2_function
fileName="性味归经_功能_关系.csv"
excelFilename="中药表.xlsx"
sheetName="Sheet1"
#打开excel数据
excelFile=xlrd.open_workbook(excelFilename)
sheet=excelFile.sheet_by_name(sheetName)
#表单行与列
number_rows=sheet.nrows
number_columns=sheet.ncols
#表单内数据
list_sheetData=[]
#
list_xingWeiFunction_relation=[]
#获取表格数据
def Get_sheetData():
for row in range(1,number_rows):
list_sheetData.append(sheet.row_values(row))
return list_sheetData
list_sheetData=Get_sheetData()
#卡方统计的keyWord1与keyWord2的关系
def Get_single_chiSquare(keyWord1,keyWord2):
list_a=[]
a=0
b=0
c=0
d=0
#统计包含a数量
for k in range(len(list_sheetData)):
i=list_sheetData[k]
if keyWord1[0] in i[0] and keyWord1[1] in i[0] and keyWord1[2] in i[0] and keyWord2 in i[1]:
a+=1
list_a.append(k+2)
#统计包含b数量
for i in list_sheetData:
if keyWord1[0] in i[0] and keyWord1[1] in i[0] and keyWord1[2] in i[0] and keyWord2 not in i[1]:
b+=1
#统计包含c数量
for i in list_sheetData:
if keyWord1[0] not in i[0] or keyWord1[1] not in i[0] or keyWord1[2] not in i[0] and keyWord2 in i[1]:
c+=1
#统计包含d数量
for i in list_sheetData:
if keyWord1[0] not in i[0] or keyWord1[1] not in i[0] or keyWord1[2] not in i[0] and keyWord2 not in i[1]:
d+=1
#print("a:",a)
#print("b:",b)
#print("c:",c)
#print("d:",d)
k2=chi_square.value_independence(a,b,c,d)
relation=chi_square.judge_independence(a,b,c,d)
lis1=[[a,b,c,d],k2,relation,list_a]
return lis1
#计算所有性味的相关性
def Get_all_chiSquare(thelist_xwgj_function):
for i in thelist_xwgj_function:
try:
keyWord1,keyWord2=i[0],i[1]
relation=Get_single_chiSquare(keyWord1,keyWord2)
list1=[i,relation,relation[2]]
list_xingWeiFunction_relation.append(list1)
except:
print("wrong at:",i)
continue
return list_xingWeiFunction_relation
def Write_table_to_csv(list1,fileName):
#对列表格式修改,字符串写入的格式不对
file=open(fileName,'w',newline='')
writer1=csv.writer(file)
writer1.writerows(list1)
file.close()
list_xingWeiFunction_relation=Get_all_chiSquare(thelist_xwgj_function)
Write_table_to_csv(list_xingWeiFunction_relation,fileName)
chi_square 脚本
#coding=utf-8
#独立性检验test for independence,也是卡方检验chi_square
#前提条件:a,b,c,d 必须大于5
#2.706是判断标准(90概率),值越大,越有关,值越小,越无关
def value_independence(a,b,c,d):
if a>=5 and b>=5 and c>=5 and d>=5:
return ((a+b+c+d)*(a*d-b*c)**2)/float((a+b)*(c+d)*(a+c)*(b+d))
#返回True表示有关
#返回False表示无关
#2.706表示有90%概率
def judge_independence(a,b,c,d):
num_independence=value_independence(a,b,c,d)
print("chi_square:",num_independence)
if num_independence==None:
print("not suit for chi_square statistics")
return "wrong"
if num_independence>2.706:
print ("there is relationship")
return True
if num_independence<=2.706:
print("there is no relationship")
return False
list_xwgj_function脚本
# -*- coding: utf-8 -*-
"""
Created on Tue Aug 23 11:38:35 2016
高频率的性味归经和功能相关性分析
算法:卡方
@author: Administrator
"""
import xlrd,csv
excelFilename="性味归经与功能.xlsx"
sheetName="Sheet1"
#打开excel数据
excelFile=xlrd.open_workbook(excelFilename)
sheet=excelFile.sheet_by_name(sheetName)
#表单行与列
number_rows=sheet.nrows
number_columns=sheet.ncols
#表格性味归经
list_xwgj=sheet.col_values(0)[1:]
list_xwgj1=[]
#表格性味归经
list_function=sheet.col_values(1)[1:]
#去除空格
list_function1=[i for i in list_function if i!=""]
#性味归经和功能的组合
list_xwgj_function=[]
def Get_list_xwgj_function():
for i in list_xwgj1:
for k in list_function1:
list_xwgj_function.append([i,k])
return list_xwgj_function
#性味归经列表 ,统一格式
def Get_list_xwgj():
for i in list_xwgj:
b=i.split(",")
list_xwgj1.append(b)
return list_xwgj1
#性味归经列表,统一格式
list_xwgj1=Get_list_xwgj()
thelist_xwgj_function=Get_list_xwgj_function()
list_xwgj_function脚本需要 性味归经与功能excel
censor_xwgj_function.py脚本
需要导入中药表
python风控评分卡建模和风控常识(博客主亲自录制视频教程)