基础
数据声明 list=[] (集合),i=0(int型),a=’123’(String),dist={}(字典),do=()(元组)
循环for i in range(5):
Print(i)
list.append(z)# 添加add
list.fillna(“其他”)#缺失值用其他填充
Len(list)集合长度
list(list1)转list
del list[index]#删除
1.list操作
list1 = [[1, 2, 3, 4], [2, 5, 3, 4], [1, 6, 3, 4], [1, 7, 3, 4], [1, 8, 3, 4]]
print([i[1] for i in list1])#按列读取
print(list[0])
#遍历
for x in list1:
print(x)
for x in range(len(list1)):
print(list1[x])
List2=[x for x in list1 if x[0]>1]#倒退试
2.字典操作
dist1={}
dist1[1]=‘1232’
print(dist1)
print(dist1[1])
del dist1[1]
print(dist1)
3.导包
import image.utils as utils #全部导入
from image.utils import add_sytle #导入函数 add_sytle
高级
涉及模块
pandas,xlrd,xlwt,sklearn,matplotlib,numpy
xlrd,xlwt(excel读取写入),pyinstaller
安装:Pip install 模块名称 -i http://pypi.douban.com/simple --trusted-host pypi.douban.com
如果安装不成功访问https://www.lfd.uci.edu/~gohlke/pythonlibs/下载对应版本whl文件 pip install *.whl 安装
pandas使用
pandas拥有种数据结构:Series和DataFrame
Series:
data = Series([1,2,3,4],index = ['a','b','c','d'])
Print(data)
data = {'a':[1,2,3],'b':['we','you','they'],'c':['btc','eos','ae']}
df = DataFrame(data)
Print(df)
dic = {'a':1,'b':2,'c':'as'}
dicSeries = Series(dic)
DataFrame:
import pandas as pd
data = {'name': ['BTC', 'ETH', 'EOS'], 'price':[50000, 4000, 150]}
data = pd.DataFrame(data)
data1 = {'name': ['BTC', 'ETH', 'EOS'], 'price':[50000, 4000, 150]}
data1 = pd.DataFrame(data1)
#拼接
df1=pd.merge(data,data1, how=’left’,on='name')#左右以name关联拼接 ,#how=’left’,表示以左为主
df2=pd.concat([data,data1])#上下拼接
data = data[~data['price'].isin(['150'])]#去除'price'列含有值150
data['price']=[50000, 4000, 150]#
#列相加
data['总和']=data['price']+data['price1']
#列求和
I=data['总和'].sum()
#分组
df=data.groupby(by='price')['price'].sum()#根据price分组求和计数
df=dp.DataFrame({'price':df.index,'总价':df.values})
#生成excel
writer = pd.ExcelWriter('生成文件路径')
workbook = writer.book
df1.to_excel(writer, sheet_name=u'各学校通过率', encoding='utf8', index=False, startcol=0, startrow=1)#startrow空行
format1 = workbook.add_format({'num_format': '#,##0.00','valign': 'vcenter', 'align': 'center', 'border': 1})#添加样式
worksheet1.set_column('D:D', 16, format1)#列样式
worksheet1.set_row(0, 30)#行样式
note_fmt = workbook.add_format(
{'bold': True, 'font_name': u'微软雅黑', 'align': 'center', 'valign': 'vcenter', 'border': 1})
worksheet1.merge_range(strcolumn, c, note_fmt)#添加标题
writer.save()
writer.close()
#去重
df= df.drop_duplicates(['考号'])
import pandas as pd
dir = u".\\"
df = pd.read_excel(dir + u"待处理原始数据.xlsx")# pd.read_excel(dir + u"待处理原始数据.xlsx",sheet_name=0)
#df=df.values.tolist()直接转二维list
df[‘原始题号’].dropna().unique().tolist()# 访问列数据并转list(单列)
案例:https://www.cnblogs.com/peng104/p/10398490.html
Numpy使用
a = np.array([1, 2, 3])
# print (a)
a = np.array([[1, 2], [3, 4]])
# print (a)
a = np.array([1, 2, 3, 4, 5], ndmin=2)
# print (a)
a = np.array([1, 2, 3], dtype=complex)
# print (a)
dt = np.dtype([('age', np.int8)])
a = np.array([(10,), (20,), (30,)], dtype=dt)
# print(a['age'])
student = np.dtype([('name', 'S20'), ('age', 'i1'), ('marks', 'f4')])
# a = np.array([('abc', 21, 50),('xyz', 18, 75)], dtype = student)
# print(a['name'])
x = np.arange(0, 5, 2, dtype=float) # numpy.arange(start, stop, step, dtype)
print(x)
a = np.linspace(1, 100, 10) # 数组是一个等差数列构成的 endpoint = False 不包含终止值
print(a)
a = np.logspace(0, 9, 10, base=2) # 创建一个于等比数列 np.logspace(start, stop, num=50, endpoint=True, base=10.0, dtype=None)
print(a)
a = np.arange(10)
s = slice(2, 7, 2) # 从索引 2 开始到索引 7 停止,间隔为2
print(a[s])
a = np.arange(10)
b = a[2:7:2] # 从索引 2 开始到索引 7 停止,间隔为 2
print(b)
a = np.arange(10) # [0 1 2 3 4 5 6 7 8 9]
b = a[5]
print(b)
print(np.std([1, 2, 3, 4]))# 标准差
print (np.var([1,2,3,4]))# 方差
a = np.array([[1, 2], [3, 4]])
b = np.array([[11, 12], [13, 14]])
# vdot 将数组展开计算内积
print(np.vdot(a, b))
#https://www.runoob.com/numpy/numpy-matplotlib.html
#https://www.runoob.com/numpy/numpy-dtype.html
pyinstalle打包
pip install pyinstaller
pyinstaller -F xx.py #-F控制台显示
#-F:所有文件打包到demo.exe
#-w/–noconsole:不显示黑窗口
#-i <file.ico>:配置demo.exe的图标,本人测试256256图标时,文件浏览界
#面中图标,小图标显示正常,大图标,特大图标* 显示异常