python数据挖掘
castingA3T
这个作者很懒,什么都没留下…
展开
专栏收录文章
- 默认排序
- 最新发布
- 最早发布
- 最多阅读
- 最少阅读
-
matplotlib绘制函数曲线
import numpy as np import matplotlib.pyplot as pltplt.rcParams['axes.unicode_minus']=Falsex = np.linspace(-30, 30, 10000) y = (x**2-5*x+10)#方程式 z = (2*x-5)#导数方程 z = (-5*x+10)#二次函数的0点切线方程plt.figure(原创 2017-12-25 23:42:25 · 6172 阅读 · 0 评论 -
pandas中concat用法
import numpy as npimport pandas as pdarr=np.arange(12).reshape((3,4))In[2]: arrOut[2]: array([[ 0, 1, 2, 3], [ 4, 5, 6, 7], [ 8, 9, 10, 11]])In[2]: np.concatenate([arr,arr]原创 2018-01-08 00:34:43 · 13913 阅读 · 2 评论 -
matplotlib双y轴绘图
# -*- coding: utf-8 -*-"""Created on Wed Jan 31 14:06:26 2018@author: Administrator"""import numpy as npimport pandas as pdimport matplotlib.pyplot as pltplt.rcParams['font.sans-serif']=['S原创 2018-01-31 14:50:22 · 14001 阅读 · 1 评论 -
pandas分区间,算频率
import pandas as pdpath='F:/python/python数据分析与挖掘实战/图书配套数据、代码/chapter3/demo/data/catering_fish_congee.xls'data=pd.read_excel(path,header=None,index_col=0)data.index.name='日期'data.columns=['销售额(元)']原创 2018-01-16 15:16:32 · 22376 阅读 · 0 评论 -
matplotlib箱形图
seed()用法import numpy as npnp.random.seed()#seed()随机数变化,seed(2)#里面有数字,数字不变,随机数不变a=np.random.randn(5,4)In [26]:aOut[26]: array([[ 0.17835632, 0.28546591, 0.65957699, -0.3424716 ], [ 1.3原创 2018-01-06 21:56:15 · 1819 阅读 · 0 评论 -
箱型图判断异常值
import pandas as pdcatering_sale='F:/python/python数据分析与挖掘实战/图书配套数据、代码/chapter3/demo/data/catering_sale.xls'data=pd.read_excel(catering_sale,index_col=u'日期')print(data.head())print(data.tail())pri原创 2018-01-15 22:32:33 · 2729 阅读 · 0 评论 -
链家网二手房数据分析(承接上篇爬虫)
import pandas as pdimport numpy as npimport matplotlib.pyplot as pltplt.rcParams['font.sans-serif']=['SimHei']#用来正常显示中文标签path='F:/python/从零开始学python网络爬虫/链家网/lianjia-3.xls'lj=pd.read_excel(path)原创 2017-12-28 22:43:05 · 4124 阅读 · 0 评论 -
pandas中groupby
import numpy as npimport pandas as pddf=pd.DataFrame({'key1':['a','a','b','b','a'], 'key2':['one','two','one','two','one'], 'data1':[1,4,7,3,9], 'd原创 2018-01-15 15:04:48 · 417 阅读 · 0 评论 -
pandas层次化索引
import pandas as pdimport numpy as npfrom numpy import nan as NAdf=pd.DataFrame(np.random.randn(7,3),index=['a','b','c','d','e','f','g'],columns=['q','w','t'])In [106]:dfOut[120]: q原创 2017-12-29 18:03:12 · 429 阅读 · 0 评论 -
pandas过滤缺失值
import pandas as pdimport numpy as npobj=pd.Series(range(5),index=['a','a','b','b','c'])print(obj)print(obj.index.is_unique)#是够是唯一值print(obj['b'])print(obj['c'])print(obj[['a','b']])#输出多个列print原创 2017-12-28 18:59:14 · 2720 阅读 · 0 评论 -
pandas运算、排序、排名
import pandas as pdimport numpy as np#Series运算s1=pd.Series([7.3,-2.5,3.4,1.5],index=['a','c','d','e'])s2=pd.Series([-2.1,3.6,-1.5,4,3.1],index=['a','c','e','f','g'])print(s1+s2)print(s1.add(s2,fi原创 2017-12-25 14:27:08 · 1650 阅读 · 0 评论 -
meshgrid计算函数sqrt(x^2+y^2)
import numpy as nppoints=np.arange(-5,5,0.01)#1000个间隔相同的点xs,ys=np.meshgrid(points,points)#生成两个二维矩阵print(ys)import matplotlib.pyplot as pltz=np.sqrt(xs ** 2 + ys ** 2)#计算函数print(z)plt.imshow(z原创 2017-12-21 12:18:46 · 1932 阅读 · 0 评论 -
前程无忧python工作薪资爬取及数据分析
# -*- coding: utf-8 -*-"""Created on Fri Dec 15 15:31:51 2017@author: Administrator"""'''获取前程无忧python相关工作地点、薪水、公司、职位'''import requestsfrom bs4 import BeautifulSoupheaders={ 'UserAgent':原创 2017-12-17 21:33:50 · 6858 阅读 · 1 评论 -
scatter绘制散点图
import numpy as np import matplotlib.pyplot as plt x = np.arange(1,10) y = x fig = plt.figure() ax1 = fig.add_subplot(111) ax1.set_title('Scatter Plot') plt.xlabel('X') plt.ylabel('Y')原创 2017-12-26 13:04:36 · 2638 阅读 · 0 评论 -
matplotlib条形图
from matplotlib import pyplot as pltimport numpy as npfig=plt.figure()ax=fig.add_subplot(111)x=np.arange(4)data=np.array([15,20,18,25])rect=ax.bar(x,data,width=0.5,color="lightblue")for rec in r原创 2017-12-31 10:48:39 · 429 阅读 · 0 评论 -
pandas合并、转换、映射、替换
import numpy as npimport pandas as pddf1=pd.DataFrame({'a':[1,np.nan,5,np.nan], 'b':[np.nan,2,np.nan,6], 'c':range(2,18,4)})df2=pd.DataFrame({'a':[5,4,np.nan,3,7原创 2018-01-09 17:08:53 · 3603 阅读 · 0 评论
分享