第八章 matplotlib画图_iloc[:,3]-优快云博客

本文链接：https://blog.youkuaiyun.com/yunini2/article/details/73613611

本文介绍Pandas库的数据选取与处理方法，包括使用iloc和loc进行数据筛选，以及通过Matplotlib进行多种图表的绘制，如线图、柱状图、直方图、密度图和散点图等。

摘要生成于 C知道，由 DeepSeek-R1 满血版支持，前往体验 >

#数据读取
df.iloc[:,0:2]#选取第一列和第二列
df.iloc[[1,3],:]#选取第一行和第四行的全部列
df.iloc[[1,3],0:2]#选取第二行和第四行的第一列到第二列
df.iloc[[1,3],[0,3]]#选取第二行和第四列的第一列和第四列
df.iat[2,2]#只能是某行某列的一个数
df
df.loc[[[10,30],['A','C']]#读取10，30这两行的A列、C列
df.loc[10:50,['A','C']]#读取从10到50的每一行的A列、C列
#开始画图
from datetime import datetime
import matplotlib.pyplot as plt
import pandas as pd
fig=plt.figure()
ax=fig.add_subplot(1,1,1)
data=pd.read_csv('E:/spx.csv',index_col=0,parse_dates=True)
spx=data['SPX']
spx.plot(ax=ax,style='k-')
#fig.show()
crisis_data=[
             (datetime(2007,10,11),'Peak of bull market'),
             (datetime(2008,3,12),'Bear Stearns Fails'),
             (datetime(2008,9,15),'Lehman Bankruptcy')
             ]
for date,label in crisis_data:
    ax.annotate(label,xy=(date,spx.asof(date)+50),
                xytext=(date,spx.asof(date)+50),arrowprops=dict(facecolor='black'),horizontalalignment='left',
verticalalignment='top')
#放大到2007-2010
ax.set_xlim(['1/1/2007','1/1/2011'])    
ax.set_ylim([600,1800])
ax.set_title('Import dates in 2008-2009 financial crisis')

>>> import numpy as np
>>> import pandas as pd
>>> s=pd.Series(np.random.randn(10).cumsum(),index=np.arange(0,100,10))
>>> s.plot()
<matplotlib.axes._subplots.AxesSubplot object at 0x040C41D0>#显示不出来图
>>> import matplotlib.pyplot as plt
>>> fig=plt.figure()
>>> ax=fig.add_subplot(1,1,1)
>>> s=pd.Series(np.random.randn(10).cumsum(),index=np.arange(0,100,10))
>>> s.plot()
<matplotlib.axes._subplots.AxesSubplot object at 0x03FE4750>
>>> fig.show()

该Series对象索引index会被传给matplotlib，并用以绘制x轴。可通过use_index=False禁用该功能。

X轴的刻度和界限可通过xticks和xlim选项调节

1、线图

import pandas as pd
import numpy as np
from pandas import DataFrame
df=DataFrame(np.random.randn(10,4).cumsum(0),columns=['A','B','C','D'],index=np.arange(0,100,10))
import matplotlib.pyplot as plt
fig=plt.figure()
ax=fig.add_subplot(1,1,1)
df.plot()

2、柱状图

fig,axes=plt.subplots(2,1,figsize=(12,10))
data=pd.Series(np.random.randn(16),index=list('abcdefghijklmnop'))
data.plot(kind='bar',ax=axes[0],color='k',alpha=0.7)
data.plot(kind='barh',ax=axes[1],color='k',alpha=0.7)

data.value_counts().plot(kind='bar')，value_counts()函数计算频率。

3、直方图

tips=pd.read_csv('e:/tips.csv')
tips['tip_pct']=tips['tip']/tips['total_bill']
tips['tip_pct'].hist(bins=50)

4、密度图

import scipy
tips['tip_pct'].plot(kind='kde')

5、散点图

macro=pd.read_csv('e:/macrodata.csv')
data=macro.iloc[:,7:11]
trans_data=np.log(data).dropna()
trans_data[-5:]
plt.scatter(trans_data[[1]],trans_data[[3]])
plt.title('Changes in log %s vs. log %s' % ('m1','uemp'))

pd.scatter_matrix(trans_data,diagonal='kde',c='k',alpha=.3)

画地图的例子：

data=pd.read_csv('e:/haiti.csv')

#CATEGORY含有一组以逗号分隔的代码，这些代码表示消息的类型

data[['INCIDENT DATE','LATITUDE','LONGITUDE']][:10]
Out[172]: 
      INCIDENT DATE   LATITUDE   LONGITUDE
0  05/07/2010 17:26  18.233333  -72.533333
1  28/06/2010 23:06  50.226029    5.729886
2  24/06/2010 16:21  22.278381  114.174287
3  20/06/2010 21:59  44.407062    8.933989
4  18/05/2010 16:26  18.571084  -72.334671
5  26/04/2010 13:14  18.593707  -72.310079
6  26/04/2010 14:19  18.482800  -73.638800
7  26/04/2010 14:27  18.415000  -73.195000
8  15/03/2010 10:58  18.517443  -72.236841
9  15/03/2010 11:00  18.547790  -72.410010

data['CATEGORY'][:6]
Out[173]: 
0          1. Urgences | Emergency, 3. Public Health, 
1    1. Urgences | Emergency, 2. Urgences logistiqu...
2    2. Urgences logistiques | Vital Lines, 8. Autr...
3                            1. Urgences | Emergency, 
4                            1. Urgences | Emergency, 
5                       5e. Communication lines down, 
Name: CATEGORY, dtype: object
#调用describe发现数据中一些异常的地理位置

data.describe()
Out[175]: 
            Serial     LATITUDE    LONGITUDE
count  3593.000000  3593.000000  3593.000000
mean   2080.277484    18.611495   -72.322680
std    1171.100360     0.738572     3.650776
min       4.000000    18.041313   -74.452757
25%    1074.000000    18.524070   -72.417500
50%    2163.000000    18.539269   -72.335000
75%    3088.000000    18.561820   -72.293570
max    4052.000000    50.226029   114.174287
In [176]: data=data[(data.LATITUDE>18)&(data.LATITUDE<20)&(data.LONGITUDE>-75)&(data.LONGITUDE<-70)&data.CATEGORY.notnull()]#编写两个函数，一个用于所有分类的列表获取，另一个用于将各个分类信息拆分为编码和英语
In [177]: def to_cat_list(catstr):
     ...:     stripped=(x.strip() for x in catstr.split(','))
     ...:     return [x for x in stripped if x]
     ...: 
     ...: def get_all_categories(cat_series):
     ...:     cat_sets=(set(to_cat_list(x)) for x in cat_series)
     ...:     return sorted(set.union(*cat_sets))
     ...: 
     ...: def get_english(cat):
     ...:     code,names=cat.split('.')
     ...:     if '|' in names:
     ...:         names=names.split('|')[1]
     ...:     return code,names.strip()
     ...: 
     ...: 
#测试一下get_english函数工作是否正常
In [178]: get_english('2.Urgences logistiqued|Vital Lines')
Out[178]: ('2', 'Vital Lines')
#将编码和名称映射起来的字典
In [185]: all_cats=get_all_categories(data.CATEGORY)
In [186]: #生成器表达式

In [187]: english_mapping=dict(get_english(x) for x in all_cats)

In [188]: english_mapping['2a']

Out[188]: 'Food Shortage'In [189]: english_mapping['6c']
Out[189]: 'Earthquake and aftershocks'

In [190]: english_mapping['2b']
Out[190]: 'Water shortage'#分类选取记录的方式有很多，其一是添加指标列，每个分类一列。首先抽取出唯一的分类编码，并构造一个全0DataFrame
def get_code(seq):
    return [x.split('.')[0] for x in seq if x]

all_codes=get_code(all_cats)
code_index=pd.Index(np.unique(all_codes))
dummy_frame=DataFrame(np.zeros((len(data),len(code_index))),index=data.index,columns=code_index)
dummy_frame.ix[:,:6]
Out[191]: 
        1   1a   1b   1c   1d    2
0     0.0  0.0  0.0  0.0  0.0  0.0
4     0.0  0.0  0.0  0.0  0.0  0.0
5     0.0  0.0  0.0  0.0  0.0  0.0
6     0.0  0.0  0.0  0.0  0.0  0.0
7     0.0  0.0  0.0  0.0  0.0  0.0
8     0.0  0.0  0.0  0.0  0.0  0.0
9     0.0  0.0  0.0  0.0  0.0  0.0
10    0.0  0.0  0.0  0.0  0.0  0.0
11    0.0  0.0  0.0  0.0  0.0  0.0
12    0.0  0.0  0.0  0.0  0.0  0.0
13    0.0  0.0  0.0  0.0  0.0  0.0
14    0.0  0.0  0.0  0.0  0.0  0.0
15    0.0  0.0  0.0  0.0  0.0  0.0
16    0.0  0.0  0.0  0.0  0.0  0.0
17    0.0  0.0  0.0  0.0  0.0  0.0
18    0.0  0.0  0.0  0.0  0.0  0.0
19    0.0  0.0  0.0  0.0  0.0  0.0
20    0.0  0.0  0.0  0.0  0.0  0.0
21    0.0  0.0  0.0  0.0  0.0  0.0
22    0.0  0.0  0.0  0.0  0.0  0.0
23    0.0  0.0  0.0  0.0  0.0  0.0
24    0.0  0.0  0.0  0.0  0.0  0.0
25    0.0  0.0  0.0  0.0  0.0  0.0
26    0.0  0.0  0.0  0.0  0.0  0.0
27    0.0  0.0  0.0  0.0  0.0  0.0
28    0.0  0.0  0.0  0.0  0.0  0.0
29    0.0  0.0  0.0  0.0  0.0  0.0
30    0.0  0.0  0.0  0.0  0.0  0.0
31    0.0  0.0  0.0  0.0  0.0  0.0
32    0.0  0.0  0.0  0.0  0.0  0.0
  ...  ...  ...  ...  ...  ...
3563  0.0  0.0  0.0  0.0  0.0  0.0
3564  0.0  0.0  0.0  0.0  0.0  0.0
3565  0.0  0.0  0.0  0.0  0.0  0.0
3566  0.0  0.0  0.0  0.0  0.0  0.0
3567  0.0  0.0  0.0  0.0  0.0  0.0
3568  0.0  0.0  0.0  0.0  0.0  0.0
3569  0.0  0.0  0.0  0.0  0.0  0.0
3570  0.0  0.0  0.0  0.0  0.0  0.0
3571  0.0  0.0  0.0  0.0  0.0  0.0
3572  0.0  0.0  0.0  0.0  0.0  0.0
3573  0.0  0.0  0.0  0.0  0.0  0.0
3574  0.0  0.0  0.0  0.0  0.0  0.0
3575  0.0  0.0  0.0  0.0  0.0  0.0
3576  0.0  0.0  0.0  0.0  0.0  0.0
3577  0.0  0.0  0.0  0.0  0.0  0.0
3578  0.0  0.0  0.0  0.0  0.0  0.0
3579  0.0  0.0  0.0  0.0  0.0  0.0
3580  0.0  0.0  0.0  0.0  0.0  0.0
3581  0.0  0.0  0.0  0.0  0.0  0.0
3582  0.0  0.0  0.0  0.0  0.0  0.0
3583  0.0  0.0  0.0  0.0  0.0  0.0
3584  0.0  0.0  0.0  0.0  0.0  0.0
3585  0.0  0.0  0.0  0.0  0.0  0.0
3586  0.0  0.0  0.0  0.0  0.0  0.0
3587  0.0  0.0  0.0  0.0  0.0  0.0
3588  0.0  0.0  0.0  0.0  0.0  0.0
3589  0.0  0.0  0.0  0.0  0.0  0.0
3590  0.0  0.0  0.0  0.0  0.0  0.0
3591  0.0  0.0  0.0  0.0  0.0  0.0
3592  0.0  0.0  0.0  0.0  0.0  0.0

[3569 rows x 6 columns]
#将各行中适当的项设置为1，再与data进行连接
for row,cat in zip(data.index,data.CATEGORY):
    codes=get_code(to_cat_list(cat))
    dummy_frame.ix[row,codes]=1
data=data.join(dummy_frame.add_prefix('category_'))
data.ix[:,10:15]
category_1  category_1a  category_1b  category_1c  category_1d0            1.0          0.0          0.0          0.0          0.0
4            1.0          0.0          0.0          0.0          0.0
5            0.0          0.0          0.0          0.0          0.0
6            0.0          0.0          0.0          0.0          0.0
7            0.0          0.0          0.0          0.0          0.0
8            0.0          0.0          0.0          0.0          0.0
9            0.0          0.0          0.0          0.0          0.0
10           0.0          1.0          0.0          0.0          0.0
11           0.0          0.0          0.0          0.0          0.0
12           0.0          0.0          0.0          0.0          0.0
13           0.0          0.0          0.0          0.0          0.0
14           0.0          0.0          0.0          0.0          0.0
15           0.0          0.0          0.0          0.0          0.0
16           1.0          0.0          0.0          0.0          0.0
17           0.0          0.0          0.0          0.0          0.0
18           0.0          0.0          0.0          0.0          0.0
19           1.0          0.0          0.0          0.0          0.0
20           1.0          0.0          0.0          0.0          0.0
21           0.0          0.0          0.0          0.0          0.0
22           0.0          0.0          0.0          0.0          0.0
23           0.0          0.0          0.0          0.0          0.0
24           0.0          0.0          0.0          0.0          0.0
25           0.0          0.0          1.0          0.0          0.0
26           0.0          0.0          0.0          0.0          0.0
27           0.0          0.0          0.0          0.0          0.0
28           0.0          0.0          0.0          0.0          0.0
29           0.0          0.0          0.0          0.0          0.0
30           0.0          0.0          0.0          0.0          0.0
31           0.0          0.0          0.0          0.0          0.0
32           0.0          0.0          0.0          0.0          0.0
...          ...          ...          ...          ...          ...
3563         0.0          0.0          0.0          0.0          0.0
3564         0.0          0.0          0.0          0.0          0.0
3565         1.0          0.0          0.0          0.0          0.0
3566         1.0          0.0          0.0          0.0          0.0
3567         0.0          0.0          0.0          0.0          0.0
3568         0.0          0.0          0.0          0.0          0.0
3569         0.0          0.0          0.0          1.0          0.0
3570         0.0          0.0          0.0          0.0          0.0
3571         0.0          0.0          0.0          0.0          0.0
3572         0.0          0.0          0.0          0.0          0.0
3573         0.0          0.0          0.0          0.0          0.0
3574         0.0          0.0          0.0          1.0          0.0
3575         0.0          0.0          0.0          0.0          0.0
3576         0.0          0.0          0.0          0.0          0.0
3577         0.0          0.0          0.0          0.0          0.0
3578         0.0          0.0          0.0          0.0          0.0
3579         0.0          0.0          0.0          0.0          0.0
3580         0.0          0.0          0.0          0.0          0.0
3581         0.0          0.0          0.0          1.0          0.0
3582         0.0          0.0          0.0          0.0          0.0
3583         0.0          0.0          0.0          0.0          0.0
3584         0.0          0.0          0.0          1.0          0.0
3585         0.0          0.0          0.0          0.0          0.0
3586         1.0          0.0          0.0          0.0          0.0
3587         1.0          0.0          0.0          0.0          0.0
3588         0.0          0.0          0.0          0.0          0.0
3589         0.0          0.0          0.0          1.0          0.0
3590         0.0          0.0          0.0          0.0          0.0
3591         0.0          0.0          0.0          0.0          0.0
3592         0.0          0.0          0.0          0.0          0.0

[3569 rows x 5 columns]