继续学习pandas库,上一节主要介绍了Series,这一节主要是DataFrame结构的用法。运行环境python2.7
#!usr/bin/python3
# coding:utf-8
# pandas 使用DataFrame
import numpy as np
import pandas as pd
dates = pd.date_range('20171231', periods=6)
print "dates:\n", dates
# a b c d
# 2017-12-31 0.544078 0.389521 0.097052 0.942329
# 2018-01-01 0.474514 0.456605 0.750682 0.683513
# 2018-01-02 0.537973 0.230534 0.216569 0.015208
# 2018-01-03 0.320855 0.295421 0.342874 0.808681
# 2018-01-04 0.649339 0.678842 0.390282 0.692622
# 2018-01-05 0.041877 0.197155 0.384499 0.301309
# index 为纵向坐标的,column为横向坐标
df = pd.DataFrame(np.random.rand(6, 4), index=dates, columns=['a', 'b', 'c', 'd'])
print "df:\n", df
print "dtypes:\n", df.dtypes
print "columns:\n", df.columns
print "values:\n", df.values
# describe是DataFrame的详细信息,包括count、mena、std等值
print "describe:\n", df.describe()
# a b c d
# count 6.000000 6.000000 6.000000 6.000000
# mean 0.428106 0.374680 0.363660 0.573944
# std 0.217850 0.177776 0.220942 0.347394
# min 0.041877 0.197155 0.097052 0.015208
# 25% 0.359270 0.246756 0.248145 0.396860
# 50% 0.506243 0.342471 0.363687 0.688067
# 75% 0.542552 0.439834 0.388836 0.779666
# max 0.649339 0.678842 0.750682 0.942329
print "转置:\n", df.T
# 按照索引排序 ascending=False 倒序列
print "按照索引排序:\n", df.sort_index(axis=1, ascending=False)
# 按照值排序
print "按照值排序:\n", df.sort_values(by='a')
print "______________________"
df1 = pd.DataFrame(np.arange(12).reshape((3, 4)))
print "df1:\n", df1
print "df1.dtypes:\n", df1.dtypes
df2 = pd.DataFrame({'A': 1, 'B': pd.Timestamp('20180101'), 'C': pd.Series(1, index=list(range(4)), dtype=float)})
print "df2:\n", df2
# DataFrame的构造
# pd.DataFrame() 参数: 1、二维array; 2、Series 列表; 3、value为Series的字典;
# 1、二维array
s1 = np.array([1, 2, 3, 4])
s2 = np.array([5, 6, 7, 8])
dataframe1 = pd.DataFrame([s1, s2])
print "dataframe1:\n", dataframe1
# 2、Series 列表
s1 = pd.Series(np.array([1, 2, 3, 4]))
s2 = pd.Series(np.array([5, 6, 7, 8]))
dataframe2 = pd.DataFrame([s1, s2])
print "dataframe2:\n", dataframe2
# 3、value为Series的字典
s1 = pd.Series(np.array([1, 2, 3, 4]))
s2 = pd.Series(np.array([5, 6, 7, 8]))
dataframe3 = pd.DataFrame({"a": s1, "b": s2});
print "dataframe3:\n", dataframe3
# DataFrame的属性
print "dataframe3的columns的值:\n", dataframe3.columns
print "dataframe3的形状:\n", dataframe3.shape
print "dataframe3的index的值:\n", list(dataframe3.index)
print "dataframe3的value的值:\n", dataframe3.values
# DataFrame的if-then操作
# df.ix[条件,then操作区域]
df = pd.DataFrame({"A": [1, 2, 3, 4], "B": [5, 6, 7, 8], "C": [1, 1, 1, 1]})
print "修改前:\n", df
# if(df.A > 2),"B"=-1
df.ix[df.A > 2, 'B'] = -1
print "修改后:\n", df
# 使用numpy.where
df = pd.DataFrame({"A": [1, 2, 3, 4], "B": [5, 6, 7, 8], "C": [1, 1, 1, 1]})
# np.where(条件,then,else)
df["then"] = np.where(df.A < 3, 1, 0)
print "修改后:\n", df
# 直接取值df.[]
df = pd.DataFrame({"A": [1, 2, 3, 4], "B": [5, 6, 7, 8], "C": [1, 1, 1, 1]})
df = df[df.A >= 2]
print "df[df.A >= 2]:\n", df
# 使用.loc[]
df = pd.DataFrame({"A": [1, 2, 3, 4], "B": [5, 6, 7, 8], "C": [1, 1, 1, 1]})
df = df.loc[df.A > 2]
print "df.loc[df.A > 2]:\n", df
df = pd.DataFrame({'animal': 'cat dog cat fish dog cat cat'.split(),
'size': list('SSMMMLL'),
'weight': [8, 10, 11, 1, 20, 12, 12],
'adult': [False] * 5 + [True] * 2})
print "df:\n", df
以下是上述代码的运行结果
D:\software\Anaconda2\python.exe D:/PycharmProjects/Learn/learn_panda/learn_pd2.py
dates:
DatetimeIndex(['2017-12-31', '2018-01-01', '2018-01-02', '2018-01-03',
'2018-01-04', '2018-01-05'],
dtype='datetime64[ns]', freq='D')
df:
a b c d
2017-12-31 0.138818 0.723637 0.095903 0.368734
2018-01-01 0.887222 0.371972 0.269801 0.250211
2018-01-02 0.121923 0.861907 0.955060 0.770644
2018-01-03 0.886326 0.606760 0.418799 0.745113
2018-01-04 0.181974 0.611008 0.144484 0.385850
2018-01-05 0.422556 0.294514 0.143726 0.420354
dtypes:
a float64
b float64
c float64
d float64
dtype: object
columns:
Index([u'a', u'b', u'c', u'd'], dtype='object')
values:
[[0.13881833 0.72363691 0.09590252 0.36873399]
[0.88722168 0.37197237 0.26980146 0.25021065]
[0.12192265 0.86190715 0.95506014 0.77064439]
[0.88632573 0.6067601 0.41879897 0.74511262]
[0.1819737 0.61100784 0.14448442 0.38585008]
[0.42255628 0.29451366 0.1437259 0.42035442]]
describe:
a b c d
count 6.000000 6.000000 6.000000 6.000000
mean 0.439803 0.578300 0.337962 0.490151
std 0.362761 0.212857 0.324267 0.215288
min 0.121923 0.294514 0.095903 0.250211
25% 0.149607 0.430669 0.143916 0.373013
50% 0.302265 0.608884 0.207143 0.403102
75% 0.770383 0.695480 0.381550 0.663923
max 0.887222 0.861907 0.955060 0.770644
转置:
2017-12-31 2018-01-01 ... 2018-01-04 2018-01-05
a 0.138818 0.887222 ... 0.181974 0.422556
b 0.723637 0.371972 ... 0.611008 0.294514
c 0.095903 0.269801 ... 0.144484 0.143726
d 0.368734 0.250211 ... 0.385850 0.420354
[4 rows x 6 columns]
按照索引排序:
d c b a
2017-12-31 0.368734 0.095903 0.723637 0.138818
2018-01-01 0.250211 0.269801 0.371972 0.887222
2018-01-02 0.770644 0.955060 0.861907 0.121923
2018-01-03 0.745113 0.418799 0.606760 0.886326
2018-01-04 0.385850 0.144484 0.611008 0.181974
2018-01-05 0.420354 0.143726 0.294514 0.422556
按照值排序:
a b c d
2018-01-02 0.121923 0.861907 0.955060 0.770644
2017-12-31 0.138818 0.723637 0.095903 0.368734
2018-01-04 0.181974 0.611008 0.144484 0.385850
2018-01-05 0.422556 0.294514 0.143726 0.420354
2018-01-03 0.886326 0.606760 0.418799 0.745113
2018-01-01 0.887222 0.371972 0.269801 0.250211
______________________
df1:
0 1 2 3
0 0 1 2 3
1 4 5 6 7
2 8 9 10 11
df1.dtypes:
0 int32
1 int32
2 int32
3 int32
dtype: object
df2:
A B C
0 1 2018-01-01 1.0
1 1 2018-01-01 1.0
2 1 2018-01-01 1.0
3 1 2018-01-01 1.0
dataframe1:
0 1 2 3
0 1 2 3 4
1 5 6 7 8
dataframe2:
0 1 2 3
0 1 2 3 4
1 5 6 7 8
dataframe3:
a b
0 1 5
1 2 6
2 3 7
3 4 8
dataframe3的columns的值:
Index([u'a', u'b'], dtype='object')
dataframe3的形状:
(4, 2)
dataframe3的index的值:
[0, 1, 2, 3]
dataframe3的value的值:
[[1 5]
[2 6]
[3 7]
[4 8]]
修改前:
A B C
0 1 5 1
1 2 6 1
2 3 7 1
3 4 8 1
修改后:
A B C
0 1 5 1
1 2 6 1
2 3 -1 1
3 4 -1 1
修改后:
A B C then
0 1 5 1 1
1 2 6 1 1
2 3 7 1 0
3 4 8 1 0
df[df.A >= 2]:
A B C
1 2 6 1
2 3 7 1
3 4 8 1
df.loc[df.A > 2]:
A B C
2 3 7 1
3 4 8 1
df:
adult animal size weight
0 False cat S 8
1 False dog S 10
2 False cat M 11
3 False fish M 1
4 False dog M 20
5 True cat L 12
6 True cat L 12
Process finished with exit code 0