一,设置DataFrame索引值 以及 时间索引如何构造
1,设置DataFrame索引值
import numpy as np
import pandas as pd
day_data = np.random.normal(0,1,(500,507))
# 将数据变成dataframe格式
day_data1 = pd.DataFrame(day_data)
# 构造行索引列表
stock_list = ["股票"+ str(i) for i in range(day_data.shape[0])]
# 构造列索引列表
date = pd.date_range("2018-01-01",periods=day_data.shape[1],freq='B')
# 设置行、列索引
day_data2 = pd.DataFrame(day_data,index=stock_list,columns=date)
print(day_data2)

2,时间索引如何构造
pd.date_range():用于生成一组连续的时间序列
date_range(start=None, end=None, periods=None, freq=‘D’)
- start:开始时间
- end:结束时间
- periods:时间天数
- freq:递进单位,默认1天,频率
二,dataframe修改索引
1,直接修改
import numpy as np
import pandas as pd
day_data = np.random.normal(0,1,(500,507))
day_data1 = pd.DataFrame(day_data)
stock_list = ["股票"+ str(i) for i in range(day_data.shape[0])]
date = pd.date_range("2018-01-01",periods=day_data.shape[1],freq='B')
day_data2 = pd.DataFrame(day_data,index=stock_list,columns=date)
print(day_data2)
# 修改索引必须整体全部修改
date = ["第"+ str(i)+"天" for i in range(day_data.shape[1])]
day_data2.columns = date
print(day_data2)
2,重设索引( reset_index() )
import numpy as np
import pandas as pd
day_data = np.random.normal(0,1,(500,507))
day_data1 = pd.DataFrame(day_data)
stock_list = ["股票"+ str(i) for i in range(day_data.shape[0])]
date = ["第"+ str(i)+"天" for i in range(day_data.shape[1])]
day_data1 = pd.DataFrame(day_data,index=stock_list,columns=date)
# 如果reset_index设置drop为默认的False,则将行索引加入到数据中,替换的是默认行索引
# day_data1 = day_data1.reset_index()
# 如果reset_index设置drop为True,则将行索引删除,替换的是默认行索引
day_data1 = day_data1.reset_index(drop=True)
3,以某列值设为新的索引( set_index() )
import pandas as pd
d1 = {
"name" : ["xiaoming","xiaofang","xiaoli"],
"age" : [20,32,24],
"tel" : [10086,10000,10010],
"agent":["male","female","male"],
}
a = pd.DataFrame(d1)
print(a )
a =a.set_index(["name","age"],drop=True)
print(a)
#--------output-----------------
name age tel agent
0 xiaoming 20 10086 male
1 xiaofang 32 10000 female
2 xiaoli 24 10010 male
tel agent
name age
xiaoming 20 10086 male
xiaofang 32 10000 female
xiaoli 24 10010 male
三,dataframe按索引排序
排序有两种,一种对索引进行排序,一种对内容进行排序
- 使用df.sort_values给内容排序(默认是从小到大)
- 单个键进行排序
- 多个键进行排序
- 使用df.sort_index给索引进行排序
1,对内容进行排序-df.sort_values()
# encoding=utf-8
import numpy as np
import pandas as pd
day_data = np.random.normal(0,1,(500,507))
day_data1 = pd.DataFrame(day_data)
stock_list = [ int(i) for i in range(day_data.shape[0])]
date = ["第"+ str(i)+"天" for i in range(day_data.shape[1])]
day_data1 = pd.DataFrame(day_data,index=stock_list,columns=date)
# 按照内容排序,默认为True-从小到大
# 单个键进行排序
# print(day_data1.sort_values(by="第1天",ascending=True))
# 多个键进行排序
print(day_data1.sort_values(by=["第1天","第2天"],ascending=False))
2,对索引进行排序-df.sort_index()
# encoding=utf-8
import numpy as np
import pandas as pd
day_data = np.random.normal(0,1,(500,507))
day_data1 = pd.DataFrame(day_data)
stock_list = [ int(i) for i in range(day_data.shape[0])]
date = ["第"+ str(i)+"天" for i in range(day_data.shape[1])]
day_data1 = pd.DataFrame(day_data,index=stock_list,columns=date)
# 按照索引排序,默认为True-从小到大
print(day_data1.sort_index(ascending=False))