应用pandas创建DataFrame数据类型，以及基本操作_使用pandas创建一个dataframe,4行两列-优快云博客

本文链接：https://blog.youkuaiyun.com/qq_43279936/article/details/87967898

import pandas as pd
import numpy as np
from matplotlib import pyplot as plt

# 方法1： 通过列表创建
li = [
    [1, 2, 3, 4],
    [2, 3, 4, 5]
]

# DataFRame对象里面包含两个索引， 行索引(0轴， axis=0)， 列索引(1轴， axis=1)
# d1 = pd.DataFrame(data=li, index=['A', 'B'], columns=['views', 'loves', 'comments', 'tranfers'])
# print(d1)

# 方法2： 通过numpy对象创建
narr = np.arange(8).reshape(2, 4) #创建一个二维四列的数组
print(narr)
# DataFRame对象里面包含两个索引， 行索引(0轴， axis=0)， 列索引(1轴， axis=1)
d2 = pd.DataFrame(data=narr, index=['A', 'B'], columns=['views', 'loves', 'comments', 'tranfers'])
print(d2)

# 方法三: 通过字典的方式创建;
# dict = {
#     'views': [1, 2, ],
#     'loves': [2, 3, ],
#     'comments': [3, 4, ]
#
# }
# d3 = pd.DataFrame(data=dict, index=['粉条', "粉丝"])
# print(d3)
#

# 日期操作的特例:
# pd.date_range()
# dates = pd.date_range(start='1/1/2018', end='1/08/2018')
# print(dates)


# 行索引
# dates = pd.date_range(start='today', periods=6)
# 数据
# data_arr = np.random.randn(6, 4)
# 列索引
# columns = ['A', 'B', 'C', 'D']
# d4 = pd.DataFrame(data_arr, index=dates, columns=columns)
# print(d4)



# 一维对象: 建立一个以2019年每一天作为索引， 值为随机数；
# dates = pd.date_range(start='1/1/2019', end='12/31/2019', freq='D')
# datas = np.random.randn(len(dates))
# s1 = pd.Series(datas, index=dates)
# print(s1[:3])

DataFrame的基本操作

import pandas as pd
import numpy as np
from matplotlib import pyplot as plt


narr = np.arange(8).reshape(2, 4)
# DataFRame对象里面包含两个索引， 行索引(0轴， axis=0)， 列索引(1轴， axis=1)
d2 = pd.DataFrame(data=narr, index=['A', 'B'], columns=['views', 'loves', 'comments', 'tranfers'])
print(d2)



# **********************1). 查看基础属性***********************
print(d2.shape)  # 获取行数和列数;
print(d2.dtypes)  # 列数据类型
print(d2.ndim)  # 获取数据的维度
print(d2.index) # 行索引
print(d2.columns) # 列索引
print(d2.values, type(d2.values))   # 对象的值， 二维ndarray数组;



# ******************************2). 数据整体状况的查询*************
print(d2.head(1))  # 显示头部的几行， 默认5行
print(d2.tail(1))  # 显示头部的尾行， 默认5行

print("*"*10)
# 相关信息的预览： 行数， 列数， 列类型， 内存占用
print("info:", d2.info())

print("统计".center(50, '*'))
# 快速综合用计结果： 计数， 均值， 标准差， 最小值， 1/4位数， 中位数， 3/4位数， 最大值;
print(d2.describe())


# 3). 转置操作
print(d2.T)

# 4). 按列进行排序
print(d2)
# 按照指定列进行排序， 默认是升序， 如果需要降序显示，设置ascending=False;
print(d2.sort_values(by="views", ascending=False))


# 5). 切片及查询
print(d2[:1])   # 可以实现切片， 但是不能索引;
print('1:\n', d2['views'])   # 通过标签查询， 获取单列信息
print('2:\n', d2.views)   # 和上面是等价的;
print(d2[['views', 'comments']])  # 通过标签查询多列信息



# 6). 通过类似索引的方式查询;
#       - iloc(通过位置进行行数据的获取),
#        - loc(t通过标签索引行数据)
# print(d2[0])
print(d2)
print(d2.iloc[0])
print(d2.iloc[-1:])


# print(d2['A'])    # 报错
print(d2)
print(d2.loc['A'])



# 7). 更改pandas的值；
d2.loc['A'] = np.nan
print(d2)

# print(d2.info())