pandas数据结构基础知识学习
# !/usr/bin/env python
# -*-encoding:utf-8-*-
# @Time:21:10
# @File:day2.py
# @Author:秦时明月
# pandas 数据结构学习
'''
1、预备知识
a、容器
b、使用函数
c、取子集和建索引
2、加载人工数据
3、Series
a、创建series
dict
ndarray
scalar
lists
b、切片
4、dataframe
'''
import pandas as pd
s=pd.Series(['banana',42])
# print(s)
# 手动指定索引
a=pd.Series(['Wes Mckinney','Creator of Pandas'],index=['Person','Who'])
# print(a)
sec=pd.DataFrame({
'name':['coco','william'],
'sex':['nan','nv'],
'age':[32,89],
'born':['1991-08-10','1992-07-24'],
'school':['daxue','dada']
},index=['one','two'])
# print(sec)
# 使用有序字典,需要导入collections模块中ordereddict
from collections import OrderedDict
se=pd.DataFrame(OrderedDict({
'name':['coco','william'],
'sex':['nan','nv'],
'age':[32,89],
'born':['1991-08-10','1992-07-24'],
'school':['daxue','dada']
}))
# print(se)
qw=pd.DataFrame({
'name':['coco','william'],
'sex':['nan','nv'],
'age':[32,89],
'born':['1991-08-10','1992-07-24'],
'school':['daxue','dada']
},index=['one','two'],
columns=['name','sex','age','school'])
# print(qw)
'''
first_row=qw.loc['two']
print(first_row)
print(first_row.index)
print(first_row.values)
print(first_row.keys)
'''
# 获取第一个索引
# print(first_row.index[0]) # 属性
# print(first_row.keys()[0]) # 方法
# 获取age列
# ages=qw['age']
# print(ages)
#
# print(ages.mean())
#
# print(ages.min())
#
# print(ages.max())
# print(ages.std())
# 布尔子集:series
ser_file=pd.read_csv(r'E:\ython\demo\data_anysis\pandas\data\scientists.csv')
ages=ser_file['Age']
# print(ages)
# 获取基本统计量
# print(ages.describe())
# 所有年龄平均值
print(ages.mean())
print(ages[ages>ages.mean()])
print(ages>ages.mean())
manual=[True,True,False]
print(ages[manual])