data_analysis28

最新推荐文章于 2025-12-09 16:47:37 发布

原创最新推荐文章于 2025-12-09 16:47:37 发布 · 152 阅读

0 ·

CC 4.0 BY-SA版权

文章标签：

#python #开发语言 #pycharm #visual studio code #数据结构

Python 专栏收录该内容

262 篇文章

订阅专栏

笔记

import pandas as pd
import numpy as np

file_path = "./pandas_data/starbucks_store_worldwide.csv"

df = pd.read_csv(file_path)
# print(df.head(1))
# print(df.info())

# grouped = df.groupby(by="Country")
# print(grouped)

# DataFrameGroupBy
# 可以进行遍历
# for i,j in grouped:
#     print(i)
#     print("-"*100)
#     print(j,type(j))
#     print("*"*100)

# 调用聚合方法
# country_count=grouped["Brand"].count()
# print(country_count["US"])
# print(country_count["CN"])


# 统计中国每个省店铺的数量
# china_data = df[df["Country"] == "CN"]
#
# grouped = china_data.groupby(by="State/Province").count()["Brand"]
# print(grouped)

# 数据按照多个条件进行分组,返回Series
# grouped = df["Brand"].groupby(by=[df["Country"],df["State/Province"]]).count()
# print(grouped)
# print(type(grouped))

# 数据按照多个条件进行分组,返回DataFrame
grouped1 = df["Brand"].groupby(by=[df["Country"],df["State/Province"]]).count()
# grouped2 = df.groupby(by=[df["Country"],df["State/Province"]])[["Brand"]].count()
# grouped3 = df.groupby(by=[df["Country"],df["State/Province"]]).count()[["Brand"]]

# print(grouped1,type(grouped1))
# print("*"*100)
# print(grouped2,type(grouped2))
# print("*"*100)
# print(grouped3,type(grouped3))


# 索引的方法和属性
print(grouped1.index)