import pandas as pd
import numpy as np
======================
Series 对象操作
======================
创建Series
s = pd.Series(np.random.randint(1, 120, 5),
index=["er", "su", "do", "nk", "fd"],
name="随机数据")
print("原始Series:\n", s)
核心操作
print("\n索引:", s.index.tolist())
print("\n值:", s.values)
print("\n数据类型:", s.dtype)
print("\n形状:", s.shape)
print("\n前2行:\n", s.head(2))
print("\n统计描述:\n", s.describe())
数据处理
print("\n唯一值:", s.unique())
print("\n值计数:\n", s.value_counts())
print("\n平方变换:\n", s.apply(lambda x: x**2))
print("\n大于50的值:\n", s[s > 50])
print("\n排序(值):\n", s.sort_values())
print("\n排序(索引):\n", s.sort_index())
======================
DataFrame 对象操作
======================
创建DataFrame
df = pd.DataFrame(np.random.randint(1, 1000, (5, 5)),
index=["A", "B", "C", "D", "E"],
columns=["Col1", "Col2", "Col3", "Col4", "Col5"])
print("\n原始DataFrame:\n", df)
核心操作
print("\n列名:", df.columns.tolist())
print("\n索引:", df.index.tolist())
print("\n数据类型:\n", df.dtypes)
print("\n形状:", df.shape)
print("\n统计描述:\n", df.describe())
print("\n前3行:\n", df.head(3))
数据操作
print("\nCol1排序:\n", df.sort_values("Col1"))
print("\n索引排序:\n", df.sort_index(ascending=False))
print("\n选择Col1>500的行:\n", df[df["Col1"] > 500])
print("\n选择Col1和Col3:\n", df[["Col1", "Col3"]])
高级操作
print("\n行均值:\n", df.mean(axis=1))
print("\n列总和:\n", df.sum())
print("\n相关系数矩阵:\n", df.corr())
======================
特殊操作补充
======================
1. 处理缺失值
df_with_nan = df.copy()
df_with_nan.loc["A", "Col1"] = np.nan
print("\n填充缺失值:\n", df_with_nan.fillna(999))
2. 数据合并
new_row = pd.DataFrame([[100,200,300,400,500]],
columns=df.columns,
index=["F"])
print("\n添加新行:\n", pd.concat([df, new_row]))
3. 分组聚合
df["Category"] = ["X", "Y", "X", "Y", "X"] # 添加分组列
print("\n分组统计:\n", df.groupby("Category").mean())
4. 透视表
print("\n透视表:\n", pd.pivot_table(df, values="Col1",
index="Category",
aggfunc=np.mean))
5. 时间序列处理(补充)
dates = pd.date_range("20230101", periods=5)
time_series = pd.Series(np.random.randn(5), index=dates)
print("\n时间序列:\n", time_series)
8330

被折叠的 条评论
为什么被折叠?



