# -*- coding:utf-8 -*-
import pandas as pd
# 一行转多行
df = pd.DataFrame(
{'Code': ['1111', '2222'],
'Color': ['a, b, c,d', 'blue, pink, yellow'],
'Size': ['1, 2, 3', '7, 6, 10']}
)
id_df = df[["Code"]]
# stack函数会将数据从”表格结构“变成”花括号结构“,即将其行索引变成列索引
# 参考 https://www.cnblogs.com/bambipai/p/7658311.html
for col in df.columns[1:]:
split_df = df[col].str.split(",", expand=True).stack().reset_index(level=1, drop=True).rename(col)
id_df = id_df.join(split_df)
# 删除行索引并重建
convert_df = id_df.reset_index(drop=True)
print(convert_df)
df = pd.DataFrame(
{"id": [2, 3, 3, 3],
"name": ["tx", "hw", "hw", "hw"],
"sub": ["game", "mobile", "cell", "cloud"]
}
)
# 多行转一行
def collect(ls):
return ",".join(ls)
df2 = df.groupby(["id", "name"])["sub"].apply(collect).rename("subs")
df2.reset_index()