
import pandas as pd
df = pd.DataFrame([['A123','82-0382-07'],
['B437','83-0382-05,84-0393-06'],
['C110','83-0382-05,83-0393-05,85-0022-01']
],columns = ['Material','Product_number'])
# Product_number列(多值列)拆分成多行
df = df.drop(['Product_number'], axis=1).join(df['Product_number'].str.split(',', expand=True).stack().reset_index(level=1, drop=True).rename('Product_number'))
df
处理后的df
针对下述公式进行拆分步骤具体讲解
df = df.drop(['Product_number'], axis=1).join(df['Product_number'].str.split(',', expand=True).stack().reset_index(level=1, drop=True).rename('Product_number'))
【1】.drop()
res1 = df.drop(['Product_number'], axis=1)
【2】.str.split()
# 不加参数expand = True,split()会返回拆分后的字符串数组
res2 = df['Product_number'].str.split(',')
# 加参数expand = True,会将数组拆开,数组中的每一个元素都会单独保存
res3 = df['Product_number'].str.split(',', expand=True)
【3】.stack()
# stack()将数据的列columns旋转成行index
res4 = df['Product_number'].str.split(',', expand=True).stack()
【4】.reset_index(level=1, drop =True)
# reset_index(level=1, drop =True)
res5 = df['Product_number'].str.split(',', expand=True).stack().reset_index(level=1, drop=True)
【5】.rename()
# rename()
res6 = df['Product_number'].str.split(',', expand=True).stack().reset_index(level=1, drop=True).rename('Product_number')
【6】.join()
# join()
res7 = df.drop(['Product_number'], axis=1).join(df['Product_number'].str.split(',', expand=True).stack().reset_index(level=1, drop=True).rename('Product_number'))
# 相当于res1.join(res6)
res8 = res1.join(res6)