import pandas as pd
product_info ={"订单号":["2951110000099262111","2181910000909928191","2194560000121355545","1194560000121311126","1483160000121315483"],"数量":[92,61,66,33,15],"价格(USD)":[230,122,150,190,200],"状态":["Not Delivered","Not Delivered","Not Delivered","Not Shipped","Not Delivered"],"订单日期":["2022-02-12","2022-03-02","2022-01-22","2022-02-22","2022-02-15"],"订单编号":["444111","444122","444132","444003","444244"],}
df = pd.DataFrame(product_info, index=['one','two','three','four','five'])print(df)'''
订单号 数量 价格(USD) 状态 订单日期 订单编号
one 2951110000099262111 92 230 Not Delivered 2022-02-12 444111
two 2181910000909928191 61 122 Not Delivered 2022-03-02 444122
three 2194560000121355545 66 150 Not Delivered 2022-01-22 444132
four 1194560000121311126 33 190 Not Shipped 2022-02-22 444003
five 1483160000121315483 15 200 Not Delivered 2022-02-15 444244
'''
方法介绍
按columns过滤
全列名过滤
# 过滤出列名为“订单号”和“订单编号”列
res_df = df.filter(items=["订单号",'订单编号'])print(res_df)'''
订单号 订单编号
one 2951110000099262111 444111
two 2181910000909928191 444122
three 2194560000121355545 444132
four 1194560000121311126 444003
five 1483160000121315483 444244
'''
模糊过滤
# 过滤出列名中包含“订单”的所有列
res_df = df.filter(like='订单', axis=1)print(res_df)'''
订单号 订单日期 订单编号
one 2951110000099262111 2022-02-12 444111
two 2181910000909928191 2022-03-02 444122
three 2194560000121355545 2022-01-22 444132
four 1194560000121311126 2022-02-22 444003
five 1483160000121315483 2022-02-15 444244
'''
正则过滤
# 过滤出以"订单"开头的所有列
res_df = df.filter(regex='^订单', axis=1)print(res_df)'''
订单号 订单日期 订单编号
one 2951110000099262111 2022-02-12 444111
two 2181910000909928191 2022-03-02 444122
three 2194560000121355545 2022-01-22 444132
four 1194560000121311126 2022-02-22 444003
five 1483160000121315483 2022-02-15 444244
'''
按index过滤
全名称过滤
# 过滤出index为"one", "four"的行
res_df = df.filter(items=["one",'four'], axis=0)print(res_df)'''
订单号 数量 价格(USD) 状态 订单日期 订单编号
one 2951110000099262111 92 230 Not Delivered 2022-02-12 444111
four 1194560000121311126 33 190 Not Shipped 2022-02-22 444003
'''
模糊过滤
# 过滤出index包含"e"的所有行
res_df = df.filter(like='e', axis=0)print(res_df)'''
订单号 数量 价格(USD) 状态 订单日期 订单编号
one 2951110000099262111 92 230 Not Delivered 2022-02-12 444111
three 2194560000121355545 66 150 Not Delivered 2022-01-22 444132
five 1483160000121315483 15 200 Not Delivered 2022-02-15 444244
'''
正则过滤
# 过滤出以"t"开头的所有行
res_df = df.filter(regex='^t', axis=0)print(res_df)'''
订单号 数量 价格(USD) 状态 订单日期 订单编号
two 2181910000909928191 61 122 Not Delivered 2022-03-02 444122
three 2194560000121355545 66 150 Not Delivered 2022-01-22 444132
'''
多条件过滤
# 过滤出以"价格"开头或名称为"订单号"的所有列
res_df = df.filter(regex='^价格|订单号', axis=1)print(res_df)'''
订单号 价格(USD)
0 295111000262111 230
1 218191000928191 122
2 21945600355545 150
3 11945600311126 190
4 14831600315483 200
'''# 过滤出以"t"开头或以"f"开头的所有行
res_df = df.filter(regex='^t|^f', axis=0)print(res_df)'''
订单号 数量 价格(USD) 状态 订单日期 订单编号
two 2181910000909928191 61 122 Not Delivered 2022-03-02 444122
three 2194560000121355545 66 150 Not Delivered 2022-01-22 444132
four 1194560000121311126 33 190 Not Shipped 2022-02-22 444003
five 1483160000121315483 15 200 Not Delivered 2022-02-15 444244
'''