from multiprocessing import cpu_count
from joblib import Parallel, delayed
import pandas as pd
cores = cpu_count()
"""
对当前列进行处理的函数
"""
def proc():
"""
xxxxxxxxxxxxxxx
"""
return results
def apply_parallel(df_grouped, func):
results = Parallel(n_jobs = cores)(delayed(func)(group) for name, group in df_grouped)
return pd.concat(results)
def func(df):
df["new"] = df["old"].apply(lambda x:proc(x))
return df
df_grouped = df.groupby("old")
df_new = apply_parallel(df_grouped, func)
joblib&pandas | 多进程处理dataframe通过某一列计算新列
于 2020-07-23 15:25:09 首次发布