使用GroupBy.transform
和gmean
:
from scipy.stats.mstats import gmean
#if necessary remove `,` and `$`
#df['price'] = df['price'].str.lstrip('$').str.replace(',', '').astype(int)
df['new'] = df.groupby('type')['price'].transform(gmean)
或自定义Lambda函数:
gmean1 = lambda x: x.product() ** (1 / float(len(x)))
df['new'] = df.groupby('type')['price'].transform(gmean1)
def meanByGroup(x):
if x == 111:
return 245474
elif x == 222:
return 194223
elif x == 333:
return 124122
df["geomean_price_bytype"] = df["type"].apply(meanByGroup)
。