df1 = pd.DataFrame({
'Id': ['00', '01', '02', '02', '01', '03'] ,
'date': ['1990-12-31 ','1990-12-27 ','1990-12-28 ',
'1990-12-28 ','1992-12-27 ','1990-12-30 '] ,
'Population': ['700','200','300','400','500','100']
})
print(df1)
"""
Id date Population
0 00 1990-12-31 700
1 01 1990-12-27 200
2 02 1990-12-28 300
3 02 1990-12-28 400
4 01 1992-12-27 500
5 03 1990-12-30 100
"""
Max1 = df1.groupby('Id').apply( lambda df : df['Population'].values[df['Population'].values.argmax()] )
print(Max1)
"""
Id
00 700
01 500
02 400
03 100
dtype: object
"""
Min1 = df1.groupby('Id').apply(lambda df : df['Population'].values[df['Population'].values.argmin()])
print(Min1)
"""
Id
00 700
01 200
02 300
03 100
dtype: object
"""
方法 2:
cc = df1.sort_values('Population', ascending=False).drop_duplicates(['Id'])
print(cc)
"""
Id date Population
0 00 1990-12-31 700
4 01 1992-12-27 500
3 02 1990-12-28 400
5 03 1990-12-30 100
"""
方法三:
aa = df1.groupby(['Id'],sort = False)['Population'].max()
print(aa)
"""
Id
00 700
01 500
02 400
03 100
Name: Population, dtype: object
"""
方法四:
res = df1.groupby(['Id'])['Population'].transform(max) == df1['Population']
print(df1[res])
"""
Id date Population
0 00 1990-12-31 700
3 02 1990-12-28 400
4 01 1992-12-27 500
5 03 1990-12-30 100
"""
FutureWarning: the take_last=True keyword is deprecated, use keep='last' instead
。 - tommy.carstensensort_values
而不是sort_index
,如 @Tamelise 的答案所示。 - Stuart