我认为这可以是你的一个良好起点。
import pandas as pd
data = [
[ 'Murray', 'Nadal', 'Hard', 1, 0 ],
[ 'Nadal', 'Murray', 'Clay', 1, 0 ],
[ 'Nadal', 'Murray', 'Hard', 0, 1 ],
[ 'Murray', 'Federer', 'Clay', 1, 0 ]
]
df = pd.DataFrame( data, columns=['p1_name', 'p2_name', 'Surface', 'p1_win', 'p2_win'] )
print(df)
df1 = df.rename(columns={'p1_name':'p2_name','p2_name':'p1_name','p1_win':'p2_win','p2_win':'p1_win'})
print(df1)
df2 = pd.concat( [df, df1] )
print(df2)
df3 = df2.groupby(['p1_name','p2_name','Surface']).sum()
print(df3)
df3['p1_pct'] = df3['p1_win'] / (df3['p1_win']+df3['p2_win'])
df3['p2_pct'] = df3['p2_win'] / (df3['p1_win']+df3['p2_win'])
print(df3)
输出:
p1_name p2_name Surface p1_win p2_win
0 Murray Nadal Hard 1 0
1 Nadal Murray Clay 1 0
2 Nadal Murray Hard 0 1
3 Murray Federer Clay 1 0
p2_name p1_name Surface p2_win p1_win
0 Murray Nadal Hard 1 0
1 Nadal Murray Clay 1 0
2 Nadal Murray Hard 0 1
3 Murray Federer Clay 1 0
p1_name p2_name Surface p1_win p2_win
0 Murray Nadal Hard 1 0
1 Nadal Murray Clay 1 0
2 Nadal Murray Hard 0 1
3 Murray Federer Clay 1 0
0 Nadal Murray Hard 0 1
1 Murray Nadal Clay 0 1
2 Murray Nadal Hard 1 0
3 Federer Murray Clay 0 1
p1_win p2_win
p1_name p2_name Surface
Federer Murray Clay 0 1
Murray Federer Clay 1 0
Nadal Clay 0 1
Hard 2 0
Nadal Murray Clay 1 0
Hard 0 2
p1_win p2_win p1_pct p2_pct
p1_name p2_name Surface
Federer Murray Clay 0 1 0.0 1.0
Murray Federer Clay 1 0 1.0 0.0
Nadal Clay 0 1 0.0 1.0
Hard 2 0 1.0 0.0
Nadal Murray Clay 1 0 1.0 0.0
Hard 0 2 0.0 1.0