如何使用dplyr获取每行最大值所在列

8

我在R中有一个数据框。对于每一行,我想选择具有最高值的列,并将该列的名称粘贴在一起。如果只有两列可供选择,则很简单(请注意,我有一个过滤步骤,如果两个列的值都小于0.1,则不包括行):

set.seed(6)
mat_simple <- matrix(rexp(200, rate=.1), ncol=2) %>%
    as.data.frame() 

head(mat_simple)
         V1         V2
1  2.125366  6.7798683
2  1.832349  8.9610534
3  6.149668 15.7777370
4  3.532614  0.2355711
5 21.110703  1.2927119
6  2.871455 16.7370847
    
mat_simple <- mat_simple %>%
    mutate(
        class = case_when(
            V1 < 0.1 & V2 < 0.1 ~ NA_character_,
            V1 > V2 ~ "V1",
            V2 > V1 ~ "V2"
        )
    )

head(mat_simple)
         V1         V2 class
1  2.125366  6.7798683    V2
2  1.832349  8.9610534    V2
3  6.149668 15.7777370    V2
4  3.532614  0.2355711    V1
5 21.110703  1.2927119    V1
6  2.871455 16.7370847    V2

然而,当列数超过两列时,这种方法效率不高。例如:

set.seed(6)
mat_hard <- matrix(rexp(200, rate=.1), ncol=5) %>%
     as.data.frame() 

head(mat_hard)
         V1        V2         V3         V4        V5
1  2.125366 26.427335 13.7289349  1.7513873  6.297978
2  1.832349 10.241441  5.3084648  0.3347235 29.247774
3  6.149668  5.689442  5.4546072  4.5035747 11.646721
4  3.532614 10.397464  6.5560545  4.4221171  1.713909
5 21.110703  9.928022  0.2284966  0.2101213  1.033498
6  2.871455  4.781357  3.3246585 15.8878010  4.004967

有没有更好的解决方案,最好使用dplyr


你没有指定的一件事:如果多列具有最大值,输出应该是什么? - slamballais
@icedcoffee,你可以考虑接受其中一个答案。 - GuedesBF
4个回答

10

我认为你可以尝试以下方式使用max.col

mat_hard %>%
  mutate(Class = names(.)[max.col(.)])

它提供了

           V1         V2          V3         V4         V5 Class
1   2.1253660 26.4273345 13.72893486  1.7513873  6.2979783    V2
2   1.8323489 10.2414409  5.30846484  0.3347235 29.2477737    V5
3   6.1496678  5.6894422  5.45460715  4.5035747 11.6467207    V5
4   3.5326145 10.3974636  6.55605448  4.4221171  1.7139087    V2
5  21.1107027  9.9280219  0.22849661  0.2101213  1.0334978    V1
6   2.8714553  4.7813566  3.32465853 15.8878010  4.0049670    V4
7   0.6601019 14.6976125  1.37343714 13.4155430  7.5144204    V2
8   5.3986340  9.9330388 28.30681662  5.9243824  8.6695885    V3
9   7.1672128  0.1135649  0.02006355  7.4839158 27.4311080    V5
10  0.3579145  3.3261009  3.59446750 11.3528078 31.4819959    V5
11  3.5569986  1.4915687 11.81571650 12.5108163 10.5650964    V4
12 15.6411692 14.9843178 13.01627289  1.4870455 13.9162441    V1
13  4.0105209 11.6297626 14.03933859  9.1182125 16.6013583    V5
14  0.8267777 19.6671308 25.39573774  1.5730764 22.6813765    V3
15 16.0518859  7.9446867  5.52230477  6.9886905 31.3423870    V5
16 11.1804892  1.2474887 32.80866682  6.0927374  5.4666769    V3
17  1.9020065  0.8736180  0.76056537  6.2290362 22.8229062    V5
18  0.4354699  4.8834713  1.48728908  2.7705605  5.1947573    V5
19 13.9564746  0.4376033 32.46160917 33.5775243  3.6361463    V4
20  0.9488887 11.3126093 21.76888266  1.1800891  9.1619501    V3
21  0.4105029 30.8768108  6.77986834  6.4456033  3.3375528    V2
22  4.8383899  3.3213757  8.96105336  5.3539974  2.9596863    V3
23 23.5980692  0.8854953 15.77773701 17.3438544  3.6268837    V1
24  5.7302813 20.6837055  0.23557108  3.8622885  1.9313057    V2
25 23.7223308  1.6956027  1.29271191  3.6884809  3.7486600    V1
26  0.8390799 11.1018979 16.73708472  1.0896291  5.1491888    V3
27  6.4742757 15.4374730  8.76199843  0.3349979  2.2843753    V2
28  3.0712249  2.8939230  8.65244642  3.1096128  1.3245159    V3
29  8.4365271 30.2740673 30.79814652  5.8697589  1.8603535    V3
30 15.6024932  5.5718871  4.07631202 24.6346215 35.3187257    V5
31  3.7759064  1.6237925 13.80958004  7.4002858 10.5098296    V3
32  2.3559053  8.5405451 11.09127093 16.6616195 10.9618053    V4
33 21.7985378 18.3840789  1.24258382 32.7283077  1.8425573    V4
34  0.5718545 22.2466535  7.35903634  5.6994226 31.8928204    V5
35  0.8731764 11.4922204  1.36448644  0.2167550  8.1839797    V2
36  4.7162801 10.8743625 33.72675944  1.7916643  4.5028127    V3
37 13.7097611 16.1319530  0.84351757  8.1407995  5.7692484    V2
38  0.5347331  7.1313409 10.23327786 24.1837711  0.2850878    V4
39  0.3738863 12.0495186  4.61309257  6.2158783  5.7180108    V2
40 18.9056686  1.7171729  4.53560492  0.8193901  7.8306692    V1

8
您可以使用以下解决方案:
library(dplyr)

mat_hard %>%
  rowwise() %>%
  mutate(max = names(mat_hard)[c_across(everything()) == max(c_across(everything()))])


# A tibble: 40 x 6
# Rowwise: 
       V1    V2      V3      V4      V5 max  
    <dbl> <dbl>   <dbl>   <dbl>   <dbl> <chr>
 1  2.48   1.73  3.97   24.2    12.7    V4   
 2  9.18   8.86 13.8     9.26    7.64   V3   
 3  6.22   5.96  0.0911  6.66    0.274  V4   
 4  1.14  24.0  12.0     7.37    5.39   V2   
 5  8.09   8.30 24.1     4.01    0.0674 V3   
 6  7.97   2.76  2.21   16.0     0.805  V4   
 7  0.135  2.05  1.85    0.645   2.15   V5   
 8 23.9   31.0   6.48    0.0328 15.1    V2   
 9  9.46   5.66 40.8    12.6     0.320  V3   
10  7.23   8.10  2.06    2.61    6.14   V2   
# ... with 30 more rows

您可以使用以下方法中的which.max函数,以减少代码冗长:

mat_hard %>%
  rowwise() %>%
  mutate(max = names(cur_data())[which.max(c_across(everything()))])

# A tibble: 40 x 6
# Rowwise: 
       V1    V2      V3      V4      V5 max  
    <dbl> <dbl>   <dbl>   <dbl>   <dbl> <chr>
 1  2.48   1.73  3.97   24.2    12.7    V4   
 2  9.18   8.86 13.8     9.26    7.64   V3   
 3  6.22   5.96  0.0911  6.66    0.274  V4   
 4  1.14  24.0  12.0     7.37    5.39   V2   
 5  8.09   8.30 24.1     4.01    0.0674 V3   
 6  7.97   2.76  2.21   16.0     0.805  V4   
 7  0.135  2.05  1.85    0.645   2.15   V5   
 8 23.9   31.0   6.48    0.0328 15.1    V2   
 9  9.46   5.66 40.8    12.6     0.320  V3   
10  7.23   8.10  2.06    2.61    6.14   V2   
# ... with 30 more rows

4

使用purrr和dplyr的选项:

library(dplyr)
library(purrr)

output<-mat_hard%>%mutate(class=pmap(., ~{
        v1<-c(...)
        toString(names(v1)[which.max(v1)])}))

head(output)
         V1        V2         V3         V4        V5 class
1  2.125366 26.427335 13.7289349  1.7513873  6.297978    V2
2  1.832349 10.241441  5.3084648  0.3347235 29.247774    V5
3  6.149668  5.689442  5.4546072  4.5035747 11.646721    V5
4  3.532614 10.397464  6.5560545  4.4221171  1.713909    V2
5 21.110703  9.928022  0.2284966  0.2101213  1.033498    V1
6  2.871455  4.781357  3.3246585 15.8878010  4.004967    V4

2
我一直对purrr包中的函数,特别是pmap心存好感。干得好! - Anoushiravan R
1
我逐渐开始在tidyverse中完成所有工作。我用pmap方法替换了每个行例程。对于多参数函数,它的效果非常好,而vector<-c(...)技巧则是解决只接受向量作为参数的函数的不错解决方案。 - GuedesBF
1
非常好!相当一段时间以来,我一直很喜欢pmap及其灵活性,并且它始终是我进行逐行操作的首选。现在,我只是尝试使用其他方法来走出舒适区。 - Anoushiravan R
1
有趣的事情,我正在走相反的方向! - GuedesBF
1
谢谢,我无法让 c_across 仅在选定的列上工作,但是使用 pmap 它就像魔法一样工作! - saQuist

2
我们可以添加一个id,然后使用pivot_longer函数,接着使用summarize函数,最后再与df进行连接。
library(dplyr)
library(tidyr)
df <- mat_hard %>% 
  mutate(id = row_number(), .before = V1)

df %>%  
  pivot_longer(cols = -1) %>% 
  group_by(id) %>% 
  summarize(maxCol = name[which.max(value)]) %>%  
  left_join(df, .)

输出:

   id         V1         V2          V3         V4         V5 maxCol
1   1  2.1253660 26.4273345 13.72893486  1.7513873  6.2979783     V2
2   2  1.8323489 10.2414409  5.30846484  0.3347235 29.2477737     V5
3   3  6.1496678  5.6894422  5.45460715  4.5035747 11.6467207     V5
4   4  3.5326145 10.3974636  6.55605448  4.4221171  1.7139087     V2
5   5 21.1107027  9.9280219  0.22849661  0.2101213  1.0334978     V1
6   6  2.8714553  4.7813566  3.32465853 15.8878010  4.0049670     V4
7   7  0.6601019 14.6976125  1.37343714 13.4155430  7.5144204     V2
8   8  5.3986340  9.9330388 28.30681662  5.9243824  8.6695885     V3
9   9  7.1672128  0.1135649  0.02006355  7.4839158 27.4311080     V5
10 10  0.3579145  3.3261009  3.59446750 11.3528078 31.4819959     V5
11 11  3.5569986  1.4915687 11.81571650 12.5108163 10.5650964     V4
12 12 15.6411692 14.9843178 13.01627289  1.4870455 13.9162441     V1
13 13  4.0105209 11.6297626 14.03933859  9.1182125 16.6013583     V5
14 14  0.8267777 19.6671308 25.39573774  1.5730764 22.6813765     V3
15 15 16.0518859  7.9446867  5.52230477  6.9886905 31.3423870     V5
16 16 11.1804892  1.2474887 32.80866682  6.0927374  5.4666769     V3
17 17  1.9020065  0.8736180  0.76056537  6.2290362 22.8229062     V5
18 18  0.4354699  4.8834713  1.48728908  2.7705605  5.1947573     V5
19 19 13.9564746  0.4376033 32.46160917 33.5775243  3.6361463     V4
20 20  0.9488887 11.3126093 21.76888266  1.1800891  9.1619501     V3
21 21  0.4105029 30.8768108  6.77986834  6.4456033  3.3375528     V2
22 22  4.8383899  3.3213757  8.96105336  5.3539974  2.9596863     V3
23 23 23.5980692  0.8854953 15.77773701 17.3438544  3.6268837     V1
24 24  5.7302813 20.6837055  0.23557108  3.8622885  1.9313057     V2
25 25 23.7223308  1.6956027  1.29271191  3.6884809  3.7486600     V1
26 26  0.8390799 11.1018979 16.73708472  1.0896291  5.1491888     V3
27 27  6.4742757 15.4374730  8.76199843  0.3349979  2.2843753     V2
28 28  3.0712249  2.8939230  8.65244642  3.1096128  1.3245159     V3
29 29  8.4365271 30.2740673 30.79814652  5.8697589  1.8603535     V3
30 30 15.6024932  5.5718871  4.07631202 24.6346215 35.3187257     V5
31 31  3.7759064  1.6237925 13.80958004  7.4002858 10.5098296     V3
32 32  2.3559053  8.5405451 11.09127093 16.6616195 10.9618053     V4
33 33 21.7985378 18.3840789  1.24258382 32.7283077  1.8425573     V4
34 34  0.5718545 22.2466535  7.35903634  5.6994226 31.8928204     V5
35 35  0.8731764 11.4922204  1.36448644  0.2167550  8.1839797     V2
36 36  4.7162801 10.8743625 33.72675944  1.7916643  4.5028127     V3
37 37 13.7097611 16.1319530  0.84351757  8.1407995  5.7692484     V2
38 38  0.5347331  7.1313409 10.23327786 24.1837711  0.2850878     V4
39 39  0.3738863 12.0495186  4.61309257  6.2158783  5.7180108     V2
40 40 18.9056686  1.7171729  4.53560492  0.8193901  7.8306692     V1

网页内容由stack overflow 提供, 点击上面的
可以查看英文原文,
原文链接