在R数据框中,将dplyr函数映射到每个变量对的组合。

12

我想在R中将一个函数映射到数据框中每个变量的组合对,返回每个组合对的函数输出的数据框。我可以这样手动完成:

library(tidyverse)

df <- tibble(a = c(1, 2), b = c(4, 3), c = c(5, 7))

f <- function(a, b) a - b # a simple function for sake of example

df %>% transmute(a_minus_b = f(a, b),
                 a_minus_c = f(a, c),
                 b_minus_c = f(b, c),
                 b_minus_a = f(b, a),
                 c_minus_a = f(c, a),
                 c_minus_b = f(c, b))

手动完成这个操作显然对于一个包含很多变量的数据框来说是不切实际的。 我应该如何使用迭代将我的函数应用于每一组变量组合?

5个回答

12

使用dplyrpurrr的另一种方法可能如下所示:

library(tidyverse)

df <- tibble(a = c(1, 2), b = c(4, 3), c = c(5, 7))

f <- function(a, b) a - b # a simple function for sake of example

f_help <- function(x) {
  df %>% 
    transmute_at(setdiff(names(.), x), ~ f(!!sym(x), .x)) %>%
    rename_all(.funs = ~ paste0(x, "_minus_", .x))
}

map(names(df), f_help) %>% 
  bind_cols()
#> # A tibble: 2 x 6
#>   a_minus_b a_minus_c b_minus_a b_minus_c c_minus_a c_minus_b
#>       <dbl>     <dbl>     <dbl>     <dbl>     <dbl>     <dbl>
#> 1        -3        -4         3        -1         4         1
#> 2        -1        -5         1        -4         5         4

8

一个使用 dplyrpurrr 的解决方案可能如下:

map_dfc(.x = c(combn(rev(names(df)), 2, simplify = FALSE),
               combn(names(df), 2, simplify = FALSE)),
        ~ df %>%
         rowwise() %>%
         transmute(!!paste(.x, collapse = "_") := reduce(c_across(all_of(.x)), `-`)) %>%
         ungroup())

    c_b   c_a   b_a   a_b   a_c   b_c
  <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
1     1     4     3    -3    -4    -1
2     4     5     1    -1    -5    -4

或者使用指定的函数:

map_dfc(.x = c(combn(rev(names(df)), 2, simplify = FALSE),
               combn(names(df), 2, simplify = FALSE)),
        ~ df %>%
         rowwise() %>%
         transmute(!!paste(.x, collapse = "_") := reduce(c_across(all_of(.x)), f)) %>%
         ungroup())

5
使用 set_names 的整洁化工具集。
library(tidyverse)
f <- function(a, b) a - b # a simple function for sake of example
c(combn(df, 2, simplify = F),
  combn(rev(df), 2, simplify = F)) %>% 
  set_names(map_chr(., ~paste(names(.), collapse = "_minus_"))) %>% 
  map(., ~f(.x[1], .x[2]) %>% pull) %>%   
  bind_cols()
 # A tibble: 2 x 6
  a_minus_b a_minus_c b_minus_c c_minus_b c_minus_a b_minus_a
      <dbl>     <dbl>     <dbl>     <dbl>     <dbl>     <dbl>
1        -3        -4        -1         1         4         3
2        -1        -5        -4         4         5         1

3

以下是一个基于 R 语言的解决方案,可以满足您的需求:

# Create combination
combos <- combn(names(df), 2, simplify = F)
combos <- c(combos, lapply(combos, rev))

# Apply function to each element of combos and add names
as.data.frame(lapply(combos, function(j) `names<-`(f(df[j[1]], df[j[2]]), paste0(j, collapse = "_minus_"))))
  a_minus_b a_minus_c b_minus_c b_minus_a c_minus_a c_minus_b
1        -3        -4        -1         3         4         1
2        -1        -5        -4         1         5         4


# Same thing but easier to read
l <- lapply(combos, function(j) {
  res <- f(df[j[1]], df[j[2]])
  names(res) <- paste0(j, collapse = "_minus_")
  res
})

as.data.frame(l)

或者,如果您想使用purrr等效方法:

# Tidyverse equivalent
map_dfc(combos, ~ `names<-`(f(df[.[1]], df[.[2]]), paste0(., collapse = "_minus_")))

1

通过宏编程和data.table实现

library(data.table)
setDT(df)

df_combn <- combn(names(df),2,simplify=FALSE)
f_vector <- lapply(df_combn,function(x){paste0("f(",x[1],",",x[2],")")})
f_vector_scoped <- paste0("df[,",f_vector,"]")

out_names <- sapply(df_combn,paste0,collapse="_minus_")

for(i in 1:length(f_vector)){
  set(df,j=out_names[i],value=eval(parse(text=f_vector_scoped[i])))
}

网页内容由stack overflow 提供, 点击上面的
可以查看英文原文,
原文链接