以下是一种基本方法:
- 将所有列名添加到每个列表项中
- 将列表转换为数组。
- 使用
aperm
转置数组以匹配您的预期输出
- 可选 使用
apply
将数组转换为列表。
myListBase <- myList
all_cols <- Reduce(base::union, lapply(myListBase, names))
myListBase <- lapply(myListBase,
function(DF){
DF[, base::setdiff(all_cols, names(DF))] <- 0
DF[, all_cols]
}
)
myArrayBase <- array(unlist(myListBase, use.names = F),
dim = c(nrow(myListBase[[1]]),
length(all_cols),
length(myListBase)
),
dimnames = list(NULL, all_cols, NULL))
myPermBase <- aperm(myArrayBase, c(3,2,1))
myPermBase
apply(myPermBase, 3, data.frame)
性能
答案的第一个版本包括了 data.table
和 abind
方法,但我已将其删除 - base
版本更快,并且没有太多额外的清晰度增益。
Unit: microseconds
expr min lq mean median uq max neval
camille_purrr_dplyr 7910.9 8139.25 8614.956 8246.30 8387.20 60159.5 1000
cole_DT_abind 2555.8 2804.75 3012.671 2917.95 3061.55 6602.3 1000
cole_base 600.3 634.40 697.987 663.00 733.10 3761.6 1000
参考完整代码:
library(dplyr)
library(purrr)
library(data.table)
library(abind)
library(microbenchmark)
myList <- list()
df1 <- as.data.frame(matrix(1:6, nrow=3, ncol=2))
df2 <- as.data.frame(matrix(7:15, nrow=3, ncol=3))
myList[[1]]<-df1
myList[[2]]<-df2
microbenchmark(
camille_purrr_dplyr = {
myList %>%
map_dfr(tibble::rownames_to_column, var = "id") %>%
mutate_at(vars(-id), ~ifelse(is.na(.), 0, .)) %>%
split(.$id) %>%
map(select, -id)
}
,
cole_DT_abind = {
myListDT <- copy(myList)
all_cols <- Reduce(base::union, lapply(myListDT, names))
lapply(myListDT, setDT)
lapply(myListDT,
function(DT) {
DT[, base::setdiff(all_cols, names(DT)) := 0]
setorderv(DT, all_cols)
})
myArray <- abind(myListDT, along = 3)
myPermArray <- aperm(myArray, c(3,2,1))
apply(myPermArray, 3, data.frame)
}
,
cole_base = {
myListBase <- myList
all_cols <- Reduce(base::union, lapply(myListBase, names))
myListBase <- lapply(myListBase,
function(DF){
DF[, base::setdiff(all_cols, names(DF))] <- 0
DF[, all_cols]
}
)
myArrayBase <- array(unlist(myListBase, use.names = F),
dim = c(nrow(myListBase[[1]]), length(all_cols), length(myListBase)),
dimnames = list(NULL, all_cols, NULL))
myPermBase <- aperm(myArrayBase, c(3,2,1))
apply(myPermBase, 3, data.frame)
}
, times = 1000
)