我希望找到将矩阵转换为长格式数据框的最快方法。
这里我比较了三种解决方案,但我想知道是否存在更快的方法,例如使用data.table
。
以下是我比较的三种方法的可重现代码:
# Generate matrix -------------------------------------------------------------
set.seed(1)
ex <- matrix(data = round(runif(100000), 1), nrow = 1000, ncol = 100)
rownames(ex) <- paste0("row", 1:nrow(ex))
colnames(ex) <- paste0("col", 1:ncol(ex))
ex[1:5, 1:5]
col1 col2 col3 col4 col5
row1 0.3 0.5 0.9 0.8 0.2
row2 0.4 0.7 1.0 0.5 0.5
row3 0.6 0.4 0.9 0.2 0.0
row4 0.9 1.0 0.4 0.4 0.5
row5 0.2 0.1 0.2 0.8 0.9
# table solution --------------------------------------------------------------
df1 <- as.data.frame(as.table(ex))
# reshape solution ------------------------------------------------------------
df2 <- reshape2::melt(ex)
# dplyr solution --------------------------------------------------------------
library(dplyr)
library(tidyr)
df3 <- ex %>%
as.data.frame() %>%
tibble::rownames_to_column("Var1") %>%
gather("Var2", "value", -Var1)
# check for equality ----------------------------------------------------------
colnames(df1)[colnames(df1) == "Freq"] <- "value"
head(df1)
Var1 Var2 value
1 row1 col1 0.3
2 row2 col1 0.4
3 row3 col1 0.6
4 row4 col1 0.9
5 row5 col1 0.2
6 row6 col1 0.9
df1$Var1 <- as.character(df1$Var1)
df1$Var2 <- as.character(df1$Var2)
df2$Var1 <- as.character(df2$Var1)
df2$Var2 <- as.character(df2$Var2)
identical(df1, df2); identical(df1, df3)
TRUE
# Microbenchmark --------------------------------------------------------------
library(microbenchmark)
comp <- microbenchmark(
table = {
df1 <- as.data.frame(as.table(ex))
},
reshape = {
df2 <- reshape2::melt(ex)
},
dplyr = {
df3 <- ex %>%
as.data.frame() %>%
tibble::rownames_to_column("Var1") %>%
gather("Var2", "value", -Var1)
}
)
library(ggplot2)
autoplot(comp)
reshape
方法目前是最快的。
melt(as.data.table(ex)[, rn := seq_len(.N)], id.var = 'rn')
。 - akrundt = melt(data.table(ex, keep.rownames = TRUE) , id.vars = c("rn"))
- Kipras Kančysdata.frame(Var1 = rownames(ex), Var2 = colnames(ex), value = c(ex))
- markus