使用apply(df, 1, fun)
可能很容易解决这个问题,但出于性能考虑,我尝试按列而不是按行解决它(我曾经看到@alexis_laz做过类似的事情,但现在找不到了)。
tmp <- df[-1] != 0
res <- rep(NA, nrow(tmp))
for(j in colnames(tmp)){
res[tmp[, j]] <- paste(res[tmp[, j]], j, sep = "-")
}
gsub("NA-", "", res, fixed = TRUE)
一些关于更大数据集的基准测试
set.seed(123)
BigDF <- as.data.frame(matrix(sample(0:1, 1e4, replace = TRUE), ncol = 10))
library(microbenchmark)
MM <- function(df) {
var_names <- names(df)[-1]
res <- character(nrow(df))
for (i in 1:nrow(df)){
non_zero_names <- var_names[df[i, -1] > 0]
res[i] <- paste(non_zero_names, collapse = '-')
}
res
}
ZX <- function(df) {
res <-
apply(df[,2:ncol(df)]>0, 1,
function(i)paste(colnames(df[, 2:ncol(df)])[i], collapse = "-"))
res[res == ""] <- NA
res
}
DA <- function(df) {
tmp <- df[-1] != 0
res <- rep(NA, nrow(tmp))
for(j in colnames(tmp)){
res[tmp[, j]] <- paste(res[tmp[, j]], j, sep = "-")
}
gsub("NA-", "", res, fixed = TRUE)
}
microbenchmark(MM(BigDF), ZX(BigDF), DA(BigDF))