这里有一个保留列顺序的新函数。只需要进行一处小改动(见注释):
my_spread <- function (data, key, value, fill = NA, convert = FALSE, drop = TRUE,
sep = NULL) {
key_col <- tidyr:::col_name(substitute(key))
value_col <- tidyr:::col_name(substitute(value))
tbl_df(my_spread_(data, key_col, value_col, fill = fill, convert = convert,
drop = drop, sep = sep))
}
my_spread_ <- function (data, key_col, value_col, fill = NA, convert = FALSE,
drop = TRUE, sep = NULL) {
col <- data[key_col]
col_id <- seq_len(nrow(data))
attr(col_id, 'n') <- nrow(data)
col_labels <- tidyr:::split_labels(col, col_id, drop = drop)
rows <- data[setdiff(names(data), c(key_col, value_col))]
if (length(rows) == 0) {
row_id <- structure(1L, n = 1L)
row_labels <- as.data.frame(matrix(nrow = 1, ncol = 0))
}
else {
row_id <- id(rows, drop = drop)
row_labels <- tidyr:::split_labels(rows, row_id, drop = drop)
rownames(row_labels) <- NULL
}
overall <- tidyr:::id(list(col_id, row_id), drop = FALSE)
n <- attr(overall, "n")
if (anyDuplicated(overall)) {
groups <- split(seq_along(overall), overall)
groups <- groups[vapply(groups, length, integer(1)) >
1]
str <- vapply(
groups,
function(x) paste0("(", paste0(x, collapse = ", "), ")"), character(1)
)
stop("Duplicate identifiers for rows ", paste(str, collapse = ", "),
call. = FALSE)
}
if (length(overall) < n) {
overall <- match(seq_len(n), overall, nomatch = NA)
}
else {
overall <- order(overall)
}
value <- data[[value_col]]
ordered <- value[overall]
if (!is.na(fill)) {
ordered[is.na(ordered)] <- fill
}
if (convert && !is.character(ordered)) {
ordered <- as.character(ordered)
}
dim(ordered) <- c(attr(row_id, "n"), attr(col_id, "n"))
colnames(ordered) <- enc2utf8( tidyr:::col_names(col_labels, sep = sep))
ordered <- tidyr:::as_data_frame_matrix(ordered)
if (convert) {
ordered[] <- lapply(ordered, type.convert, as.is = TRUE)
}
tidyr:::append_df(row_labels, ordered)
}
%>% .[gtools::mixedorder(names(.))]
。此外,此处不需要使用rowwise()
,只需改为mutate(rnd = sample(100))
(sample
是向量化的)。 - David Arenburglibrary(tidyverse)
库data.frame(time = paste0("t_", 1:100)) %>% rowwise() %>% mutate(rnd = sample(1:100, size=1)) %>% mutate(time = factor(time, levels=paste0("t_", 1:100))) %>% spread(time, rnd) - Marco Sandri