如何在保持列名顺序的情况下扩展tidyr::spread()函数？

Question

如何在保持列名顺序的情况下扩展tidyr::spread()函数？

3

如何在使用spread函数时保持数字排名并扩展？

library(tidyverse)

data.frame(time = paste0("t_", 1:100)) %>% 
  rowwise() %>% 
  mutate(rnd = sample(1:100, size=1)) %>% 
  spread(time, rnd)

上述代码执行结果的列名为t_1, t_11, t_100, ....。我想按数字顺序获取列名(t_1, t_2, t_3, ...)。

- ogw

3

你可以在结尾处添加 %>% .[gtools::mixedorder(names(.))]。此外，此处不需要使用 rowwise()，只需改为 mutate(rnd = sample(100))（sample是向量化的）。 - David Arenburg

尝试使用 library(tidyverse) 库data.frame(time = paste0("t_", 1:100)) %>% rowwise() %>% mutate(rnd = sample(1:100, size=1)) %>% mutate(time = factor(time, levels=paste0("t_", 1:100))) %>% spread(time, rnd) - Marco Sandri

2个回答

0

这里有一个保留列顺序的新函数。只需要进行一处小改动（见注释）：

my_spread <- function (data, key, value, fill = NA, convert = FALSE, drop = TRUE, 
          sep = NULL) {
  key_col <- tidyr:::col_name(substitute(key))
  value_col <- tidyr:::col_name(substitute(value))
  tbl_df(my_spread_(data, key_col, value_col, fill = fill, convert = convert, 
                    drop = drop, sep = sep))
}

my_spread_ <- function (data, key_col, value_col, fill = NA, convert = FALSE, 
                       drop = TRUE, sep = NULL) {
  col <- data[key_col]
  #col_id <- tidyr:::id(col, drop = drop)                                   # Old line
  col_id <- seq_len(nrow(data))                                             # New line 1
  attr(col_id, 'n') <- nrow(data)                                           # New line 2
  col_labels <- tidyr:::split_labels(col, col_id, drop = drop)
  rows <- data[setdiff(names(data), c(key_col, value_col))]
  if (length(rows) == 0) {
    row_id <- structure(1L, n = 1L)
    row_labels <- as.data.frame(matrix(nrow = 1, ncol = 0))
  }
  else {
    row_id <- id(rows, drop = drop)
    row_labels <-  tidyr:::split_labels(rows, row_id, drop = drop)
    rownames(row_labels) <- NULL
  }
  overall <- tidyr:::id(list(col_id, row_id), drop = FALSE)
  n <- attr(overall, "n")
  if (anyDuplicated(overall)) {
    groups <- split(seq_along(overall), overall)
    groups <- groups[vapply(groups, length, integer(1)) > 
                       1]
    str <- vapply(
      groups, 
      function(x) paste0("(", paste0(x, collapse = ", "), ")"), character(1)
    )
    stop("Duplicate identifiers for rows ", paste(str, collapse = ", "), 
         call. = FALSE)
  }
  if (length(overall) < n) {
    overall <- match(seq_len(n), overall, nomatch = NA)
  }
  else {
    overall <- order(overall)
  }
  value <- data[[value_col]]
  ordered <- value[overall]
  if (!is.na(fill)) {
    ordered[is.na(ordered)] <- fill
  }
  if (convert && !is.character(ordered)) {
    ordered <- as.character(ordered)
  }
  dim(ordered) <- c(attr(row_id, "n"), attr(col_id, "n"))
  colnames(ordered) <- enc2utf8( tidyr:::col_names(col_labels, sep = sep))
  ordered <- tidyr:::as_data_frame_matrix(ordered)
  if (convert) {
    ordered[] <- lapply(ordered, type.convert, as.is = TRUE)
  }
  tidyr:::append_df(row_labels, ordered)
}

- Axeman

网页内容由stack overflow 提供, 点击上面的

可以查看英文原文，
原文链接

- Hadley Is My Hero · Accepted Answer

您可以尝试以下两种方法：

（1）将“时间”作为因子，并设置与您想要的顺序相匹配的级别：

data.frame(time = factor(paste0("t_", 1:100), levels = paste0("t_", 1:100))) %>% 
  rowwise() %>% 
  mutate(rnd = sample(1:100, size=1)) %>% 
  spread(time, rnd)

(2) 使用select语句强制顺序：

data.frame(time = paste0("t_", 1:100)) %>% 
  rowwise() %>% 
  mutate(rnd = sample(1:100, size=1)) %>% 
  spread(time, rnd) %>% 
  select(paste0("t_", 1:100))