整洁宇宙:在数据框中将一个变量与所有其他变量交叉制表。

4

我想在数据框中用一个变量与所有其他变量制作交叉表。

library(tidyverse)
library(janitor)

humans <- starwars %>%
  filter(species == "Human")

humans %>%
  janitor::tabyl(gender, eye_color)



gender blue blue-gray brown dark hazel yellow
 female    3         0     5    0     1      0
   male    9         1    12    1     1      2

humans %>%
  dplyr::select_if(is.character) %>%
  dplyr::select(-name, -gender) %>%
  purrr::map(.f = ~janitor::tabyl(dat = humans, gender, .x))

Error: Unknown columns `blond`, `none`, `brown`, `brown, grey`, `brown` and ... 
Call `rlang::last_error()` to see a backtrace
4个回答

6

tably 接受名称作为参数,而你传递了一个向量。

如果使用 imap,你将可以访问列的名称,然后将其转换为符号。由于 janitor 支持类引用(quasi-quotation),因此可以编写:

humans %>%
  select_if(is.character) %>%
  select(-name, -gender) %>%
  imap(.f = ~janitor::tabyl(dat = humans, !!sym(.y), gender))
#$`hair_color`
#     hair_color female male
#         auburn      1    0
#   auburn, grey      0    1
#  auburn, white      0    1
#          black      1    7
#          blond      0    3
#          brown      6    8
#    brown, grey      0    1
#           grey      0    1
#           none      0    3
#          white      1    1
# 
# $skin_color
#  skin_color female male
#        dark      0    4
#        fair      3   13

有趣的是,tabyl.data.frame调用了一个处理符号的未导出函数,因此通过直接调用它,我们可以跳过解引号并使用基本R。
cols <- setdiff(names(Filter(is.character,humans)), c("name","gender"))
lapply(cols, function(x) janitor:::tabyl_2way(humans, as.name(x), quote(gender)))
# [[1]]
#     hair_color female male
#         auburn      1    0
#   auburn, grey      0    1
#  auburn, white      0    1
#          black      1    7
#          blond      0    3
#          brown      6    8
#    brown, grey      0    1
#           grey      0    1
#           none      0    3
#          white      1    1
# 
# [[2]]
#  skin_color female male
#        dark      0    4

要让其与xtable协同工作,@akrun的建议同样适用:

humans %>%
  select_if(is.character) %>%
  select(-name, -gender) %>%
  imap(.f = ~tabyl(dat = humans, !!sym(.y), gender) %>% rename_at(1,~"x")) %>%
  xtableList

或者

cols <- setdiff(names(Filter(is.character,humans)), c("name","gender"))
l <- lapply(cols, function(x) {
  res <- janitor:::tabyl_2way(humans, as.name(x), quote(gender))
  names(res)[1] <- "x"
  res
})
xtableList(l)

感谢@Moody_Mudskipper的回答。然而,仍然无法使用xtable将输出呈现为.Rnw,如下所示: humans%>% select_if(is.character)%>% select(-name,-gender)%>% imap(.f = ~ janitor :: tabyl(dat = humans,!! sym(.y),gender))%>% xtableList - MYaseen208

3
假设我们需要一个带有“性别”字段的成对数据表。
humans %>%
  dplyr::select_if(is.character) %>%
  dplyr::select(-name, -gender) %>%
  imap(~ tibble(!! .y := .x) %>% 
             mutate(gender = humans[['gender']]) %>% 
             janitor::tabyl(!!rlang::sym(names(.)[1]), gender))
#$hair_color
#    hair_color female male
#        auburn      1    0
#  auburn, grey      0    1
# auburn, white      0    1
#         black      1    7
#         blond      0    3
#        brown      6    8
#  brown, grey      0    1
#         grey      0    1
#         none      0    3
#        white      1    1

#$skin_color
# skin_color female male
#       dark      0    4
#       fair      3   13
#      light      6    5
#...

更新

xtable::xtableList 要求 list 元素的名称相同。为了实现这一点,需要将第一列的名称在 list 元素中保持一致,并创建一个标识符列。

library(xtable)
humans %>%
 dplyr::select_if(is.character) %>%
 dplyr::select(-name, -gender) %>%
 imap(~ tibble(!! .y := .x) %>% 
         mutate(gender = humans[['gender']]) %>% 
         janitor::tabyl(!!rlang::sym(names(.)[1]), gender) %>%  
         mutate(colNname = .y) %>% 
         rename_at(1, ~ 'Variable')) %>%
 xtableList

感谢 @akrun 提供的非常有用的答案。但是,使用 xtable:: xtableList 无法在 .Rnw 中呈现输出。有任何想法吗? - MYaseen208
@MYaseen208 我认为问题出在那些不常见的名称上。您可以将这些名称变得普遍并创建一个新的列作为标识符,即 humans %>% dplyr::select_if(is.character) %>% dplyr::select(-name, -gender) %>% imap(~ tibble(!! .y := .x) %>% mutate(gender = humans[['gender']]) %>% janitor::tabyl(!!rlang::sym(names(.)[1]), gender) %>% mutate(colNname = .y) %>% rename_at(1, ~ 'Variable')) %>% xtableList - akrun

0

starwars中的列表列增加了复杂性,但这里有一个关于mtcars的示例:交叉表cyl与所有其他变量。

mtcars %>%
  tidyr::gather(var, value, -cyl) %>%
  janitor::tabyl(cyl, value, var, show_missing_levels = FALSE) %>%
  purrr::map2(.x = ., .y = names(.), ~ janitor::adorn_title(.x, col_name = .y))

返回一个交叉表列表。 cyl x am, cyl x carb 等等。:

$`am`
     am  
 cyl  0 1
   4  3 8
   6  4 3
   8 12 2

$carb
     carb          
 cyl    1 2 3 4 6 8
   4    5 6 0 0 0 0
   6    2 0 0 4 1 0
   8    0 4 3 6 0 1

...

如果您将进一步操作这些数据框,您可能会发现此标题选项更加友好:

purrr::map2(.x = ., .y = names(.), ~ janitor::adorn_title(.x, col_name = .y, placement = "combined"))

这将给你:

$vs
 cyl/vs  0  1
      4  1 10
      6  3  4
      8 14  0

0

仅使用 data.table(和一个 %>%):

library(data.table)
swDT <- data.table(starwars)
setkey(swDT, gender, hair_color)


swDT[species == "Human"
     ][CJ(gender, hair_color, unique =TRUE), .N, .EACHI] %>% 
  dcast(hair_color ~ gender, value.var = "N")


       hair_color female male
 1:        auburn      1    0
 2:  auburn, grey      0    1
 3: auburn, white      0    1
 4:         black      1    7
 5:         blond      0    3
 6:         brown      6    8
 7:   brown, grey      0    1
 8:          grey      0    1
 9:          none      0    3
10:         white      1    1

网页内容由stack overflow 提供, 点击上面的
可以查看英文原文,
原文链接