在R中将特定列转置为行

4

我需要将某些列的值转换为特定行的行。

这是一个样本数据集:

df <- data.frame(
  student_id = c(1,1,1,2,2,2),
  question_id = c(10,11,12,20,21,22),
  score = c(2,3,4,2,1,5),
  A = c(NA,NA,1, NA,NA,1),
  B = c(NA,NA,2, NA,NA,1),
  C = c(NA,NA,1, NA,NA,3)
)

> df
  student_id question_id score  C  E  O
1          1          10     2 NA NA NA
2          1          11     3 NA NA NA
3          1          12     4  1  2  1
4          2          20     2 NA NA NA
5          2          21     1 NA NA NA
6          2          22     5  1  1  3

对于那些在ABC列中有值的行,我需要将这些值抓取到分数列中,并添加后缀以定义question_id。例如,第三行的分数是这三列(A、B、C)的总和。

我的期望输出如下。

> df
   student_id question_id score
1           1          10     2
2           1          11     3
3           1        12_C     1
4           1        12_E     2
5           1        12_O     1
6           2          20     2
7           2          21     1
8           2        22_C     1
9           2        22_E     1
10          2        22_O     3
4个回答

2
数据
df <- data.frame(
  student_id = c(1,1,1,2,2,2),
  question_id = c(10,11,12,20,21,22),
  score = c(2,3,4,2,1,5),
  C = c(NA,NA,1, NA,NA,1),
  E = c(NA,NA,2, NA,NA,1),
  O = c(NA,NA,1, NA,NA,3)
)

如何
library(tidyverse)
df %>% 
  pivot_longer(cols = c(C,E,O)) %>% 
  filter(!is.na(value)) %>% 
  mutate(question_id = str_c(question_id,name,sep = "_")) %>% 
  select(-name,-value)

  student_id question_id score
       <dbl> <chr>       <dbl>
1          1 12_C            4
2          1 12_E            4
3          1 12_O            4
4          2 22_C            5
5          2 22_E            5
6          2 22_O            5

2
library(dplyr)
library(tidyr)

  df %>% 
  pivot_longer(cols = c(A, B, C)) %>% 
  mutate(question_id = ifelse(!is.na(value), paste(question_id, name, sep = "_"), question_id),
         score = ifelse(!is.na(value), value, score)) %>% 
  select(-c(name, value)) %>% 
  distinct()
#> # A tibble: 10 x 3
#>    student_id question_id score
#>         <dbl> <chr>       <dbl>
#>  1          1 10              2
#>  2          1 11              3
#>  3          1 12_A            1
#>  4          1 12_B            2
#>  5          1 12_C            1
#>  6          2 20              2
#>  7          2 21              1
#>  8          2 22_A            1
#>  9          2 22_B            1
#> 10          2 22_C            3

此文档由reprex包(v2.0.0)于2021-09-15创建。

数据

df <- data.frame(
  student_id = c(1,1,1,2,2,2),
  question_id = c(10,11,12,20,21,22),
  score = c(2,3,4,2,1,5),
  A = c(NA,NA,1, NA,NA,1),
  B = c(NA,NA,2, NA,NA,1),
  C = c(NA,NA,1, NA,NA,3))


2

另一个选择是将求和的score(即问题12和22)替换为NA。从那里,您可以使用pivot_longerscore:Cvalues_drop_na。然后将namescore的行转换为NA。最后,unite question_idname列。

df %>%
  mutate(score = ifelse(!is.na(A), NA, score)) %>%
  pivot_longer('score':'C', values_drop_na = TRUE) %>%
  mutate(name = na_if(name, 'score')) %>%
  unite('question_id', c(question_id, name), na.rm = T)

#------
# A tibble: 10 x 3
   student_id question_id value
        <dbl> <chr>       <dbl>
 1          1 10              2
 2          1 11              3
 3          1 12_A            1
 4          1 12_B            2
 5          1 12_C            1
 6          2 20              2
 7          2 21              1
 8          2 22_A            1
 9          2 22_B            1
10          2 22_C            3

1

我不确定这是否比在另一个答案中只添加 distinct() 更好,但我很好奇是否可以在不使用带有 NA 值的行进行枢轴操作的情况下完成此操作,因此这里是:

df <- data.frame(
  student_id = c(1,1,1,2,2,2),
  question_id = c(10,11,12,20,21,22),
  score = c(2,3,4,2,1,5),
  A = c(NA,NA,1, NA,NA,1),
  B = c(NA,NA,2, NA,NA,1),
  C = c(NA,NA,1, NA,NA,3)
)

library(dplyr, warn.conflicts = FALSE)
library(tidyr)

df %>% 
  mutate(make_long = !do.call(pmax, across(c(A, B, C), is.na))) %>% 
  group_by(ml = make_long) %>% 
  group_modify(~ {
    if (first(.x$make_long))
      pivot_longer(.x, c(A, B, C)) %>% 
        transmute(
          student_id, 
          question_id = paste(question_id, name, sep = '_'),
          score = value)
     else 
       transmute(.x, 
         student_id, 
         question_id = as.character(question_id), 
         score)
  }) %>% 
  ungroup() %>% 
  select(-ml)
#> # A tibble: 10 × 3
#>    student_id question_id score
#>         <dbl> <chr>       <dbl>
#>  1          1 10              2
#>  2          1 11              3
#>  3          2 20              2
#>  4          2 21              1
#>  5          1 12_A            1
#>  6          1 12_B            2
#>  7          1 12_C            1
#>  8          2 22_A            1
#>  9          2 22_B            1
#> 10          2 22_C            3

reprex package (v2.0.1) 在 2021-09-15 创建


网页内容由stack overflow 提供, 点击上面的
可以查看英文原文,
原文链接