在R中,如何根据列表元素对数据框的某些列进行降序排列,而其他列进行升序排列?

3
现在我有一个数据框和两个列表,每个列表包含一些数据框列名,我需要添加新的列,其中包含数据框中每列的排名。问题是我必须按照 Deslist1 列表中的列降序排列,并按与 Asclist1 匹配的列名升序排列。相应地,最终所需的输出结果如下:this 我尝试了使用 any()within() 但它们不起作用。我的问题不是如何添加包含排名的新列,而是如何基于列表元素进行排名,正如您可以看到 Asclist1 包含的元素不存在于 DF1 列中。
DF1 <- data.frame("name" = c("john", "adam", "leo", "lena", "Di"),
                 "sex" = c("m", "m", "m", "f", "f"),
                 "age" = c(99, 46, 23, 54, 23),
                 "grade" = c(96, 46, 63, 54, 23),
                 "income" = c(59, 36, 93, 34, 23),
                 "score" = c(99, 46, 23, 54, 23))
                 print(DF1)

Asclist1<-list("score","income","spending")
Asclist1
Deslist2<-list("age","grade")
Deslist2

更新——代码1

library(readr)
library(tidyr)
library(purrr)
library(rlang)
library(glue)
library(dplyr)
library(miscTools)
library(matrixStats)
library(shiny)
library(reshape2)
library(dplyr)


hotdrinks<-list("tea","green tea") 
juices<-list("orange","mango") 
energydrinks<-list("powerhorse","redbull") 
drinks<-list("hotdrinks"=hotdrinks,"juices"=juices,"energydrinks"=energydrinks) 

biscuits<-list("loacker","tuc") 
choc<-list("aftereight","lindt") 
gum<-list("trident","clortes") 
sweets<-list("gum"=gum,"biscuits"=biscuits,"choc"=choc)

all_products<-list("sweets"=sweets,"drinks"=drinks)
mt<-melt(all_products)
mt2<-mt%>%mutate("Price"=c(23,34,23,23,54,32,45,23,12,56,76,43),
                 "Quantity"=c(10,20,26,22,51,52,45,23,12,56,76,43),
                 "amount"=c(23,34,23,23,54,32,45,23,12,56,76,43))

t1<-mt2[,c(4,3,1,5,6,7)]
t1
colnames(t1)<-c("CAT","PN","SP","Quantity","Price","amount")

t2<-list(unique(t1$CAT))
t2
QL<-c("Quantity","Price")
QD<-c("Quantity","amount")
QS<-c("amount","Price")


all <- list("drinks"=drinks, "sweets"=sweets)

FCX<-data.frame("sbo"=c("w","q","a"),
                "Quantity_fcx"=c(3,2,5),
                "Price_fcx"=c(7,8,5),
                "amount_fcx"=c(4,7,3)
)
#DF1<-Y
DF1 <- t1
DF1
#print(DF1)
DFCXL<-list(colnames(DF1[-c(1:3)]))
DFCXL
DFCX1<-lapply(DFCXL, paste0, "_fcx")
DFCX1
DFCXM<-colMeans(FCX[,unlist(DFCX1)],na.rm = FALSE)
DFCXM
DFCXMd<-colMedians(data.matrix(FCX[,unlist(DFCX1)]),na.rm = FALSE )
DFCXMddf<-as.data.frame(t(DFCXMd))
DFCXMddf
DFCX1l<-as.list(DFCX1)
colnames(DFCXMddf)<-unlist(DFCX1l)
DFCXMddf
#median repeated tibble
rDFCXMddf<-DFCXMddf[rep(seq_len(nrow(DFCXMddf)), each = nrow(DF1)), ]
rDFCXMddf
DFCX<-data.frame(t(DFCXM))
DFL<-as.vector(colnames(DF1))
DFL
DFCX
#mean repeated tibble
rDFCX<-DFCX[rep(seq_len(nrow(DFCX)), each = nrow(DF1)), ]
#rDFCX
#ascending rank form smallest to largest where the smallest is the most competitive
Asclist1<-list("Quantity","Price")
#Asclist1
#descending rank form largest to smallest where the largest is the most competitive
Deslist2<-list("xyz","amount")
#Deslist2
#DF3 contains orginal dataframe with rank for each column descending & ascending 
DF3<-
  DF1 %>% mutate_if(grepl(paste(Deslist2, collapse = "|"), names(.)), list(rank=~rank(-.))) %>% 
  mutate_if(grepl(paste(Asclist1, collapse = "|"), names(.)), list(rank=~rank( .)))

DF3
#DF4 contains only determinants columns
DF4<-DF3%>%select(-one_of(DFL))
DF4
#DF5 contains all deterements with their ranks columns
DF5<-cbind(rDFCX,DF4)
DF5
#getting final rank for each column based on multiplying CX columns "weight" * normal rank to get weighted ranking
dynamic_mutate = function(DF5,  
                          col_names = gsub("(.*)_\\w+$", "\\1", names(DF5)), 
                          expression = "({x}_rank*{x}_fcx)",
                          prefix = "FINAL"){
  name_list = col_names %>% unique() %>% as.list()
  expr_list = name_list %>% lapply(function(x) parse_quosure(glue(expression))) %>% 
    setNames(paste(prefix, name_list, sep = "_")) 
  DF5 %>% mutate(!!!expr_list)}
DF6<-DF5 %>% dynamic_mutate()
#DF6
#getting mean for ranks 
DFL2<-as.vector(colnames(DF5))
DF7<-DF6%>%select(-one_of(DFL2))
#DF7
#final limit ranking 
DF8<-mutate(DF7,fnl_scr=rowMeans(DF7))
#DF8
#final rank 
Ranking<-rank(DF8$fnl_scr)
#Ranking
#final dataframe
DF9<-as_tibble(cbind(DF1,Ranking))
DF9

代码 2

library(readr)
library(tidyr)
library(purrr)
library(rlang)
library(glue)
library(dplyr)
library(miscTools)
library(matrixStats)
library(shiny)
library(reshape2)
library(dplyr)


hotdrinks<-list("tea","green tea") 
juices<-list("orange","mango") 
energydrinks<-list("powerhorse","redbull") 
drinks<-list("hotdrinks"=hotdrinks,"juices"=juices,"energydrinks"=energydrinks) 

biscuits<-list("loacker","tuc") 
choc<-list("aftereight","lindt") 
gum<-list("trident","clortes") 
sweets<-list("gum"=gum,"biscuits"=biscuits,"choc"=choc)

all_products<-list("sweets"=sweets,"drinks"=drinks)
mt<-melt(all_products)
mt2<-mt%>%mutate("Price"=c(23,34,23,23,54,32,45,23,12,56,76,43),
                 "Quantity"=c(10,20,26,22,51,52,45,23,12,56,76,43),
                 "amount"=c(23,34,23,23,54,32,45,23,12,56,76,43))

t1<-mt2[,c(4,3,1,5,6,7)]
t1
colnames(t1)<-c("CAT","PN","SP","Quantity","Price","amount")

t2<-list(unique(t1$CAT))
t2
QL<-c("Quantity","Price")
QD<-c("Quantity","amount")
QS<-c("amount","Price")


all <- list("drinks"=drinks, "sweets"=sweets)

FCX<-data.frame("sbo"=c("w","q","a"),
                "Quantity_fcx"=c(3,2,5),
                "Price_fcx"=c(7,8,5),
                "amount_fcx"=c(4,7,3)
)
#DF1<-Y
DF1 <- t1
DF1
#print(DF1)
DFCXL<-list(colnames(DF1[-c(1:3)]))
DFCXL
DFCX1<-lapply(DFCXL, paste0, "_fcx")
DFCX1
DFCXM<-colMeans(FCX[,unlist(DFCX1)],na.rm = FALSE)
DFCXM
DFCXMd<-colMedians(data.matrix(FCX[,unlist(DFCX1)]),na.rm = FALSE )
DFCXMddf<-as.data.frame(t(DFCXMd))
DFCXMddf
DFCX1l<-as.list(DFCX1)
colnames(DFCXMddf)<-unlist(DFCX1l)
DFCXMddf
#median repeated tibble
rDFCXMddf<-DFCXMddf[rep(seq_len(nrow(DFCXMddf)), each = nrow(DF1)), ]
rDFCXMddf
DFCX<-data.frame(t(DFCXM))
DFL<-as.vector(colnames(DF1))
DFL
DFCX
#mean repeated tibble
rDFCX<-DFCX[rep(seq_len(nrow(DFCX)), each = nrow(DF1)), ]
#rDFCX
#ascending rank form smallest to largest where the smallest is the most competitive
Asclist1<-list("Quantity","Price","amount")
#Asclist1
#descending rank form largest to smallest where the largest is the most competitive
Deslist2<-list("xyz")
#Deslist2
#DF3 contains orginal dataframe with rank for each column descending & ascending 
DF3<-
  DF1 %>% mutate_if(grepl(paste(Deslist2, collapse = "|"), names(.)), list(rank=~rank(-.))) %>% 
  mutate_if(grepl(paste(Asclist1, collapse = "|"), names(.)), list(rank=~rank( .)))

DF3
#DF4 contains only determinants columns
DF4<-DF3%>%select(-one_of(DFL))
DF4
#DF5 contains all deterements with their ranks columns
DF5<-cbind(rDFCX,DF4)
DF5
#getting final rank for each column based on multiplying CX columns "weight" * normal rank to get weighted ranking
dynamic_mutate = function(DF5,  
                          col_names = gsub("(.*)_\\w+$", "\\1", names(DF5)), 
                          expression = "({x}_rank*{x}_fcx)",
                          prefix = "FINAL"){
  name_list = col_names %>% unique() %>% as.list()
  expr_list = name_list %>% lapply(function(x) parse_quosure(glue(expression))) %>% 
    setNames(paste(prefix, name_list, sep = "_")) 
  DF5 %>% mutate(!!!expr_list)}
DF6<-DF5 %>% dynamic_mutate()
#DF6
#getting mean for ranks 
DFL2<-as.vector(colnames(DF5))
DF7<-DF6%>%select(-one_of(DFL2))
#DF7
#final limit ranking 
DF8<-mutate(DF7,fnl_scr=rowMeans(DF7))
#DF8
#final rank 
Ranking<-rank(DF8$fnl_scr)
#Ranking
#final dataframe
DF9<-as_tibble(cbind(DF1,Ranking))
DF9

问题具体在哪里? - akrun
2个回答

2
我们可以使用 mutate_ifgrepl 来应用 rank
library(dplyr)
DF1 %>% 
    mutate_if(grepl(paste(Asclist1, collapse = "|"), names(.)), list(rank=~rank( .))) %>% 
    mutate_if(grepl(paste(Deslist2, collapse = "|"), names(.)), list(rank=~rank(-.)))

  name sex age grade income score age_rank grade_rank income_rank score_rank
1 john   m  99    96     59    99      1.0          1           4        5.0
2 adam   m  46    46     36    46      3.0          4           3        3.0
3  leo   m  23    63     93    23      4.5          2           5        1.5
4 lena   f  54    54     34    54      2.0          3           2        4.0
5   Di   f  23    23     23    23      4.5          5           1        1.5

为什么不使用mutate_if?您之后就不必使用bind_cols了。DF1 %>% mutate_if(grepl(paste(Deslist2, collapse = "|"), names(.)), list(rank=~order(., decreasing = FALSE))) %>% mutate_if(grepl(paste(Asclist1, collapse = "|"), names(.)), list(rank=~order(., decreasing = TRUE))) - Edo
1
@Edo非常感谢你,我觉得我把问题搞复杂了。 - A. Suliman

2
另一种选择是使用map同时创建一个包含1和-1的列来实现此操作。
library(dplyr)
library(tidyr)
library(purrr)
library(stringr)
tibble(col1 = list(Asclist1, Deslist2), col2 = c(1, -1)) %>% 
    unnest_longer(col1) %>% 
    group_split(col2) %>%
    map_dfc(~ DF1 %>% 
              mutate(tmp = first(.x$col2)) %>% 
              select(one_of(.x$col1), tmp)  %>% 
              transmute_at(vars(-tmp), list(rank = ~rank(tmp * .)))) %>% 
   bind_cols(DF1, .)
# name sex age grade income score age_rank grade_rank income_rank score_rank
#1 john   m  99    96     59    99      1.0          1           4        5.0
#2 adam   m  46    46     36    46      3.0          4           3        3.0
#3  leo   m  23    63     93    23      4.5          2           5        1.5
#4 lena   f  54    54     34    54      2.0          3           2        4.0
#5   Di   f  23    23     23    23      4.5          5           1        1.5
#Warning message:
#Unknown columns: `spending` 

它还会将未知的列作为警告通知

更新

如果只有一个列使用了transmute_at,它不会将列名添加到list中作为后缀。要解决这个问题,我们可以使用rename_if创建一个函数。

f1 <- function(dat) {
     nm1 <- setdiff(names(dat), "tmp")   
     n1 <- length(nm1)
    

      dat %>%
          transmute_at(vars(-tmp), list(rank = ~rank(tmp * .))) %>%
          rename_if(rep(n1 == 1, n1), ~ str_c(nm1, "_", .))       
                               
    }
            

tibble(col1 = list(Asclist1, Deslist2), col2 = c(1, -1)) %>% 
     unnest_longer(col1) %>% 
     group_split(col2) %>%
     map_dfc(~ DF1 %>% 
               mutate(tmp = first(.x$col2)) %>% 
               select(one_of(.x$col1), tmp)  %>% 
               f1(.)) %>% 
    bind_cols(DF1, .)
#  CAT           PN         SP Quantity Price amount amount_rank Quantity_rank Price_rank
#    1  sweets          gum    trident       23    10     23         9.5           3.5          1
#    2  sweets          gum    clortes       34    20     34         6.0           7.0          3
#    3  sweets     biscuits    loacker       23    26     23         9.5           3.5          6
#    4  sweets     biscuits        tuc       23    22     23         9.5           3.5          4
#    5  sweets         choc aftereight       54    51     54         3.0          10.0          9
#    6  sweets         choc      lindt       32    52     32         7.0           6.0         10
#    7  drinks    hotdrinks        tea       45    45     45         4.0           9.0          8
#    8  drinks    hotdrinks  green tea       23    23     23         9.5           3.5          5
#    9  drinks       juices     orange       12    12     12        12.0           1.0          2
#    10 drinks       juices      mango       56    56     56         2.0          11.0         11
#    11 drinks energydrinks powerhorse       76    76     76         1.0          12.0         12
#    12 drinks energydrinks    redbull       43    43     43         5.0           8.0          7

我尝试了,但是出现了新的错误,很抱歉打扰你。 - John Smith
@JohnSmith 我在更新中避开了DF1,没有发现任何错误。 - akrun
请检查代码1、DF3,您会发现第四列应该被命名为amount_rank,但是您会发现它只是“rank”。 - John Smith
@JohnSmith 在code1中,我将DF3的第四列称为Quantity - akrun
让我们在聊天中继续这个讨论 - John Smith

网页内容由stack overflow 提供, 点击上面的
可以查看英文原文,
原文链接