在R中,我有两个数据框,我需要逐行计算元素匹配情况,最终得到一列,其中包含两个表的笛卡尔积长度和两行的ID。同时,这两个表都很大,行数不同,但列数相同。以下是我目前的代码,但运行多次时速度较慢。
library(data.table)
table_1<-data.table(matrix(c(1:24),nrow = 4))
table_2<-data.table(matrix(c(11:34),nrow = 4))
names(table_1)<-c("s1", "s2","s3","s4","s5","s6")
names(table_2)<-c("a1","a2","a3","a4","a5","a6")
table_1$ID<-seq.int(nrow(table_1))
table_2$ID_ap<-seq.int(nrow(table_2))
setcolorder(table_1, c("ID", "s1", "s2","s3","s4","s5","s6"))
setcolorder(table_2, c("ID_ap","a1","a2","a3","a4","a5","a6"))
CJ.table<-function(X,Y) setkey(X[,c(k=1,.SD)],k)[Y[,c(k=1,.SD)],allow.cartesian=TRUE][,k:=NULL]
join<-CJ.table(table_1,table_2)
R<-subset(join, select=c("ID_ap","ID"))
R$Ac<- (join$s1 == join$a1) + (join$s1 ==join$a2) + (join$s1 ==join$a3) + (join$s1 ==join$a4) + (join$s1 ==join$a5) + (join$s1 ==join$a6)+
(join$s2 == join$a1) + (join$s2 ==join$a2) + (join$s2 ==join$a3) + (join$s2 ==join$a4) + (join$s2 ==join$a5) + (join$s2 ==join$a6)+
(join$s3 == join$a1) + (join$s3 ==join$a2) + (join$s3 ==join$a3) + (join$s3 ==join$a4) + (join$s3 ==join$a5) + (join$s3 ==join$a6)+
(join$s4 == join$a1) + (join$s4 ==join$a2) + (join$s4 ==join$a3) + (join$s4 ==join$a4) + (join$s4 ==join$a5) + (join$s4 ==join$a6)+
(join$s5 == join$a1) + (join$s5 ==join$a2) + (join$s5 ==join$a3) + (join$s5 ==join$a4) + (join$s5 ==join$a5) + (join$s5 ==join$a6)+
(join$s6 == join$a1) + (join$s6 ==join$a2) + (join$s6 ==join$a3) + (join$s6 ==join$a4) + (join$s6 ==join$a5) + (join$s6 ==join$a6)
这提供了
R
ID_ap ID Ac
1: 1 1 0
2: 1 2 0
3: 1 3 4
4: 1 4 0
5: 2 1 0
6: 2 2 0
7: 2 3 0
8: 2 4 4
9: 3 1 3
10: 3 2 0
11: 3 3 0
12: 3 4 0
13: 4 1 0
14: 4 2 3
15: 4 3 0
16: 4 4 0