我有一个R中的数据集,它是“Formal class enrichResult”的类别。我使用DOSE包中的
我的代码如下:
我正在编写代码来叠加药物形状在图表上,像这样:
cnetplot()
绘制了该数据集中的基因 - 该函数应该是基于ggplot图形的。它会以一个相互作用通路中的基因网络为图表展示:
![enter image description here](https://istack.dev59.com/eeBnr.webp)
kegg_organism = "hsa"
kegg_enrich <- enrichKEGG(gene = df$geneID,
organism = 'hsa',
pvalueCutoff = 0.05,
pAdjustMethod = 'fdr')
kegg <- setReadable(kegg_enrich, 'org.Hs.eg.db', 'ENTREZID')
kegg_genes <- kegg[,]
gene_of_interest <- dplyr::filter(kegg_genes, grepl('CALML6', geneID))
gene_of_interest <- enrichDF2enrichResult(gene_of_interest)
gene_list_scores <- df$Score
names(gene_list_scores) <- df$geneID
gene_list_scores <- na.omit(gene_list_scores)
gene_list_scores <- sort(gene_list_scores, decreasing = TRUE)
plot <- cnetplot(gene_of_interest, foldChange = gene_list_scores)
plot <- plot + scale_color_gradient2(name='Score', low='steelblue', high='firebrick')
我希望在这个图形上叠加表示药物类型类别的形状,以表示基因所对应的药物类型类别,但我在尝试时遇到了困难。
我的药物数据与enrichResult数据是分开的,我的药物数据如下:
drugs <- structure(list(Gene = c("ACE", "AQP1", "IRS2", "SMAD3",
"HLA-B"), Druggability = c("KINASE", "DRUGGABLE GENOME", "CLINICALLY ACTIONABLE",
"KINASE", "CLINICALLY ACTIONABLE")), row.names = c(NA, -5L), class = c("data.table",
"data.frame"))
#drugs data is 2 columns like:
Gene Druggability
TLN2 KINASE
PDGFC DRUGGABLE GENOME
我正在编写代码来叠加药物形状在图表上,像这样:
drugs <- fread('genes_dgidb_export.tsv')
drugs <- dplyr::select(drugs, Gene, Druggability)
drugs <- drugs[1:80,] #making data same size otherwise I get an Aesthetics number mismatch error
Druggability <- drugs$Druggability
names(Druggability) <- drugs$Gene
options(ggrepel.max.overlaps = Inf)
pother <- cnetplot(gene_of_interest,
categorySize ='pvalue',
foldChange = gene_list_scores,
)
pother <- pother + scale_color_gradient2(name='Score', low='steelblue', high='red') +
scale_size_continuous(range = c(2, 8))
#Overlaying shapes by drug:
library(ggraph)
pother + geom_node_point(aes(shape=Druggability)) +
scale_shape_manual(values=c(2, 5, 3, 4))
但是,这里叠加的形状不匹配/位置错误,并且将药物分配给了通路节点中的米色部分,而它们应该只被分配给红色基因节点。
除了现有函数之外,是否还有其他函数可用于在此图中叠加形状点以正确对应基因?
示例输入数据和用于获取enrichResult的软件包:
library(data.table)
library(clusterProfiler)
library(enrichplot)
library(tidyverse)
library(DOSE)
library(clusterProfiler)
library(multienrichjam)
library(RColorBrewer)
df <- structure(list(geneID = c(107986084L, 100874369L, 100506380L, 100288797L,
100137049L, 100130742L, 723788L, 643136L, 641649L, 497258L),
Score = c(0.809859097, 0.913054347, 0.423823357, 0.369738668,
0.798110485, 0.78013134, 0.764999211, 0.231925398, 0.317150593,
0.754656732)), row.names = c(NA, -10L), class = c("data.table",
"data.frame"))
kegg_genes <- structure(list(ID = c("hsa04924", "hsa04925", "hsa04022", "hsa04934",
"hsa05166", "hsa04218", "hsa05410", "hsa04024", "hsa05414", "hsa04933"
), Description = c("Renin secretion", "Aldosterone synthesis and secretion",
"cGMP-PKG signaling pathway", "Cushing syndrome", "Human T-cell leukemia virus 1 infection",
"Cellular senescence", "Hypertrophic cardiomyopathy", "cAMP signaling pathway",
"Dilated cardiomyopathy", "AGE-RAGE signaling pathway in diabetic complications"
), GeneRatio = c("22/381", "25/381", "31/381", "28/381", "33/381",
"27/381", "20/381", "32/381", "20/381", "20/381"), BgRatio = c("69/8093",
"98/8093", "167/8093", "155/8093", "222/8093", "156/8093", "90/8093",
"219/8093", "96/8093", "100/8093"), pvalue = c(2.67752556162864e-13,
1.75172505637096e-12, 3.06146322777988e-11, 5.54589140412457e-10,
2.99051127478473e-09, 3.0449722225371e-09, 4.26754375382907e-09,
8.08219152477885e-09, 1.39375994107875e-08, 2.90351315318027e-08
), p.adjust = c(8.19322821858365e-11, 2.68013933624758e-10, 3.12269249233547e-09,
4.2426069241553e-08, 1.55293583349392e-07, 1.55293583349392e-07,
1.86552626953099e-07, 3.09143825822791e-07, 4.73878379966775e-07,
8.88475024873162e-07), qvalue = c(4.70680809254719e-11, 1.53967412849448e-10,
1.79391003171663e-09, 2.43727332760211e-08, 8.92123440638062e-08,
8.92123440638062e-08, 1.0716989577285e-07, 1.77595524294483e-07,
2.72231473871522e-07, 5.10407049032742e-07), geneID = c("CALML6/CALML4/PLCB1/CLCA2/PPP3CA/PLCB3/PDE3A/PDE1A/NPR1/NPPA/GNAS/EDNRA/EDN1/ACE/CACNA1D/CACNA1C/AQP1/AGT/ADRB2/ADRB1/ADORA1/ADCY5",
"CALML6/CALML4/KCNK9/PRKD3/PLCB1/CREB5/PRKD1/PRKCE/PLCB3/NPR1/NPPA/LDLR/KCNK3/GNAS/CYP21A2/CYP11B2/CAMK2B/CACNA1D/CACNA1C/ATP2B1/ATF1/AGT/ADCY9/ADCY5/ADCY3",
"CALML6/CALML4/PLCB1/CREB5/IRS2/PDE5A/SLC8A1/MAP2K2/PRKCE/PPP3CA/PLCB3/PDE3A/NPR1/NPPB/NPPA/NFATC2/MEF2A/INSR/GTF2I/EDNRA/CACNA1D/CACNA1C/ATP2B1/AKT2/ADRB2/ADRB1/ADRA2B/ADORA1/ADCY9/ADCY5/ADCY3",
"WNT3A/LEF1/PDE11A/PLCB1/CREB5/TCF7L2/MAP2K2/PLCB3/PBX1/LDLR/KCNK3/GNAS/DVL1/CYP21A2/CYP17A1/CYP11B1/CDKN2C/CDKN1A/CDK6/CCNE1/CAMK2B/CACNA1D/CACNA1C/CCND1/AGT/ADCY9/ADCY5/ADCY3",
"TLN2/VAC14/CREB5/CDC16/KAT2B/PIK3R3/MAD1L1/XPO1/TLN1/TGFB2/TGFB1/TERT/STAT5B/RELA/PTEN/MAP2K2/PPP3CA/NFKBIA/NFATC2/SMAD3/IL6/HLA-DQB1/HLA-DQA1/HLA-B/CDKN2C/CDKN1A/CDC20/CCNE1/CCND1/AKT2/ADCY9/ADCY5/ADCY3",
"CALML6/CALML4/TRPM7/HIPK2/BTRC/PIK3R3/TGFB2/TGFB1/RRAS/RELA/PTEN/MAP2K2/PPP3CA/NFATC2/SMAD3/IL6/IGFBP3/HLA-B/FOXO3/CDKN1A/CDK6/CDC25A/CCNE1/CACNA1D/ZFP36L1/CCND1/AKT2",
"PRKAG2/CACNA2D2/TGFB2/TGFB1/SLC8A1/SGCD/PRKAG1/ITGB5/ITGA9/ITGA1/IL6/IGF1/EDN1/ACE/DAG1/CACNB4/CACNB2/CACNA1D/CACNA1C/AGT",
"CALML6/CALML4/PDE10A/CREB5/PIK3R3/RRAS/RELA/MAP2K2/PDE3A/NPR1/NPPA/NFKBIA/HTR1D/GRIN2B/GNAS/GIPR/GIP/EDNRA/EDN1/CHRM2/CAMK2B/CACNA1D/CACNA1C/ATP2B1/AKT2/ADRB2/ADRB1/ADORA1/ADCY9/ADCY5/ADCY3/ACOX1",
"CACNA2D2/TGFB2/TGFB1/SLC8A1/SGCD/ITGB5/ITGA9/ITGA1/IGF1/GNAS/DAG1/CACNB4/CACNB2/CACNA1D/CACNA1C/AGT/ADRB1/ADCY9/ADCY5/ADCY3",
"PLCD3/NOX4/PLCB1/PIK3R3/VEGFA/TGFB2/TGFB1/STAT5B/RELA/PRKCE/PLCB3/SMAD3/IL6/FN1/EDN1/COL4A4/BCL2/CCND1/AKT2/AGT"
), Count = c(22L, 25L, 31L, 28L, 33L, 27L, 20L, 32L, 20L, 20L
)), row.names = c("hsa04924", "hsa04925", "hsa04022", "hsa04934",
"hsa05166", "hsa04218", "hsa05410", "hsa04024", "hsa05414", "hsa04933"
), class = "data.frame")
使用上面示例数据运行后输出的图表(之前的图表是整个数据的):
我在红色下划线处用Paint标出了已分配可药性的基因,但由于某种原因,这些形状会连接到通路节点。
gene_of_interest
为空。 - zx8754geom_node_point
,尝试在绘图之前对数据进行子集化,仅包括在“可药性”中存在的基因。 - zx8754kegg_genes
作为样本,现在应该可以运行到绘图了(输出不同的通路但原理相同)。代码现在应该都能运行并得出结果,唯一的问题是在df中只给出了10个样本基因和它们的Scores
,所以绘图点将不会被标记为红色,但我知道这一步对我来说是有效的,所以我确保我的Druggability
数据中的基因应该在示例图上显示形状。 - DN1Druggability
基因列表仅由进入cnetplot的基因组成(没有具有Druggability
的基因在该列中被分配为“NA”),但没有任何未出现在图中的额外基因。 - DN1