我有一些数据,想要制作盒形图并在其上覆盖点的抖动效果。我的问题在于这些点,所以我们将重点放在这里。
以下是数据:
> dput(test)
structure(list(var1 = structure(c(1L, 1L, 1L, 1L, 1L, 1L, 1L,
2L, 2L, 2L, 2L, 2L, 2L, 2L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 4L, 4L,
4L, 4L, 4L, 4L, 4L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 6L, 6L, 6L, 6L,
6L, 6L, 6L, 7L, 7L, 7L, 7L, 7L, 7L, 7L, 8L, 8L, 8L, 8L, 8L, 8L,
8L, 9L, 9L, 9L, 9L, 9L, 9L, 9L), .Label = c("A", "B", "C", "D",
"E", "F", "G", "H", "I"), class = "factor"), var2 = structure(c(1L,
2L, 3L, 4L, 5L, 6L, 7L, 1L, 2L, 3L, 4L, 5L, 6L, 7L, 1L, 2L, 3L,
4L, 5L, 6L, 7L, 1L, 2L, 3L, 4L, 5L, 6L, 7L, 1L, 2L, 3L, 4L, 5L,
6L, 7L, 1L, 2L, 3L, 4L, 5L, 6L, 7L, 1L, 2L, 3L, 4L, 5L, 6L, 7L,
1L, 2L, 3L, 4L, 5L, 6L, 7L, 1L, 2L, 3L, 4L, 5L, 6L, 7L), .Label = c("V1",
"V2", "V3", "V4", "V5", "V6", "V7"), class = "factor"), response1 = c(5L,
6L, 5L, 5L, 5L, 5L, 4L, 6L, 6L, 5L, 5L, 6L, 6L, 4L, 1L, 1L, NA,
1L, NA, NA, 1L, 1L, 1L, NA, 1L, NA, NA, 1L, 5L, 5L, 4L, 5L, 3L,
2L, 3L, 1L, 1L, NA, 1L, NA, NA, 1L, NA, NA, 2L, NA, 3L, 1L, NA,
NA, NA, 4L, NA, 4L, 5L, NA, NA, NA, 1L, NA, 1L, 1L, NA), response2 = c(2L,
2L, 2L, 2L, 2L, 2L, 4L, 1L, 1L, 1L, 1L, 1L, 1L, 2L, 5L, 5L, NA,
5L, NA, NA, 5L, 5L, 5L, NA, 5L, NA, NA, 5L, 5L, 5L, 5L, 5L, 5L,
5L, 5L, 5L, 5L, NA, 5L, NA, NA, 5L, NA, NA, 5L, NA, 5L, 5L, NA,
NA, NA, 5L, NA, 5L, 5L, NA, NA, NA, 5L, NA, 5L, 5L, NA), response3 = c(4L,
5L, 1L, 1L, 4L, 1L, 1L, 4L, 5L, 1L, 1L, 5L, NA, 1L, 4L, NA, NA,
NA, 3L, 2L, NA, 4L, NA, NA, NA, 3L, NA, NA, 4L, NA, 1L, NA, 3L,
NA, 2L, 4L, NA, NA, NA, NA, NA, NA, NA, 2L, 1L, 1L, NA, NA, 1L,
NA, 3L, 1L, NA, NA, NA, 1L, NA, 3L, 1L, NA, NA, NA, 1L)), .Names = c("var1",
"var2", "response1", "response2", "response3"), class = "data.frame", row.names = c(NA,
-63L))
我使用了reshape2
来对我的数据进行融合,以便于在绘图命令中进行分面/简化:
library(reshape2)
test_melted <- melt(test, id.var = c("var1", "var2"), na.rm = T)
这里是我创建的情节:
library(ggplot2)
p <- ggplot(test_melted, aes(x = var1, y = value)) + geom_point()
p <- p + facet_grid(~variable) + coord_flip()
p <- p + geom_jitter(position = position_jitter(width=0.2, height = 0.2))
p
这将产生以下结果:
看起来很正常,但我注意到每个方面/因子水平似乎都有比应该多的点数。我缩小范围只针对var1
的一个级别。
test_subset <- test_melted[test_melted$var1 == "E", ]
nrow(test_subset)
[1] 18
summary(test_subset)
var1 var2 variable value
E :18 V1:3 response1:7 Min. :1
A : 0 V2:2 response2:7 1st Qu.:3
B : 0 V3:3 response3:4 Median :5
C : 0 V4:2 Mean :4
D : 0 V5:3 3rd Qu.:5
F : 0 V6:2 Max. :5
(Other): 0 V7:3
因此,我们应该总共绘制18个点(7个用于response1
,7个用于response2
,4个用于response3
)。让我们试试:
p <- ggplot(test_subset, aes(x = var1, y = value)) + geom_point()
p <- p + facet_grid(~variable) + coord_flip()
p <- p + geom_jitter(position = position_jitter(width=0.2, height = 0.2))
p
在response1
、response2
和response3
方面,我计算出了11个点、8个点和8个点。
这一定是我忽略的一些愚蠢的问题。我已经用过很多点图来进行分面处理,但从来没有发生过这种情况(或者从来没有注意到!)。
我尝试的事情:
- 删除
coord_flip()
test_subset <- droplevels(test_subset)
,以防空因子水平影响了某些内容- 尝试使用
facet_grid(~variable)
vs.facet_grid(.~variable)
vs.facet_grid(variable~)
vs.facet_grid(variable~.)
最后需要注意的是,根据是否进行分面处理,我得到的点数不同。如果进行分面处理,则得到11 + 8 + 8 = 27
,如果删除facet_grid(~variable)
,则得到23个点。
感谢任何建议!
ggplot(test_melted, aes(x = var1, y = value, color = var2))
。 - agstudy