使用直接标签和ggplot2避免标签重叠

14

我在我的图形中遇到了重叠标签的问题。我查看了类似的问题,但没有一个能够帮助我解决问题。下面我提供了一个可重现的例子。此外,我不明白为什么direct.labels没有选择适当的颜色给每个标签。

mydf <- structure(list(ano = c(1970, 1975, 1980, 1985, 1990, 1995, 2000, 
2004, 2005, 2006, 2007, 2008, 2009, 1970, 1975, 1980, 1985, 1990, 
1995, 2000, 2004, 2005, 2006, 2007, 2008, 2009, 1970, 1975, 1980, 
1985, 1990, 1995, 2000, 2004, 2005, 2006, 2007, 2008, 2009, 1970, 
1975, 1980, 1985, 1990, 1995, 2000, 2004, 2005, 2006, 2007, 2008, 
2009, 1970, 1975, 1980, 1985, 1990, 1995, 2000, 2004, 2005, 2006, 
2007, 2008, 2009, 1970, 1975, 1980, 1985, 1990, 1995, 2000, 2004, 
2005, 2006, 2007, 2008, 2009, 1970, 1975, 1980, 1985, 1990, 1995, 
2000, 2004, 2005, 2006, 2007, 2008, 2009), field = structure(c(1L, 
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 2L, 2L, 2L, 2L, 
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 
3L, 3L, 3L, 3L, 3L, 3L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 
4L, 4L, 4L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 
6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L, 7L, 7L, 7L, 
7L, 7L, 7L, 7L, 7L, 7L, 7L, 7L, 7L, 7L), .Label = c("Humanites", 
"Scial Sciences", "Natural Sciences", "Computer science and engineering", 
"education", "business", "Other fields"), class = "factor"), 
value = c(143549, 150736, 134139, 132891, 172485, 193404, 
214107, 254847, 261696, 265808, 274535, 278387, 280993, 193511, 
176674, 141581, 134468, 183762, 199895, 201681, 242506, 249619, 
254222, 259950, 262771, 269996, 81916, 91596, 78092, 76228, 
70209, 93443, 89772, 99370, 105899, 113077, 117200, 121009, 
125809, 52570, 52328, 90476, 139459, 104910, 102503, 117011, 
133655, 128886, 124024, 122084, 122408, 128318, 176307, 154437, 
108074, 87147, 110807, 105384, 105458, 105451, 107238, 105641, 
102582, 101708, 101265, 115396, 143171, 200521, 236700, 249165, 
226623, 263515, 311574, 318042, 327531, 335254, 347985, 358293, 
76481, 156804, 182257, 180930, 203200, 243540, 252627, 291861, 
313862, 333789, 351464, 367100, 385340)), .Names = c("ano", 
"field", "value"), row.names = c(NA, -91L), class = "data.frame")

require(ggplot2)
require(directlabels)
require(scales)

p <- ggplot(mydf, aes(y=value, x=ano, group=field)) + geom_line(aes(group=field,     colour=field), show_guide = FALSE) + 
ylab("Number of B.A. degrees awarded") + xlab("year") + theme_hyper() + 
theme(legend.key = element_rect(colour = "white")) + scale_y_continuous(labels=comma) 

p1 <- p + geom_dl(aes(label=field), list('last.points', cex = 1.3, hjust = 1))
p1

函数theme_hyper的定义如下:

theme_hyper <- function(angle=0, fonte="arial", size=14) {
theme(panel.grid.major = element_blank()) +
theme(panel.background = element_blank())+
theme (panel.grid.minor = element_blank()) +
theme(legend.title = element_text(family=fonte, size = size, face = "bold")) +
theme(legend.text = element_text(family=fonte, size = size, face = "bold")) +
theme(plot.title = element_text(family=fonte, size = size, face = "bold")) +
theme(axis.text.x = element_text(family=fonte, size = size, face = "bold", angle = angle, vjust = .5)) +
theme(axis.text.y= element_text(family=fonte, size = size, face = "bold", hjust=.5)) +
theme(axis.title.x= element_text(family=fonte, size = size, face = "bold")) +
theme(axis.title.y= element_text(family=fonte, size = size, face = "bold"))

}
3个回答

12

使用"last.bumpup"

添加colour=field来为标签添加颜色。

geom_dl(aes(label=field, colour=field), list('last.bumpup', cex = 1.3, hjust = 1))

去掉颜色图例/指南。

scale_colour_discrete(guide="none")

最终通告:

p <- ggplot(mydf, aes(y=value, x=ano, group=field)) +
  geom_line(aes(group=field, colour=field), show_guide = FALSE) + 
  ylab("Number of B.A. degrees awarded") + xlab("year") + theme_hyper() + 
  theme(legend.key = element_rect(colour = "white")) +
  scale_y_continuous(labels=comma) +
  geom_dl(aes(label=field, colour=field), list('last.bumpup', cex = 1.3, hjust = 1)) +
  scale_colour_discrete(guide="none")

10
你有没有考虑使用 ggrepel 来定位文本标签,避免它们互相重叠呢?

每年授予的学士学位数量

读取数据:

library(ggrepel)
#> Loading required package: ggplot2
library(scales)
library(readr)
#> 
#> Attaching package: 'readr'
#> The following object is masked from 'package:scales':
#> 
#>     col_factor
library(sitools)
library(tools)

mydf <- read_tsv("ano   field   value
1970    Humanities  143549
1975    Humanities  150736
1980    Humanities  134139
1985    Humanities  132891
1990    Humanities  172485
1995    Humanities  193404
2000    Humanities  214107
2004    Humanities  254847
2005    Humanities  261696
2006    Humanities  265808
2007    Humanities  274535
2008    Humanities  278387
2009    Humanities  280993
1970    Social Sciences 193511
1975    Social Sciences 176674
1980    Social Sciences 141581
1985    Social Sciences 134468
1990    Social Sciences 183762
1995    Social Sciences 199895
2000    Social Sciences 201681
2004    Social Sciences 242506
2005    Social Sciences 249619
2006    Social Sciences 254222
2007    Social Sciences 259950
2008    Social Sciences 262771
2009    Social Sciences 269996
1970    Natural Sciences    81916
1975    Natural Sciences    91596
1980    Natural Sciences    78092
1985    Natural Sciences    76228
1990    Natural Sciences    70209
1995    Natural Sciences    93443
2000    Natural Sciences    89772
2004    Natural Sciences    99370
2005    Natural Sciences    105899
2006    Natural Sciences    113077
2007    Natural Sciences    117200
2008    Natural Sciences    121009
2009    Natural Sciences    125809
1970    Computer science and engineering    52570
1975    Computer science and engineering    52328
1980    Computer science and engineering    90476
1985    Computer science and engineering    139459
1990    Computer science and engineering    104910
1995    Computer science and engineering    102503
2000    Computer science and engineering    117011
2004    Computer science and engineering    133655
2005    Computer science and engineering    128886
2006    Computer science and engineering    124024
2007    Computer science and engineering    122084
2008    Computer science and engineering    122408
2009    Computer science and engineering    128318
1970    education   176307
1975    education   154437
1980    education   108074
1985    education   87147
1990    education   110807
1995    education   105384
2000    education   105458
2004    education   105451
2005    education   107238
2006    education   105641
2007    education   102582
2008    education   101708
2009    education   101265
1970    business    115396
1975    business    143171
1980    business    200521
1985    business    236700
1990    business    249165
1995    business    226623
2000    business    263515
2004    business    311574
2005    business    318042
2006    business    327531
2007    business    335254
2008    business    347985
2009    business    358293
1970    Other fields    76481
1975    Other fields    156804
1980    Other fields    182257
1985    Other fields    180930
1990    Other fields    203200
1995    Other fields    243540
2000    Other fields    252627
2004    Other fields    291861
2005    Other fields    313862
2006    Other fields    333789
2007    Other fields    351464
2008    Other fields    367100
2009    Other fields    385340")

创建图表:
p <- ggplot(mydf, aes(x = ano, y = value, group = field)) +
  geom_line(aes(group = field, color = field), size = 1.5, show.legend = FALSE) + 
  scale_x_continuous(
    expand = c(0, 1),
    limits = c(min(mydf$ano), max(mydf$ano) + 50),
    breaks = seq(1970, 2010, by = 10)
  ) +
  scale_y_continuous(labels = sitools::f2si) +
  annotate(
    geom = "rect", xmin = 2010.5, xmax = Inf, ymin = -Inf, ymax = Inf,
    fill = "white"
  ) +
  geom_point(
    data = subset(mydf, ano == max(ano)),
    aes(color = field),
    size = 3,
    show.legend = FALSE
  ) +
  geom_text_repel(
    data = subset(mydf, ano == max(ano)),
    aes(label = sprintf("%s %s", comma(value), toTitleCase(field)), color = field),
    size = 7,
    hjust = 0,
    direction = "y",
    nudge_x = 2,
    segment.color = NA,
    show.legend = FALSE
  ) +
  theme_minimal(base_size = 24) +
  theme(panel.grid.minor.x = element_blank()) +
  labs(
    x = NULL,
    y = NULL,
    title = "B.A. degrees awarded per year"
  )

ggsave(
  plot = p,
  filename = "stackoverflow-21004491.png",
  width = 12.5,
  height = 6
)

文章创建于2018年12月29日,使用了 reprex包 (v0.2.1)


2
也许可以尝试一下。
direct.label(p, list("last.points", cex=.7, hjust=1))

或者在这里查看其他可能性。


网页内容由stack overflow 提供, 点击上面的
可以查看英文原文,
原文链接