如何使用ggplot2和R将一系列坐标绘制为矩形而不重叠?

4

我最近在Stack Overflow上提出了一个问题,询问如何根据一系列坐标创建矩形,链接在这里

答案很完美,使我能够非常好地生成我的矩形:

# Sample data
plot.data <- data.frame(start.points=c(5, 32),
                        end.points=c(15, 51), 
                        text.label=c("Sample A", "Sample B"))
plot.data$text.position <- (plot.data$start.points + plot.data$end.points)/2

# Plot using ggplot
library(ggplot2)
p <- ggplot(plot.data)
p + geom_rect(aes(xmin=start.points, xmax=end.points, ymin=0, ymax=3), 
              fill="yellow") + 
  theme_bw() + geom_text(aes(x=text.position, y=1.5, label=text.label)) + 
  labs(x=NULL, y=NULL)

然而,我意识到我的数据经常有重叠的坐标,并且我希望能够可视化每个单独的跨度而不会淹没重叠的跨度。因此,我们使用以下数据集作为示例:2-3、5-10、7-10。

当前的代码将产生类似于以下内容:

    ----    -----------------             
----|  |----|               |-------------
    ----    -----------------             

然而,我希望以某种方式更改代码,使得重叠数据可以在新的轨道上可视化:

    ----    -----------------             
----|  |----|               |-------------
    ----    -----------------             

                -------------             
----------------|           |---------
                -------------             

抱歉,这里的ASCII艺术有点幼稚!

有人有建议吗?如果这样更容易,我可以独立生成多张图片然后将它们叠加在一起。谢谢!

1个回答

5
您可以手动计算非重叠区间序列,并相应地排列矩形。使用 intervals 包可以实现此功能:(请注意,我们假设您的点已按 start.points 排序 - 这很容易做到)。
library(intervals)
plot.data <- data.frame(start.points = c(1,2,4,6,8,11), end.points = c(3,5,9,10,12,13),
                        text.label = paste0('Sample ', LETTERS[1:6]))
plot.data$text.position <- (plot.data$start.points + plot.data$end.points)/2

overlap <- interval_overlap(tmp <- Intervals(c(plot.data$start.points, plot.data$end.points)), tmp)
# Find the next non-overlapping interval
nexts <- lapply(overlap, function(x) max(x) + 1)
non_overlaps <- list()
while(sum(sapply(nexts, Negate(is.na))) > 0) {
  consec <- c()
  i <- which(sapply(nexts, Negate(is.na)))[1]

  # Find a stretch of consecutive non-overlapping intervals
  while(!is.na(i) && i <= length(nexts) && !any(sapply(non_overlaps, function(y) i %in% y))) {
    consec <- c(consec, i); i <- nexts[[i]]
  }

  non_overlaps <- append(non_overlaps, list(consec))
  # Wipe out that stretch since we're no longer looking at it
  nexts[consec] <- NA
}

# Squash remaining non-overlapping intervals -- the packing is not yet compact
i <- 1
while (i < length(non_overlaps)) {
  ints1 <- non_overlaps[[i]]
  ints1 <- Intervals(c(plot.data$start.points[ints1], plot.data$end.points[ints1]))
  j <- i + 1
  while(j <= length(non_overlaps)) {
    ints2 <-  Intervals(c(plot.data$start.points[non_overlaps[[j]]],
                  plot.data$end.points[non_overlaps[[j]]]))
    iv <- interval_overlap(ints1, ints2)
    if (length(c(iv, recursive = TRUE)) == 0) break;
    j <- j + 1
  }

  if (j <= length(non_overlaps)) {
    # we can merge non_overlaps[[i]] and non_overlaps[[j]]
    non_overlaps[[i]] <- c(non_overlaps[[i]], non_overlaps[[j]])
    non_overlaps[[j]] <- NULL
  } else {
    # we are done non_overlaps[[i]] -- nothing else can be squashed!
    i <- i + 1
  }
}

现在我们拥有:
 print(non_overlaps)
 # [[1]]
 # [1] 1 3 6
 #
 # [[2]]
 # [1] 2 4 6
 #
 # [[3]]
 # [1] 5

我们可以将这些不重叠的区间分别在不同的高度上绘制图形。
 ymin <- length(non_overlaps) - 1 - (sapply(seq_len(nrow(plot.data)),
    function(ix) which(sapply(non_overlaps, function(y) ix %in% y))) - 1)
 ymax <- ymin + 0.9
 text.position.y <- ymin + 0.45
 ymin <- ymin / length(non_overlaps) * 3 # rescale for display
 ymax <- ymax / length(non_overlaps) * 3 # rescale for display
 text.position.y <- text.position.y / length(non_overlaps) * 3

 library(ggplot2)
 p <- ggplot(plot.data)
 p + geom_rect(aes(xmin=start.points, xmax=end.points, ymin=ymin, ymax=ymax),
               fill="yellow") +
   theme_bw() + geom_text(aes(x=text.position, y=text.position.y, label=text.label)) +
   labs(x=NULL, y=NULL)

最终结果:

在这里输入图片描述

更多示例:
plot.data <- data.frame(start.points = c(1,3,5,7,9,11,13), end.points = c(4,6,8,10,12,14, 16), text.label = paste0('Sample ', LETTERS[1:7]))

enter image description here

plot.data <- data.frame(start.points = seq(1, 13, by = 4), end.points = seq(4, 16, by = 4), text.label = paste0('Sample ', LETTERS[1:4]))

enter image description here

set.seed(100); plot.data <- data.frame(start.points = tmp <- sort(runif(26, 1, 15)), end.points = tmp + runif(26, 1, 3), text.label = paste0('Sample ', LETTERS))

在此输入图片描述

附言:抱歉字迹不太好,我匆忙完成了这个操作——我相信其中一些操作可以更加聪明地执行!


网页内容由stack overflow 提供, 点击上面的
可以查看英文原文,
原文链接