使用R重新创建在Igor中制作的等高线图。

6
这个等高线图是用Igor程序制作的,它在大气化学和污染研究中很受欢迎:

enter image description here

我正在尝试用R重新创建它,为了一个想停止使用Igor的朋友,但我们还无法完全做到。这里是数据集(与使用Igor制作图表所使用的相同数据),以及我目前在使用R制作图表方面所做的:

# read in the data
dat <- read.csv("contour_plot_data.csv")

# focus on the untransformed values
dat <- dat[, 1:108]

# get Diameter value from col names
Diameter <- as.numeric(gsub("X", "", names(dat)[-1]))

# interpolate between the Diameter values for a smoother contour,
# a seperate interpolation for each row (date value)
# this takes a moment or two...

interp <-  seq(min(Diameter), max(Diameter), 0.2)
dat_interp <- data.frame(matrix(0, ncol =  length(interp), nrow = nrow(dat)))
for(i in 1:nrow(dat)){
  # get the values from row i
  vec <- unlist(dat[i, 2:108], use.names = FALSE)
  # compute loess interpolations
  lo <- loess(vec ~ Diameter)
  # predict interpolated values
  pr <- predict(lo, newdata = data.frame(Diameter = interp))
  # store in a data frame
  df <- data.frame(ct = unname(pr), Diameter = interp)
  # add as new row to new data frame
  dat_interp[i, ] <- df$ct
  print(i) # so we can see that it's working
}

# add date col and col names to the interpolated data
names(dat_interp) <- interp
dat_interp$date <- as.character(dat$Time)

# melt data into long format
# see http://www.cookbook-r.com/Manipulating_data/Converting_data_between_wide_and_long_format/
library(tidyr)
gather_cols <- interp
dat_long <- gather_(dat_interp, "Diameter", "dN_dlogDp", gather_cols)

# we want diameter as a numeric
dat_long$Diameter <- as.numeric(as.character(dat_long$Diameter))
# we want date as a date format
x <-  as.character(dat_long$date)
date_ <- as.Date(x, format = "%d/%m/%Y") 
time_ <- gsub(" ", "", substr(x, nchar(x) - 4, nchar(x)))
dat_long$date_time <- as.POSIXct(paste0(date_, " ", time_))

# The Igor plot seems to use log dN_dlogDp values, so let's get those
dat_long$dN_dlogDp_log <- log10(dat_long$dN_dlogDp)
dat_long$dN_dlogDp_log <- ifelse(dat_long$dN_dlogDp_log == "NaN", 0, dat_long$dN_dlogDp_log)

# get on with plottong...
library(ggplot2)   
library(scales)

labels_breaks <- seq(0, max(Diameter), 100)
mytheme <- theme_bw(base_size = 14) +  theme(aspect.ratio = 1/4) 
ggplot(dat_long, aes(y = Diameter, x = date_time,  fill=dN_dlogDp_log)) +
  geom_raster(interpolate = TRUE)  +
  scale_fill_gradientn(name=expression(log(dN/dlogD[p])), colours = rainbow(7)) +
  scale_y_continuous(expand = c(0,0), breaks = labels_breaks ) +
  scale_x_datetime(expand = c(0,0), breaks = date_breaks("12 hours")) +
    ylab("Diameter (nm)") +
  xlab("Date and time") +
  mytheme

enter image description here

我的图表需要更精细的标签和刻度线等。然而,我的主要问题是为什么我的填充轮廓看起来与Igor的图表如此不同。比例似乎被反转了,插值看起来非常不同。
我该如何使我的图表看起来更像Igor的图表?
请注意,我提出的这些其他问题与重新创建此图表的任务密切相关:
- geom_raster插值对数比例尺 - 用于类别的2d密度图
在我提出这个问题之后,我一直在更新R代码的gist,将这些问题的答案详细介绍,并成功地复制了这些图表(gist中包含示例输出)。该gist在这里: https://gist.github.com/benmarwick/9a54cbd325149a8ff405。

更新:我现在已经制作了一个能够生成这些图的软件包:https://github.com/benmarwick/smps


我不会从那个网站下载任何东西。你能生成一些示例数据吗?反转颜色比例应该很容易,只需要使用 scale_fill_gradientn(colours = rev(rainbow(7))。要忠实地重新创建它,您需要了解该颜色比例的详细信息。过渡似乎非常锐利,所以我不确定是否值得详细重现。我建议使用 viridis 包中的 scale_fill_viridis - Axeman
你会从哪个网站下载数据?我在 Meta 上找到了一个建议的网站。 - Ben
@Ben 你好,我正在学习你的方法。如何下载你的数据?当我打开你的链接时,只有一个上传页面。 - YQ.Wang
@YQ.Wang 您好,是的,数据文件包含在我制作的软件包中,您可以按照这里的说明进行下载:https://github.com/benmarwick/smps - Ben
1个回答

7
我可以使用akima::interp进行插值,相比于loess,我能更接近Igor图。
# read in the data
dat <- read.csv("contour_plot_data.csv")

# focus on the untransformed values
dat <- dat[, 1:108]

# get Diameter value from col names
Diameter <- as.numeric(gsub("X", "", names(dat)[-1]))

# melt data into long format
# see http://www.cookbook-r.com/Manipulating_data/Converting_data_between_wide_and_long_format/
library(tidyr)
dat_long <- gather(dat, "Diameter", "dN_dlogDp", 2:108)

# we want diameter as a numeric
dat_long$Diameter <- as.numeric(gsub("X", "", dat_long$Diameter ))
# we want time as a date-formatted variable
x <-  as.character(dat_long$Time)
date_ <- as.Date(x, format = "%d/%m/%Y") 
time_ <- gsub(" ", "", substr(x, nchar(x) - 4, nchar(x)))
dat_long$Time <- as.POSIXct(paste0(date_, " ", time_))

# The Igor plot seems to use log dN_dlogDp values, so let's get those
dat_long$dN_dlogDp_log <- log10(dat_long$dN_dlogDp)
dat_long$dN_dlogDp_log <- ifelse(dat_long$dN_dlogDp_log == "NaN" |
                                   dat_long$dN_dlogDp_log == "-Inf"  , 0, dat_long$dN_dlogDp_log)


# interpolate between the  values for a smoother contour
# this takes a moment or two...

library(akima)
xo <- with(dat_long, seq(min(Time), max(Time), 120))
yo <- with(dat_long, seq(min(Diameter), max(Diameter), 0.5))
dat_interp <- with(dat_long, interp(Time, Diameter, dN_dlogDp_log, xo = xo, yo = yo) )

# get on with plotting...

# make into a data frame for ggplot
dat_interp_df <-  data.frame(matrix(data = dat_interp$z, ncol = length(dat_interp$y), nrow = length(dat_interp$x)))
names(dat_interp_df) <- dat_interp$y
dat_interp_df$Time <- as.POSIXct(dat_interp$x, origin = "1970-01-01")

# wide to long
dat_interp_df_long <- gather(dat_interp_df, "Diameter", "dN_dlogDp_log", 1:(ncol(dat_interp_df)-1))
dat_interp_df_long$Diameter <- as.numeric(as.character(dat_interp_df_long$Diameter))

# plot
library(ggplot2) 
library(scales)
y_labels_breaks <- seq(0, max(Diameter), 100)
ggplot(dat_interp_df_long, aes(y = Diameter, x = Time,  fill = dN_dlogDp_log)) +
  geom_raster(interpolate = TRUE)  +
  scale_fill_gradientn(name=expression(log(dN/dlogD[p])), colours = rev(rainbow(50))) +
  scale_y_continuous(expand = c(0,0), breaks = y_labels_breaks ) +
  scale_x_datetime(expand = c(0,0), breaks = date_breaks("1 day"))

在这里输入图片描述

但是颜色映射仍然存在相当大的差异,Igor 绘图具有宽阔的带状区域和清晰的边界,而我的绘图则只有较少数量的颜色带,并且它们之间的边界模糊。所以我想我并没有使用 Igor 绘图中的插值方法。

更新:在尝试了一堆颜色梯度之后,我发现 colorRamps::blue2green2red 很匹配。我还花了一些功夫设计了一些漂亮的刻度标记:

# plot
library(ggplot2) 
library(scales) # for date_breaks
library(colorRamps) # for blue2green2red

# function for minor tick marks
every_nth <- function(x, nth, empty = TRUE, inverse = FALSE) 
{
  if (!inverse) {
    if(empty) {
      x[1:nth == 1] <- ""
      x
    } else {
      x[1:nth != 1]
    }
  } else {
    if(empty) {
      x[1:nth != 1] <- ""
      x
    } else {
      x[1:nth == 1]
    }
  }
}

# add tick marks every two hours
start_date <- min(dat_interp_df_long$Time)
end_date <-  max(dat_interp_df_long$Time)
date_breaks_2h <-  seq(from = start_date, to = end_date, by = "2 hours")
date_breaks_1_day <- seq(from = start_date, to = end_date, by = "1 day")
multiple <- length(date_breaks_2h) / length(date_breaks_1_day)

insert_minor <- function(major_labs, n_minor) {labs <- 
  c( sapply( major_labs, function(x) c(x, rep("", multiple) ) ) )
labs[1:(length(labs)-n_minor)]}


y_labels_breaks <- seq(0, max(Diameter), 100)
mytheme <- theme_bw(base_size = 14) +  theme(aspect.ratio = 1/5)
ggplot(dat_interp_df_long, aes(y = Diameter, x = Time,  fill = dN_dlogDp_log)) +
  geom_raster(interpolate = TRUE)  +
  scale_fill_gradientn(name=expression(log(dN/dlogD[p])), colours = blue2green2red(100)) +
  scale_y_continuous(expand = c(0,0), 
                     labels = every_nth(y_labels_breaks, 2, inverse = TRUE),
                     breaks = y_labels_breaks) +
  scale_x_datetime(expand = c(0,0), 
                   breaks=date_breaks_2h, 
                   labels=insert_minor(format(date_breaks_1_day, "%d %b"),  
                                       length(date_breaks_1_day))) +
  xlab("Day and time") +
  ylab("Diameter (nm)") +
  mytheme

在这里输入图片描述

绿蓝渐变与伊戈尔图有些不同。我的绿色非常少!也许进一步尝试颜色栅格可以改善匹配问题。

要将y轴放在对数刻度上,需要额外的努力。我们必须使用geom_rect并调整每个矩形的大小以适应对数比例:

##################  y-axis with log scale ###########################
# get visually diminishing axis ticks
base_breaks <- function(n = 10){
  function(x) {
    axisTicks(log10(range(x, na.rm = TRUE)), log = TRUE, n = n)
  }
}

# Now with log axis, we need to replace the ymin and ymax
distance <- diff((unique(dat_interp_df_long$Diameter)))/2
upper <- (unique(dat_interp_df_long$Diameter)) + c(distance, distance[length(distance)])
lower <- (unique(dat_interp_df_long$Diameter)) - c(distance[1], distance) 

# Create xmin, xmax, ymin, ymax
dat_interp_df_long$xmin <- dat_interp_df_long$Time - 1000 # default of geom_raster is 0.5
dat_interp_df_long$xmax <- dat_interp_df_long$Time + 1000
idx <- rle(dat_interp_df_long$Diameter)$lengths[1]
dat_interp_df_long$ymin <- unlist(lapply(lower, function(i) rep(i, idx)))
dat_interp_df_long$ymax <- unlist(lapply(upper, function(i) rep(i, idx)))


ggplot(dat_interp_df_long, aes(y = Diameter, x = Time, 
                               xmin=xmin, xmax=xmax, ymin=ymin, ymax=ymax, 
                               fill = dN_dlogDp_log)) +
  geom_rect()  +
  scale_fill_gradientn(name=expression(log(dN/dlogD[p])), colours = blue2green2red(1000)) +
  scale_y_continuous(expand = c(0,0), 
                     trans = log_trans(), breaks = base_breaks()) +
  scale_x_datetime(expand = c(0,0), 
                   breaks=date_breaks_2h, 
                   labels=insert_minor(format(date_breaks_1_day, "%d %b"),  
                                       length(date_breaks_1_day))) +
  xlab("Day and time") +
  ylab("Diameter (nm)") +
  mytheme

在这里输入图片描述

更新 经过一些颜色梯度实验,我找到了一个相当接近的匹配:

# adjust the colour ramp to match the Igor plot (their colour ramp is pretty uneven! lots of red and blue, it seems.)
colfunc <- colorRampPalette(c( rep("red", 3), 
                               rep("yellow", 1), 
                               rep("green", 2), 
                               "cyan", 
                               rep("blue", 3), 
                               "purple"))
y_labels_breaks <- seq(0, max(Diameter), 100)
mytheme <- theme_bw(base_size = 14) +  theme(aspect.ratio = 1/5)
ggplot(dat_interp_df_long, aes(y = Diameter, x = Time,  fill = dN_dlogDp_log)) +
  geom_raster(interpolate = TRUE)  +
  scale_fill_gradientn(name=expression(log(dN/dlogD[p])), colours = rev(colfunc(100))) +
  scale_y_continuous(expand = c(0,0), 
                     labels = every_nth(y_labels_breaks, 2, inverse = TRUE),
                     breaks = y_labels_breaks) +
  scale_x_datetime(expand = c(0,0), 
                   breaks=date_breaks_2h, 
                   labels=insert_minor(format(date_breaks_1_day, "%d %b"),  
                                       length(date_breaks_1_day))) +
  xlab("Day and time") +
  ylab("Diameter (nm)") +
  mytheme

enter image description here

这篇文章中的代码也在https://gist.github.com/benmarwick/9a54cbd325149a8ff405上。

更新 我现在做了一个包,可以生成这些图:https://github.com/benmarwick/smps


网页内容由stack overflow 提供, 点击上面的
可以查看英文原文,
原文链接