给定从这里 https://blogs.sas.com/content/graphicallyspeaking/files/2014/06/Swimmer_93.txt 获取的游泳者数据帧转换为数据帧
df %>% dplyr::glimpse()
## 观察数:15
## 变量数:9
## $ subjectID "1", "2", "3", "3", "4", "4", "5", "5", "5",...
## $ stage 阶段1,阶段2,阶段3,阶段3,阶段4,...
## $ startTime 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,...
## $ endTime 18.5, 17.0, 14.0, 14.0, 13.5, 13.5, 12.5, 12...
## $ isContinued TRUE, FALSE, TRUE, TRUE, TRUE, TRUE, TRUE, T...
## $ responseType "完整响应","完整响应","部分响应",...
## $ responseStartTime 6.5, 10.5, 2.5, 6.0, 7.0, 11.5, 3.5, 6.5, 10...
## $ responseEndTime 13.5, 17.0, 3.5, NA, 11.0, NA, 4.5, 8.5, NA,...
## $ Durable -0.25, -0.25, -0.25, -0.25, NA, NA, -0.25, -...
df.shapes <- df %>%
dplyr::select(subjectID, responseType, responseStartTime) %>%
reshape2::melt(id.vars=c("subjectID", "responseType"),
value.name="time") %>%
dplyr::filter(!is.na(time)) %>%
dplyr::select(-variable) %>%
dplyr::mutate(responseType=paste(responseType, "start", sep=" "))
df.shapes %<>%
dplyr::bind_rows(df %>%
dplyr::select(subjectID, endTime, responseEndTime,
isContinued) %>%
dplyr::mutate(responseEndTime=dplyr::if_else(
!isContinued & is.na(responseEndTime),
endTime, responseEndTime)) %>%
dplyr::select(-endTime, -isContinued) %>%
dplyr::filter(!is.na(responseEndTime)) %>%
dplyr::mutate(responseType="Response end") %>%
dplyr::rename(time=responseEndTime))
df.shapes %<>%
dplyr::bind_rows(df %>%
dplyr::select(subjectID, Durable) %>%
dplyr::filter(!is.na(Durable)) %>%
dplyr::mutate(responseType="Durable") %>%
dplyr::rename(time=Durable))
df.shapes %<>%
dplyr::bind_rows(df %>%
dplyr::select(subjectID, endTime, isContinued) %>%
dplyr::filter(isContinued) %>%
dplyr::select(-isContinued) %>%
dplyr::mutate(responseType="Continued Treatment") %>%
dplyr::mutate(endTime=endTime+0.25) %>%
dplyr::rename(time=endTime))
responseLevels = c("Complete response start",
"Partial response start",
"Response end", "Durable", "Continued Treatment")
df.shapes %<>%
dplyr::mutate(responseType = factor(responseType,
levels=responseLevels)) %>%
dplyr::arrange(desc(responseType))
设置Unicode变量。
unicode = list(triangle=sprintf('\u25B2'),
circle=sprintf('\u25CF'),
square=sprintf('\u25A0'),
arrow=sprintf('\u2794'))
df.shapes数据框应该长成这样。
df %>% dplyr::glimpse()
## 观察数:45
## 变量数:3
## $ subjectID "1", "3", "3", "4", "4", "5", "5", "5", "6", "6",...
## $ responseType 继续治疗,继续治疗,继续治疗,改变治疗,改变治疗,继续治疗,继续治疗,继续治疗,继续治疗,继续治疗,...
## $ time 18.75, 14.25, 14.25, 13.75, 13.75, 12.75, 12.75, ...
现在将数据框传递到 ggplot 中。
df %>%
dplyr::select(subjectID, endTime, stage) %>%
dplyr::distinct() %>%
dplyr::mutate(subjectID=forcats::fct_reorder(.f=subjectID,
.x=as.numeric(subjectID),
.desc = TRUE)) %>%
ggplot(aes(subjectID, endTime)) +
geom_bar(stat="identity", aes(fill=factor(stage))) +
geom_point(data=df.shapes, size=5,
aes(subjectID, time, colour=responseType,
shape=responseType)) +
coord_flip() +
scale_colour_manual(values=c(RColorBrewer::brewer.pal(3, "Set1")[1:2],
rep("black", 3))) +
scale_shape_manual(values=c(rep(unicode[["triangle"]], 2),
unicode[["circle"]], unicode[["square"]],
unicode[["arrow"]])) +
scale_y_continuous(limits=c(-0.5, 20), breaks=0:20) +
labs(fill="Disease Stage", colour="Symbol Key", shape="Symbol Key",
x="Subject ID ", y="Months since diagnosis",
title="Swimmer Plot",
caption=paste(c("Durable defined as subject with six months",
"or more of confirmed response", sep=" ") +
theme(plot.title = element_text(hjust = 0.5),
plot.caption = element_text(size=7, hjust=0))
![enter image description here](https://istack.dev59.com/9j0XA.webp)
完整描述可以在这里找到:http://rpubs.com/alexiswl/swimmer