library(swimplot)
library(ggplot2)
#> Warning: package 'ggplot2' was built under R version 4.0.3
A swimmer plot is a graphical tool used to display individual trajectories over time.
A swimmer plot is able to tell a full story using horizontal bars to represent each subject (or study unit), while lines, points, and arrows are utilized to display additional information.
The “swimmer” package has a variety of functions which add layers to a swimmer plot by implementing ggplot functions.
This vignette goes through examples to create swimmers plots, and demonstrates converting a dataframe to the required format.
ClinicalTrial.Arm
, ClinicalTrial.AE
, and ClinicalTrial.Response
knitr::kable(head(ClinicalTrial.Arm,10))
id | Arm | End_trt | Continued_treatment | Sex | Age |
---|---|---|---|---|---|
1 | Arm A | 3.26 | NA | F | >=65 |
2 | Arm A | 2.00 | NA | F | <65 |
2 | Off Treatment | 10.00 | NA | F | <65 |
2 | Arm A | 15.45 | NA | F | <65 |
3 | Arm B | 5.00 | NA | F | >=65 |
3 | Arm A | 14.84 | NA | F | >=65 |
4 | Arm B | 3.51 | NA | F | <65 |
5 | Arm B | 6.00 | NA | F | >=65 |
5 | Arm A | 7.44 | NA | F | >=65 |
6 | Arm B | 3.70 | NA | F | <65 |
knitr::kable(head(ClinicalTrial.AE,10))
id | time | event | Related | Sex | Age |
---|---|---|---|---|---|
33 | 2.20 | AE | Likely | M | >=65 |
14 | 1.00 | SAE | Possibly | F | <65 |
14 | 3.67 | Death | Not Likely | F | <65 |
3 | 14.58 | AE | Likely | F | >=65 |
29 | 5.44 | SAE | Possibly | M | >=65 |
21 | 0.50 | AE | Possibly | M | >=65 |
10 | 5.00 | SAE | Not Likely | F | <65 |
10 | 6.06 | SAE | Possibly | F | <65 |
2 | 2.00 | SAE | Not Likely | F | <65 |
3 | 14.84 | Death | Likely | F | >=65 |
knitr::kable(head(ClinicalTrial.Response,10))
id | Response_start | Response_end | Response | Continued_response | Sex | Age |
---|---|---|---|---|---|---|
10 | 3.74 | 6.02 | CR | NA | F | <65 |
11 | 1.48 | 2.21 | PR | NA | F | >=65 |
12 | 0.20 | 0.67 | CR | NA | F | <65 |
13 | 2.07 | 2.59 | CR | NA | F | >=65 |
15 | 0.35 | 4.05 | PR | NA | M | >=65 |
16 | 4.49 | 7.30 | CR | NA | M | <65 |
17 | 1.09 | 1.44 | PR | NA | M | >=65 |
18 | 0.55 | 1.32 | CR | NA | M | <65 |
19 | 0.00 | 0.96 | PR | 1 | M | >=65 |
2 | 3.36 | 8.77 | PR | NA | F | <65 |
swimmer_plot()
function creates the base of the swimmer plotgeom_bar()
arguments (eg. fill,width, alpha)swimmer_plot(df=ClinicalTrial.Arm,id='id',end='End_trt',fill='lightblue',width=.85)
The swimmer_plot()
function includes the option to have the bars change colours. Each section of the bars should be in a different row, where each row includes the time that section ends. By default the bars are plotted in increasing order, a column name can be used in the argument id_order to have the bars sorted first by a column, or string of IDs can be specified to have the bars in a specific order. Here the bars are ordered by the starting treatment, and follow up time.
arm_plot <- swimmer_plot(df=ClinicalTrial.Arm,id='id',end='End_trt',name_fill='Arm',
id_order='Arm',col="black",alpha=0.75,width=.8)
arm_plot
Plots can be stratified by any variables in the dataframe
swim_plot_stratify <-swimmer_plot(df=ClinicalTrial.Arm,id='id',end='End_trt',name_fill='Arm',
col="black",alpha=0.75,width=.8,base_size = 14,stratify= c('Age','Sex'))
swim_plot_stratify
swimmer_points()
functiongeom_point()
argumentsAE_plot <- arm_plot + swimmer_points(df_points=
ClinicalTrial.AE,id='id',time='time',name_shape =
'event',size=2.5,fill='white',col='black')
AE_plot
Multiple aesthetics can be mapped to different columns
arm_plot + swimmer_points(df_points=
ClinicalTrial.AE,id='id',time='time',name_shape =
'event',size=2.5,fill='white',name_col = 'Related')
swimmer_lines()
functiongeom_segment()
argumentsResponse_plot <- arm_plot +
swimmer_lines(df_lines=ClinicalTrial.Response,id='id',start =
'Response_start',end='Response_end',name_col='Response',size=1)
Response_plot
swimmer_points_from_lines()
adds points to a plot at the start and end of each lineswimmer_lines()
geom_point()
argumentsResponse_plot_with_points <- Response_plot+
swimmer_points_from_lines(df_lines=ClinicalTrial.Response,id='id',start =
'Response_start',end = 'Response_end', cont =
'Continued_response',name_col='Response',size=2)
Response_plot_with_points
swimmer_arrows()
functiongeom_segment()
argumentsThe example below uses arrows to demonstrate patients remaining on treatment after the end of follow up
AE_plot+
swimmer_arrows(df_arrows=ClinicalTrial.Arm,id='id',arrow_start='End_trt',
cont = 'Continued_treatment',name_col='Arm',type =
"open",cex=1)
Since none of the patients continue on “Off treatment” the arrow colours do not match the bars, this can be fixed by adding the layer scale_color_discrete(drop=FALSE)
, the option show.legend = FALSE can also be employed as the arrow legend is not necessary
AE_plot <- AE_plot+
swimmer_arrows(df_arrows=ClinicalTrial.Arm,id='id',arrow_start='End_trt',
cont = 'Continued_treatment',name_col='Arm',show.legend = FALSE,type =
"open",cex=1) + scale_color_discrete(drop=FALSE)
AE_plot
Another arrow example, here the arrows are also used to demonstrate a continued treatment
Response_plot_with_points <- Response_plot_with_points+
swimmer_arrows(df_arrows=ClinicalTrial.Response,id='id',arrow_start='Response_end',
cont = 'Continued_response',name_col='Response',show.legend = FALSE,type =
"open",cex=1)
Response_plot_with_points
AE_plot <- AE_plot +
scale_fill_manual(name="Treatment",values=c("Arm A" = "#e41a1c", "Arm B"="#377eb8","Off Treatment"='#4daf4a'))+
scale_color_manual(name="Treatment",values=c("Arm A"="#e41a1c", "Arm B" ="#377eb8","Off Treatment"='#4daf4a')) +
scale_shape_manual(name="Adverse event",values=c(AE=21,SAE=24,Death=17),breaks=c('AE','SAE','Death'))
AE_plot
Response_plot_with_points <- Response_plot_with_points +
scale_fill_manual(name="Treatment",values=c("Arm A" ="#e41a1c", "Arm B"="#377eb8","Off Treatment"='#4daf4a'))+
scale_color_manual(name="Response",values=c("grey20","grey80"))+
scale_shape_manual(name='',values=c(17,15),breaks=c('Response_start','Response_end'),
labels=c('Response start','Response end'))
Response_plot_with_points
Sometimes there will be points within the fill of the legend, this can be turned off with the layer guides()
Response_plot_with_points <- Response_plot_with_points+guides(fill = guide_legend(override.aes = list(shape = NA)))
Response_plot_with_points
A work around to add arrows to the legend is using the symbol for an arrow within annotate()
Response_plot_with_points <- Response_plot_with_points+
annotate("text", x=3.5, y=20.45, label="Continued response",size=3.25)+
annotate("text",x=2.5, y=20.25, label=sprintf('\u2192'),size=8.25)+
coord_flip(clip = 'off', ylim = c(0, 17))
Response_plot_with_points
The swimmer plot is a bar plot that has been turned on its side, so to modify the x axis it is actually required to change the y axis. This is also the case for adding axis labels
Response_plot_with_points + scale_y_continuous(name = "Time since enrollment (months)",breaks = seq(0,18,by=3))
Sometimes multiple layers of the swimmers plot will include the same aesthetic the plot below uses “fill” with both the points and with the bars. Using guides, and override.aes the legends can be manipulated to divide the layers in the legend
#Overriding legends to have colours for the events and no points in the lines
p1 <- arm_plot + swimmer_points(df_points=ClinicalTrial.AE,id='id',time='time',name_shape =
'event',size=2.5,col='black',name_fill = 'event') +
scale_shape_manual(values=c(21,22,23),breaks=c('AE','SAE','Death'))
p1 +scale_fill_manual(name="Treatment",values=c("AE"='grey90',"SAE" ="grey40","Death" =1,"Arm A"="#e41a1c", "Arm B" ="#377eb8","Off Treatment"="#4daf4a"))
This plot legend is difficult to follow
However, by removing the AE fills from the legend, and adding them to the points it is much easier to follow the plot
#First step is to correct the fill legend
p2 <- p1 + scale_fill_manual(name="Treatment",values=c("AE"='grey90',"SAE" ="grey40","Death" =1,"Arm A"="#e41a1c", "Arm B" ="#377eb8","Off Treatment"="#4daf4a"),breaks = c("Arm A","Arm B","Off Treatment"))
p2
##Then use guides to add the colours to the
#Setting the colours of the filled points to match the AE type
p2 + guides(shape = guide_legend(override.aes = list(fill=c('grey90','grey40',1))),fill = guide_legend(override.aes = list(shape = NA)))
There may be situations where you want to include gaps between sections of colours in a single bar, or have bars that do not start at time zero.
Gap_data <- data.frame(patient_ID=c('ID:3','ID:1','ID:1','ID:1','ID:2',
'ID:2','ID:2','ID:3','ID:3'),
start=c(10,1,2,7,2,10,14,5,0),
end=c(20,2,4,10,7,14,22,7,3),
treatment=c("A","B","C","A","A","C","A","B","C"))
knitr::kable(Gap_data)
patient_ID | start | end | treatment |
---|---|---|---|
ID:3 | 10 | 20 | A |
ID:1 | 1 | 2 | B |
ID:1 | 2 | 4 | C |
ID:1 | 7 | 10 | A |
ID:2 | 2 | 7 | A |
ID:2 | 10 | 14 | C |
ID:2 | 14 | 22 | A |
ID:3 | 5 | 7 | B |
ID:3 | 0 | 3 | C |
When a start and end are specified any spaces in between are filled in with a section of “NA”
swimmer_plot(df=Gap_data,id='patient_ID',name_fill="treatment",col=1,
id_order = c('ID:1','ID:2','ID:3')) +theme_bw()
Additional “NA” information can be added to the end of a bar when the colour variables is NA
Gap_data <- rbind(Gap_data,data.frame(patient_ID='ID:2',start=22,end=26,treatment=NA))
knitr::kable(Gap_data)
patient_ID | start | end | treatment |
---|---|---|---|
ID:3 | 10 | 20 | A |
ID:1 | 1 | 2 | B |
ID:1 | 2 | 4 | C |
ID:1 | 7 | 10 | A |
ID:2 | 2 | 7 | A |
ID:2 | 10 | 14 | C |
ID:2 | 14 | 22 | A |
ID:3 | 5 | 7 | B |
ID:3 | 0 | 3 | C |
ID:2 | 22 | 26 | NA |
scale_fill_manual can be used to have the NA sections filled in transparently with the argument na.value=NA
swimmer_plot(df=Gap_data,id='patient_ID',name_fill="treatment",col=1,
id_order = c('ID:1','ID:2','ID:3')) +
ggplot2::theme_bw()+ggplot2::scale_fill_manual(name="Treatment",
values=c("A"="#e41a1c", "B"="#377eb8","C"="#4daf4a",na.value=NA),breaks=c("A","B","C"))+
ggplot2::scale_y_continuous(breaks=c(0:26))
For all of the function to run, the data must be in the long format. This means that each event must be on a new row. An event would be a single point, a line segment, or an arrow. If a study unit has multiple events occur they must be recorded over multiple rows. Often times data is given in the long format (eg. One row per patient).
Here is an example data.frame in the long format.
wide_example <- structure(list(ID = c("ID:001", "ID:002", "ID:003"), Date.begin.Treatment = structure(c(14307,
14126, 15312), class = "Date"), AE = structure(c(16133, 14491,
NA), class = "Date"), SAE = structure(c(16316, NA, 16042), class = "Date"),
Death.date = structure(c(16499, NA, 17869), class = "Date"),
Response1 = c("SD", "SD", NA), Response1.Start = structure(c(14745,
14345, NA), class = "Date"), Response1.End = structure(c(15111,
14418, NA), class = "Date"), Response2 = c("CR", "PR", NA
), Response2.Start = structure(c(15768, 14674, NA), class = "Date"),
Response2.End = structure(c(16133, 14856, NA), class = "Date"),
Response3 = c(NA, "CR", NA), Response3.Start = structure(c(NA,
14856, NA), class = "Date"), Response3.End = structure(c(NA,
15587, NA), class = "Date"), Last.follow.up = structure(c(16499,
17048, 17869), class = "Date")), class = "data.frame", row.names = c(NA,
-3L))
ID | Date.begin.Treatment | AE | SAE | Death.date | Response1 | Response1.Start | Response1.End | Response2 | Response2.Start | Response2.End | Response3 | Response3.Start | Response3.End | Last.follow.up |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
ID:001 | 2009-03-04 | 2014-03-04 | 2014-09-03 | 2015-03-05 | SD | 2010-05-16 | 2011-05-17 | CR | 2013-03-04 | 2014-03-04 | NA | NA | NA | 2015-03-05 |
ID:002 | 2008-09-04 | 2009-09-04 | NA | NA | SD | 2009-04-11 | 2009-06-23 | PR | 2010-03-06 | 2010-09-04 | CR | 2010-09-04 | 2012-09-04 | 2016-09-04 |
ID:003 | 2011-12-04 | NA | 2013-12-03 | 2018-12-04 | NA | NA | NA | NA | NA | NA | NA | NA | NA | 2018-12-04 |
All of the dates need to be converted to time. For each patient the Date.begin.Treatment is the starting point (Time 0)
date_cols <- c("Date.begin.Treatment","AE","SAE",'Death.date','Response1.Start', 'Response1.End','Response2.Start', 'Response2.End',
'Response3.Start' ,'Response3.End' ,'Last.follow.up') # Getting the columns with dates
wide_example[date_cols] <- lapply(wide_example[date_cols], as.numeric) # Converting to numbers
wide_example[date_cols] <- round((wide_example[date_cols]-wide_example$Date.begin.Treatment)/365.25,1) #Calcuating the time in years since the start of treatment
knitr::kable(wide_example)
ID | Date.begin.Treatment | AE | SAE | Death.date | Response1 | Response1.Start | Response1.End | Response2 | Response2.Start | Response2.End | Response3 | Response3.Start | Response3.End | Last.follow.up |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
ID:001 | 0 | 5 | 5.5 | 6 | SD | 1.2 | 2.2 | CR | 4.0 | 5 | NA | NA | NA | 6 |
ID:002 | 0 | 1 | NA | NA | SD | 0.6 | 0.8 | PR | 1.5 | 2 | CR | 2 | 4 | 8 |
ID:003 | 0 | NA | 2.0 | 7 | NA | NA | NA | NA | NA | NA | NA | NA | NA | 7 |
The wide data can be used to create the bars of the swimmer plot
plot <- swimmer_plot(df=wide_example,id='ID',end='Last.follow.up',col='black',fill='grey')
plot
The gather_()
function from the tidyr
package can be used to change data from the wide to long format. When each event type has its only column with the exact time, the function only needs to be run once
library(tidyr)
data_time_points <- wide_example[,c('ID','AE','SAE','Death.date')]
points_long <- gather_(data=data_time_points,"point", "time",
gather_cols=c('AE','SAE','Death.date'),na.rm=T)
knitr::kable(points_long,align='c',row.names = F)
ID | point | time |
---|---|---|
ID:001 | AE | 5.0 |
ID:002 | AE | 1.0 |
ID:001 | SAE | 5.5 |
ID:003 | SAE | 2.0 |
ID:001 | Death.date | 6.0 |
ID:003 | Death.date | 7.0 |
The points can now be added to the plot
plot+ swimmer_points(df=points_long,id='ID',name_shape = 'point',size=8)
When there are separate columns for the data, and event type it is more complex. In this data the response start, end, and response types are all stored in different columns, but must be kept together per patient and event.
long_start <- gather_(data=wide_example[,c('ID','Response1.Start','Response2.Start','Response3.Start')],
"response_number", "start_time", gather_cols=c('Response1.Start','Response2.Start',
'Response3.Start'),na.rm=T)
long_start$response_number <- substring(long_start$response_number,1,9) # Will be used to match to the end and types
ID | response_number | start_time |
---|---|---|
ID:001 | Response1 | 1.2 |
ID:002 | Response1 | 0.6 |
ID:001 | Response2 | 4.0 |
ID:002 | Response2 | 1.5 |
ID:002 | Response3 | 2.0 |
Separate dataframes are created for the end time, and response, then they are all merged together by the id, and response_number
long_end <- gather_(data=wide_example[,c('ID','Response1.End','Response2.End','Response3.End')],
"response_number", "end_time", gather_cols=c('Response1.End','Response2.End',
'Response3.End'),na.rm=T)
long_end$response_number <- substring(long_end$response_number,1,9)
long_response <- gather_(data=wide_example[,c('ID','Response1','Response2','Response3')],
"response_number", "Response", gather_cols=c('Response1','Response2','Response3'),
na.rm=T)
long_response_full <- Reduce(function(...) merge(..., all=TRUE,by=c('ID','response_number')),
list(long_start, long_end, long_response))
ID | response_number | start_time | end_time | Response |
---|---|---|---|---|
ID:001 | Response1 | 1.2 | 2.2 | SD |
ID:001 | Response2 | 4.0 | 5.0 | CR |
ID:002 | Response1 | 0.6 | 0.8 | SD |
ID:002 | Response2 | 1.5 | 2.0 | PR |
ID:002 | Response3 | 2.0 | 4.0 | CR |
The lines can then be added to the plot
plot+
swimmer_points(df=points_long,id='ID',name_shape = 'point',size=8)+
swimmer_lines(df_lines = long_response_full,id='ID',start = 'start_time',end='end_time',
name_col='Response',size=25)