library(tidyverse)
library(ggthemes)
library(ggHoriPlot)
Sports and leisure activities happen at different times during the day (sources: https://github.com/halhen/viz-pub/blob/master/sports-time-of-day/activity.tsv, https://eagereyes.org/blog/2017/joy-plots). Horizon plots can be used to condense this time series data into a highly informative and easily interpretable chart:
::data(sports_time)
utils
%>% ggplot() +
sports_time geom_horizon(aes(time/60, p), origin = 'min', horizonscale = 4) +
facet_wrap(~activity, ncol = 1, strip.position = 'right') +
scale_fill_hcl(palette = 'Peach', reverse = T) +
theme_few() +
theme(
panel.spacing.y=unit(0, "lines"),
strip.text.y = element_text(angle = 0),
legend.position = 'none',
axis.text.y = element_blank(),
axis.title.y = element_blank(),
axis.ticks.y = element_blank(),
panel.border = element_blank()
+
) scale_x_continuous(
name = 'Time',
breaks=seq(from = 3, to = 27, by = 3),
labels = function(x) {sprintf("%02d:00", as.integer(x %% 24))}) +
ggtitle('Peak time of day for sports and leisure')
COVID-19 cases were recorded in waves across the world (source: https://www.ecdc.europa.eu/en/publications-data/download-todays-data-geographic-distribution-covid-19-cases-worldwide). A simple horizon plot is useful for detecting how many waves there have been and when these waves have happened:
::data(COVID)
utils
%>%
COVID ggplot() +
geom_horizon(aes(date_mine,
origin = 'min', horizonscale = 4) +
y), scale_fill_hcl(palette = 'BluGrn', reverse = T) +
facet_grid(countriesAndTerritories~.) +
theme_few() +
theme(
panel.spacing.y=unit(0, "lines"),
strip.text.y = element_text(size = 7, angle = 0, hjust = 0),
legend.position = 'none',
axis.text.y = element_blank(),
axis.title.y = element_blank(),
axis.ticks.y = element_blank(),
panel.border = element_blank()
+
) scale_x_date(expand=c(0,0), date_breaks = "1 month", date_labels = "%b") +
ggtitle('Cumulative number for 14 days of COVID-19 cases per 100,000',
'in Asia, 2020') +
xlab('Date')
Horizon plots are also useful to plot genomic data, such as simple repeat content along the human genome (source: https://genome.ucsc.edu/cgi-bin/hgTrackUi?g=rmsk):
::data(rmsk)
utils
<- rmsk %>%
cutpoint_tab ungroup() %>%
mutate(
outlier = between(
p_repeat, quantile(p_repeat, 0.25, na.rm=T)-1.5*IQR(p_repeat, na.rm=T),
quantile(p_repeat, 0.75, na.rm=T)+1.5*IQR(p_repeat, na.rm=T))) %>%
filter(outlier)
<- sum(range(cutpoint_tab$p_repeat, na.rm = T))/2
ori <- seq(range(cutpoint_tab$p_repeat)[1],
sca range(cutpoint_tab$p_repeat)[2],
length.out = 6)
%>%
rmsk ggplot() +
geom_horizon(aes(x = bin, xend=bin_2, y = p_repeat, fill = ..Cutpoints..),
origin = ori, horizonscale = sca) +
facet_grid(genoName~., switch = 'y') +
theme_few() +
theme(
panel.spacing.y=unit(0, "lines"),
strip.text.y.left = element_text(size = 7, angle = 0, hjust = 1),
legend.position = c(0.85, 0.4),
axis.text.y = element_blank(),
axis.title.y = element_blank(),
axis.ticks.y = element_blank(),
panel.border = element_blank()
+
) scale_x_continuous(expand=c(0,0)) +
scale_fill_hcl() +
ggtitle('Simple repeat content along the human genome',
'in 100 kb windows') +
xlab('Position') +
guides(fill=guide_legend(title="% of repeats"))
Changes in the average temperature across several geographic locations can also be condensed in horizon plots. For example, the average temperature during 2000 for major US cities (source: https://www.kaggle.com/sudalairajkumar/daily-temperature-of-major-cities; order of the states:https://benschmidt.org/2014/06/05/optimally-ordering-geographical-entities-in-linear-space/):
::data(climate_US)
utils
%>%
climate_US ggplot() +
geom_horizon(aes(date_mine,
rm.outliers = T) +
AvgTemperature), scale_fill_hcl(palette = 'RdBu', reverse = T) +
facet_grid(State+City~.) +
theme_bw() +
theme(
panel.spacing.y=unit(0, "lines"),
strip.text.y = element_text(size = 4, angle = 0),
legend.position = 'none',
axis.text.y = element_blank(),
axis.title.y = element_blank(),
axis.ticks.y = element_blank(),
panel.border = element_blank(),panel.grid = element_blank()
+
) scale_x_date(expand=c(0,0), date_breaks = "1 month", date_labels = "%b") +
xlab('Date') +
ggtitle('Average daily temperature of major US cities',
'for the year 2000')
The average daily temperature can also be plotted by year. For example, in Copenhagen:
::data(climate_CPH)
utils
<- climate_CPH %>%
cutpoints mutate(
outlier = between(
AvgTemperature, quantile(AvgTemperature, 0.25, na.rm=T)-1.5*IQR(AvgTemperature, na.rm=T),
quantile(AvgTemperature, 0.75, na.rm=T)+1.5*IQR(AvgTemperature, na.rm=T))) %>%
filter(outlier)
<- sum(range(cutpoints$AvgTemperature))/2
ori <- seq(range(cutpoints$AvgTemperature)[1], range(cutpoints$AvgTemperature)[2], length.out = 7)[-4]
sca
%>% ggplot() +
climate_CPH geom_horizon(aes(date_mine,
AvgTemperature,fill = ..Cutpoints..),
origin = ori, horizonscale = sca) +
scale_fill_hcl(palette = 'RdBu', reverse = T) +
facet_grid(Year~.) +
theme_few() +
theme(
panel.spacing.y=unit(0, "lines"),
strip.text.y = element_text(size = 7, angle = 0, hjust = 0),
axis.text.y = element_blank(),
axis.title.y = element_blank(),
axis.ticks.y = element_blank(),
panel.border = element_blank()
+
) scale_x_date(expand=c(0,0),
date_breaks = "1 month",
date_labels = "%b") +
xlab('Date') +
ggtitle('Average daily temperature in Copenhagen',
'from 1995 to 2019')