The most fundamental function in runner
package is
runner
. With runner::runner
one can apply any
R function on running windows. This tutorial presents set of examples
explaining how to tackle some tasks. Some of the examples are referenced
to original topic on stack-overflow.
library(runner)
<- sample(letters, 20, replace = TRUE)
x <- Sys.Date() + cumsum(sample(1:5, 20, replace = TRUE)) # unequally spaced time series
date
runner(
x,k = "7 days",
idx = date,
f = function(x) length(unique(x))
)
library(runner)
<- cumsum(rnorm(20))
x <- Sys.Date() + cumsum(sample(1:5, 20, replace = TRUE)) # unequaly spaced time series
date
runner(
x,k = "week",
idx = date,
f = function(x) mean(x, trim = 0.05)
)
library(runner)
# sample data
<- cumsum(rnorm(20))
x <- data.frame(
data date = Sys.Date() + cumsum(sample(1:3, 20, replace = TRUE)), # unequally spaced time series,
y = 3 * x + rnorm(20),
x = cumsum(rnorm(20))
)
# solution
$pred <- runner(
data
data,lag = "1 days",
k = "2 weeks",
idx = data$date,
f = function(data) {
predict(
lm(y ~ x, data = data)
nrow(data)]
)[
}
)
plot(data$date, data$y, type = "l", col = "red")
lines(data$date, data$pred, col = "blue")
library(runner)
library(dplyr)
set.seed(3737)
<- data.frame(
df user_id = c(rep(27, 7), rep(11, 7)),
date = as.Date(rep(c(
"2016-01-01", "2016-01-03", "2016-01-05", "2016-01-07",
"2016-01-10", "2016-01-14", "2016-01-16"
2)),
), value = round(rnorm(14, 15, 5), 1)
)
%>%
df group_by(user_id) %>%
mutate(
v_minus7 = sum_run(value, 7, idx = date),
v_minus14 = sum_run(value, 14, idx = date)
)
dplyr
library(runner)
library(dplyr)
<- read.table(text = " user_id date category
df 27 2016-01-01 apple
27 2016-01-03 apple
27 2016-01-05 pear
27 2016-01-07 plum
27 2016-01-10 apple
27 2016-01-14 pear
27 2016-01-16 plum
11 2016-01-01 apple
11 2016-01-03 pear
11 2016-01-05 pear
11 2016-01-07 pear
11 2016-01-10 apple
11 2016-01-14 apple
11 2016-01-16 apple", header = TRUE)
%>%
df group_by(user_id) %>%
mutate(
distinct_7 = runner(category,
k = "7 days",
idx = as.Date(date),
f = function(x) length(unique(x))
),distinct_14 = runner(category,
k = "14 days",
idx = as.Date(date),
f = function(x) length(unique(x))
) )
library(dplyr)
<- cumsum(rnorm(20))
x <- 3 * x + rnorm(20)
y <- Sys.Date() + cumsum(sample(1:3, 20, replace = TRUE)) # unequaly spaced time series
date <- rep(c("a", "b"), each = 10)
group
data.frame(date, group, y, x) %>%
group_by(group) %>%
run_by(idx = "date", k = "5 days") %>%
mutate(
alpha_5 = runner(
x = .,
f = function(x) {
coefficients(lm(x ~ y, x))[1]
}
),beta_5 = runner(
x = .,
f = function(x) {
coefficients(lm(x ~ y, x))[1]
}
) )
grouped_df
library(runner)
library(dplyr)
<- seq(
Date from = as.Date("2014-01-01"),
to = as.Date("2019-12-31"),
by = "day"
)<- c(rnorm(2191))
market_return
<- data.frame(
AAPL Company.name = "AAPL",
Date = Date,
market_return = market_return
)
<- data.frame(
MSFT Company.name = "MSFT",
Date = Date,
market_return = market_return
)
<- rbind(AAPL, MSFT)
df $stock_return <- c(rnorm(4382))
df<- df[order(df$Date), ]
df
<- data.frame(
df2 Company.name2 = c(replicate(450, "AAPL"), replicate(450, "MSFT")),
Event_date = sample(
seq(as.Date("2015/01/01"),
as.Date("2019/12/31"),
by = "day"
),size = 900
)
)
%>%
df2 group_by(Company.name2) %>%
mutate(
intercept = runner(
x = df[df$Company.name == Company.name2[1], ],
k = "180 days",
lag = "5 days",
idx = df$Date[df$Company.name == Company.name2[1]],
at = Event_date,
f = function(x) {
coef(
lm(stock_return ~ market_return, data = x)
1]
)[
}
),slope = runner(
x = df[df$Company.name == Company.name2[1], ],
k = "180 days",
lag = "5 days",
idx = df$Date[df$Company.name == Company.name2[1]],
at = Event_date,
f = function(x) {
coef(
lm(stock_return ~ market_return, data = x)
2]
)[
}
) )