The most fundamental function in runner
package is runner
. With runner::runner
one can apply any R function on running windows. This tutorial presents set of examples explaining how to tackle some tasks. Some of the examples are referenced to original topic on stack-overflow.
library(runner)
x <- sample(letters, 20, replace = TRUE)
date <- Sys.Date() + cumsum(sample(1:5, 20, replace = TRUE)) # unequaly spaced time series
runner(
x,
k = "7 days",
idx = date,
f = function(x) length(unique(x))
)
## [1] 1 1 2 3 2 2 3 2 2 2 2 2 3 2 1 2 3 3 4 4
library(runner)
x <- cumsum(rnorm(20))
date <- Sys.Date() + cumsum(sample(1:5, 20, replace = TRUE)) # unequaly spaced time series
runner(
x,
k = "week",
idx = date,
f = function(x) mean(x, trim = 0.05)
)
## [1] -0.45570427 0.05141348 0.49139631 1.36488554 1.95825964 1.39570190
## [7] 1.13721199 0.55797114 0.49053289 0.40135966 -0.02990304 -1.12243140
## [13] -1.69157529 -1.76535337 -2.32295965 -1.98330961 -2.12996001 -2.28638815
## [19] -3.28535707 -2.97083821
library(runner)
x <- cumsum(rnorm(20))
y <- 3 * x + rnorm(20)
date <- Sys.Date() + cumsum(sample(1:3, 20, replace = TRUE)) # unequaly spaced time series
data <- data.frame(date, y, x)
data$pred <- runner(
data,
lag = "1 day",
k = "2 weeks",
idx = data$date,
f = function(data) {
predict(
lm(y ~ x, data = data)
)[nrow(data)]
}
)
plot(data$date, data$y, type = "l", col = "red")
lines(data$date, data$pred, col = "blue")
library(runner)
library(dplyr)
set.seed(3737)
df <- data.frame(
user_id = c(rep(27, 7), rep(11, 7)),
date = as.Date(rep(c('2016-01-01', '2016-01-03', '2016-01-05', '2016-01-07',
'2016-01-10', '2016-01-14', '2016-01-16'), 2)),
value = round(rnorm(14, 15, 5), 1))
df %>%
group_by(user_id) %>%
mutate(
v_minus7 = sum_run(value, 7, idx = date),
v_minus14 = sum_run(value, 14, idx = date)
)
## # A tibble: 14 x 5
## # Groups: user_id [2]
## user_id date value v_minus7 v_minus14
## <dbl> <date> <dbl> <dbl> <dbl>
## 1 27 2016-01-01 15 15 15
## 2 27 2016-01-03 22.4 37.4 37.4
## 3 27 2016-01-05 13.3 50.7 50.7
## 4 27 2016-01-07 21.9 72.6 72.6
## 5 27 2016-01-10 20.6 55.8 93.2
## 6 27 2016-01-14 18.6 39.2 112.
## 7 27 2016-01-16 16.4 55.6 113.
## 8 11 2016-01-01 6.8 6.8 6.8
## 9 11 2016-01-03 21.3 28.1 28.1
## 10 11 2016-01-05 19.8 47.9 47.9
## 11 11 2016-01-07 22 69.9 69.9
## 12 11 2016-01-10 19.4 61.2 89.3
## 13 11 2016-01-14 17.5 36.9 107.
## 14 11 2016-01-16 19.3 56.2 119.
library(runner)
library(dplyr)
df <- read.table(text = " user_id date category
27 2016-01-01 apple
27 2016-01-03 apple
27 2016-01-05 pear
27 2016-01-07 plum
27 2016-01-10 apple
27 2016-01-14 pear
27 2016-01-16 plum
11 2016-01-01 apple
11 2016-01-03 pear
11 2016-01-05 pear
11 2016-01-07 pear
11 2016-01-10 apple
11 2016-01-14 apple
11 2016-01-16 apple", header = TRUE)
df %>%
group_by(user_id) %>%
mutate(
distinct_7 = runner(category,
k = "7 days",
idx = as.Date(date),
f = function(x) length(unique(x))),
distinct_14 = runner(category,
k = "14 days",
idx = as.Date(date),
f = function(x) length(unique(x)))
)
## # A tibble: 14 x 5
## # Groups: user_id [2]
## user_id date category distinct_7 distinct_14
## <int> <fct> <fct> <int> <int>
## 1 27 2016-01-01 apple 1 1
## 2 27 2016-01-03 apple 1 1
## 3 27 2016-01-05 pear 2 2
## 4 27 2016-01-07 plum 3 3
## 5 27 2016-01-10 apple 3 3
## 6 27 2016-01-14 pear 2 3
## 7 27 2016-01-16 plum 3 3
## 8 11 2016-01-01 apple 1 1
## 9 11 2016-01-03 pear 2 2
## 10 11 2016-01-05 pear 2 2
## 11 11 2016-01-07 pear 2 2
## 12 11 2016-01-10 apple 2 2
## 13 11 2016-01-14 apple 1 2
## 14 11 2016-01-16 apple 1 2
grouped_df
library(runner)
library(dplyr)
Date <- seq(from = as.Date("2014-01-01"),
to = as.Date("2019-12-31"),
by = 'day')
market_return <- c(rnorm(2191))
AAPL <- data.frame(
Company.name = "AAPL",
Date = Date,
market_return = market_return
)
MSFT <- data.frame(
Company.name = "MSFT",
Date = Date,
market_return = market_return
)
df <- rbind(AAPL, MSFT)
df$stock_return <- c(rnorm(4382))
df <- df[order(df$Date),]
df2 <- data.frame(
Company.name2 = c(replicate(450, "AAPL"), replicate(450, "MSFT")),
Event_date = sample(
seq(as.Date('2015/01/01'),
as.Date('2019/12/31'),
by = "day"),
size = 900)
)
df2 %>%
group_by(Company.name2) %>%
mutate(
intercept = runner(
x = df[df$Company.name == Company.name2[1], ],
k = "180 days",
lag = "5 days",
idx = df$Date[df$Company.name == Company.name2[1]],
at = Event_date,
f = function(x) {
coef(
lm(stock_return ~ market_return, data = x)
)[1]
}
),
slope = runner(
x = df[df$Company.name == Company.name2[1], ],
k = "180 days",
lag = "5 days",
idx = df$Date[df$Company.name == Company.name2[1]],
at = Event_date,
f = function(x) {
coef(
lm(stock_return ~ market_return, data = x)
)[2]
}
)
)
## # A tibble: 900 x 4
## # Groups: Company.name2 [2]
## Company.name2 Event_date intercept slope
## <fct> <date> <dbl> <dbl>
## 1 AAPL 2017-10-29 -0.0291 0.0669
## 2 AAPL 2019-04-08 -0.0701 0.0157
## 3 AAPL 2017-08-05 0.0688 -0.00656
## 4 AAPL 2018-07-24 0.00725 -0.0304
## 5 AAPL 2015-01-15 0.00982 -0.0998
## 6 AAPL 2017-07-25 0.0686 -0.00609
## 7 AAPL 2015-11-23 -0.0482 -0.0269
## 8 AAPL 2019-09-18 -0.0658 0.146
## 9 AAPL 2016-07-04 0.00890 -0.00803
## 10 AAPL 2017-07-15 0.0691 0.00376
## # … with 890 more rows