Runner examples

The most fundamental function in runner package is runner. With runner::runner one can apply any R function on running windows. This tutorial presents set of examples explaining how to tackle some tasks. Some of the examples are referenced to original topic on stack-overflow.

Number of unique elements in 7 days window

library(runner)

x <- sample(letters, 20, replace = TRUE)
date <- Sys.Date() + cumsum(sample(1:5, 20, replace = TRUE)) # unequaly spaced time series

runner(
  x, 
  k = "7 days", 
  idx = date, 
  f = function(x) length(unique(x))
)
##  [1] 1 1 2 3 2 2 3 2 2 2 2 2 3 2 1 2 3 3 4 4

weekly trimmed mean

library(runner)

x <- cumsum(rnorm(20))
date <- Sys.Date() + cumsum(sample(1:5, 20, replace = TRUE)) # unequaly spaced time series

runner(
  x, 
  k = "week", 
  idx = date, 
  f = function(x) mean(x, trim = 0.05)
)
##  [1] -0.45570427  0.05141348  0.49139631  1.36488554  1.95825964  1.39570190
##  [7]  1.13721199  0.55797114  0.49053289  0.40135966 -0.02990304 -1.12243140
## [13] -1.69157529 -1.76535337 -2.32295965 -1.98330961 -2.12996001 -2.28638815
## [19] -3.28535707 -2.97083821

Prediction on current day based on preceding 2-weeks regression

library(runner)

x <- cumsum(rnorm(20))
y <- 3 * x + rnorm(20)
date <- Sys.Date() + cumsum(sample(1:3, 20, replace = TRUE)) # unequaly spaced time series
data <- data.frame(date, y, x)


data$pred <- runner(
  data,
  lag = "1 day",
  k = "2 weeks",
  idx = data$date,
  f = function(data) {
      predict(
        lm(y ~ x, data = data)
      )[nrow(data)]
  }
)


plot(data$date, data$y, type = "l", col = "red")
lines(data$date, data$pred, col = "blue")

Rolling sums for groups with uneven time gaps

SO discussion

library(runner)
library(dplyr)

set.seed(3737)
df <- data.frame(
  user_id = c(rep(27, 7), rep(11, 7)),
  date = as.Date(rep(c('2016-01-01', '2016-01-03', '2016-01-05', '2016-01-07', 
                       '2016-01-10', '2016-01-14', '2016-01-16'), 2)),
  value = round(rnorm(14, 15, 5), 1))

df %>%
  group_by(user_id) %>%
  mutate(
    v_minus7  = sum_run(value, 7, idx = date),
    v_minus14 = sum_run(value, 14, idx = date)
  )
## # A tibble: 14 x 5
## # Groups:   user_id [2]
##    user_id date       value v_minus7 v_minus14
##      <dbl> <date>     <dbl>    <dbl>     <dbl>
##  1      27 2016-01-01  15       15        15  
##  2      27 2016-01-03  22.4     37.4      37.4
##  3      27 2016-01-05  13.3     50.7      50.7
##  4      27 2016-01-07  21.9     72.6      72.6
##  5      27 2016-01-10  20.6     55.8      93.2
##  6      27 2016-01-14  18.6     39.2     112. 
##  7      27 2016-01-16  16.4     55.6     113. 
##  8      11 2016-01-01   6.8      6.8       6.8
##  9      11 2016-01-03  21.3     28.1      28.1
## 10      11 2016-01-05  19.8     47.9      47.9
## 11      11 2016-01-07  22       69.9      69.9
## 12      11 2016-01-10  19.4     61.2      89.3
## 13      11 2016-01-14  17.5     36.9     107. 
## 14      11 2016-01-16  19.3     56.2     119.

Unique for specified time frame

SO discussion

library(runner)
library(dplyr)

df <- read.table(text = "  user_id       date category
       27 2016-01-01    apple
       27 2016-01-03    apple
       27 2016-01-05     pear
       27 2016-01-07     plum
       27 2016-01-10    apple
       27 2016-01-14     pear
       27 2016-01-16     plum
       11 2016-01-01    apple
       11 2016-01-03     pear
       11 2016-01-05     pear
       11 2016-01-07     pear
       11 2016-01-10    apple
       11 2016-01-14    apple
       11 2016-01-16    apple", header = TRUE)

df %>%
  group_by(user_id) %>%
  mutate(
    distinct_7  = runner(category, 
                         k = "7 days", 
                         idx = as.Date(date), 
                         f = function(x) length(unique(x))),
    distinct_14 = runner(category, 
                         k = "14 days", 
                         idx = as.Date(date), 
                         f = function(x) length(unique(x)))
  )
## # A tibble: 14 x 5
## # Groups:   user_id [2]
##    user_id date       category distinct_7 distinct_14
##      <int> <fct>      <fct>         <int>       <int>
##  1      27 2016-01-01 apple             1           1
##  2      27 2016-01-03 apple             1           1
##  3      27 2016-01-05 pear              2           2
##  4      27 2016-01-07 plum              3           3
##  5      27 2016-01-10 apple             3           3
##  6      27 2016-01-14 pear              2           3
##  7      27 2016-01-16 plum              3           3
##  8      11 2016-01-01 apple             1           1
##  9      11 2016-01-03 pear              2           2
## 10      11 2016-01-05 pear              2           2
## 11      11 2016-01-07 pear              2           2
## 12      11 2016-01-10 apple             2           2
## 13      11 2016-01-14 apple             1           2
## 14      11 2016-01-16 apple             1           2

Aggregating values from another data.frame in grouped_df

SO Discussion

library(runner)
library(dplyr)

Date <- seq(from = as.Date("2014-01-01"), 
            to = as.Date("2019-12-31"), 
            by = 'day')
market_return <- c(rnorm(2191))

AAPL <- data.frame(
  Company.name = "AAPL", 
  Date =  Date,
  market_return = market_return
)

MSFT <- data.frame(
  Company.name = "MSFT", 
  Date = Date,
  market_return = market_return
)

df <- rbind(AAPL, MSFT)
df$stock_return <- c(rnorm(4382))
df <- df[order(df$Date),]

df2 <- data.frame(
  Company.name2 = c(replicate(450, "AAPL"), replicate(450, "MSFT")), 
  Event_date = sample(
    seq(as.Date('2015/01/01'), 
        as.Date('2019/12/31'), 
        by = "day"),
    size =  900)
)


df2 %>%
  group_by(Company.name2) %>%
  mutate(
    intercept = runner(
      x = df[df$Company.name ==  Company.name2[1], ], 
      k = "180 days", 
      lag = "5 days",
      idx = df$Date[df$Company.name == Company.name2[1]],
      at = Event_date,
      f = function(x) {
        coef(
          lm(stock_return ~ market_return, data = x)
        )[1]
      }
    ),
    slope = runner(
      x = df[df$Company.name == Company.name2[1], ],  
      k = "180 days", 
      lag = "5 days",
      idx = df$Date[df$Company.name == Company.name2[1]],
      at = Event_date,
      f = function(x) {
        coef(
          lm(stock_return ~ market_return, data = x)
        )[2]
      }
    )
  )
## # A tibble: 900 x 4
## # Groups:   Company.name2 [2]
##    Company.name2 Event_date intercept    slope
##    <fct>         <date>         <dbl>    <dbl>
##  1 AAPL          2017-10-29  -0.0291   0.0669 
##  2 AAPL          2019-04-08  -0.0701   0.0157 
##  3 AAPL          2017-08-05   0.0688  -0.00656
##  4 AAPL          2018-07-24   0.00725 -0.0304 
##  5 AAPL          2015-01-15   0.00982 -0.0998 
##  6 AAPL          2017-07-25   0.0686  -0.00609
##  7 AAPL          2015-11-23  -0.0482  -0.0269 
##  8 AAPL          2019-09-18  -0.0658   0.146  
##  9 AAPL          2016-07-04   0.00890 -0.00803
## 10 AAPL          2017-07-15   0.0691   0.00376
## # … with 890 more rows