rollify returns a rolling version of the input function, with a rolling window specified by the user.

rollify(.f, window = 1, unlist = TRUE, na_value = NULL)

Arguments

.f

A function to modify, specified in one of the following ways:

  • A named function, e.g. mean.

  • An anonymous function, e.g. \(x) x + 1 or function(x) x + 1.

  • A formula, e.g. ~ .x + 1. Only recommended if you require backward compatibility with older versions of R.

window

The window size to roll over

unlist

If the function returns a single value each time it is called, use unlist = TRUE. If the function returns more than one value, or a more complicated object (like a linear model), use unlist = FALSE to create a list-column of the rolling results.

na_value

A default value for the NA values at the beginning of the roll.

Details

The intended use of rollify is to turn a function into a rolling version of itself for use inside of a call to dplyr::mutate(), however it works equally as well when called from purrr::map().

Because of it's intended use with dplyr::mutate(), rollify creates a function that always returns output with the same length of the input, aligned right, and filled with NA unless otherwise specified by na_value.

The form of the .f argument is the same as the form that can be passed to purrr::map(). Use .x or . to refer to the first object to roll over, and .y to refer to the second object if required. The examples explain this further.

If optional arguments to the function are required, specify them in the call to rollify, and not in the call to the rolling version of the function. See the examples for more details.

Examples


# Rolling mean --------------------------------------------------------------

data(FB)

# Turn the normal mean function into a rolling mean with a 5 row window
mean_roll_5 <- rollify(mean, window = 5)

dplyr::mutate(FB,
       normal_mean  = mean(adjusted),
       rolling_mean = mean_roll_5(adjusted))
#> # A tibble: 1,008 × 10
#>    symbol date        open  high   low close    volume adjusted normal_mean
#>    <chr>  <date>     <dbl> <dbl> <dbl> <dbl>     <dbl>    <dbl>       <dbl>
#>  1 FB     2013-01-02  27.4  28.2  27.4  28    69846400     28          77.5
#>  2 FB     2013-01-03  27.9  28.5  27.6  27.8  63140600     27.8        77.5
#>  3 FB     2013-01-04  28.0  28.9  27.8  28.8  72715400     28.8        77.5
#>  4 FB     2013-01-07  28.7  29.8  28.6  29.4  83781800     29.4        77.5
#>  5 FB     2013-01-08  29.5  29.6  28.9  29.1  45871300     29.1        77.5
#>  6 FB     2013-01-09  29.7  30.6  29.5  30.6 104787700     30.6        77.5
#>  7 FB     2013-01-10  30.6  31.5  30.3  31.3  95316400     31.3        77.5
#>  8 FB     2013-01-11  31.3  32.0  31.1  31.7  89598000     31.7        77.5
#>  9 FB     2013-01-14  32.1  32.2  30.6  31.0  98892800     31.0        77.5
#> 10 FB     2013-01-15  30.6  31.7  29.9  30.1 173242600     30.1        77.5
#> # ℹ 998 more rows
#> # ℹ 1 more variable: rolling_mean <dbl>

# There's nothing stopping you from combining multiple rolling functions with
# different window sizes in the same mutate call
mean_roll_10 <- rollify(mean, window = 10)

dplyr::mutate(FB,
       rolling_mean_5  = mean_roll_5(adjusted),
       rolling_mean_10 = mean_roll_10(adjusted))
#> # A tibble: 1,008 × 10
#>    symbol date        open  high   low close    volume adjusted rolling_mean_5
#>    <chr>  <date>     <dbl> <dbl> <dbl> <dbl>     <dbl>    <dbl>          <dbl>
#>  1 FB     2013-01-02  27.4  28.2  27.4  28    69846400     28             NA  
#>  2 FB     2013-01-03  27.9  28.5  27.6  27.8  63140600     27.8           NA  
#>  3 FB     2013-01-04  28.0  28.9  27.8  28.8  72715400     28.8           NA  
#>  4 FB     2013-01-07  28.7  29.8  28.6  29.4  83781800     29.4           NA  
#>  5 FB     2013-01-08  29.5  29.6  28.9  29.1  45871300     29.1           28.6
#>  6 FB     2013-01-09  29.7  30.6  29.5  30.6 104787700     30.6           29.1
#>  7 FB     2013-01-10  30.6  31.5  30.3  31.3  95316400     31.3           29.8
#>  8 FB     2013-01-11  31.3  32.0  31.1  31.7  89598000     31.7           30.4
#>  9 FB     2013-01-14  32.1  32.2  30.6  31.0  98892800     31.0           30.7
#> 10 FB     2013-01-15  30.6  31.7  29.9  30.1 173242600     30.1           30.9
#> # ℹ 998 more rows
#> # ℹ 1 more variable: rolling_mean_10 <dbl>

# Functions with multiple args and optional args ----------------------------

# With 2 args, use the purrr syntax of ~ and .x, .y
# Rolling correlation example
cor_roll <- rollify(~cor(.x, .y), window = 5)

dplyr::mutate(FB, running_cor = cor_roll(adjusted, open))
#> # A tibble: 1,008 × 9
#>    symbol date        open  high   low close    volume adjusted running_cor
#>    <chr>  <date>     <dbl> <dbl> <dbl> <dbl>     <dbl>    <dbl>       <dbl>
#>  1 FB     2013-01-02  27.4  28.2  27.4  28    69846400     28        NA    
#>  2 FB     2013-01-03  27.9  28.5  27.6  27.8  63140600     27.8      NA    
#>  3 FB     2013-01-04  28.0  28.9  27.8  28.8  72715400     28.8      NA    
#>  4 FB     2013-01-07  28.7  29.8  28.6  29.4  83781800     29.4      NA    
#>  5 FB     2013-01-08  29.5  29.6  28.9  29.1  45871300     29.1       0.749
#>  6 FB     2013-01-09  29.7  30.6  29.5  30.6 104787700     30.6       0.805
#>  7 FB     2013-01-10  30.6  31.5  30.3  31.3  95316400     31.3       0.859
#>  8 FB     2013-01-11  31.3  32.0  31.1  31.7  89598000     31.7       0.884
#>  9 FB     2013-01-14  32.1  32.2  30.6  31.0  98892800     31.0       0.667
#> 10 FB     2013-01-15  30.6  31.7  29.9  30.1 173242600     30.1       0.379
#> # ℹ 998 more rows

# With >2 args, create an anonymous function with >2 args or use
# the purrr convention of ..1, ..2, ..3 to refer to the arguments
avg_of_avgs <- rollify(function(x, y, z) {
                         (mean(x) + mean(y) + mean(z)) / 3
                       },
                       window = 10)

# Or
avg_of_avgs <- rollify(~(mean(..1) + mean(..2) + mean(..3)) / 3,
                       window = 10)

dplyr::mutate(FB, avg_of_avgs = avg_of_avgs(open, high, low))
#> # A tibble: 1,008 × 9
#>    symbol date        open  high   low close    volume adjusted avg_of_avgs
#>    <chr>  <date>     <dbl> <dbl> <dbl> <dbl>     <dbl>    <dbl>       <dbl>
#>  1 FB     2013-01-02  27.4  28.2  27.4  28    69846400     28          NA  
#>  2 FB     2013-01-03  27.9  28.5  27.6  27.8  63140600     27.8        NA  
#>  3 FB     2013-01-04  28.0  28.9  27.8  28.8  72715400     28.8        NA  
#>  4 FB     2013-01-07  28.7  29.8  28.6  29.4  83781800     29.4        NA  
#>  5 FB     2013-01-08  29.5  29.6  28.9  29.1  45871300     29.1        NA  
#>  6 FB     2013-01-09  29.7  30.6  29.5  30.6 104787700     30.6        NA  
#>  7 FB     2013-01-10  30.6  31.5  30.3  31.3  95316400     31.3        NA  
#>  8 FB     2013-01-11  31.3  32.0  31.1  31.7  89598000     31.7        NA  
#>  9 FB     2013-01-14  32.1  32.2  30.6  31.0  98892800     31.0        NA  
#> 10 FB     2013-01-15  30.6  31.7  29.9  30.1 173242600     30.1        29.7
#> # ℹ 998 more rows

# Optional arguments MUST be passed at the creation of the rolling function
# Only data arguments that are "rolled over" are allowed when calling the
# rolling version of the function
FB$adjusted[1] <- NA

roll_mean_na_rm <- rollify(~mean(.x, na.rm = TRUE), window = 5)

dplyr::mutate(FB, roll_mean = roll_mean_na_rm(adjusted))
#> # A tibble: 1,008 × 9
#>    symbol date        open  high   low close    volume adjusted roll_mean
#>    <chr>  <date>     <dbl> <dbl> <dbl> <dbl>     <dbl>    <dbl>     <dbl>
#>  1 FB     2013-01-02  27.4  28.2  27.4  28    69846400     NA        NA  
#>  2 FB     2013-01-03  27.9  28.5  27.6  27.8  63140600     27.8      NA  
#>  3 FB     2013-01-04  28.0  28.9  27.8  28.8  72715400     28.8      NA  
#>  4 FB     2013-01-07  28.7  29.8  28.6  29.4  83781800     29.4      NA  
#>  5 FB     2013-01-08  29.5  29.6  28.9  29.1  45871300     29.1      28.8
#>  6 FB     2013-01-09  29.7  30.6  29.5  30.6 104787700     30.6      29.1
#>  7 FB     2013-01-10  30.6  31.5  30.3  31.3  95316400     31.3      29.8
#>  8 FB     2013-01-11  31.3  32.0  31.1  31.7  89598000     31.7      30.4
#>  9 FB     2013-01-14  32.1  32.2  30.6  31.0  98892800     31.0      30.7
#> 10 FB     2013-01-15  30.6  31.7  29.9  30.1 173242600     30.1      30.9
#> # ℹ 998 more rows

# Returning multiple values -------------------------------------------------

data(FB)

summary2 <- function(x) {
  unclass(summary(x))
}

# If the function returns >1 value, set the `unlist = FALSE` argument
# Running 5 number summary
summary_roll <- rollify(summary2, window = 5, unlist = FALSE)

FB_summarised <- dplyr::mutate(FB, summary_roll = summary_roll(adjusted))
FB_summarised$summary_roll[[5]]
#>    Min. 1st Qu.  Median    Mean 3rd Qu.    Max.    NA's 
#> 27.7700 28.5125 28.9100 28.7525 29.1500 29.4200  1.0000 

# dplyr::bind_rows() is often helpful in these cases to get
# meaningful output

summary_roll <- rollify(~dplyr::bind_rows(summary2(.)), window = 5, unlist = FALSE)
FB_summarised <- dplyr::mutate(FB, summary_roll = summary_roll(adjusted))
FB_summarised %>%
  dplyr::filter(!is.na(summary_roll)) %>%
  tidyr::unnest(summary_roll)
#> # A tibble: 1,004 × 15
#>    symbol date        open  high   low close    volume adjusted  Min. `1st Qu.`
#>    <chr>  <date>     <dbl> <dbl> <dbl> <dbl>     <dbl>    <dbl> <dbl>     <dbl>
#>  1 FB     2013-01-08  29.5  29.6  28.9  29.1  45871300     29.1  27.8      28.5
#>  2 FB     2013-01-09  29.7  30.6  29.5  30.6 104787700     30.6  27.8      28.8
#>  3 FB     2013-01-10  30.6  31.5  30.3  31.3  95316400     31.3  28.8      29.1
#>  4 FB     2013-01-11  31.3  32.0  31.1  31.7  89598000     31.7  29.1      29.4
#>  5 FB     2013-01-14  32.1  32.2  30.6  31.0  98892800     31.0  29.1      30.6
#>  6 FB     2013-01-15  30.6  31.7  29.9  30.1 173242600     30.1  30.1      30.6
#>  7 FB     2013-01-16  30.2  30.4  29.5  29.8  75332700     29.8  29.8      30.1
#>  8 FB     2013-01-17  30.1  30.4  30.0  30.1  40256700     30.1  29.8      30.1
#>  9 FB     2013-01-18  30.3  30.4  29.3  29.7  49631500     29.7  29.7      29.8
#> 10 FB     2013-01-22  29.8  30.9  29.7  30.7  55243300     30.7  29.7      29.8
#> # ℹ 994 more rows
#> # ℹ 5 more variables: Median <dbl>, Mean <dbl>, `3rd Qu.` <dbl>, Max. <dbl>,
#> #   `NA's` <dbl>

# Rolling regressions -------------------------------------------------------

# Extending an example from R 4 Data Science on "Many Models".
# For each country in the gapminder data, calculate a linear regression
# every 5 periods of lifeExp ~ year
library(gapminder)

# Rolling regressions are easy to implement
lm_roll <- rollify(~lm(.x ~ .y), window = 5, unlist = FALSE)

gapminder %>%
  dplyr::group_by(country) %>%
  dplyr::mutate(rolling_lm = lm_roll(lifeExp, year))
#> # A tibble: 1,704 × 7
#> # Groups:   country [142]
#>    country     continent  year lifeExp      pop gdpPercap rolling_lm
#>    <fct>       <fct>     <int>   <dbl>    <int>     <dbl> <list>    
#>  1 Afghanistan Asia       1952    28.8  8425333      779. <lgl [1]> 
#>  2 Afghanistan Asia       1957    30.3  9240934      821. <lgl [1]> 
#>  3 Afghanistan Asia       1962    32.0 10267083      853. <lgl [1]> 
#>  4 Afghanistan Asia       1967    34.0 11537966      836. <lgl [1]> 
#>  5 Afghanistan Asia       1972    36.1 13079460      740. <lm>      
#>  6 Afghanistan Asia       1977    38.4 14880372      786. <lm>      
#>  7 Afghanistan Asia       1982    39.9 12881816      978. <lm>      
#>  8 Afghanistan Asia       1987    40.8 13867957      852. <lm>      
#>  9 Afghanistan Asia       1992    41.7 16317921      649. <lm>      
#> 10 Afghanistan Asia       1997    41.8 22227415      635. <lm>      
#> # ℹ 1,694 more rows

# Rolling with groups -------------------------------------------------------

# One of the most powerful things about this is that it works with
# groups since `mutate` is being used
data(FANG)
FANG <- FANG %>%
  dplyr::group_by(symbol)

mean_roll_3 <- rollify(mean, window = 3)

FANG %>%
  dplyr::mutate(mean_roll = mean_roll_3(adjusted)) %>%
  dplyr::slice(1:5)
#> # A tibble: 20 × 9
#> # Groups:   symbol [4]
#>    symbol date        open  high   low close   volume adjusted mean_roll
#>    <chr>  <date>     <dbl> <dbl> <dbl> <dbl>    <dbl>    <dbl>     <dbl>
#>  1 AMZN   2013-01-02 256.  258.  253.  257.   3271000    257.       NA  
#>  2 AMZN   2013-01-03 257.  261.  256.  258.   2750900    258.       NA  
#>  3 AMZN   2013-01-04 258.  260.  257.  259.   1874200    259.      258. 
#>  4 AMZN   2013-01-07 263.  270.  263.  268.   4910000    268.      262. 
#>  5 AMZN   2013-01-08 267.  269.  264.  266.   3010700    266.      265. 
#>  6 FB     2013-01-02  27.4  28.2  27.4  28   69846400     28        NA  
#>  7 FB     2013-01-03  27.9  28.5  27.6  27.8 63140600     27.8      NA  
#>  8 FB     2013-01-04  28.0  28.9  27.8  28.8 72715400     28.8      28.2
#>  9 FB     2013-01-07  28.7  29.8  28.6  29.4 83781800     29.4      28.6
#> 10 FB     2013-01-08  29.5  29.6  28.9  29.1 45871300     29.1      29.1
#> 11 GOOG   2013-01-02 719.  727.  717.  723.   5101500    361.       NA  
#> 12 GOOG   2013-01-03 725.  732.  721.  724.   4653700    361.       NA  
#> 13 GOOG   2013-01-04 729.  741.  728.  738.   5547600    369.      364. 
#> 14 GOOG   2013-01-07 735.  739.  731.  735.   3323800    367.      366. 
#> 15 GOOG   2013-01-08 736.  736.  724.  733.   3364700    366.      367. 
#> 16 NFLX   2013-01-02  95.2  95.8  90.7  92.0 19431300     13.1      NA  
#> 17 NFLX   2013-01-03  92.0  97.9  91.5  96.6 27912500     13.8      NA  
#> 18 NFLX   2013-01-04  96.5  97.7  95.5  96.0 17761100     13.7      13.6
#> 19 NFLX   2013-01-07  96.4 102.   96.1  99.2 45550400     14.2      13.9
#> 20 NFLX   2013-01-08 100.  101.   96.8  97.2 24714900     13.9      13.9