rollify
returns a rolling version of the input function, with a
rolling window
specified by the user.
rollify(.f, window = 1, unlist = TRUE, na_value = NULL)
A function to modify, specified in one of the following ways:
A named function, e.g. mean
.
An anonymous function, e.g. \(x) x + 1
or function(x) x + 1
.
A formula, e.g. ~ .x + 1
. Only recommended if you require backward
compatibility with older versions of R.
The window size to roll over
If the function returns a single value each time it is called,
use unlist = TRUE
. If the function returns more than one value, or a more
complicated object (like a linear model), use unlist = FALSE
to create
a list-column of the rolling results.
A default value for the NA
values at the beginning of the
roll.
The intended use of rollify
is to turn a function into a rolling version
of itself for use inside of a call to dplyr::mutate()
, however it works
equally as well when called from purrr::map()
.
Because of it's intended use with dplyr::mutate()
, rollify
creates a function that always returns output with the same length of the
input, aligned right, and filled with NA
unless otherwise specified
by na_value
.
The form of the .f
argument is the same as the form that can be passed
to purrr::map()
. Use .x
or .
to refer to the first object to roll over,
and .y
to refer to the second object if required. The examples explain this
further.
If optional arguments to the function are required, specify them in the
call to rollify
, and not in the call to the rolling version of the
function. See the examples for more details.
# Rolling mean --------------------------------------------------------------
data(FB)
# Turn the normal mean function into a rolling mean with a 5 row window
mean_roll_5 <- rollify(mean, window = 5)
dplyr::mutate(FB,
normal_mean = mean(adjusted),
rolling_mean = mean_roll_5(adjusted))
#> # A tibble: 1,008 × 10
#> symbol date open high low close volume adjusted normal…¹ rolli…²
#> <chr> <date> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
#> 1 FB 2013-01-02 27.4 28.2 27.4 28 69846400 28 77.5 NA
#> 2 FB 2013-01-03 27.9 28.5 27.6 27.8 63140600 27.8 77.5 NA
#> 3 FB 2013-01-04 28.0 28.9 27.8 28.8 72715400 28.8 77.5 NA
#> 4 FB 2013-01-07 28.7 29.8 28.6 29.4 83781800 29.4 77.5 NA
#> 5 FB 2013-01-08 29.5 29.6 28.9 29.1 45871300 29.1 77.5 28.6
#> 6 FB 2013-01-09 29.7 30.6 29.5 30.6 104787700 30.6 77.5 29.1
#> 7 FB 2013-01-10 30.6 31.5 30.3 31.3 95316400 31.3 77.5 29.8
#> 8 FB 2013-01-11 31.3 32.0 31.1 31.7 89598000 31.7 77.5 30.4
#> 9 FB 2013-01-14 32.1 32.2 30.6 31.0 98892800 31.0 77.5 30.7
#> 10 FB 2013-01-15 30.6 31.7 29.9 30.1 173242600 30.1 77.5 30.9
#> # … with 998 more rows, and abbreviated variable names ¹normal_mean,
#> # ²rolling_mean
# There's nothing stopping you from combining multiple rolling functions with
# different window sizes in the same mutate call
mean_roll_10 <- rollify(mean, window = 10)
dplyr::mutate(FB,
rolling_mean_5 = mean_roll_5(adjusted),
rolling_mean_10 = mean_roll_10(adjusted))
#> # A tibble: 1,008 × 10
#> symbol date open high low close volume adjusted rollin…¹ rolli…²
#> <chr> <date> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
#> 1 FB 2013-01-02 27.4 28.2 27.4 28 69846400 28 NA NA
#> 2 FB 2013-01-03 27.9 28.5 27.6 27.8 63140600 27.8 NA NA
#> 3 FB 2013-01-04 28.0 28.9 27.8 28.8 72715400 28.8 NA NA
#> 4 FB 2013-01-07 28.7 29.8 28.6 29.4 83781800 29.4 NA NA
#> 5 FB 2013-01-08 29.5 29.6 28.9 29.1 45871300 29.1 28.6 NA
#> 6 FB 2013-01-09 29.7 30.6 29.5 30.6 104787700 30.6 29.1 NA
#> 7 FB 2013-01-10 30.6 31.5 30.3 31.3 95316400 31.3 29.8 NA
#> 8 FB 2013-01-11 31.3 32.0 31.1 31.7 89598000 31.7 30.4 NA
#> 9 FB 2013-01-14 32.1 32.2 30.6 31.0 98892800 31.0 30.7 NA
#> 10 FB 2013-01-15 30.6 31.7 29.9 30.1 173242600 30.1 30.9 29.8
#> # … with 998 more rows, and abbreviated variable names ¹rolling_mean_5,
#> # ²rolling_mean_10
# Functions with multiple args and optional args ----------------------------
# With 2 args, use the purrr syntax of ~ and .x, .y
# Rolling correlation example
cor_roll <- rollify(~cor(.x, .y), window = 5)
dplyr::mutate(FB, running_cor = cor_roll(adjusted, open))
#> # A tibble: 1,008 × 9
#> symbol date open high low close volume adjusted running_cor
#> <chr> <date> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
#> 1 FB 2013-01-02 27.4 28.2 27.4 28 69846400 28 NA
#> 2 FB 2013-01-03 27.9 28.5 27.6 27.8 63140600 27.8 NA
#> 3 FB 2013-01-04 28.0 28.9 27.8 28.8 72715400 28.8 NA
#> 4 FB 2013-01-07 28.7 29.8 28.6 29.4 83781800 29.4 NA
#> 5 FB 2013-01-08 29.5 29.6 28.9 29.1 45871300 29.1 0.749
#> 6 FB 2013-01-09 29.7 30.6 29.5 30.6 104787700 30.6 0.805
#> 7 FB 2013-01-10 30.6 31.5 30.3 31.3 95316400 31.3 0.859
#> 8 FB 2013-01-11 31.3 32.0 31.1 31.7 89598000 31.7 0.884
#> 9 FB 2013-01-14 32.1 32.2 30.6 31.0 98892800 31.0 0.667
#> 10 FB 2013-01-15 30.6 31.7 29.9 30.1 173242600 30.1 0.379
#> # … with 998 more rows
# With >2 args, create an anonymous function with >2 args or use
# the purrr convention of ..1, ..2, ..3 to refer to the arguments
avg_of_avgs <- rollify(function(x, y, z) {
(mean(x) + mean(y) + mean(z)) / 3
},
window = 10)
# Or
avg_of_avgs <- rollify(~(mean(..1) + mean(..2) + mean(..3)) / 3,
window = 10)
dplyr::mutate(FB, avg_of_avgs = avg_of_avgs(open, high, low))
#> # A tibble: 1,008 × 9
#> symbol date open high low close volume adjusted avg_of_avgs
#> <chr> <date> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
#> 1 FB 2013-01-02 27.4 28.2 27.4 28 69846400 28 NA
#> 2 FB 2013-01-03 27.9 28.5 27.6 27.8 63140600 27.8 NA
#> 3 FB 2013-01-04 28.0 28.9 27.8 28.8 72715400 28.8 NA
#> 4 FB 2013-01-07 28.7 29.8 28.6 29.4 83781800 29.4 NA
#> 5 FB 2013-01-08 29.5 29.6 28.9 29.1 45871300 29.1 NA
#> 6 FB 2013-01-09 29.7 30.6 29.5 30.6 104787700 30.6 NA
#> 7 FB 2013-01-10 30.6 31.5 30.3 31.3 95316400 31.3 NA
#> 8 FB 2013-01-11 31.3 32.0 31.1 31.7 89598000 31.7 NA
#> 9 FB 2013-01-14 32.1 32.2 30.6 31.0 98892800 31.0 NA
#> 10 FB 2013-01-15 30.6 31.7 29.9 30.1 173242600 30.1 29.7
#> # … with 998 more rows
# Optional arguments MUST be passed at the creation of the rolling function
# Only data arguments that are "rolled over" are allowed when calling the
# rolling version of the function
FB$adjusted[1] <- NA
roll_mean_na_rm <- rollify(~mean(.x, na.rm = TRUE), window = 5)
dplyr::mutate(FB, roll_mean = roll_mean_na_rm(adjusted))
#> # A tibble: 1,008 × 9
#> symbol date open high low close volume adjusted roll_mean
#> <chr> <date> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
#> 1 FB 2013-01-02 27.4 28.2 27.4 28 69846400 NA NA
#> 2 FB 2013-01-03 27.9 28.5 27.6 27.8 63140600 27.8 NA
#> 3 FB 2013-01-04 28.0 28.9 27.8 28.8 72715400 28.8 NA
#> 4 FB 2013-01-07 28.7 29.8 28.6 29.4 83781800 29.4 NA
#> 5 FB 2013-01-08 29.5 29.6 28.9 29.1 45871300 29.1 28.8
#> 6 FB 2013-01-09 29.7 30.6 29.5 30.6 104787700 30.6 29.1
#> 7 FB 2013-01-10 30.6 31.5 30.3 31.3 95316400 31.3 29.8
#> 8 FB 2013-01-11 31.3 32.0 31.1 31.7 89598000 31.7 30.4
#> 9 FB 2013-01-14 32.1 32.2 30.6 31.0 98892800 31.0 30.7
#> 10 FB 2013-01-15 30.6 31.7 29.9 30.1 173242600 30.1 30.9
#> # … with 998 more rows
# Returning multiple values -------------------------------------------------
data(FB)
summary2 <- function(x) {
unclass(summary(x))
}
# If the function returns >1 value, set the `unlist = FALSE` argument
# Running 5 number summary
summary_roll <- rollify(summary2, window = 5, unlist = FALSE)
FB_summarised <- dplyr::mutate(FB, summary_roll = summary_roll(adjusted))
FB_summarised$summary_roll[[5]]
#> Min. 1st Qu. Median Mean 3rd Qu. Max. NA's
#> 27.7700 28.5125 28.9100 28.7525 29.1500 29.4200 1.0000
# dplyr::bind_rows() is often helpful in these cases to get
# meaningful output
summary_roll <- rollify(~dplyr::bind_rows(summary2(.)), window = 5, unlist = FALSE)
FB_summarised <- dplyr::mutate(FB, summary_roll = summary_roll(adjusted))
FB_summarised %>%
dplyr::filter(!is.na(summary_roll)) %>%
tidyr::unnest(summary_roll)
#> # A tibble: 1,004 × 15
#> symbol date open high low close volume adjus…¹ Min. 1st Q…² Median
#> <chr> <date> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
#> 1 FB 2013-01-08 29.5 29.6 28.9 29.1 4.59e7 29.1 27.8 28.5 28.9
#> 2 FB 2013-01-09 29.7 30.6 29.5 30.6 1.05e8 30.6 27.8 28.8 29.1
#> 3 FB 2013-01-10 30.6 31.5 30.3 31.3 9.53e7 31.3 28.8 29.1 29.4
#> 4 FB 2013-01-11 31.3 32.0 31.1 31.7 8.96e7 31.7 29.1 29.4 30.6
#> 5 FB 2013-01-14 32.1 32.2 30.6 31.0 9.89e7 31.0 29.1 30.6 31.0
#> 6 FB 2013-01-15 30.6 31.7 29.9 30.1 1.73e8 30.1 30.1 30.6 31.0
#> 7 FB 2013-01-16 30.2 30.4 29.5 29.8 7.53e7 29.8 29.8 30.1 31.0
#> 8 FB 2013-01-17 30.1 30.4 30.0 30.1 4.03e7 30.1 29.8 30.1 30.1
#> 9 FB 2013-01-18 30.3 30.4 29.3 29.7 4.96e7 29.7 29.7 29.8 30.1
#> 10 FB 2013-01-22 29.8 30.9 29.7 30.7 5.52e7 30.7 29.7 29.8 30.1
#> # … with 994 more rows, 4 more variables: Mean <dbl>, `3rd Qu.` <dbl>,
#> # Max. <dbl>, `NA's` <dbl>, and abbreviated variable names ¹adjusted,
#> # ²`1st Qu.`
# Rolling regressions -------------------------------------------------------
# Extending an example from R 4 Data Science on "Many Models".
# For each country in the gapminder data, calculate a linear regression
# every 5 periods of lifeExp ~ year
library(gapminder)
# Rolling regressions are easy to implement
lm_roll <- rollify(~lm(.x ~ .y), window = 5, unlist = FALSE)
gapminder %>%
dplyr::group_by(country) %>%
dplyr::mutate(rolling_lm = lm_roll(lifeExp, year))
#> # A tibble: 1,704 × 7
#> # Groups: country [142]
#> country continent year lifeExp pop gdpPercap rolling_lm
#> <fct> <fct> <int> <dbl> <int> <dbl> <list>
#> 1 Afghanistan Asia 1952 28.8 8425333 779. <lgl [1]>
#> 2 Afghanistan Asia 1957 30.3 9240934 821. <lgl [1]>
#> 3 Afghanistan Asia 1962 32.0 10267083 853. <lgl [1]>
#> 4 Afghanistan Asia 1967 34.0 11537966 836. <lgl [1]>
#> 5 Afghanistan Asia 1972 36.1 13079460 740. <lm>
#> 6 Afghanistan Asia 1977 38.4 14880372 786. <lm>
#> 7 Afghanistan Asia 1982 39.9 12881816 978. <lm>
#> 8 Afghanistan Asia 1987 40.8 13867957 852. <lm>
#> 9 Afghanistan Asia 1992 41.7 16317921 649. <lm>
#> 10 Afghanistan Asia 1997 41.8 22227415 635. <lm>
#> # … with 1,694 more rows
# Rolling with groups -------------------------------------------------------
# One of the most powerful things about this is that it works with
# groups since `mutate` is being used
data(FANG)
FANG <- FANG %>%
dplyr::group_by(symbol)
mean_roll_3 <- rollify(mean, window = 3)
FANG %>%
dplyr::mutate(mean_roll = mean_roll_3(adjusted)) %>%
dplyr::slice(1:5)
#> # A tibble: 20 × 9
#> # Groups: symbol [4]
#> symbol date open high low close volume adjusted mean_roll
#> <chr> <date> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
#> 1 AMZN 2013-01-02 256. 258. 253. 257. 3271000 257. NA
#> 2 AMZN 2013-01-03 257. 261. 256. 258. 2750900 258. NA
#> 3 AMZN 2013-01-04 258. 260. 257. 259. 1874200 259. 258.
#> 4 AMZN 2013-01-07 263. 270. 263. 268. 4910000 268. 262.
#> 5 AMZN 2013-01-08 267. 269. 264. 266. 3010700 266. 265.
#> 6 FB 2013-01-02 27.4 28.2 27.4 28 69846400 28 NA
#> 7 FB 2013-01-03 27.9 28.5 27.6 27.8 63140600 27.8 NA
#> 8 FB 2013-01-04 28.0 28.9 27.8 28.8 72715400 28.8 28.2
#> 9 FB 2013-01-07 28.7 29.8 28.6 29.4 83781800 29.4 28.6
#> 10 FB 2013-01-08 29.5 29.6 28.9 29.1 45871300 29.1 29.1
#> 11 GOOG 2013-01-02 719. 727. 717. 723. 5101500 361. NA
#> 12 GOOG 2013-01-03 725. 732. 721. 724. 4653700 361. NA
#> 13 GOOG 2013-01-04 729. 741. 728. 738. 5547600 369. 364.
#> 14 GOOG 2013-01-07 735. 739. 731. 735. 3323800 367. 366.
#> 15 GOOG 2013-01-08 736. 736. 724. 733. 3364700 366. 367.
#> 16 NFLX 2013-01-02 95.2 95.8 90.7 92.0 19431300 13.1 NA
#> 17 NFLX 2013-01-03 92.0 97.9 91.5 96.6 27912500 13.8 NA
#> 18 NFLX 2013-01-04 96.5 97.7 95.5 96.0 17761100 13.7 13.6
#> 19 NFLX 2013-01-07 96.4 102. 96.1 99.2 45550400 14.2 13.9
#> 20 NFLX 2013-01-08 100. 101. 96.8 97.2 24714900 13.9 13.9