import pytimetk as tk
import pandas as pd
import numpy as np
= tk.load_dataset("m4_daily", parse_dates = ['date']) df
augment_rolling
augment_rolling(data, date_column, value_column, window_func='mean', window=2, min_periods=None, engine='pandas', center=False, threads=1, show_progress=True, reduce_memory=False, **kwargs)
Apply one or more Series-based rolling functions and window sizes to one or more columns of a DataFrame.
Parameters
Name | Type | Description | Default |
---|---|---|---|
data |
Union[pd.DataFrame, pd.core.groupby.generic.DataFrameGroupBy] | Input data to be processed. Can be a Pandas DataFrame or a GroupBy object. | required |
date_column |
str | Name of the datetime column. Data is sorted by this column within each group. | required |
value_column |
Union[str, list] | Column(s) to which the rolling window functions should be applied. Can be a single column name or a list. | required |
window_func |
Union[str, list, Tuple[str, Callable]] | The window_func parameter in the augment_rolling function specifies the function(s) to be applied to the rolling windows of the value column(s). 1. It can be either: - A string representing the name of a standard function (e.g., ‘mean’, ‘sum’). 2. For custom functions: - Provide a list of tuples. Each tuple should contain a custom name for the function and the function itself. - Each custom function should accept a Pandas Series as its input and operate on that series. Example: (“range”, lambda x: x.max() - x.min()) (See more Examples below.) Note: If your function needs to operate on multiple columns (i.e., it requires access to a DataFrame rather than just a Series), consider using the augment_rolling_apply function in this library. |
'mean' |
window |
Union[int, tuple, list] | Specifies the size of the rolling windows. - An integer applies the same window size to all columns in value_column . - A tuple generates windows from the first to the second value (inclusive). - A list of integers designates multiple window sizes for each respective column. |
2 |
min_periods |
int | Minimum observations in the window to have a value. Defaults to the window size. If set, a value will be produced even if fewer observations are present than the window size. | None |
center |
bool | If True , the rolling window will be centered on the current value. For even-sized windows, the window will be left-biased. Otherwise, it uses a trailing window. |
False |
threads |
int | Number of threads to use for parallel processing. If threads is set to 1, parallel processing will be disabled. Set to -1 to use all available CPU cores. |
1 |
show_progress |
bool | If True , a progress bar will be displayed during parallel processing. |
True |
reduce_memory |
bool | The reduce_memory parameter is used to specify whether to reduce the memory usage of the DataFrame by converting int, float to smaller bytes and str to categorical data. This reduces memory for large data but may impact resolution of float and will change str to categorical. Default is False. |
False |
engine |
str | Specifies the backend computation library for augmenting expanding window functions. The options are: - “pandas” (default): Uses the pandas library. - “polars”: Uses the polars library, which may offer performance benefits for larger datasets. |
'pandas' |
Returns
Type | Description |
---|---|
pd.DataFrame | The augment_rolling function returns a DataFrame with new columns for each applied function, window size, and value column. |
Notes
Performance
This function uses parallel processing to speed up computation for large datasets with many time series groups:
Parallel processing has overhead and may not be faster on small datasets.
To use parallel processing, set threads = -1
to use all available processors.
Examples
# Example 1 - Using a single window size and a single function name, pandas engine
# This example demonstrates the use of both string-named functions and lambda
# functions on a rolling window. We specify a list of window sizes: [2,7].
# As a result, the output will have computations for both window sizes 2 and 7.
# Note - It's preferred to use built-in or configurable functions instead of
# lambda functions for performance reasons.
= (
rolled_df
df'id')
.groupby(
.augment_rolling(= 'date',
date_column = 'value',
value_column = [2,7], # Specifying multiple window sizes
window = [
window_func 'mean', # Built-in mean function
'std', lambda x: x.std()) # Lambda function to compute standard deviation
(
],= 1, # Disabling parallel processing
threads = 'pandas' # Using pandas engine
engine
)
) display(rolled_df)
id | date | value | value_rolling_mean_win_2 | value_rolling_std_win_2 | value_rolling_mean_win_7 | value_rolling_std_win_7 | |
---|---|---|---|---|---|---|---|
0 | D10 | 2014-07-03 | 2076.2 | NaN | NaN | NaN | NaN |
1 | D10 | 2014-07-04 | 2073.4 | 2074.80 | 1.40 | 2074.800000 | 1.400000 |
2 | D10 | 2014-07-05 | 2048.7 | 2061.05 | 12.35 | 2066.100000 | 12.356645 |
3 | D10 | 2014-07-06 | 2048.9 | 2048.80 | 0.10 | 2061.800000 | 13.037830 |
4 | D10 | 2014-07-07 | 2006.4 | 2027.65 | 21.25 | 2050.720000 | 25.041038 |
... | ... | ... | ... | ... | ... | ... | ... |
9738 | D500 | 2012-09-19 | 9418.8 | 9425.35 | 6.55 | 9382.071429 | 74.335988 |
9739 | D500 | 2012-09-20 | 9365.7 | 9392.25 | 26.55 | 9396.400000 | 58.431303 |
9740 | D500 | 2012-09-21 | 9445.9 | 9405.80 | 40.10 | 9419.114286 | 39.184451 |
9741 | D500 | 2012-09-22 | 9497.9 | 9471.90 | 26.00 | 9438.928571 | 38.945336 |
9742 | D500 | 2012-09-23 | 9545.3 | 9521.60 | 23.70 | 9449.028571 | 53.379416 |
9743 rows × 7 columns
# Example 2 - Multiple groups, pandas engine
# Example showcasing the use of string function names and lambda functions
# applied on rolling windows. The `window` tuple (1,3) will generate window
# sizes of 1, 2, and 3.
# Note - It's preferred to use built-in or configurable functions instead of
# lambda functions for performance reasons.
= (
rolled_df
df'id')
.groupby(
.augment_rolling(= 'date',
date_column = 'value',
value_column = (1,3), # Specifying a range of window sizes
window = [
window_func 'mean', # Using built-in mean function
'std', lambda x: x.std()) # Lambda function for standard deviation
(
],= 1, # Disabling parallel processing
threads = 'pandas' # Using pandas engine
engine
)
) display(rolled_df)
id | date | value | value_rolling_mean_win_1 | value_rolling_std_win_1 | value_rolling_mean_win_2 | value_rolling_std_win_2 | value_rolling_mean_win_3 | value_rolling_std_win_3 | |
---|---|---|---|---|---|---|---|---|---|
0 | D10 | 2014-07-03 | 2076.2 | 2076.2 | 0.0 | 2076.20 | 0.00 | 2076.200000 | 0.000000 |
1 | D10 | 2014-07-04 | 2073.4 | 2073.4 | 0.0 | 2074.80 | 1.40 | 2074.800000 | 1.400000 |
2 | D10 | 2014-07-05 | 2048.7 | 2048.7 | 0.0 | 2061.05 | 12.35 | 2066.100000 | 12.356645 |
3 | D10 | 2014-07-06 | 2048.9 | 2048.9 | 0.0 | 2048.80 | 0.10 | 2057.000000 | 11.596839 |
4 | D10 | 2014-07-07 | 2006.4 | 2006.4 | 0.0 | 2027.65 | 21.25 | 2034.666667 | 19.987718 |
... | ... | ... | ... | ... | ... | ... | ... | ... | ... |
9738 | D500 | 2012-09-19 | 9418.8 | 9418.8 | 0.0 | 9425.35 | 6.55 | 9429.466667 | 7.905413 |
9739 | D500 | 2012-09-20 | 9365.7 | 9365.7 | 0.0 | 9392.25 | 26.55 | 9405.466667 | 28.623339 |
9740 | D500 | 2012-09-21 | 9445.9 | 9445.9 | 0.0 | 9405.80 | 40.10 | 9410.133333 | 33.310092 |
9741 | D500 | 2012-09-22 | 9497.9 | 9497.9 | 0.0 | 9471.90 | 26.00 | 9436.500000 | 54.378182 |
9742 | D500 | 2012-09-23 | 9545.3 | 9545.3 | 0.0 | 9521.60 | 23.70 | 9496.366667 | 40.594362 |
9743 rows × 9 columns
# Example 3 - Multiple groups, polars engine
= (
rolled_df
df'id')
.groupby(
.augment_rolling(= 'date',
date_column = 'value',
value_column = (1,3), # Specifying a range of window sizes
window = [
window_func 'mean', # Using built-in mean function
'std', # Using built-in standard deviation function
],= 'polars' # Using polars engine
engine
)
) display(rolled_df)
id | date | value | value_rolling_mean_win_1 | value_rolling_std_win_1 | value_rolling_mean_win_2 | value_rolling_std_win_2 | value_rolling_mean_win_3 | value_rolling_std_win_3 | |
---|---|---|---|---|---|---|---|---|---|
0 | D10 | 2014-07-03 | 2076.2 | 2076.2 | NaN | 2076.20 | NaN | 2076.200000 | NaN |
1 | D10 | 2014-07-04 | 2073.4 | 2073.4 | NaN | 2074.80 | 1.979899 | 2074.800000 | 1.979899 |
2 | D10 | 2014-07-05 | 2048.7 | 2048.7 | NaN | 2061.05 | 17.465537 | 2066.100000 | 15.133737 |
3 | D10 | 2014-07-06 | 2048.9 | 2048.9 | NaN | 2048.80 | 0.141421 | 2057.000000 | 14.203169 |
4 | D10 | 2014-07-07 | 2006.4 | 2006.4 | NaN | 2027.65 | 30.052038 | 2034.666667 | 24.479856 |
... | ... | ... | ... | ... | ... | ... | ... | ... | ... |
9738 | D500 | 2012-09-19 | 9418.8 | 9418.8 | NaN | 9425.35 | 9.263099 | 9429.466667 | 9.682114 |
9739 | D500 | 2012-09-20 | 9365.7 | 9365.7 | NaN | 9392.25 | 37.547370 | 9405.466667 | 35.056288 |
9740 | D500 | 2012-09-21 | 9445.9 | 9445.9 | NaN | 9405.80 | 56.709964 | 9410.133333 | 40.796364 |
9741 | D500 | 2012-09-22 | 9497.9 | 9497.9 | NaN | 9471.90 | 36.769553 | 9436.500000 | 66.599399 |
9742 | D500 | 2012-09-23 | 9545.3 | 9545.3 | NaN | 9521.60 | 33.516861 | 9496.366667 | 49.717737 |
9743 rows × 9 columns