augment_spline

augment_spline(
    data,
    date_column,
    value_column,
    spline_type='bs',
    df=5,
    degree=3,
    knots=None,
    include_intercept=False,
    lower_bound=None,
    upper_bound=None,
    prefix=None,
    reduce_memory=False,
    engine='auto',
)

Add spline basis expansions for a numeric column.

Parameters

Name	Type	Description	Default
data	DataFrame or GroupBy(pandas or polars)	Input tabular data or grouped data.	required
date_column	str	Name of the datetime column used to order observations prior to building the spline basis.	required
value_column	str	Name of the numeric column to transform into spline basis features.	required
spline_type	str	Spline family. Supported values are “bs” (B-spline), “natural”/“cr” (natural cubic spline) and “cyclic”/“cc” (cyclic spline). Defaults to “bs”.	`'bs'`
df	int	Degrees of freedom passed to the spline constructor. Required unless `knots` are supplied. Defaults to 5.	`5`
degree	int	Degree of the polynomial pieces (B-spline only). Defaults to 3.	`3`
knots	Sequence[float]	Internal knot positions to use when constructing the spline basis.	`None`
include_intercept	bool	Whether to include the intercept column (B-spline only). Defaults to False.	`False`
lower_bound	float	Lower boundary for the spline. When omitted the minimum value of `value_column` is used.	`None`
upper_bound	float	Upper boundary for the spline. When omitted the maximum value of `value_column` is used.	`None`
prefix	str	Custom prefix for the generated column names. When omitted a name is derived from `value_column` and `spline_type`.	`None`
reduce_memory	bool	If True, attempt to downcast numeric columns to reduce memory usage.	`False`
engine	(auto, pandas, polars)	Execution engine. When set to “auto” (default) the backend is inferred from the input data type. Use “pandas” or “polars” to force a specific backend regardless of input type.	`"auto"`

Returns

Name	Type	Description
	DataFrame	DataFrame with spline basis columns appended. The result matches the input data backend (pandas or polars).

Examples

# Pandas Example
import pandas as pd
import polars as pl
import pytimetk as tk


df = tk.load_dataset('m4_daily', parse_dates=['date'])

df_spline = (
    df
        .query("id == 'D10'")
        .augment_spline(
            date_column='date',
            value_column='value',
            spline_type='bs',
            df=5,
            degree=3,
            prefix='value_bs'
        )
)

df_spline.head()

	id	date	value	value_bs_1	value_bs_2	value_bs_3
0	D10	2014-07-03	2076.2	0.341593	0.495854	0.111560
1	D10	2014-07-04	2073.4	0.347049	0.491041	0.108410
2	D10	2014-07-05	2048.7	0.392400	0.445193	0.083145
3	D10	2014-07-06	2048.9	0.392056	0.445586	0.083331
4	D10	2014-07-07	2006.4	0.453599	0.356309	0.049568

pl_spline = (
    pl.from_pandas(df.query("id == 'D10'"))
    .tk.augment_spline(
        date_column='date',
        value_column='value',
        spline_type='bs',
        df=5,
        degree=3,
        prefix='value_bs'
    )
)

pl_spline.head()

shape: (5, 8)

id	date	value	value_bs_1	value_bs_2	value_bs_3	value_bs_4	value_bs_5
str	datetime[ns]	f64	f64	f64	f64	f64	f64
"D10"	2014-07-03 00:00:00	2076.2	0.341593	0.495854	0.11156	0.0	0.0
"D10"	2014-07-04 00:00:00	2073.4	0.347049	0.491041	0.10841	0.0	0.0
"D10"	2014-07-05 00:00:00	2048.7	0.3924	0.445193	0.083145	0.0	0.0
"D10"	2014-07-06 00:00:00	2048.9	0.392056	0.445586	0.083331	0.0	0.0
"D10"	2014-07-07 00:00:00	2006.4	0.453599	0.356309	0.049568	0.0	0.0