import pandas as pd
import pytimetk as tk
= pd.date_range(start = '2019-01', end = '2019-03', freq = 'D') dates
get_timeseries_signature
get_timeseries_signature(idx, reduce_memory=False, engine='pandas')
Convert a timestamp to a set of 29 time series features.
The function get_timeseries_signature
engineers 29 different date and time based features from a single datetime index idx
:
Parameters
Name | Type | Description | Default |
---|---|---|---|
idx |
pd.DataFrame | The idx parameter is a pandas Series of DatetimeIndex. |
required |
reduce_memory |
bool | The reduce_memory parameter is used to specify whether to reduce the memory usage of the DataFrame by converting int, float to smaller bytes and str to categorical data. This reduces memory for large data but may impact resolution of float and will change str to categorical. Default is False. |
False |
engine |
str | The engine parameter is used to specify the engine to use for augmenting datetime features. It can be either “pandas” or “polars”. - The default value is “pandas”. - When “polars”, the function will internally use the polars library for feature generation. This is generally faster than using “pandas” for large datasets. |
'pandas' |
Returns
Type | Description |
---|---|
pd.DataFrame | A Pandas DataFrame with 29 datetime features added to it. |
- _index_num: An int64 feature that captures the entire datetime as a numeric value to the second | |
- _year: The year of the datetime | |
- _year_iso: The iso year of the datetime | |
- _yearstart: Logical (0,1) indicating if first day of year (defined by frequency) | |
- _yearend: Logical (0,1) indicating if last day of year (defined by frequency) | |
- _leapyear: Logical (0,1) indicating if the date belongs to a leap year | |
- _half: Half year of the date: Jan-Jun = 1, July-Dec = 2 | |
- _quarter: Quarter of the date: Jan-Mar = 1, Apr-Jun = 2, Jul-Sep = 3, Oct-Dec = 4 | |
- _quarteryear: Quarter of the date + relative year | |
- _quarterstart: Logical (0,1) indicating if first day of quarter (defined by frequency) | |
- _quarterend: Logical (0,1) indicating if last day of quarter (defined by frequency) | |
- _month: The month of the datetime | |
- _month_lbl: The month label of the datetime | |
- _monthstart: Logical (0,1) indicating if first day of month (defined by frequency) | |
- _monthend: Logical (0,1) indicating if last day of month (defined by frequency) | |
- _yweek: The week ordinal of the year | |
- _mweek: The week ordinal of the month | |
- _wday: The number of the day of the week with Monday=1, Sunday=6 | |
- _wday_lbl: The day of the week label | |
- _mday: The day of the datetime | |
- _qday: The days of the relative quarter | |
- _yday: The ordinal day of year | |
- _weekend: Logical (0,1) indicating if the day is a weekend | |
- _hour: The hour of the datetime | |
- _minute: The minutes of the datetime | |
- _second: The seconds of the datetime | |
- _msecond: The microseconds of the datetime | |
- _nsecond: The nanoseconds of the datetime | |
- _am_pm: Half of the day, AM = ante meridiem, PM = post meridiem |
Examples
# Makes 29 new time series features from the dates
='pandas').glimpse() tk.get_timeseries_signature(dates, engine
<class 'pandas.core.frame.DataFrame'>: 60 rows of 30 columns
idx: datetime64[ns] [Timestamp('2019-01-01 00:00:00'), ...
idx_index_num: int64 [1546300800, 1546387200, 1546473600 ...
idx_year: int64 [2019, 2019, 2019, 2019, 2019, 2019 ...
idx_year_iso: UInt32 [2019, 2019, 2019, 2019, 2019, 2019 ...
idx_yearstart: uint8 [1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 ...
idx_yearend: uint8 [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 ...
idx_leapyear: uint8 [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 ...
idx_half: int64 [1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1 ...
idx_quarter: int64 [1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1 ...
idx_quarteryear: object ['2019Q1', '2019Q1', '2019Q1', '201 ...
idx_quarterstart: uint8 [1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 ...
idx_quarterend: uint8 [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 ...
idx_month: int64 [1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1 ...
idx_month_lbl: object ['January', 'January', 'January', ' ...
idx_monthstart: uint8 [1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 ...
idx_monthend: uint8 [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 ...
idx_yweek: UInt32 [1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2 ...
idx_mweek: int64 [1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2 ...
idx_wday: int64 [2, 3, 4, 5, 6, 7, 1, 2, 3, 4, 5, 6 ...
idx_wday_lbl: object ['Tuesday', 'Wednesday', 'Thursday' ...
idx_mday: int64 [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, ...
idx_qday: int64 [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, ...
idx_yday: int64 [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, ...
idx_weekend: int64 [0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0 ...
idx_hour: int64 [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 ...
idx_minute: int64 [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 ...
idx_second: int64 [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 ...
idx_msecond: int64 [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 ...
idx_nsecond: int64 [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 ...
idx_am_pm: object ['am', 'am', 'am', 'am', 'am', 'am' ...
='polars').glimpse() tk.get_timeseries_signature(dates, engine
<class 'pandas.core.frame.DataFrame'>: 60 rows of 30 columns
idx: datetime64[ns] [Timestamp('2019-01-01 00:00:00'), ...
idx_index_num: float64 [1546300800.0, 1546387200.0, 154647 ...
idx_year: int32 [2019, 2019, 2019, 2019, 2019, 2019 ...
idx_year_iso: int32 [2019, 2019, 2019, 2019, 2019, 2019 ...
idx_yearstart: int32 [1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 ...
idx_yearend: int32 [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 ...
idx_leapyear: int8 [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 ...
idx_half: int32 [1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1 ...
idx_quarter: uint32 [1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1 ...
idx_quarteryear: object ['2019Q1', '2019Q1', '2019Q1', '201 ...
idx_quarterstart: int32 [1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 ...
idx_quarterend: int32 [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 ...
idx_month: uint32 [1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1 ...
idx_month_lbl: object ['January', 'January', 'January', ' ...
idx_monthstart: int32 [1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 ...
idx_monthend: int32 [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 ...
idx_yweek: uint32 [1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2 ...
idx_mweek: uint32 [1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2 ...
idx_wday: uint32 [2, 3, 4, 5, 6, 7, 1, 2, 3, 4, 5, 6 ...
idx_wday_lbl: object ['Tuesday', 'Wednesday', 'Thursday' ...
idx_mday: uint32 [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, ...
idx_qday: int64 [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, ...
idx_yday: uint32 [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, ...
idx_weekend: int32 [0, 0, 0, 0, 1, 1, 0, 0, 0, 0, 0, 1 ...
idx_hour: uint32 [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 ...
idx_minute: uint32 [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 ...
idx_second: uint32 [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 ...
idx_msecond: uint32 [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 ...
idx_nsecond: uint32 [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 ...
idx_am_pm: object ['am', 'am', 'am', 'am', 'am', 'am' ...
= "date").get_timeseries_signature(engine='pandas').glimpse() pd.Series(dates, name
<class 'pandas.core.frame.DataFrame'>: 60 rows of 30 columns
date: datetime64[ns] [Timestamp('2019-01-01 00:00:00'), ...
date_index_num: int64 [1546300800, 1546387200, 154647360 ...
date_year: int64 [2019, 2019, 2019, 2019, 2019, 201 ...
date_year_iso: UInt32 [2019, 2019, 2019, 2019, 2019, 201 ...
date_yearstart: uint8 [1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ...
date_yearend: uint8 [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ...
date_leapyear: uint8 [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ...
date_half: int64 [1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, ...
date_quarter: int64 [1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, ...
date_quarteryear: object ['2019Q1', '2019Q1', '2019Q1', '20 ...
date_quarterstart: uint8 [1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ...
date_quarterend: uint8 [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ...
date_month: int64 [1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, ...
date_month_lbl: object ['January', 'January', 'January', ...
date_monthstart: uint8 [1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ...
date_monthend: uint8 [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ...
date_yweek: UInt32 [1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, ...
date_mweek: int64 [1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, ...
date_wday: int64 [2, 3, 4, 5, 6, 7, 1, 2, 3, 4, 5, ...
date_wday_lbl: object ['Tuesday', 'Wednesday', 'Thursday ...
date_mday: int64 [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11 ...
date_qday: int64 [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11 ...
date_yday: int64 [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11 ...
date_weekend: int64 [0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, ...
date_hour: int64 [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ...
date_minute: int64 [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ...
date_second: int64 [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ...
date_msecond: int64 [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ...
date_nsecond: int64 [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ...
date_am_pm: object ['am', 'am', 'am', 'am', 'am', 'am ...
= "date").get_timeseries_signature(engine='polars').glimpse() pd.Series(dates, name
<class 'pandas.core.frame.DataFrame'>: 60 rows of 30 columns
date: datetime64[ns] [Timestamp('2019-01-01 00:00:00'), ...
date_index_num: float64 [1546300800.0, 1546387200.0, 15464 ...
date_year: int32 [2019, 2019, 2019, 2019, 2019, 201 ...
date_year_iso: int32 [2019, 2019, 2019, 2019, 2019, 201 ...
date_yearstart: int32 [1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ...
date_yearend: int32 [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ...
date_leapyear: int8 [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ...
date_half: int32 [1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, ...
date_quarter: uint32 [1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, ...
date_quarteryear: object ['2019Q1', '2019Q1', '2019Q1', '20 ...
date_quarterstart: int32 [1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ...
date_quarterend: int32 [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ...
date_month: uint32 [1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, ...
date_month_lbl: object ['January', 'January', 'January', ...
date_monthstart: int32 [1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ...
date_monthend: int32 [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ...
date_yweek: uint32 [1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, ...
date_mweek: uint32 [1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, ...
date_wday: uint32 [2, 3, 4, 5, 6, 7, 1, 2, 3, 4, 5, ...
date_wday_lbl: object ['Tuesday', 'Wednesday', 'Thursday ...
date_mday: uint32 [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11 ...
date_qday: int64 [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11 ...
date_yday: uint32 [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11 ...
date_weekend: int32 [0, 0, 0, 0, 1, 1, 0, 0, 0, 0, 0, ...
date_hour: uint32 [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ...
date_minute: uint32 [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ...
date_second: uint32 [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ...
date_msecond: uint32 [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ...
date_nsecond: uint32 [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ...
date_am_pm: object ['am', 'am', 'am', 'am', 'am', 'am ...