Get date features from a time-series index
Details
tk_get_timeseries_signature
decomposes the timeseries into commonly
needed features such as
numeric value, differences,
year, month, day, day of week, day of month,
day of year, hour, minute, second.
tk_get_timeseries_summary
returns the summary returns the
start, end, units, scale, and a "summary" of the timeseries differences
in seconds including
the minimum, 1st quartile, median, mean, 3rd quartile, and maximum frequency.
The timeseries
differences give the user a better picture of the index frequency
so the user can understand the level of regularity or irregularity.
A perfectly regular time series will have equal values in seconds for each metric.
However, this is not often the case.
Important Note: These functions only work with time-based indexes in datetime, date, yearmon, and yearqtr values. Regularized dates cannot be decomposed.
Examples
library(dplyr)
library(lubridate)
library(zoo)
#>
#> Attaching package: ‘zoo’
#> The following objects are masked from ‘package:base’:
#>
#> as.Date, as.Date.numeric
# Works with time-based tibbles
FB_tbl <- FANG %>% dplyr::filter(symbol == "FB")
FB_idx <- tk_index(FB_tbl)
tk_get_timeseries_signature(FB_idx)
#> # A tibble: 1,008 × 29
#> index index.num diff year year.iso half quarter month month.xts
#> <date> <dbl> <dbl> <int> <int> <int> <int> <int> <int>
#> 1 2013-01-02 1357084800 NA 2013 2013 1 1 1 0
#> 2 2013-01-03 1357171200 86400 2013 2013 1 1 1 0
#> 3 2013-01-04 1357257600 86400 2013 2013 1 1 1 0
#> 4 2013-01-07 1357516800 259200 2013 2013 1 1 1 0
#> 5 2013-01-08 1357603200 86400 2013 2013 1 1 1 0
#> 6 2013-01-09 1357689600 86400 2013 2013 1 1 1 0
#> 7 2013-01-10 1357776000 86400 2013 2013 1 1 1 0
#> 8 2013-01-11 1357862400 86400 2013 2013 1 1 1 0
#> 9 2013-01-14 1358121600 259200 2013 2013 1 1 1 0
#> 10 2013-01-15 1358208000 86400 2013 2013 1 1 1 0
#> # ℹ 998 more rows
#> # ℹ 20 more variables: month.lbl <ord>, day <int>, hour <int>, minute <int>,
#> # second <int>, hour12 <int>, am.pm <int>, wday <int>, wday.xts <int>,
#> # wday.lbl <ord>, mday <int>, qday <int>, yday <int>, mweek <int>,
#> # week <int>, week.iso <int>, week2 <int>, week3 <int>, week4 <int>,
#> # mday7 <int>
tk_get_timeseries_summary(FB_idx)
#> # A tibble: 1 × 12
#> n.obs start end units scale tzone diff.minimum diff.q1 diff.median
#> <int> <date> <date> <chr> <chr> <chr> <dbl> <dbl> <dbl>
#> 1 1008 2013-01-02 2016-12-30 days day UTC 86400 86400 86400
#> # ℹ 3 more variables: diff.mean <dbl>, diff.q3 <dbl>, diff.maximum <dbl>
# Works with dates in any periodicity
idx_weekly <- seq.Date(from = lubridate::ymd("2016-01-01"), by = 'week', length.out = 6)
tk_get_timeseries_signature(idx_weekly)
#> # A tibble: 6 × 29
#> index index.num diff year year.iso half quarter month month.xts
#> <date> <dbl> <dbl> <int> <int> <int> <int> <int> <int>
#> 1 2016-01-01 1451606400 NA 2016 2015 1 1 1 0
#> 2 2016-01-08 1452211200 604800 2016 2016 1 1 1 0
#> 3 2016-01-15 1452816000 604800 2016 2016 1 1 1 0
#> 4 2016-01-22 1453420800 604800 2016 2016 1 1 1 0
#> 5 2016-01-29 1454025600 604800 2016 2016 1 1 1 0
#> 6 2016-02-05 1454630400 604800 2016 2016 1 1 2 1
#> # ℹ 20 more variables: month.lbl <ord>, day <int>, hour <int>, minute <int>,
#> # second <int>, hour12 <int>, am.pm <int>, wday <int>, wday.xts <int>,
#> # wday.lbl <ord>, mday <int>, qday <int>, yday <int>, mweek <int>,
#> # week <int>, week.iso <int>, week2 <int>, week3 <int>, week4 <int>,
#> # mday7 <int>
tk_get_timeseries_summary(idx_weekly)
#> # A tibble: 1 × 12
#> n.obs start end units scale tzone diff.minimum diff.q1 diff.median
#> <int> <date> <date> <chr> <chr> <chr> <dbl> <dbl> <dbl>
#> 1 6 2016-01-01 2016-02-05 days week UTC 604800 604800 604800
#> # ℹ 3 more variables: diff.mean <dbl>, diff.q3 <dbl>, diff.maximum <dbl>
# Works with zoo yearmon and yearqtr classes
idx_yearmon <- seq.Date(from = lubridate::ymd("2016-01-01"),
by = "month",
length.out = 12) %>%
zoo::as.yearmon()
tk_get_timeseries_signature(idx_yearmon)
#> # A tibble: 12 × 29
#> index index.num diff year year.iso half quarter month month.xts
#> <yearmon> <dbl> <dbl> <int> <int> <int> <int> <int> <int>
#> 1 Jan 2016 1451606400 NA 2016 2015 1 1 1 0
#> 2 Feb 2016 1454284800 2678400 2016 2016 1 1 2 1
#> 3 Mar 2016 1456790400 2505600 2016 2016 1 1 3 2
#> 4 Apr 2016 1459468800 2678400 2016 2016 1 2 4 3
#> 5 May 2016 1462060800 2592000 2016 2016 1 2 5 4
#> 6 Jun 2016 1464739200 2678400 2016 2016 1 2 6 5
#> 7 Jul 2016 1467331200 2592000 2016 2016 2 3 7 6
#> 8 Aug 2016 1470009600 2678400 2016 2016 2 3 8 7
#> 9 Sep 2016 1472688000 2678400 2016 2016 2 3 9 8
#> 10 Oct 2016 1475280000 2592000 2016 2016 2 4 10 9
#> 11 Nov 2016 1477958400 2678400 2016 2016 2 4 11 10
#> 12 Dec 2016 1480550400 2592000 2016 2016 2 4 12 11
#> # ℹ 20 more variables: month.lbl <ord>, day <int>, hour <int>, minute <int>,
#> # second <int>, hour12 <int>, am.pm <int>, wday <int>, wday.xts <int>,
#> # wday.lbl <ord>, mday <int>, qday <int>, yday <int>, mweek <int>,
#> # week <int>, week.iso <int>, week2 <int>, week3 <int>, week4 <int>,
#> # mday7 <int>
tk_get_timeseries_summary(idx_yearmon)
#> # A tibble: 1 × 12
#> n.obs start end units scale tzone diff.minimum diff.q1 diff.median diff.mean
#> <int> <yea> <yea> <chr> <chr> <chr> <dbl> <dbl> <dbl> <dbl>
#> 1 12 Jan … Dec … days month UTC 2505600 2592000 2678400 2631273.
#> # ℹ 2 more variables: diff.q3 <dbl>, diff.maximum <dbl>