Methods that power anomalize()
Usage
iqr(x, alpha = 0.05, max_anoms = 0.2, verbose = FALSE)
gesd(x, alpha = 0.05, max_anoms = 0.2, verbose = FALSE)
Arguments
- x
A vector of numeric data.
- alpha
Controls the width of the "normal" range. Lower values are more conservative while higher values are less prone to incorrectly classifying "normal" observations.
- max_anoms
The maximum percent of anomalies permitted to be identified.
- verbose
A boolean. If
TRUE
, will return a list containing useful information about the anomalies. IfFALSE
, just returns a vector of "Yes" / "No" values.
References
The GESD method is used in Twitter's
AnomalyDetection
package and is also available as a function in @raunakms's GESD method
Examples
set.seed(100)
x <- rnorm(100)
idx_outliers <- sample(100, size = 5)
x[idx_outliers] <- x[idx_outliers] + 10
iqr(x, alpha = 0.05, max_anoms = 0.2)
#> 25% 25% 25% 25% 25% 25% 25% 25% 25% 25% 25% 25% 25%
#> "No" "No" "No" "No" "No" "No" "No" "No" "No" "No" "No" "No" "No"
#> 25% 25% 25% 25% 25% 25% 25% 25% 25% 25% 25% 25% 25%
#> "No" "No" "No" "No" "No" "No" "No" "No" "No" "No" "No" "No" "No"
#> 25% 25% 25% 25% 25% 25% 25% 25% 25% 25% 25% 25% 25%
#> "No" "No" "No" "Yes" "No" "No" "No" "No" "No" "No" "No" "No" "No"
#> 25% 25% 25% 25% 25% 25% 25% 25% 25% 25% 25% 25% 25%
#> "No" "No" "No" "No" "No" "No" "No" "No" "No" "No" "No" "No" "No"
#> 25% 25% 25% 25% 25% 25% 25% 25% 25% 25% 25% 25% 25%
#> "No" "No" "No" "No" "No" "No" "No" "No" "No" "No" "No" "No" "No"
#> 25% 25% 25% 25% 25% 25% 25% 25% 25% 25% 25% 25% 25%
#> "No" "No" "No" "No" "No" "Yes" "No" "No" "Yes" "No" "No" "No" "No"
#> 25% 25% 25% 25% 25% 25% 25% 25% 25% 25% 25% 25% 25%
#> "No" "No" "No" "Yes" "No" "No" "No" "No" "No" "No" "No" "No" "No"
#> 25% 25% 25% 25% 25% 25% 25% 25% 25%
#> "No" "No" "No" "No" "No" "Yes" "No" "No" "No"
iqr(x, alpha = 0.05, max_anoms = 0.2, verbose = TRUE)
#> $outlier
#> 25% 25% 25% 25% 25% 25% 25% 25% 25% 25% 25% 25% 25%
#> "No" "No" "No" "No" "No" "No" "No" "No" "No" "No" "No" "No" "No"
#> 25% 25% 25% 25% 25% 25% 25% 25% 25% 25% 25% 25% 25%
#> "No" "No" "No" "No" "No" "No" "No" "No" "No" "No" "No" "No" "No"
#> 25% 25% 25% 25% 25% 25% 25% 25% 25% 25% 25% 25% 25%
#> "No" "No" "No" "Yes" "No" "No" "No" "No" "No" "No" "No" "No" "No"
#> 25% 25% 25% 25% 25% 25% 25% 25% 25% 25% 25% 25% 25%
#> "No" "No" "No" "No" "No" "No" "No" "No" "No" "No" "No" "No" "No"
#> 25% 25% 25% 25% 25% 25% 25% 25% 25% 25% 25% 25% 25%
#> "No" "No" "No" "No" "No" "No" "No" "No" "No" "No" "No" "No" "No"
#> 25% 25% 25% 25% 25% 25% 25% 25% 25% 25% 25% 25% 25%
#> "No" "No" "No" "No" "No" "Yes" "No" "No" "Yes" "No" "No" "No" "No"
#> 25% 25% 25% 25% 25% 25% 25% 25% 25% 25% 25% 25% 25%
#> "No" "No" "No" "Yes" "No" "No" "No" "No" "No" "No" "No" "No" "No"
#> 25% 25% 25% 25% 25% 25% 25% 25% 25%
#> "No" "No" "No" "No" "No" "Yes" "No" "No" "No"
#>
#> $outlier_idx
#> [1] 74 71 30 82 97
#>
#> $outlier_vals
#> [1] 11.648522 10.448903 10.247076 9.950004 9.167504
#>
#> $outlier_direction
#> [1] "Up" "Up" "Up" "Up" "Up"
#>
#> $critical_limits
#> limit_lower.25% limit_upper.75%
#> -4.552347 4.755455
#>
#> $outlier_report
#> # A tibble: 20 × 7
#> rank index value limit_lower limit_upper outlier direction
#> <dbl> <dbl> <dbl> <dbl> <dbl> <chr> <chr>
#> 1 1 74 11.6 -4.55 4.76 Yes Up
#> 2 2 71 10.4 -4.55 4.76 Yes Up
#> 3 3 30 10.2 -4.55 4.76 Yes Up
#> 4 4 82 9.95 -4.55 4.76 Yes Up
#> 5 5 97 9.17 -4.55 4.76 Yes Up
#> 6 6 64 2.58 -4.55 4.76 No NA
#> 7 7 55 -2.27 -4.55 4.76 No NA
#> 8 8 96 2.45 -4.55 4.76 No NA
#> 9 9 20 2.31 -4.55 4.76 No NA
#> 10 10 80 -2.07 -4.55 4.76 No NA
#> 11 11 75 -2.06 -4.55 4.76 No NA
#> 12 12 84 -1.93 -4.55 4.76 No NA
#> 13 13 50 -1.88 -4.55 4.76 No NA
#> 14 14 43 -1.78 -4.55 4.76 No NA
#> 15 15 52 -1.74 -4.55 4.76 No NA
#> 16 16 54 1.90 -4.55 4.76 No NA
#> 17 17 58 1.82 -4.55 4.76 No NA
#> 18 18 32 1.76 -4.55 4.76 No NA
#> 19 19 89 1.73 -4.55 4.76 No NA
#> 20 20 57 -1.40 -4.55 4.76 No NA
#>
gesd(x, alpha = 0.05, max_anoms = 0.2)
#> [1] "No" "No" "No" "No" "No" "No" "No" "No" "No" "No" "No" "No"
#> [13] "No" "No" "No" "No" "No" "No" "No" "No" "No" "No" "No" "No"
#> [25] "No" "No" "No" "No" "No" "Yes" "No" "No" "No" "No" "No" "No"
#> [37] "No" "No" "No" "No" "No" "No" "No" "No" "No" "No" "No" "No"
#> [49] "No" "No" "No" "No" "No" "No" "No" "No" "No" "No" "No" "No"
#> [61] "No" "No" "No" "No" "No" "No" "No" "No" "No" "No" "Yes" "No"
#> [73] "No" "Yes" "No" "No" "No" "No" "No" "No" "No" "Yes" "No" "No"
#> [85] "No" "No" "No" "No" "No" "No" "No" "No" "No" "No" "No" "No"
#> [97] "Yes" "No" "No" "No"
gesd(x, alpha = 0.05, max_anoms = 0.2, verbose = TRUE)
#> $outlier
#> [1] "No" "No" "No" "No" "No" "No" "No" "No" "No" "No" "No" "No"
#> [13] "No" "No" "No" "No" "No" "No" "No" "No" "No" "No" "No" "No"
#> [25] "No" "No" "No" "No" "No" "Yes" "No" "No" "No" "No" "No" "No"
#> [37] "No" "No" "No" "No" "No" "No" "No" "No" "No" "No" "No" "No"
#> [49] "No" "No" "No" "No" "No" "No" "No" "No" "No" "No" "No" "No"
#> [61] "No" "No" "No" "No" "No" "No" "No" "No" "No" "No" "Yes" "No"
#> [73] "No" "Yes" "No" "No" "No" "No" "No" "No" "No" "Yes" "No" "No"
#> [85] "No" "No" "No" "No" "No" "No" "No" "No" "No" "No" "No" "No"
#> [97] "Yes" "No" "No" "No"
#>
#> $outlier_idx
#> [1] 74 71 30 82 97
#>
#> $outlier_vals
#> [1] 11.648522 10.448903 10.247076 9.950004 9.167504
#>
#> $outlier_direction
#> [1] "Up" "Up" "Up" "Up" "Up"
#>
#> $critical_limits
#> limit_lower limit_upper
#> -3.315690 3.175856
#>
#> $outlier_report
#> # A tibble: 20 × 7
#> rank index value limit_lower limit_upper outlier direction
#> <dbl> <dbl> <dbl> <dbl> <dbl> <chr> <chr>
#> 1 1 74 11.6 -3.60 3.58 Yes Up
#> 2 2 71 10.4 -3.49 3.43 Yes Up
#> 3 3 30 10.2 -3.45 3.35 Yes Up
#> 4 4 82 9.95 -3.53 3.39 Yes Up
#> 5 5 97 9.17 -3.42 3.29 Yes Up
#> 6 6 64 2.58 -3.32 3.18 No NA
#> 7 7 96 2.45 -3.28 3.13 No NA
#> 8 8 20 2.31 -3.24 3.08 No NA
#> 9 9 55 -2.27 -3.15 2.98 No NA
#> 10 10 80 -2.07 -3.12 2.96 No NA
#> 11 11 75 -2.06 -3.05 2.91 No NA
#> 12 12 54 1.90 -2.95 2.81 No NA
#> 13 13 58 1.82 -2.78 2.63 No NA
#> 14 14 84 -1.93 -2.57 2.41 No NA
#> 15 15 32 1.76 -2.54 2.39 No NA
#> 16 16 89 1.73 -2.53 2.37 No NA
#> 17 17 50 -1.88 -2.54 2.37 No NA
#> 18 18 43 -1.78 -2.50 2.34 No NA
#> 19 19 52 -1.74 -2.46 2.31 No NA
#> 20 20 92 1.43 -2.44 2.30 No NA
#>