Methods that power anomalize()
iqr(x, alpha = 0.05, max_anoms = 0.2, verbose = FALSE)
gesd(x, alpha = 0.05, max_anoms = 0.2, verbose = FALSE)
A vector of numeric data.
Controls the width of the "normal" range. Lower values are more conservative while higher values are less prone to incorrectly classifying "normal" observations.
The maximum percent of anomalies permitted to be identified.
A boolean. If TRUE
, will return a list containing useful information
about the anomalies. If FALSE
, just returns a vector of "Yes" / "No" values.
Returns character vector or list depending on the value of verbose
.
The GESD method is used in Twitter's AnomalyDetection
package and is also available as a function in @raunakms's GESD method
set.seed(100)
x <- rnorm(100)
idx_outliers <- sample(100, size = 5)
x[idx_outliers] <- x[idx_outliers] + 10
iqr(x, alpha = 0.05, max_anoms = 0.2)
#> 25% 25% 25% 25% 25% 25% 25% 25% 25% 25% 25% 25% 25%
#> "No" "No" "No" "No" "No" "No" "No" "No" "No" "No" "No" "No" "No"
#> 25% 25% 25% 25% 25% 25% 25% 25% 25% 25% 25% 25% 25%
#> "No" "No" "No" "No" "No" "No" "No" "No" "No" "No" "No" "No" "No"
#> 25% 25% 25% 25% 25% 25% 25% 25% 25% 25% 25% 25% 25%
#> "No" "No" "No" "Yes" "No" "No" "No" "No" "No" "No" "No" "No" "No"
#> 25% 25% 25% 25% 25% 25% 25% 25% 25% 25% 25% 25% 25%
#> "No" "No" "No" "No" "No" "No" "No" "No" "No" "No" "No" "No" "No"
#> 25% 25% 25% 25% 25% 25% 25% 25% 25% 25% 25% 25% 25%
#> "No" "No" "No" "No" "No" "No" "No" "No" "No" "No" "No" "No" "No"
#> 25% 25% 25% 25% 25% 25% 25% 25% 25% 25% 25% 25% 25%
#> "No" "No" "No" "No" "No" "Yes" "No" "No" "Yes" "No" "No" "No" "No"
#> 25% 25% 25% 25% 25% 25% 25% 25% 25% 25% 25% 25% 25%
#> "No" "No" "No" "Yes" "No" "No" "No" "No" "No" "No" "No" "No" "No"
#> 25% 25% 25% 25% 25% 25% 25% 25% 25%
#> "No" "No" "No" "No" "No" "Yes" "No" "No" "No"
iqr(x, alpha = 0.05, max_anoms = 0.2, verbose = TRUE)
#> $outlier
#> 25% 25% 25% 25% 25% 25% 25% 25% 25% 25% 25% 25% 25%
#> "No" "No" "No" "No" "No" "No" "No" "No" "No" "No" "No" "No" "No"
#> 25% 25% 25% 25% 25% 25% 25% 25% 25% 25% 25% 25% 25%
#> "No" "No" "No" "No" "No" "No" "No" "No" "No" "No" "No" "No" "No"
#> 25% 25% 25% 25% 25% 25% 25% 25% 25% 25% 25% 25% 25%
#> "No" "No" "No" "Yes" "No" "No" "No" "No" "No" "No" "No" "No" "No"
#> 25% 25% 25% 25% 25% 25% 25% 25% 25% 25% 25% 25% 25%
#> "No" "No" "No" "No" "No" "No" "No" "No" "No" "No" "No" "No" "No"
#> 25% 25% 25% 25% 25% 25% 25% 25% 25% 25% 25% 25% 25%
#> "No" "No" "No" "No" "No" "No" "No" "No" "No" "No" "No" "No" "No"
#> 25% 25% 25% 25% 25% 25% 25% 25% 25% 25% 25% 25% 25%
#> "No" "No" "No" "No" "No" "Yes" "No" "No" "Yes" "No" "No" "No" "No"
#> 25% 25% 25% 25% 25% 25% 25% 25% 25% 25% 25% 25% 25%
#> "No" "No" "No" "Yes" "No" "No" "No" "No" "No" "No" "No" "No" "No"
#> 25% 25% 25% 25% 25% 25% 25% 25% 25%
#> "No" "No" "No" "No" "No" "Yes" "No" "No" "No"
#>
#> $outlier_idx
#> [1] 74 71 30 82 97
#>
#> $outlier_vals
#> [1] 11.648522 10.448903 10.247076 9.950004 9.167504
#>
#> $outlier_direction
#> [1] "Up" "Up" "Up" "Up" "Up"
#>
#> $critical_limits
#> limit_lower.25% limit_upper.75%
#> -4.552347 4.755455
#>
#> $outlier_report
#> # A tibble: 20 × 7
#> rank index value limit_lower limit_upper outlier direction
#> <dbl> <dbl> <dbl> <dbl> <dbl> <chr> <chr>
#> 1 1 74 11.6 -4.55 4.76 Yes Up
#> 2 2 71 10.4 -4.55 4.76 Yes Up
#> 3 3 30 10.2 -4.55 4.76 Yes Up
#> 4 4 82 9.95 -4.55 4.76 Yes Up
#> 5 5 97 9.17 -4.55 4.76 Yes Up
#> 6 6 64 2.58 -4.55 4.76 No NA
#> 7 7 55 -2.27 -4.55 4.76 No NA
#> 8 8 96 2.45 -4.55 4.76 No NA
#> 9 9 20 2.31 -4.55 4.76 No NA
#> 10 10 80 -2.07 -4.55 4.76 No NA
#> 11 11 75 -2.06 -4.55 4.76 No NA
#> 12 12 84 -1.93 -4.55 4.76 No NA
#> 13 13 50 -1.88 -4.55 4.76 No NA
#> 14 14 43 -1.78 -4.55 4.76 No NA
#> 15 15 52 -1.74 -4.55 4.76 No NA
#> 16 16 54 1.90 -4.55 4.76 No NA
#> 17 17 58 1.82 -4.55 4.76 No NA
#> 18 18 32 1.76 -4.55 4.76 No NA
#> 19 19 89 1.73 -4.55 4.76 No NA
#> 20 20 57 -1.40 -4.55 4.76 No NA
#>
gesd(x, alpha = 0.05, max_anoms = 0.2)
#> [1] "No" "No" "No" "No" "No" "No" "No" "No" "No" "No" "No" "No"
#> [13] "No" "No" "No" "No" "No" "No" "No" "No" "No" "No" "No" "No"
#> [25] "No" "No" "No" "No" "No" "Yes" "No" "No" "No" "No" "No" "No"
#> [37] "No" "No" "No" "No" "No" "No" "No" "No" "No" "No" "No" "No"
#> [49] "No" "No" "No" "No" "No" "No" "No" "No" "No" "No" "No" "No"
#> [61] "No" "No" "No" "No" "No" "No" "No" "No" "No" "No" "Yes" "No"
#> [73] "No" "Yes" "No" "No" "No" "No" "No" "No" "No" "Yes" "No" "No"
#> [85] "No" "No" "No" "No" "No" "No" "No" "No" "No" "No" "No" "No"
#> [97] "Yes" "No" "No" "No"
gesd(x, alpha = 0.05, max_anoms = 0.2, verbose = TRUE)
#> $outlier
#> [1] "No" "No" "No" "No" "No" "No" "No" "No" "No" "No" "No" "No"
#> [13] "No" "No" "No" "No" "No" "No" "No" "No" "No" "No" "No" "No"
#> [25] "No" "No" "No" "No" "No" "Yes" "No" "No" "No" "No" "No" "No"
#> [37] "No" "No" "No" "No" "No" "No" "No" "No" "No" "No" "No" "No"
#> [49] "No" "No" "No" "No" "No" "No" "No" "No" "No" "No" "No" "No"
#> [61] "No" "No" "No" "No" "No" "No" "No" "No" "No" "No" "Yes" "No"
#> [73] "No" "Yes" "No" "No" "No" "No" "No" "No" "No" "Yes" "No" "No"
#> [85] "No" "No" "No" "No" "No" "No" "No" "No" "No" "No" "No" "No"
#> [97] "Yes" "No" "No" "No"
#>
#> $outlier_idx
#> [1] 74 71 30 82 97
#>
#> $outlier_vals
#> [1] 11.648522 10.448903 10.247076 9.950004 9.167504
#>
#> $outlier_direction
#> [1] "Up" "Up" "Up" "Up" "Up"
#>
#> $critical_limits
#> limit_lower limit_upper
#> -3.315690 3.175856
#>
#> $outlier_report
#> # A tibble: 20 × 7
#> rank index value limit_lower limit_upper outlier direction
#> <dbl> <dbl> <dbl> <dbl> <dbl> <chr> <chr>
#> 1 1 74 11.6 -3.60 3.58 Yes Up
#> 2 2 71 10.4 -3.49 3.43 Yes Up
#> 3 3 30 10.2 -3.45 3.35 Yes Up
#> 4 4 82 9.95 -3.53 3.39 Yes Up
#> 5 5 97 9.17 -3.42 3.29 Yes Up
#> 6 6 64 2.58 -3.32 3.18 No NA
#> 7 7 96 2.45 -3.28 3.13 No NA
#> 8 8 20 2.31 -3.24 3.08 No NA
#> 9 9 55 -2.27 -3.15 2.98 No NA
#> 10 10 80 -2.07 -3.12 2.96 No NA
#> 11 11 75 -2.06 -3.05 2.91 No NA
#> 12 12 54 1.90 -2.95 2.81 No NA
#> 13 13 58 1.82 -2.78 2.63 No NA
#> 14 14 84 -1.93 -2.57 2.41 No NA
#> 15 15 32 1.76 -2.54 2.39 No NA
#> 16 16 89 1.73 -2.53 2.37 No NA
#> 17 17 50 -1.88 -2.54 2.37 No NA
#> 18 18 43 -1.78 -2.50 2.34 No NA
#> 19 19 52 -1.74 -2.46 2.31 No NA
#> 20 20 92 1.43 -2.44 2.30 No NA
#>