---
title: "outlierensembles"
output: rmarkdown::html_vignette
vignette: >
  %\VignetteIndexEntry{outlierensembles}
  %\VignetteEngine{knitr::rmarkdown}
  %\VignetteEncoding{UTF-8}
---

```{r, include = FALSE}
knitr::opts_chunk$set(
  collapse = TRUE,
  comment = "#>"
)
```

```{r setup}
library(outlierensembles)
library(ggplot2)
library(dbscan)
```

Let us add some anomalies inside an ring. 
```{r dat}
set.seed(1)
r1 <-runif(803)
r2 <-rnorm(803, mean=5)
theta = 2*pi*r1;
R1 <- 2
R2 <- 2
dist = r2+R2;
x =  dist * cos(theta)
y =  dist * sin(theta)

X <- data.frame(
    x1 = x,
    x2 = y
)
labs <- c(rep(0,800), rep(1,3))
nn <- dim(X)[1]
knn_auc <- lof_auc <- cof_auc <- rep(0, 10)
mu <-  0
z <- cbind(rnorm(3,mu, sd=0.2), rnorm(3,0, sd=0.2))
X[801:803, 1:2] <- z
ggplot(X, aes(x1, x2)) + geom_point()
```

Let us find outliers using the R package dbscan and use the IRT ensemble to construct an ensemble score.

```{r outliers1}
# Using different parameters of lof for anomaly detection
y1 <- dbscan::lof(X, minPts = 5)
y2 <- dbscan::lof(X, minPts = 10)
y3 <- dbscan::lof(X, minPts = 20)
knnobj <- dbscan::kNN(X, k = 20)
# Using different KNN distances as anomaly scores
y4 <- knnobj$dist[ ,5]
y5 <- knnobj$dist[ ,10]
y6 <- knnobj$dist[ ,20]
# Dense points are less anomalous. Points in less dense areas are more anomalous. Hence 1 - pointdensity is used.
y7 <- 1 - dbscan::pointdensity(X, eps = 1, type="gaussian")
y8 <- 1 - dbscan::pointdensity(X, eps = 2, type = "gaussian")
y9 <- 1 - dbscan::pointdensity(X, eps = 0.5, type = "gaussian")

Y <- cbind.data.frame(y1, y2, y3, y4, y5, y6, y7, y8, y9)
ens1 <- irt_ensemble(Y)
df <- cbind.data.frame(X, ens1$scores)
colnames(df)[3] <- "IRT"
ggplot(df, aes(x1, x2)) + geom_point(aes(color=IRT))  +  scale_color_gradient(low="yellow", high="red") 
```

Then we do the greedy ensemble. 
```{r example2}
ens2 <- greedy_ensemble(Y)
df <- cbind.data.frame(X, ens2$scores)
colnames(df)[3] <- "Greedy"
ggplot(df, aes(x1, x2)) + geom_point(aes(color=Greedy)) +  scale_color_gradient(low="yellow", high="red") 
```


We do the ICWA ensemble next.
```{r example3}
ens3 <- icwa_ensemble(Y)
df <- cbind.data.frame(X, ens3)
colnames(df)[3] <- "ICWA"
ggplot(df, aes(x1, x2)) + geom_point(aes(color=ICWA)) +  scale_color_gradient(low="yellow", high="red") 
```

Next, we use the maximum scores to build the ensemble.
```{r example4}
ens4 <- max_ensemble(Y)
df <- cbind.data.frame(X, ens4)
colnames(df)[3] <- "Max"
ggplot(df, aes(x1, x2)) + geom_point(aes(color=Max)) +  scale_color_gradient(low="yellow", high="red") 
```

Then, we use the a threshold sum to construct the ensemble.
```{r example5}
ens5 <- threshold_ensemble(Y)
df <- cbind.data.frame(X, ens5)
colnames(df)[3] <- "Threshold"
ggplot(df, aes(x1, x2)) + geom_point(aes(color=Threshold)) +  scale_color_gradient(low="yellow", high="red") 
```

Finally, we use the mean values as the ensemble score. 
```{r example6}
ens6 <- average_ensemble(Y)
df <- cbind.data.frame(X, ens6)
colnames(df)[3] <- "Average"
ggplot(df, aes(x1, x2)) + geom_point(aes(color=Average)) +  scale_color_gradient(low="yellow", high="red") 
```