Classification modeling

For showing classification SSLR models, we will use Wine dataset with 20% labeled data:

library(SSLR)
library(tidymodels)
library(caret)
data(wine)

set.seed(1)

#Train and test data
train.index <- createDataPartition(wine$Wine, p = .7, list = FALSE)
train <- wine[ train.index,]
test  <- wine[-train.index,]

cls <- which(colnames(wine) == "Wine")

# 20 % LABELED
labeled.index <- createDataPartition(wine$Wine, p = .2, list = FALSE)
train[-labeled.index,cls] <- NA

We have multiple models for solving semi-supervised learning problems of classification. You can read Model List section

For example, we train with Decision Tree:

m <- SSLRDecisionTree(min_samples_split = round(length(labeled.index) * 0.25),
                      w = 0.3) %>% fit(Wine ~ ., data = train)

Now we predict with class (tibble) and prob (tibble:)

test_results <- 
    test %>%
    select(Wine) %>%
    as_tibble() %>%
    mutate(
        dt_class = predict(m, test) %>% 
            pull(.pred_class)
    )

test_results
#> # A tibble: 52 x 2
#>    Wine  dt_class
#>    <fct> <fct>   
#>  1 1     1       
#>  2 1     2       
#>  3 1     1       
#>  4 1     1       
#>  5 1     1       
#>  6 1     1       
#>  7 1     1       
#>  8 1     1       
#>  9 1     2       
#> 10 1     1       
#> # ... with 42 more rows

Now we can use metrics from yardstick package:

test_results %>% accuracy(truth = Wine, dt_class)
#> # A tibble: 1 x 3
#>   .metric  .estimator .estimate
#>   <chr>    <chr>          <dbl>
#> 1 accuracy multiclass     0.865

test_results %>% conf_mat(truth = Wine, dt_class)
#>           Truth
#> Prediction  1  2  3
#>          1 14  1  0
#>          2  2 17  0
#>          3  1  3 14

#Using multiple metrics

multi_metric <- metric_set(accuracy, kap, sens, spec, f_meas )

test_results %>% multi_metric(truth = Wine, estimate = dt_class)
#> # A tibble: 5 x 3
#>   .metric  .estimator .estimate
#>   <chr>    <chr>          <dbl>
#> 1 accuracy multiclass     0.865
#> 2 kap      multiclass     0.798
#> 3 sens     macro          0.878
#> 4 spec     macro          0.934
#> 5 f_meas   macro          0.867

In classification models we can use raw type of predict for getting labels in factor:

predict(m,test,"raw")
#>  [1] 1 2 1 1 1 1 1 1 2 1 1 3 1 1 1 1 1 3 2 2 3 2 3 1 2 2 2 2 2 2 2 2 2 2 2 2 2 2
#> [39] 3 3 3 3 3 3 3 3 3 3 3 3 3 3
#> Levels: 1 2 3

We can even use probability predictions in the Decision Tree model:

predict(m,test,"prob")
#> # A tibble: 52 x 3
#>    .pred_1 .pred_2 .pred_3
#>      <dbl>   <dbl>   <dbl>
#>  1       1       0       0
#>  2       0       1       0
#>  3       1       0       0
#>  4       1       0       0
#>  5       1       0       0
#>  6       1       0       0
#>  7       1       0       0
#>  8       1       0       0
#>  9       0       1       0
#> 10       1       0       0
#> # ... with 42 more rows