[R] error with caretEnsmble of different training datasets of SVM - need help

Zahyah Alharbi (CMP) Z.Alharbi at uea.ac.uk
Fri Nov 25 15:47:44 CET 2016


Hi,
The following is a reproducible example , what basically I am trying to do , is creating five imputed datasets then apply SVM to each imputed dataset using the train function in caret, then ensemble the resulted training model using caretEnsemble. Lastly, I am predicting each test set using the ensemble model.

 However, I have this error (Error: { .... is not TRUE) occur with caretEnsemble although I converted the list of the resulted model to caretlist.

Any help is truly appreciated.

        library(mice)
        library(e1071)
        library(caret)
        library("caretEnsemble")

    data <- iris
    #Generate 10% missing values at Random
    iris.mis <- prodNA(iris, noNA = 0.1)
    #remove categorical variables
    iris.mis <- subset(iris.mis, select = -c(Species))

    # 5 Imputation using mice pmm

    imp <- mice(iris.mis, m=5, maxit = 10, method = 'pmm', seed = 500)

    # save 5 imputed dataset.
    x1 <- complete(imp, action = 1, include = FALSE)
    x2 <- complete(imp, action = 2, include = FALSE)
    x3 <- complete(imp, action = 3, include = FALSE)
    x4 <- complete(imp, action = 4, include = FALSE)
    x5 <- complete(imp, action = 5, include = FALSE)

    ## Apply the following method with 10 fold across validation for each imputed set and Compute rmse for each imputed set
    avg.rmse <- NULL
    avg.foldrmse <- matrix(data = NA,nrow=5, ncol=1)
    SDofMean.rmse <- NULL
    form <- iris$Sepal.Width # target coloumn
    fold <- 10  # number of fold for cross validation
    n <- nrow(x1)  # since all data sample are the same length
    prop <- n%/%fold
    set.seed(7)
    newseq <- rank(runif(n))
    k <- as.factor((newseq - 1)%/%prop + 1)
    y <- unlist(strsplit(as.character(form), " "))[2]
    vec.error <- vector(length = fold)

    for (i in seq(fold))
    {
      avg.foldrmse <- NULL
      # Perfrom SVM method on each imputed dataset
      fit1 <- train(Sepal.Width ~., data = x1[k != i, ],method='svmLinear2')
      fit2 <- train(Sepal.Width ~., data = x2[k != i, ],method='svmLinear2')
      fit3 <- train(Sepal.Width ~., data = x3[k != i, ],method='svmLinear2')
      fit4 <- train(Sepal.Width ~., data = x4[k != i, ],method='svmLinear2')
      fit5 <- train(Sepal.Width ~., data = x5[k != i, ],method='svmLinear2')


      #combine in the created model to a list
      svm.fit <- list(svmLinear1 = fit1, svmLinear2 = fit2, svmLinear3 = fit3, svmLinear4 = fit4, svmLinear5 = fit5)

      # convert the list to cartlist
      class(svm.fit) <- "caretList"

      #create the ensemble where the error occur.
      svm.all <- caretEnsemble(svm.fit,method='svmLinear2')


      # predict the 5 test set using the ensemble model and compute the RMSE
      fcast1 <- predict(svm.all, newdata = x1[k == i, ])
      rmse1 <-  sqrt(mean((x1[k == i, ]$Sepal.Width - fcast1)^2))
      avg.foldrmse[1] <- rmse1
      # predict using test set of the Second imputed dataset
      fcast2 <- predict(svm.all, newdata = x2[k == i, ])
      rmse2 <-  sqrt(mean((x2[k == i, ]$Sepal.Width- fcast2)^2))
      avg.foldrmse[2] <- rmse2
      # predict using test set of the Third imputed dataset
      fcast3 <- predict(svm.all, newdata = x3[k == i, ])
      rmse3 <-  sqrt(mean((x3[k == i, ]$Sepal.Width- fcast3)^2))
      avg.foldrmse[3] <- rmse3
      # predict using test set of the fourth imputed dataset
      fcast4 <- predict(svm.all, newdata = x4[k == i, ])
      rmse4 <-  sqrt(mean((x4[k == i, ]$Sepal.Width - fcast4)^2))
      avg.foldrmse[4] <- rmse4
      # predict using test set of the fifth imputed dataset
      fcast5 <- predict(svm.all, newdata = x5[k == i, ])
      rmse5 <-  sqrt(mean((x5[k == i, ]$Sepal.Width - fcast5)^2))
      avg.foldrmse[5] <- rmse5

    }# end loop



Regards,
Zawahy

	[[alternative HTML version deleted]]



More information about the R-help mailing list