[R] RandomForest tuning the parameters

varin sacha v@r|n@@ch@ @end|ng |rom y@hoo@|r
Mon May 8 22:10:25 CEST 2023


Dear R-experts,

Here below a toy example with some error messages, especially at the end of the code (Tuning the parameters). Your help to correct my R code would be highly appreciated.


#######################################
#libraries
library(lattice)
library(ggplot2)
library(caret)
library(randomForest)
  
#Data
y=c(23,34,32,12,24,35,45,56,76,87,54,34,23,45,41,13,16,98,35,65,56,67,78,89,87,64,53,31,14,34,45,46,57,69,90,80,70,65,50,45,60,56,87,79,64,34,25,47,61,24,10,13,12,15,46,58,76,89,90,98)
x1=c(4,5,6,7,1,10,19,20,21,14,23,6,5,32,15,12,16,14,2,3,4,5,3,2,1,2,6,7,5,4,3,2,1,3,4,6,7,9,5,4,3,7,10,11,12,13,10,3,2,5,6,9,8,7,4,12,15,16,2,3)
x2=c(0,0,0,1,1,0,1,1,0,1,1,0,0,1,0,0,0,0,0,1,1,1,1,1,0,0,0,1,0,0,1,0,0,0,1,1,0,1,0,0,0,1,1,1,1,0,1,0,1,0,0,1,1,0,0,1,0,0,1,1)
 
y=as.numeric(y)
x1=as.numeric(x1)
x2=as.factor(x2)
 
X=data.frame(x1,x2)
y=y
 
#Split data into training and test sets
index=createDataPartition(y, p=0.75, list=FALSE)
X_train = X[index, ]
X_test = X[-index, ]
y_train= y[index ]
y_test = y[-index ]
 
#Train de model
regr=randomForest (x=X_train, y=y_train, maxnodes=10, ntree=10)

regr<-randomForest(y~x1+x2, data=X_train, proximity=TRUE)
regr
 
#Make prediction
predictions= predict(regr, X_test)
 
result= X_test
result['y'] = y_test
result['prediction'] = predictions
result
 
# Import library for Metrics
library(Metrics)
 
print(paste0('MAE: ' , mae(y_test,predictions) ))
print(paste0('MSE: ' ,caret::postResample(predictions , y_test)['RMSE']^2 ))
print(paste0('R2: ' ,caret::postResample(predictions , y_test)['Rsquared'] ))

 
#Tuning the parameters
N=500 #length(X_train)
X_train_ = X_train[1:N , ]
y_train_ = y_train[1:N]

seed <-7
metric<-'RMSE'

customRF <- list(type = "Regression", library = "randomForest", loop = NULL)

customRF$parameters <- data.frame(parameter = c("maxnodes", "ntree"), class = rep("numeric", 2), label = c("maxnodes", "ntree")) 

customRF$grid <- function(x, y, len = NULL, search = "grid") {}

customRF$fit <- function(x, y, wts, param, lev, last, weights, classProbs, ...) {

 randomForest(x, y, maxnodes = param$maxnodes, ntree=param$ntree, ...)

}

customRF$predict <- function(modelFit, newdata, preProc = NULL, submodels = NULL)

predict(modelFit, newdata)

customRF$prob <- function(modelFit, newdata, preProc = NULL, submodels = NULL)

  predict(modelFit, newdata, type = "prob")

customRF$sort <- function(x) x[order(x[,1]),]

customRF$levels <- function(x) x$classes

 
# Set grid search parameters
control <- trainControl(method="repeatedcv", number=10, repeats=3, search='grid')
 
# Outline the grid of parameters
tunegrid <- expand.grid(.maxnodes=c(10,20,30,50), .ntree=c(100, 200, 300))
set.seed(seed)
 
# Train the model
rf_gridsearch <- train(x=X_train_, y=y_train_, method=customRF, metric=metric, tuneGrid=tunegrid, trControl=control)
 
plot(rf_gridsearch)

rf_gridsearch$bestTune

#################################################



More information about the R-help mailing list