Dear Mr/Mrs
I am Iut, student of graduate student in Bogor Agriculture Institur
I read a book on ensemble methods in data mining by Seni and Elder and find
R code about bagging.
I am confused how to call these functions and and how to agregate it with
the majority votes?
I think there is missing code in here.What if the function is replaced with
SVM?
Example :
genPredictors <- function(seed = 123, N = 30) {
# Load package with random number generation
# for the multivariate normal distribution
library(mnormt)
# 5 "features" each having a "standard" Normal
# distribution with pairwise correlation 0.95
Rho <- matrix(c(1,.95,.95,.95,.95,
+ .95, 1,.95,.95,.95,
+ .95,.95,1,.95,.95,
+ .95,.95,.95,1,.95,
+ .95,.95,.95,.95,1), 5, 5)
mu <- c(rep(0,5))
set.seed(seed);
x <- rmnorm(N, mu, Rho)
colnames(x) <- c("x1", "x2", "x3", "x4", "x5")
return(x)
}
genTarget <- function(x, N, seed = 123) {
# Response Y is generated according to:
# Pr(Y = 1 | x1 <= 0.5) = 0.2,
# Pr(Y = 1 | x1 > 0.5) = 0.8
y <- c(rep(-1, N))
set.seed(seed);
for (i in 1:N) {
if ( x[i,1] <= 0.5 ) {
if ( runif(1) <= 0.2 ) {
y[i] <- 1
} else {
y[i] <- 0
}
} else {
if ( runif(1) <= 0.8 ) {
y[i] <- 1
} else {
y[i] <- 0
}
}
}
return(y)
}
genBStrapSamp <- function(seed = 123, N = 200, Size = 30) {
set.seed(seed)
sampleList <- vector(mode = "list", length = N)
for (i in 1:N) {
sampleList[[i]] <- sample(1:Size, replace=TRUE)
}
return(sampleList)
}
fitBStrapTrees <- function(data, sampleList, N) {
treeList <- vector(mode = "list", length = N)
for (i in 1:N) {
tree.params=list(minsplit = 4, minbucket = 2, maxdepth = 7)
treeList[[i]] <- fitClassTree(data[sampleList[[i]],],
tree.params)
}
return(treeList)
}
fitClassTree <- function(x, params, w = NULL,
seed = 123) {
library(rpart)
set.seed(seed)
tree <- rpart(y ~ ., method = "class",
data = x, weights = w, cp = 0,
minsplit = params.minsplit,
minbucket = params.minbucket,
maxdepth = params.maxdepth)
return(tree)
}
thankyou very much
best regard,
Iut
[[alternative HTML version deleted]]