[R] Issues with lapply and for loop Compared to Running Function
Kevin Egan
kev|neg@n31 @end|ng |rom gm@||@com
Wed Sep 23 16:32:30 CEST 2020
Hello,
I’d like to apologise as I understand that this is a significant amount of code, but I am struggling to understand why my code develops an error when running. I have been able to obtain results for the list of matrices named xdot and ydot but am struggling with zdot as I keep getting the error "Error in seq.default(sbeta) : 'from' must be a finite number “ when trying to run either a for loop or lapply through the list of matrices. However, when I run the the bootstrap function on one of the many matrices it provides results. Is there a way to fix this issue? I’m confused as to why it works normally but not when running lapply or a for loop
set.seed(123) # seed for reproducibility
library(boot)
library(np) # used for b.star
library(dplyr) # Used for Centering and Standardizing
library(deSolve) # ODE
library(lars)
# linear 3D system
Linear3D_derivative <- function(n, eta, polyorder){
n <- round(n, 0)
# n = number of time points rounded to nearest integer
# noise = noise to be added
# polyorder = degree of polynomial
times.3D <- seq(0, ((n)-1)*0.01, by = 0.01)
A.linear3D <- matrix(c(-0.1, -2, 0,
2, -0.1, 0,
0, 0, -0.3), 3, 3)
state.linear3D <- c(X = 2, Y = 0, Z = 1)
linear3D <- function(t, A, b) {
with(as.list(c(A, b)), {
dX <- A %*% b
list(c(dX))
})
}
out.linear3D <- ode(y = state.linear3D, times = times.3D,
func = linear3D, parms = A.linear3D)
out.linear3D <- out.linear3D[,-1]
out.linear3D.sorted <- data.frame(out.linear3D)[-1]
out.linear3D.sorted <- out.linear3D[,c(3,2,1)] # Rearrange for Theta matrix: X0.0.1 = x, X0.1.0 = y, X1.0.0 = z
# Polynomial Expansion
expanded.theta <- polym(as.matrix(out.linear3D.sorted), degree = polyorder, raw = T)
# Order by degree using as.numeric_version
# numeric_version allows to convert names of variables and expand without limit
ordered.results <- order(attr(expanded.theta, "degree"),
as.numeric_version(colnames(expanded.theta)))
# Sort Theta Matrix
sorted.theta <- expanded.theta[,ordered.results]
sorted.theta <- data.frame(sorted.theta)
# Change Variable Names
s <- strsplit(substring(colnames(sorted.theta), 2), "\\.")
colnames(sorted.theta) <- sapply(s, function(x){
vec <- c("x", "y", "z")[seq_along(x)]
x <- as.integer(x)
y <- rep(vec, rev(x))
paste(y, collapse = "")
})
# Add ones column to theta matrix
sorted.theta <- data.frame(1, sorted.theta)
# That lost the attributes, so put them back
attr(sorted.theta, "degree") <- c(0, attr(expanded.theta, "degree")[ordered.results])
sorted.theta <- sorted.theta[,order(attr(sorted.theta, "degree"), colnames(sorted.theta))]
# That lost the attributes again, so put them back
attr(sorted.theta, "degree") <- c(0, attr(expanded.theta, "degree")[ordered.results])
# Create Derivative matrix
dx <- matrix(NA, nrow = nrow(out.linear3D.sorted), ncol = ncol(out.linear3D.sorted))
for (i in 1:nrow(out.linear3D.sorted)){
# lorenz returns a list with one element. To assign to dx you have extract the list element using [[1]]
dx[i,] <- linear3D(0, A.linear3D, out.linear3D[i,])[[1]]
}
# Add Noise
length <- nrow(dx) * ncol(dx)
dx <- dx + eta*matrix(rnorm(length, mean = 0, sd = 1), nrow(dx))
# Derivative Variables
xdot <- dx[,1]
ydot <- dx[,2]
zdot <- dx[,3]
# Combine Matrices
xdot.df <- data.frame(cbind(xdot, sorted.theta))
ydot.df <- data.frame(cbind(ydot, sorted.theta))
zdot.df <- data.frame(cbind(zdot, sorted.theta))
# Center y, X will be standardized in the modelling function
y.xdot <- xdot.df %>% dplyr::select(xdot) %>% scale(center = TRUE, scale = FALSE) %>% as.matrix()
X.xdot <- xdot.df %>% dplyr::select(-xdot) %>% as.matrix()
xdot.matrix <- as.matrix(cbind(y.xdot,X.xdot))
colnames(xdot.matrix)[which(colnames(xdot.matrix) == "X1")] <- "1"
y.ydot <- ydot.df %>% dplyr::select(ydot) %>% scale(center = TRUE, scale = FALSE) %>% as.matrix()
X.ydot <- ydot.df %>% dplyr::select(-ydot) %>% as.matrix()
ydot.matrix <- as.matrix(cbind(y.ydot,X.ydot))
colnames(ydot.matrix)[which(colnames(ydot.matrix) == "X1")] <- "1"
y.zdot <- zdot.df %>% dplyr::select(zdot) %>% scale(center = TRUE, scale = FALSE) %>% as.matrix()
X.zdot <- zdot.df %>% dplyr::select(-zdot) %>% as.matrix()
zdot.matrix <- as.matrix(cbind(y.zdot,X.zdot))
colnames(zdot.matrix)[which(colnames(zdot.matrix) == "X1")] <- "1"
return(list(xdot.matrix=xdot.matrix, ydot.matrix = ydot.matrix, zdot.matrix = zdot.matrix, col.names = colnames(X.xdot),
degree.theta = attr(sorted.theta, "degree")))
}
# lars alasso step BIC with OLS weights
lars_alasso_OLSweights_fn <- function(data,index){ #index is the bootstrap sample index
x <- data[index,-1]
y <- data[index,1]
m <-ncol(x)
n <-nrow(x)
x <- as.matrix(x)
# standardize variables like lars does
one <- rep(1, n)
meanx <- drop(one %*% x)/n
xc <- scale(x, meanx, FALSE) # first subtracts mean
xc[,1] <- 1
normx <- sqrt(drop(one %*% (xc^2)))
names(normx) <- NULL
xs <- scale(xc, FALSE, normx)
xs[,1] <- 1
# Perform OLS for weights vector
out.ls<-lm(y~xs) # ols fit on standardized
beta.ols<-out.ls$coeff[2:(m+1)] # ols except for intercept
# Some Coefficients may be NA so make them 0
beta.ols[is.na(beta.ols)] <- 0
w<-abs(beta.ols)
# Scale x using weights vector
xs2 <- scale(xs,center=FALSE,scale=1/w)
xs2[,1] <- 1
object <- lars(xs2,y,type="lasso",normalize=FALSE)
bic <- ((log(n)/(n))*object$df)+(as.vector(object$RSS)/(n)) # Tibshirani On the degrees of freedom
step.bic <- which.min(bic)
#bic <- log(n)*object$df+n*log(as.vector(object$RSS)/n)
coeff <- as.vector(coef(object, s = step.bic, mode= "step"))
# coeff <- predict.lars(object,xs2,s=step.bic,type="coef",mode="step")$coefficients
# get back in right scale by multiplying by weights vector
coeff <- coeff*w/normx
coeff[is.na(coeff)] <- 0
coef <- as.vector(coeff)
return(coef)
}
# lars alasso step BIC with OLS weights plus OLS post selection
lars_alassoOLS_OLSweights_fn <- function(data,index){ #index is the bootstrap sample index
x <- data[index,-1]
y <- data[index,1]
m <-ncol(x)
n <-nrow(x)
x <- as.matrix(x)
# standardize variables like lars does
one <- rep(1, n)
meanx <- drop(one %*% x)/n
xc <- scale(x, meanx, FALSE) # first subtracts mean
xc[,1] <- 1
normx <- sqrt(drop(one %*% (xc^2)))
names(normx) <- NULL
xs <- scale(xc, FALSE, normx)
xs[,1] <- 1
# Perform OLS for weights vector
out.ls<-lm(y~xs) # ols fit on standardized
beta.ols<-out.ls$coeff[2:(m+1)] # ols except for intercept
# Some Coefficients may be NA so make them 0
beta.ols[is.na(beta.ols)] <- 0
w <- abs(beta.ols)
# Scale xs using weights vector
xs2 <- scale(xs,center=FALSE,scale=1/w)
xs2[,1] <- 1
object <- lars(xs2,y,type="lasso",normalize=FALSE)
bic <- ((log(n)/(n))*object$df)+(as.vector(object$RSS)/(n)) # Tibshirani On the degrees of freedom
# bic <- log(n)*object$df+n*log(as.vector(object$RSS)/n)
step.bic <- which.min(bic)
coeff <- as.vector(coef(object,s=step.bic,mode="step"))
# coeff <- predict.lars(object,xs2,s=step.bic,type="coef",mode="step")$coefficients
coeff <- coeff*w/normx # get back in right scale by multiplying by weights vector
coeff[is.na(coeff)] <- 0
coef_nonzero <- as.vector(coeff) != 0
if(all(coef_nonzero == F)){
ls_coef <- coeff
} else{
ls.obj <- glm(y~x[, coef_nonzero, drop = FALSE])
ls_coef <- (ls.obj$coefficients)[-1]
}
vect_coef <- rep(0,length(coef_nonzero))
vect_coef[coef_nonzero] <- ls_coef
return(vect_coef)
}
### Non-parametric bootstrap
# Determine block length using b.star function
bootstrap_function <- function(data, alpha, polynomial.orders){
# polynomial.orders <- attr(expanded.theta, "degree")[ordered.results] degree of variables
bstar <- b.star(data[,1], round = TRUE) # Block length for derivative variable
blocklength <- bstar[,2] # Select Block Length of cirular block result
# tsboot Function on Original Dataframe
# Run tsboot on function using blocklength of derivative variable
alasso.boot.ts <- tsboot(data,lars_alasso_OLSweights_fn, R=1000,
sim = "fixed", l = blocklength)
alasso.boot.t0<- alasso.boot.ts$t0 # the estimator from original data set
alasso.boot.t <- alasso.boot.ts$t # Matrix of coefficients from bootstrap samples
# Determine number of variables with polynomial degree less than or equal to
## largest non-zero column polynomial degree
alasso.boot.t.nonzero <- apply(alasso.boot.t, 2, function(c)sum(c!=0))
alasso.boot.t.nz.max <- max(which(alasso.boot.t.nonzero!=0))
#add 1 for ones column
new.theta.order <- sum(polynomial.orders<=polynomial.orders[alasso.boot.t.nz.max])
# Rerun Bootstrap with Truncated Matrix
post.boot.matrix <- data[,0:new.theta.order+1] # adding 1 to include derivative variable
alassoOLS.boot.ts <- tsboot(post.boot.matrix,lars_alassoOLS_OLSweights_fn,
R=1000, sim = "fixed", l = blocklength)
alassoOLS.boot.t0<- alassoOLS.boot.ts$t0 # the estimator from iterative data set
alassoOLS.boot.t <- alassoOLS.boot.ts$t
variables <- length(alassoOLS.boot.t0) # Number of Variables considered
coef.nonzero <- length(which(alassoOLS.boot.t0 != 0)) # variables selected
q <- (alpha*coef.nonzero)/variables # q = alpha
B <- alassoOLS.boot.ts$R # Number of bootstraps
q1 <- (B*q)/2 # Lower bound
q2 <- B-q1+1 # Upper bound
# Sort and determine value of lower and upper bound
bound.percentile <- apply(alassoOLS.boot.t,2, function(u){CI = sort(u)[c(round(q1,0),round(q2,0))]})
# See if variables are within confidence intervals
within.ci.check <- (alassoOLS.boot.t0 >= bound.percentile[1,] & alassoOLS.boot.t0 <= bound.percentile[2,])
iterative.df.colnames <- colnames(post.boot.matrix)
iterative.theta.colnames <- iterative.df.colnames[-c(1)]
return(list(ci=bound.percentile, point.estimates = alassoOLS.boot.t0, within.ci.check = within.ci.check, point.estimate.colnames = iterative.theta.colnames))
}
s <- seq(4.77, 5, by = 0.01)
k <- 10^s
systems <- lapply(k, eta = 0, polyorder = 3, Linear3D_derivative)
xdot <- lapply(systems, `[[`, 1)
ydot <- lapply(systems, `[[`, 2)
zdot <- lapply(systems, `[[`, 3)
zdot.results <- lapply(zdot, alpha = 0.05, polynomial.orders = systems[[1]]$degree.theta, bootstrap_function)
I’ve also tried to use mclapply to speed up the process.
Thanks
More information about the R-help
mailing list