[R-SIG-Finance] Efficient CVaR Scenario Optimization for a Large number of Scenarios

Wed Feb 15 23:11:57 CET 2012

Hi Bob,

There is a way to work with nonlinear solvers to solve this type of 
problem (without resorting to expensive global optimization approaches), 
once you work out the derivatives and decide on how to deal with the 
min/max type functions so that the problem is 'made convex'.

A complete example follows:

# We work with a smooth approximation to the max function:
epsilon = 1e-8

.max.smooth = function(x)
{
	(sqrt(x*x + epsilon ) + x) / 2
}

# the objective function (CVaR)
func.cvar = function(w, Data, alpha, mu, rmin)
{
	# we reverse the sign since we are minimizing
	-w[1] + mean( .max.smooth(w[1] - Data %*% w[-1]) )/alpha
}

# the gradient of the objective function
grad.cvar = function(w, Data, alpha, mu, rmin)
{
	epsilon2 = epsilon^2
	m = length(w)
	N = dim(Data)[1]
	g = vector(mode = "numeric", length = m)
	port = Data %*% w[-1]
	# first the derivative wrt to VaR
	g[1] = ( sum( ( port - w[1] )/( 2*N * sqrt( epsilon2 + ( port - w[1] 
)^2 ) ) ) - 0.5 )/alpha + 1
	for(i in 2:m){
		g[i] =  ( sum(Data[,i-1]/(2*N)) - sum( (Data[,i-1] * ( port - w[1]) 
)/( ( 2*N ) * (epsilon2 + ( port - w[1] )^2 )^0.5 ) ) )/alpha
	}
	# reverse the sign since we are minimizing
	return(-1 * g)
}

# the budget constraint:
func.eq = function(w, Data, alpha, mu, rmin){
	sum(w[-1]) - 1
}
# gradient of equality
grad.eq = function(w, Data, alpha, mu, rmin){
	m = length(w)-1
	return( matrix(c(0, rep(1, m)), ncol = m+1, nrow = 1) )
}

# Your inequality (weighted forecast >= rmin)
# but the solver we will use, sets: g(x) <= 0
# so we set: (rmin - weighted forecast) <= 0
func.ineq = function(w, Data, alpha, mu, rmin){
	wM = sum( mu * w[-1] )
	return( rmin - wM )
}
# gradient inequality
grad.ineq = function(w, Data, alpha, mu, rmin){
	m = length(w)
	Mu = matrix( c(0, mu), ncol = m )
	return(-Mu)
}

## Main Program Example (replicating your data)
library(mvtnorm)
mu <- c(.04, .06, .1)
sig <- c(.04, .09, .2)
cor.mat <- rbind(c( 1, .6, .2),
                  c(.6, 1, .1),
                  c(.2, .1, 1)
                  )
v.cov <- sig%o%sig*cor.mat
set.seed(100)
n <- 10000
samps <- rmvnorm(n, mean = mu, sigma = v.cov)
rmin = .08
alpha = 0.05
wmin = 0
wmax = 1

# I am going to use the nloptr solver (get it from CRAN)
library(nloptr)

.slsqp.ctrl = function(control = list())
{
	ctrl = list()
	ctrl$algorithm = "NLOPT_LD_SLSQP"
	if( is.null(control$ftol_rel) ) ctrl$ftol_rel = 1e-12 else 
ctrl$ftol_rel = control$ftol_rel
	if( is.null(control$xtol_rel) ) ctrl$xtol_rel = 1e-10 else 
ctrl$xtol_rel = control$xtol_rel	
	if( is.null(control$maxeval) ) ctrl$maxeval = 5000 else  ctrl$maxeval = 
as.integer( control$maxeval )
	if( is.null(control$maxtime) ) ctrl$maxtime = 10000 else  ctrl$maxtime 
= control$maxtime
	if( is.null(control$print_level) ) ctrl$print_level =0 else 
ctrl$print_level = as.integer( control$print_level )
	if( is.null(control$local_opts$algorithm) ) ctrl$local_opts$algorithm = 
"NLOPT_LD_MMA"
	if( is.null(control$local_opts$ftol_rel) ) ctrl$local_opts$ftol_rel = 
1e-12 else  ctrl$local_opts$ftol_rel = control$local_opts$ftol_rel
	if( is.null(control$local_opts$xtol_rel) ) ctrl$local_opts$xtol_rel = 
1e-10 else  ctrl$local_opts$xtol_rel = control$local_opts$xtol_rel	
	if( is.null(control$local_opts$print_level) ) 
ctrl$local_opts$print_level = 0 else  ctrl$local_opts$print_level = 
as.integer( control$local_opts$print_level )
	return(ctrl)
}

# set control parameters and subsolvers (use SQP)
ctrl = .slsqp.ctrl(control = list())

# set starting values
q1 = quantile(samps %*% rep(1/3,3), alpha)
# we are estimating [VaR AND the weights]
sol = nloptr(
x0 = c(q1,rep(1/3,3)),
eval_f = func.cvar,
eval_grad_f = grad.cvar,
lb = c(-1, rep( wmin, 3) ),
ub = c( 0, rep( wmax, 3) ),
opts = ctrl,
eval_g_ineq = func.ineq,
eval_jac_g_ineq = grad.ineq,
eval_g_eq = func.eq,
eval_jac_g_eq = grad.eq,
Data = samps, alpha = 0.05, mu = colMeans(samps), rmin = 0.08)

# [VaR w1 w2 w3]
# nloptr[-0.1125099 1.104523e-14 0.480181 0.519819]
# glpk[0.1125283 0.000000 0.480181 0.519819]
# end example

HTH.

Regards,

Alexios

On 15/02/2012 21:05, Robert Harlow wrote:
> Hi,
>     I am running into a memory issue when I try to run a mean-CVaR
> optimization on a large number of scenarios.  I am running R 32-bit on
> windows (see sessionInfo() output below).  I have attached code to display
> the problem below (using a sample from a random multi-variate normal
> distribution as an example, though that is not what I am actually doing.)
> I am using the methodology proposed by Uryasev (thanks to  Guy Yollin for
> the cvarOpt function) to formulate the problem as an LP.  When I have a
> small number of asset classes, I can arrive at good (though sometimes non
> "optimal" for a given set of scenarios) solutions using different
> optimizers (DEoptim, BBoptim, Rsolnp) where I don't run into the scenario
> size constraint.  However, when I increase the number of asset classes, the
> non-LP solvers do a worse and worse job of finding a stable "global"
> optimum, with the exception of DEoptim which will arrive at a good
> solution, but will take a very long time for, say, 25 asset classes and
> 10,000 scenarios.  The LP, on the other hand, seems much more sensitive to
> number of scenarios than to the number of asset classes.
> Thanks in advance,
> -Bob
>
> ## Code
> library(Rglpk)
> library(mvtnorm)
> mu<- c(.04, .06, .1)
> sig<- c(.04, .09, .2)
> cor.mat<- rbind(c( 1, .6, .2),
>                   c(.6, 1, .1),
>                   c(.2, .1, 1)
>                   )
> v.cov<- sig%o%sig*cor.mat
> set.seed(100)
> n<- 1000 ##works
> n<- 10000##doesn't work due to memory error
> samps<- rmvnorm(n, mean = mu, sigma = v.cov)
> cvarOpt<- function(rmat, alpha = .05, rmin = .05, wmin = 0, wmax = 1,
> weight.sum =1){
>      nAss = ncol(rmat) # number of assets
>      s = nrow(rmat) # number of scenarios i.e. periods
>      averet = colMeans(rmat)
>      ##create objective vector, constraint matrix, constraint rhs
>      Amat =
> rbind(cbind(rbind(1,averet),matrix(data=0,nrow=2,ncol=s+1)),cbind(rmat,diag(s),1))
>
>      objL = c(rep(0,nAss), rep(-1/(alpha*s), s), -1)
>      bvec = c(weight.sum,rmin,rep(0,s))
>      ##direction vector
>      dir.vec = c("==",">=",rep(">=",s))
>      ##bounds on weights
>      bounds = list(lower = list(ind = 1:nAss, val = rep(wmin,nAss)),upper =
> list(ind = 1:nAss, val = rep(wmax,nAss)))
>      res = Rglpk_solve_LP(obj=objL, mat=Amat, dir=dir.vec, rhs=bvec,
> types=rep("C",length(objL)), max=T, bounds=bounds)
>      w = as.numeric(res$solution[1:nAss])
>      return(list(w=w,status=res$status))
> }
> ans.cvar<- cvarOpt(rmat = samps, rmin = .08)
>
> ## session info
>
> R version 2.13.2 (2011-09-30)
> Platform: i386-pc-mingw32/i386 (32-bit)
>
> locale:
> [1] LC_COLLATE=English_United States.1252
> [2] LC_CTYPE=English_United States.1252
> [3] LC_MONETARY=English_United States.1252
> [4] LC_NUMERIC=C
> [5] LC_TIME=English_United States.1252
>
> attached base packages:
> [1] stats     graphics  grDevices utils     datasets  methods   base
>
> other attached packages:
> [1] mvtnorm_0.9-9991 Rglpk_0.3-5      slam_0.1-22
>
> loaded via a namespace (and not attached):
> [1] tools_2.13.2
>
> 	[[alternative HTML version deleted]]
>
> _______________________________________________
> R-SIG-Finance at r-project.org mailing list
> https://stat.ethz.ch/mailman/listinfo/r-sig-finance
> -- Subscriber-posting only. If you want to post, subscribe first.
> -- Also note that this is not the r-help list where general R questions should go.
>