## ----'preamble', include=FALSE, warning=FALSE, message=FALSE, cache=FALSE---- library(knitr) # set the knitr options ... for everyone! # if you unset this, then vignette build bonks. oh, joy. #opts_knit$set(progress=TRUE) opts_knit$set(eval.after='fig.cap') # for a package vignette, you do want to echo. # opts_chunk$set(echo=FALSE,warning=FALSE,message=FALSE) opts_chunk$set(warning=FALSE,message=FALSE) #opts_chunk$set(results="asis") opts_chunk$set(cache=TRUE,cache.path="cache/MarkowitzR") #opts_chunk$set(fig.path="figure/",dev=c("pdf","cairo_ps")) opts_chunk$set(fig.path="figure/MarkowitzR",dev=c("pdf")) opts_chunk$set(fig.width=4.5,fig.height=4,dpi=300) # doing this means that png files are made of figures; # the savings is small, and it looks like shit: #opts_chunk$set(fig.path="figure/",dev=c("png","pdf","cairo_ps")) #opts_chunk$set(fig.width=4,fig.height=4) # for figures? this is sweave-specific? #opts_knit$set(eps=TRUE) # this would be for figures: #opts_chunk$set(out.width='.8\\textwidth') # for text wrapping: options(width=64,digits=2) opts_chunk$set(size="small") opts_chunk$set(tidy=TRUE,tidy.opts=list(width.cutoff=50,keep.blank.line=TRUE)) compile.time <- Sys.time() # from the environment # only recompute if FORCE_RECOMPUTE=True w/out case match. FORCE_RECOMPUTE <- (toupper(Sys.getenv('FORCE_RECOMPUTE',unset='False')) == "TRUE") # compiler flags! # not used yet LONG.FORM <- FALSE mc.resolution <- ifelse(LONG.FORM,1000,200) mc.resolution <- max(mc.resolution,100) library(MarkowitzR) gen_norm <- rnorm lseq <- function(from,to,length.out) { exp(seq(log(from),log(to),length.out = length.out)) } ## ----'the_proc',echo=TRUE,cache=FALSE------------------------- require(gtools) # set the colnames of X appropriately set.coln <- defmacro(X,expr={ if (is.null(colnames(X))) { colnames(X) <- paste0(deparse(substitute(X),nlines=1),1:(dim(X)[2])) }}) # compute Wald scores via this trick wrap.itheta <- function(rets,ret.what=c('wald','mp'),...) { set.coln(rets) ret.what <- match.arg(ret.what) # 'flipping the sign on returns is idiomatic' asymv <- itheta_vcov(- as.matrix(rets),...) qidx <- 2:asymv$pp mp <- asymv$mu[qidx] stat <- switch(ret.what, mp = mp, wald = mp / sqrt(diag(asymv$Ohat[2:asymv$pp,2:asymv$pp]))) names(stat) <- colnames(rets) return(stat) } wrap.mp <- function(rets,ret.what=c('wald','mp'),...) { set.coln(rets) ret.what <- match.arg(ret.what) asymv <- mp_vcov(as.matrix(rets),...) mp <- t(asymv$W) stat <- switch(ret.what, mp = mp, wald = mp / sqrt(diag(asymv$What))) return(stat) } # t-stat via Britten-Jones procedure bjones.ts <- function(rets) { set.coln(rets) ones.vec <- matrix(1,nrow=dim(rets)[1],ncol=1) rets <- as.matrix(rets) bjones.mod <- lm(ones.vec ~ rets - 1) bjones.sum <- summary(bjones.mod) retval <- bjones.sum$coefficients[,3] names(retval) <- colnames(rets) return(retval) } # compare the procedures do.both <- function(rets,...) { set.coln(rets) retval <- rbind(bjones.ts(rets),wrap.itheta(rets,...)) rownames(retval) <- c("Britten Jones t-stat","via itheta_vcov") return(retval) } do.all <- function(rets,...) { set.coln(rets) retval <- do.both(rets,...) retval <- rbind(retval,wrap.mp(rets,...)) rownames(retval)[dim(retval)[1]] <- "via mp_vcov" return(retval) } n.day <- 1000 n.stock <- 5 ## ----'me_vs_bjones',echo=TRUE--------------------------------- # under the null: all returns are zero mean; set.seed(as.integer(charToRaw("7af85b0b-e521-4059-bebe-55ad9a9a0456"))) rets <- matrix(rnorm(n.day * n.stock),nrow=n.day) # compare them: print(do.all(rets)) # returns under the alternative set.seed(as.integer(charToRaw("464dcbea-375b-49d0-8f38-b48b5a33c7ea"))) rets <- matrix(rnorm(n.day * n.stock,mean=1e-1),nrow=n.day) print(do.all(rets)) ## ----'get_mp',echo=TRUE--------------------------------------- # returns under the alternative set.seed(as.integer(charToRaw("464dcbea-375b-49d0-8f38-b48b5a33c7ea"))) rets <- matrix(rnorm(n.day * n.stock,mean=1e-1),nrow=n.day) print(wrap.itheta(rets,ret.what='mp')) ## ----'multiple_correlation',echo=TRUE,cache=FALSE------------- # multiple correlation coefficients of portfolio # error to precision errors. mult.cor <- function(rets,...) { set.coln(rets) # 'flipping the sign on returns is idiomatic' asymv <- itheta_vcov(- as.matrix(rets),...) Ro <- cov2cor(asymv$Ohat) prec.idx <- (asymv$p+1):(dim(asymv$Ohat)[1]) prec.Ro <- Ro[prec.idx,prec.idx] xcor <- Ro[2:asymv$p,prec.idx] R.sq <- diag(xcor %*% (solve(prec.Ro,t(xcor)))) } set.seed(as.integer(charToRaw("464dcbea-375b-49d0-8f38-b48b5a33c7ea"))) rets <- matrix(rnorm(n.day * n.stock,mean=8e-2),nrow=n.day) print(signif(100 * mult.cor(rets),digits=2)) ## ----'multiple_correlation2',echo=TRUE,cache=FALSE------------ set.seed(as.integer(charToRaw("464dcbea-375b-49d0-8f38-b48b5a33c7ea"))) rets <- matrix(rnorm(n.day * n.stock,mean=1.6e-1),nrow=n.day) print(signif(100 * mult.cor(rets),digits=2)) ## ----'conditional',echo=TRUE---------------------------------- # generate data with given W, Sigma Xgen <- function(W,Sigma,Feat) { Btrue <- Sigma %*% W Xmean <- Feat %*% t(Btrue) Shalf <- chol(Sigma) X <- Xmean + matrix(rnorm(prod(dim(Xmean))),ncol=dim(Xmean)[2]) %*% Shalf } n.feat <- 4 n.ret <- 8 n.obs <- 2000 set.seed(12321) Feat <- matrix(rnorm(n.obs * n.feat),ncol=n.feat) Wtrue <- 10 * matrix(rnorm(n.feat * n.ret),ncol=n.feat) Sigma <- cov(matrix(rnorm(100*n.ret),ncol=n.ret)) Sigma <- Sigma + diag(seq(from=1,to=3,length.out=n.ret)) X <- Xgen(Wtrue,Sigma,Feat) ism <- mp_vcov(X,feat=Feat,fit.intercept=TRUE) # a bit of legerdemain b/c there's an intercept term # fit Wcomp <- cbind(0,Wtrue) ## ----'scatfit',echo=TRUE,cache=FALSE,fig.cap=paste("Scatter of the fit value against the true value of the Markowitz Coefficien is shown, for",n.ret,"assets, and",n.feat,"predictive features, given",n.obs,"days of observations of Gaussian returns. The $y=x$ line is plotted in green.")---- # scatter them against each other w.true <- Wcomp w.fit <- ism$W dim(w.true) <- c(length(w.true),1) dim(w.fit) <- c(length(w.fit),1) plot(w.true, w.fit, main="Markowitz coefficient", xlab="True Value ", ylab="Fit Value ", pch=1) #abline(lm(w.fit ~ w.true), col="red") abline(a=0,b=1, col="green") ## ----'check_normality',echo=TRUE,cache=FALSE,fig.cap=paste("Sample quantiles of the error of the \\txtMC, transformed to approximate unit covariance using the estimated covariance, are plotted against those of the normal.")---- n.feat <- 4 n.ret <- 8 n.obs <- 2000 set.seed(12321) Feat <- matrix(rnorm(n.obs * n.feat),ncol=n.feat) Wtrue <- 10 * matrix(rnorm(n.feat * n.ret),ncol=n.feat) Sigma <- cov(matrix(rnorm(100*n.ret),ncol=n.ret)) Sigma <- Sigma + diag(seq(from=1,to=3,length.out=n.ret)) X <- Xgen(Wtrue,Sigma,Feat) ism <- mp_vcov(X,feat=Feat,fit.intercept=TRUE) Wcomp <- cbind(0,Wtrue) # compute the errors errs <- ism$W - Wcomp dim(errs) <- c(length(errs),1) # transform them to approximately identity covariance Zerr <- solve(t(chol(ism$What)),errs) # did it work? qqnorm(Zerr) qqline(Zerr,col=2) ## ----'cons_zero',echo=TRUE,cache=FALSE------------------------ # first for the case where the real Markowitz Portfolio is actually # just 'the market': equal dollar long in each stock. set.seed(as.integer(charToRaw("dbeebe3f-da7e-4d11-b014-feac88a1d6cb"))) rets <- matrix(rnorm(n.day * n.stock,mean=1e-1),nrow=n.day) Jmat <- matrix(c(1,1,1,-1),nrow=2,ncol=2*n.stock) # overbuilt Jmat <- Jmat[,1:n.stock] # fixed. print(Jmat) # first, unconstrained: print(wrap.mp(rets)) # now with subspace constraint: print(wrap.mp(rets,Jmat=Jmat)) # and print the portfolio too: print(wrap.mp(rets,ret.what='mp',Jmat=Jmat)) # now for the case where the real Markowitz portfolio is not just the market. set.seed(as.integer(charToRaw("420f1dfd-b19b-4175-83b3-b96548264bf8"))) rets <- matrix(rnorm(n.day * n.stock,mean=0),nrow=n.day) mu.off <- t(matrix(seq(from=-0.15,to=0.15,length.out=n.stock),nrow=n.stock,ncol=n.day)) rets <- rets + mu.off print(wrap.mp(rets,Jmat=Jmat)) # and print the portfolio too: print(wrap.mp(rets,ret.what='mp',Jmat=Jmat)) ## ----'cons_one',echo=TRUE,cache=FALSE------------------------- # first for the case where the real Markowitz Portfolio is actually # equal dollar long in each stock. set.seed(as.integer(charToRaw("0bda3ab6-53a7-4f5a-aa6a-0fe21edbaa20"))) rets <- matrix(rnorm(n.day * n.stock,mean=1e-1),nrow=n.day) Gmat <- matrix(1,nrow=1,ncol=n.stock) print(wrap.mp(rets,Gmat=Gmat)) # and print the portfolio too: print(wrap.mp(rets,ret.what='mp',Gmat=Gmat)) # and in the case where it is not. set.seed(as.integer(charToRaw("420f1dfd-b19b-4175-83b3-b96548264bf8"))) rets <- matrix(rnorm(n.day * n.stock,mean=0),nrow=n.day) mu.off <- t(matrix(seq(from=-0.8,to=0.8,length.out=n.stock),nrow=n.stock,ncol=n.day)) rets <- rets + mu.off Gmat <- matrix(1,nrow=1,ncol=n.stock) print(wrap.mp(rets,Gmat=Gmat)) # and print the portfolio too: print(wrap.mp(rets,ret.what='mp',Gmat=Gmat)) ## ----'rao_giri',echo=TRUE,eval=TRUE--------------------------- rao.giri <- function(rets,Gmat,...) { set.coln(rets) asymv <- mp_vcov(as.matrix(rets),Gmat=Gmat,...) stat <- asymv$mu[1] a.var <- asymv$Ohat[1,1] return(list(stat=stat,a.var=a.var,wald=stat/sqrt(a.var))) } # here we hedge out G, the rows of which span the true # Markowitz portfolio set.seed(as.integer(charToRaw("4618fc2e-9c58-4ea7-81f7-6ed771ace500"))) rets <- matrix(rnorm(n.day * n.stock,mean=1e-1),nrow=n.day) Gmat <- matrix(1,nrow=1,ncol=n.stock) print(rao.giri(rets,Gmat)$wald) # here we hedge out a random G, the rows of which do not # span the true Markowitz portfolio set.seed(as.integer(charToRaw("4abaccd9-8cac-4149-b30d-f3b4d32b44df"))) rets <- matrix(rnorm(n.day * n.stock,mean=1e-1),nrow=n.day) Gmat <- matrix(rnorm(n.stock),nrow=1,ncol=n.stock) print(rao.giri(rets,Gmat)$wald) ## ----'ff_load',eval=FALSE,echo=TRUE--------------------------- # ff.data <- read.csv(paste0('http://www.quandl.com/api/v1/datasets/', # 'KFRENCH/FACTORS_M.csv?&trim_start=1926-07-31&trim_end=2013-10-31', # '&sort_order=asc'), colClasses=c('Month'='Date')) # # rownames(ff.data) <- ff.data$Month # ff.data <- ff.data[,! (colnames(ff.data) %in% c("Month"))] # # will not matter, but convert pcts: # ff.data <- 1e-2 * ff.data ## ----'ff_load_sneaky',eval=TRUE,echo=FALSE-------------------- # sleight of hand to load precomputed data instead. fname <- system.file('extdata','ff_data.rda',package='MarkowitzR') if (fname == "") { fname <- 'ff_data.rda' } # poofs ff.data here load(fname) ## ----'ff_process',echo=TRUE,cachc=TRUE------------------------ rfr <- ff.data[,'RF'] ff.ret <- cbind(ff.data[,'Mkt.RF'], ff.data[,c('HML','SMB')] - rep(rfr,2)) colnames(ff.ret)[1] <- "MKT" # try both procedures: print(do.both(ff.ret)) # try with robust s.e. if (require(sandwich)) { print(do.both(ff.ret,vcov.func=sandwich::vcovHAC)) }