DBpower Tutorial

Worked Example

Suppose we would like to reproduce one of the power calculations from Figure 2 of Sun, Shi, & Lin (submitted). Let us focus on the calulations for Signal Location 1 in panel 1A. In this setting, we have five genotypes that we would like to test for association with a single outcome. In the true model, the effect sizes of these genotypes are (0.25, 0, 0, 0, 0). The variance of the outcome is 1. The minor allele frequencies of the genotypes is 0.3, there are 400 subjects, and we do not fit any other covariates in the model except for the genotypes. The first three genotypes are correlated at \(\rho_{1} = 0.3\), the last two genotypes are correlated at \(\rho_{2} = 0.3\), and the correlation between the two blocks is \(\rho_{3} = 0.1\) We test at \(\alpha = 0.01\).

First we need to calculate the rejection region for this setting:

library(DBpower)
library(magrittr)
set.seed(0)

# make the correlation matrix of the genotypes
corMat <- matrix(data=NA, nrow=5, ncol=5)
corMat[1:2, 1:2] <- 0.3
corMat[3:5, 3:5] <- 0.3
corMat[1:2, 3:5] <- 0.1
corMat[3:5, 1:2] <- 0.1
diag(corMat) <- 1

# calculate rejection region for this setting.
# the iBJ bounds are the same as the BJ bounds, because iBJ decorrelates
# the test statistics first and then applies the standard BJ assuming independence.
bjBounds <- set_BJ_bounds(alpha = 0.01, J=5)
bjBounds

## [1] 1.791455 1.791455 1.791455 2.347593 3.352179

gbjBounds <- set_GBJ_bounds(alpha = 0.01, J=5, sig_vec = corMat[lower.tri(corMat)])
gbjBounds

## [1] 1.913604 1.913604 1.913604 2.466072 3.338019

Next we need to calculate the distribution of the test statistics under the alternative:

# eigendecomposition of the correlation matrix
eVals <- eigen(corMat)$values
eVecs <- eigen(corMat)$vectors

# calculate the distributions of the test statistics under the alternative
effectSizes <- c(0.25, 0, 0, 0, 0)
MAF <- 0.3
n <- 400
sigSqY <- 1
Wg <- diag(rep(sqrt(2 * MAF * (1 - MAF)), 5))

# these are the approximate means of the generalized and innovated test statistics
genMean <- as.numeric( sqrt(n / sigSqY) * corMat %*% Wg %*% effectSizes )
genMean

## [1] 3.2403703 0.9721111 0.3240370 0.3240370 0.3240370

innMean <- as.numeric( sqrt(n / sigSqY) * diag(sqrt(eVals)) %*% t(eVecs) %*% Wg %*% effectSizes )
innMean

## [1] -1.476050  2.155522  0.000000 -1.369555  1.341387

And now we can calculate lower and upper bounds on power:

# upper and lower bounds on power for iBJ
innBounds <- calcb2(lower = TRUE, upper = TRUE, muVec = innMean, sigMat = diag(rep(1, 5)), bounds = bjBounds)
c(innBounds$lowerProb, innBounds$upperProb)

## [1] 0.3301290 0.4088431

# upper and lower bounds on power for GBJ
genBounds <- calcb2(lower = TRUE, upper = TRUE, muVec = genMean, sigMat = corMat, bounds = gbjBounds)
# clearly we should use the GBJ in this situation
c(genBounds$lowerProb, genBounds$upperProb)

## [1] 0.4928451 0.5032327

Additionally, we can also calculate the exact power when the size of the set is five elements or less (otherwise it will be too computationally expensive):

# note that the exact power does indeed fall within the bounds
innPower <- calc_exact_power(bounds = bjBounds, sig_mat = diag(rep(1, 5)), muVec = innMean)
innPower$power

## [1] 0.408814

genPower <- calc_exact_power(bounds = gbjBounds, sig_mat = corMat, muVec = genMean)
genPower$power

## [1] 0.5032047

We can also perform simulations to confirm that our calculations are correct:

# these functions simulate the probability of falling in the simplified rejection regions
# used to calculate the lower and upper bounds - note how they match the lower and upper bounds
# calculated above.
simBoundsInn <- sim_b2(lower=TRUE, upper=TRUE, n = 30000, muVec = innMean, sigMat = diag(rep(1, 5)), 
                             bounds = bjBounds)
simBoundsInn

## $lowerBound
## [1] 0.3274667
## 
## $upperBound
## [1] 0.4042667

simBoundsGen <- sim_b2(lower=TRUE, upper=TRUE, n = 30000, muVec = genMean, sigMat = corMat, bounds = gbjBounds)
simBoundsGen

## $lowerBound
## [1] 0.4940333
## 
## $upperBound
## [1] 0.5044

# these functions simulate the exact power - note how they match the exact power calculations
simPowerInn <- sim_power_mvn(n = 30000, muVec = innMean, sigMat = diag(rep(1, 5)), bounds=bjBounds, test=NULL, alpha = alpha)
simPowerInn$boundsPower

## [1] 0.4121

simPowerGen <- sim_power_mvn(n = 30000, muVec = genMean, sigMat = corMat, bounds=gbjBounds, test=NULL, alpha = alpha)
simPowerGen$boundsPower

## [1] 0.4992333

We can also simulate to confirm that we calculated the correct distribution of test statistics under the alternative:

# simulate test statistics
gMat <- bindata::rmvbin(n=n, margprob = rep(MAF, 5), bincorr = corMat) +
        bindata::rmvbin(n=n, margprob = rep(MAF, 5), bincorr = corMat)
xMat <- matrix(data=1, nrow=n, ncol=1)

# note the good correspondence
simStatsOutput <- sim_stats_mef(B=10000, sigSq = sigSqY, xMat = xMat, decompTrue = eigen(corMat),
                                gMat = gMat, alphaVec = c(0), betaVec = effectSizes, checkpoint = FALSE)
apply(simStatsOutput$zMat, 2 , mean)

## [1] 3.1086862 1.1812643 0.3346979 0.3148885 0.4615489

genMean

## [1] 3.2403703 0.9721111 0.3240370 0.3240370 0.3240370

apply(simStatsOutput$iMat, 2 , mean)

## [1] -1.5494919  2.1634601  0.1308509 -1.1855516  1.1175763

innMean

## [1] -1.476050  2.155522  0.000000 -1.369555  1.341387

If we want to test another SNP set (e.g. another gene) or try a different specification of the alternative, just input those new parameters and calculate the bounds again.

# new correlation matrix 
corMatNew <- matrix(data=NA, nrow=5, ncol=5)
corMatNew[1:2, 1:2] <- 0.7
corMatNew[3:5, 3:5] <- 0.7
corMatNew[1:2, 3:5] <- 0.5
corMatNew[3:5, 1:2] <- 0.5
diag(corMatNew) <- 1

# new effect sizes
effectSizesNew <- c(0, 0.25, 0, 0, 0)

# new rejection regions
bjBoundsNew <- set_BJ_bounds(alpha = 0.01, J=5)
bjBoundsNew

## [1] 1.791455 1.791455 1.791455 2.347593 3.352179

gbjBoundsNew <- set_GBJ_bounds(alpha = 0.01, J=5, sig_vec = corMatNew[lower.tri(corMatNew)])
gbjBoundsNew

## [1] 2.415717 2.415717 2.415717 2.804292 3.273987

# eigendecomposition of the correlation matrix
eValsNew <- eigen(corMatNew)$values
eVecsNew <- eigen(corMatNew)$vectors
  
# these are the approximate means of the generalized and innovated test statistics
genMeanNew <- as.numeric(sqrt(n / sigSqY) * corMatNew %*% Wg %*% effectSizesNew)
innMeanNew <- as.numeric(sqrt(n / sigSqY) * diag(sqrt(eValsNew)) %*% t(eVecsNew) %*% Wg %*% effectSizesNew)

# calculate lower and upper bounds on power
innBoundsNew <- calcb2(lower = TRUE, upper = TRUE, muVec = innMeanNew, sigMat = diag(rep(1, 5)), bounds = bjBoundsNew)
c(innBoundsNew$lowerProb, innBoundsNew$upperProb)

## [1] 0.3731720 0.4236479

genBoundsNew <- calcb2(lower = TRUE, upper = TRUE, muVec = genMeanNew, sigMat = corMatNew, bounds = gbjBoundsNew)
c(genBoundsNew$lowerProb, genBoundsNew$upperProb)

## [1] 0.5500517 0.5669133

Questions or novel applications? Please let me know! Contact information can be found in the package description.

DBpower Tutorial

Ryan Sun

2022-02-09

Introduction

Set-based testing

Use

Worked Example