We provide R code for Zero-inflated Poisson-Gamma Model (ZIPG) with an application to longitudinal microbiome count data.
You can install the development version of ZIPG like so:
::install_github("roulan2000/ZIPG") devtools
Load dietary data. Complete Dietary data can be found in “Daily sampling reveals personalized diet-microbiome associations in humans.” (Johnson et al. 2019)
library(ZIPG)
library(ggplot2)
data("Dietary")
= Dietary
dat = 100
taxa_num $taxa_name[taxa_num] # taxa name
dat#> OTU100
#> "Burkholderiales bacterium 1_1_47"
= dat$OTU[,taxa_num] # taxa count
W = dat$M # sequencing depth
M ggplot(NULL)+
geom_boxplot(aes(
x = as.factor(dat$COV$ALC01),y=log((W+0.5)/M)))+
labs(title = dat$taxa_name[taxa_num],
x = 'ALC',y='Relative Abundance')+
theme_bw()
Use function ZIPG_main()
to run our ZIPG model.
Input :
W
: Observed taxa count data.
M
: Sequencing depth, ZIPG use log(M) as offset by
default.
X
, X_star
: Covariates of interesting of
differential abundance and differential varibility, input as
formula.
Output list:
ZIPG_res$init
: pscl results, used as
initialization.
ZIPG_res$res
: ZIPG output evaluated at last EM
iteration.
ZIPG_res$res$par
: ZIPG estimation for \(\Omega = (\beta,\beta^*,\gamma)\).
ZIPG_res$wald_test
: ZIPG Wald test
ZIPG_res$logli
: ZIPG log-likelihood
<- ZIPG_main(data = dat$COV,
ZIPG_res X = ~ALC01+nutrPC1+nutrPC2, X_star = ~ ALC01,
W = W, M = M)
= ZIPG_summary(ZIPG_res)
res #> ZIPG Wald
#> Estimation SE pval
#> beta0 -7.371 0.1512 0.00e+00 ***
#> beta1 0.121 0.1985 5.41e-01
#> beta2 0.106 0.0188 1.41e-08 ***
#> beta3 -0.118 0.0287 4.17e-05 ***
#> beta0* 0.525 0.1199 1.20e-05 ***
#> beta1* 0.606 0.1406 1.63e-05 ***
#> gamma -2.080 0.1460 4.93e-46 ***
Set the bootstrap replicates B
in
bWald_list
to conduct ZIPG-bWald, results and covariance
matrix can be find in ZIPG_res$bWald
.
set.seed(123)
# Set bootstrap replicates B
= list(B = 100)
bWald_list # Wait for a wile
= ZIPG_main(
ZIPG_res1 data = dat$COV,
X = ~ALC01+nutrPC1+nutrPC2, X_star = ~ ALC01,
W = W, M = M,
bWald_list = bWald_list)
#> Running non-parametric bootstrap wald test
#> Finish
= ZIPG_summary(ZIPG_res1,type = 'bWald')
res #> ZIPG bWald
#> Estimation SE pval
#> beta0 -7.371 0.1896 0.00e+00 ***
#> beta1 0.121 0.2323 6.01e-01
#> beta2 0.106 0.0209 3.51e-07 ***
#> beta3 -0.118 0.0342 5.95e-04 ***
#> beta0* 0.525 0.1283 4.30e-05 ***
#> beta1* 0.606 0.1740 4.97e-04 ***
#> gamma -2.080 0.3698 1.86e-08 ***
= ZIPG_CI(ZIPG_res1,type='bWald',alpha = 0.05)
res #> ZIPG Wald Confidence interval
#> Estimation lb ub
#> beta0 -7.371 -7.7421 -6.9990
#> beta1 0.121 -0.3338 0.5768
#> beta2 0.106 0.0655 0.1474
#> beta3 -0.118 -0.1847 -0.0505
#> beta0* 0.525 0.2734 0.7765
#> beta1* 0.606 0.2649 0.9470
#> gamma -2.080 -2.8046 -1.3551
To test more complicated hypothesis, you may use the covariance matirx driven from bootstrap.
round(ZIPG_res1$bWald$vcov,3)
#> [,1] [,2] [,3] [,4] [,5] [,6] [,7]
#> [1,] 0.036 -0.040 0.001 0.003 0.005 -0.009 0.013
#> [2,] -0.040 0.054 -0.001 -0.004 -0.006 0.009 -0.008
#> [3,] 0.001 -0.001 0.000 0.000 0.000 0.001 -0.001
#> [4,] 0.003 -0.004 0.000 0.001 0.001 -0.002 0.003
#> [5,] 0.005 -0.006 0.000 0.001 0.016 -0.014 -0.012
#> [6,] -0.009 0.009 0.001 -0.002 -0.014 0.030 -0.025
#> [7,] 0.013 -0.008 -0.001 0.003 -0.012 -0.025 0.137
Set bootstrap replicates B
and the null hypothesis by
formula X0
and X_star0
in
pbWald_list
to conduct ZIPG-pbWald, results can be find in
ZIPG_res$pbWald
# test beta1star, the 6th parameter
#
= list(
pbWald_list X0 = ~ALC01 + nutrPC1+nutrPC2,
X_star0 = ~ 1,
B = 100
)
= ZIPG_main(
ZIPG_res2 data = dat$COV,
X = ~ALC01+nutrPC1+nutrPC2, X_star = ~ ALC01,
W = W, M = M,
pbWald_list= pbWald_list)
#> Running parametric bootstrap wald test
#> Finish
= ZIPG_summary(ZIPG_res2,type ='pbWald')
res #> ZIPG pbWald
#> H0: beta1* = 0
#> pvalue = 0.0099