## ----setup-------------------------------------------------------------------- library(ApplyPolygenicScore) ## ----import-vcf--------------------------------------------------------------- vcf.file <- system.file( 'extdata', 'HG001_GIAB.vcf.gz', package = 'ApplyPolygenicScore', mustWork = TRUE ) vcf.data <- import.vcf( vcf.path = vcf.file, info.fields = NULL, format.fields = NULL, verbose = TRUE ) str(vcf.data) ## ----import-pgs--------------------------------------------------------------- pgs.file <- system.file( 'extdata', 'PGS003378_hmPOS_GRCh38.txt.gz', package = 'ApplyPolygenicScore', mustWork = TRUE ) pgs.data <- import.pgs.weight.file( pgs.weight.path = pgs.file, use.harmonized.data = TRUE ) str(pgs.data) ## ----import-phenotype--------------------------------------------------------- # Isolating the individual IDs from the VCF data vcf.individuals <- unique(vcf.data$dat$Indiv) # Simulating phenotype data set.seed(123) phenotype.data <- data.frame( Indiv = vcf.individuals, continuous.phenotype = rnorm(length(vcf.individuals)), binary.phenotype = rbinom(length(vcf.individuals), 1, 0.5) ) head(phenotype.data) ## ----convert-pgs-to-bed------------------------------------------------------- pgs.coordinate.info <- pgs.data$pgs.weight.data pgs.bed.format <- convert.pgs.to.bed( pgs.weight.data = pgs.coordinate.info, chr.prefix = TRUE, numeric.sex.chr = FALSE, slop = 10 ) head(pgs.bed.format) ## ----merge-pgs-bed------------------------------------------------------------ # convert your PGS weight files with no added slop pgs.bed1 <- convert.pgs.to.bed(pgs.coordinate.info, slop = 0) # simulating a second PGS with all coordinates shifted by 20 base pairs. pgs.bed2 <- pgs.bed1 pgs.bed2$start <- pgs.bed2$start + 20 pgs.bed2$end <- pgs.bed2$end + 20 # Input must be a named list pgs.bed.list <- list(PGS1 = pgs.bed1, PGS2 = pgs.bed2) merged.pgs.bed <- combine.pgs.bed( pgs.bed.list = pgs.bed.list, add.annotation.data = TRUE, annotation.column.index = which(colnames(pgs.bed1) == 'rsID') # keep information from the rsID column during merge ) str(merged.pgs.bed) ## ----check-pgs-columns-------------------------------------------------------- apply.polygenic.score( vcf.data = vcf.data$dat, pgs.weight.data = pgs.data$pgs.weight.data, phenotype.data = phenotype.data, validate.inputs.only = TRUE ) ## ----apply-pgs---------------------------------------------------------------- pgs.results <- apply.polygenic.score( vcf.data = vcf.data$dat, pgs.weight.data = pgs.data$pgs.weight.data, correct.strand.flips = FALSE, # no strand flip check to avoid warnings missing.genotype.method = 'none' ) str(pgs.results) ## ----merge-vcf-with-pgs------------------------------------------------------- merged.vcf.pgs.data <- combine.vcf.with.pgs( vcf.data = vcf.data$dat, pgs.weight.data = pgs.data$pgs.weight.data ) names(merged.vcf.pgs.data) head(merged.vcf.pgs.data$missing.snp.data)[, 1:6] ## ----allele-matching, eval = FALSE-------------------------------------------- # # # Most strict allele match handling # strict.allele.match.result <- apply.polygenic.score( # vcf.data = vcf.data$dat, # pgs.weight.data = pgs.data$pgs.weight.data, # missing.genotype.method = 'none', # correct.strand.flips = TRUE, # remove.ambiguous.allele.matches = TRUE, # remove.mismatched.indels = TRUE # ); # # # Less strict allele match handling # less.strict.allele.match.result <- apply.polygenic.score( # vcf.data = vcf.data$dat, # pgs.weight.data = pgs.data$pgs.weight.data, # missing.genotype.method = 'none', # correct.strand.flips = TRUE, # remove.ambiguous.allele.matches = FALSE, # remove.mismatched.indels = FALSE # ); # ## ----missing-genotype-methods------------------------------------------------- all.missing.methods.pgs.results <- apply.polygenic.score( vcf.data = vcf.data$dat, pgs.weight.data = pgs.data$pgs.weight.data, correct.strand.flips = FALSE, # no strand flip check to avoid warnings missing.genotype.method = c('none', 'normalize', 'mean.dosage') ) head(all.missing.methods.pgs.results$pgs.output) ## ----custom-percentiles------------------------------------------------------- custom.percentiles.pgs.results <- apply.polygenic.score( vcf.data = vcf.data$dat, pgs.weight.data = pgs.data$pgs.weight.data, correct.strand.flips = FALSE, # no strand flip check to avoid warnings n.percentiles = 5 ) head(custom.percentiles.pgs.results$pgs.output) ## ----load-large-vcf----------------------------------------------------------- phenotype.test.data.path <- system.file( 'extdata', 'phenotype.test.data.Rda', package = 'ApplyPolygenicScore', mustWork = TRUE ) load(phenotype.test.data.path) str(phenotype.test.data) ## ----phenotype-merge---------------------------------------------------------- pgs.results.with.phenotype <- apply.polygenic.score( vcf.data = phenotype.test.data$vcf.data, pgs.weight.data = phenotype.test.data$pgs.weight.data, phenotype.data = phenotype.test.data$phenotype.data ) head(pgs.results.with.phenotype$pgs.output) ## ----phenotype-analysis------------------------------------------------------- pgs.results.with.phenotype.analysis <- apply.polygenic.score( vcf.data = phenotype.test.data$vcf.data, pgs.weight.data = phenotype.test.data$pgs.weight.data, phenotype.data = phenotype.test.data$phenotype.data, phenotype.analysis.columns = c('continuous.phenotype', 'binary.phenotype') ) head(pgs.results.with.phenotype.analysis$regression.output) ## ----plotting-dir, echo = FALSE----------------------------------------------- temp.dir <- tempdir(); basic.density.filename <- ApplyPolygenicScore:::generate.filename( project.stem = 'vignette-example-basic', file.core = 'pgs-density', extension = 'png' ); phenotype.density.filename <- ApplyPolygenicScore:::generate.filename( project.stem = 'vignette-example-phenotype', file.core = 'pgs-density', extension = 'png' ); correlation.filename <- ApplyPolygenicScore:::generate.filename( project.stem = 'vignette-example-correlation', file.core = 'pgs-scatter', extension = 'png' ); basic.rank.filename <- ApplyPolygenicScore:::generate.filename( project.stem = 'vignette-example-basic', file.core = 'pgs-rank-plot', extension = 'png' ); phenotype.rank.filename <- ApplyPolygenicScore:::generate.filename( project.stem = 'vignette-example-phenotype', file.core = 'pgs-rank-plot', extension = 'png' ); ## ----pgs-density, eval = TRUE------------------------------------------------- create.pgs.density.plot( pgs.data = pgs.results.with.phenotype.analysis$pgs.output, output.dir = temp.dir, filename.prefix = 'vignette-example-basic', file.extension = 'png' ) ## ----out.width = '50%', echo = FALSE------------------------------------------ knitr::include_graphics(file.path(temp.dir, basic.density.filename)); ## ----pgs-density-phenotype, eval = TRUE--------------------------------------- create.pgs.density.plot( pgs.data = pgs.results.with.phenotype.analysis$pgs.output, output.dir = temp.dir, filename.prefix = 'vignette-example-phenotype', file.extension = 'png', tidy.titles = TRUE, phenotype.columns = c('binary.factor.phenotype', 'categorical.phenotype', 'continuous.phenotype') ) ## ----out.width = '50%', echo = FALSE------------------------------------------ knitr::include_graphics(file.path(temp.dir, phenotype.density.filename)); ## ----pgs-correlation, eval = TRUE--------------------------------------------- create.pgs.with.continuous.phenotype.plot( pgs.data = pgs.results.with.phenotype.analysis$pgs.output, output.dir = temp.dir, filename.prefix = 'vignette-example-correlation', file.extension = 'png', tidy.titles = TRUE, phenotype.columns = c('continuous.phenotype', 'categorical.phenotype') ) ## ----out.width = '70%', echo = FALSE------------------------------------------ knitr::include_graphics(file.path(temp.dir, correlation.filename)); ## ----pgs-rank, eval = TRUE---------------------------------------------------- # Introducing some missing genotypes to demonstrate the missing genotype barplot pgs.results.with.phenotype.analysis$pgs.output$n.missing.genotypes <- rep(c(0, 1, 0, 2, 1), 2) create.pgs.rank.plot( pgs.data = pgs.results.with.phenotype.analysis$pgs.output, output.dir = temp.dir, filename.prefix = 'vignette-example-basic', file.extension = 'png' ) ## ----out.width = '50%', echo = FALSE------------------------------------------ knitr::include_graphics(file.path(temp.dir, basic.rank.filename)); ## ----pgs-rank-phenotype, eval = TRUE------------------------------------------ create.pgs.rank.plot( pgs.data = pgs.results.with.phenotype.analysis$pgs.output, output.dir = temp.dir, filename.prefix = 'vignette-example-phenotype', file.extension = 'png', phenotype.columns = c('binary.factor.phenotype', 'binary.phenotype', 'categorical.phenotype', 'continuous.phenotype') ) ## ----out.width = '50%', echo = FALSE------------------------------------------ knitr::include_graphics(file.path(temp.dir, phenotype.rank.filename));