# This codes is partly based on the following book: # Højsgaard, S., Edwards, D. and Lauritzen, S. (2012). # Graphical Models with R. Springer. # For more R-packages, see # http://cran.r-project.org/web/views/gR.html # you have to install graph and Rgaphviz from Bioconductor # source("http://bioconductor.org/biocLite.R") # biocLite("graph") # biocLite("Rgraphviz") library(gRain) library(gRbase) library(RBGL) library(pcalg) library(vcd) ### A very simple example for probabilistic reasoning # define possible outcomes: (yn <- c("yes","no")) # define conditional probability tables (cptable): # marginal probability of disease: (d <- cptable(~disease, values=c(1,999), levels=yn)) # conditional probability of positive test given disease (y/n): (t.d <- cptable(~test+disease, values=c(99,1,1,99), levels=yn)) # put cpt's together: plist <- compileCPT(list(d, t.d)) # build GRAphical Independence Network: pn <- grain(plist) plot(pn) # compile/fit Bayesian network: pnc <- compile(pn, propagate=TRUE) pnc$cpt # one can now get all kinds of conditional/marginal/joint # probabilities out: querygrain(pnc, nodes=c("test", "disease"), type="conditional") #test|disease querygrain(pnc, nodes=c("disease","test"), type="conditional") #disease|test # alternatively: (pnc.ev <- setEvidence(pnc, "test", "yes")) # what is the probability of disease? querygrain(pnc.ev, nodes="disease") ### a slightly more complex example for probabilistic reasoning: # create dag directly (without specifying cond. distributions): g <- list(~asia, ~tub|asia, ~smoke, ~lung|smoke, ~bronc|smoke, ~either|lung:tub, ~xray|either, ~dysp|bronc:either) chestdag <- dagList(g) plot(chestdag) # check d-separation (conditional independence) relationships: dsep("lung","bronc","smoke", chestdag) dsep("tub", "lung", "xray", chestdag) dsep("asia","smoke", c("either","lung"), chestdag) # look at data simulated from this graph data(chestSim500, package='gRbase') ?chestSim500 head(chestSim500) names(chestSim500) # fit data to DAG simdagchest <- grain(chestdag, data=chestSim500) simdagchest <- compile(simdagchest, propagate=T, smooth=.1) # now we can get out all kinds of conditional/marginal/joint # probability estimates: querygrain(simdagchest, nodes=c("lung","bronc"),type="marginal") # check with data: summary(chestSim500) querygrain(simdagchest, nodes=c("lung","bronc"), type="joint") # check with data: (tab <- margin.table(table(chestSim500), c(4,5))) tab/500 # suppose we see someone who has been in asia, # has bad xray and smokes ev <- setEvidence(simdagchest, c("asia","xray","smoke"), c("yes","yes","yes")) getFinding(ev) # what is the probability of lung cancer? querygrain(ev, nodes="lung") # compare to marginal probability of lung cancer: querygrain(simdagchest, "lung") ### Estimating causal effects when a DAG is given # example with a linear Gaussian SEM n <- 1000 eps1 <- rnorm(n,0,1) eps2 <- rnorm(n,0,2) eps3 <- rnorm(n,0,1) eps4 <- rnorm(n,0,2) x2 <- eps2 x1 <- 3*x2 + eps1 x3 <- 4*x1 + 2*x2 + eps3 x4 <- 5*x3 + eps4 data <- cbind(x1,x2,x3,x4) # path method: # estimate edge weights: lm(x3 ~ x1+x2)$coef (a1 <- lm(x3 ~ x1+x2)$coef[2]) lm(x4 ~ x3)$coef (a2 <- lm(x4 ~ x3)$coef[2]) a1*a2 # regressions: lm(x4 ~ x1)$coef lm(x4 ~ x1+x2)$coef lm(x4 ~ x1+x3)$coef lm(x4 ~ x1+x2+x3)$coef # do-operator # do(x1=1) eps1 <- rnorm(n,0,1) eps2 <- rnorm(n,0,2) eps3 <- rnorm(n,0,1) eps4 <- rnorm(n,0,2) x2 <- eps2 x1 <- rep(1,n) x3 <- 4*x1 + 2*x2 + eps3 x4 <- 5*x3 + eps4 (do1 <- mean(x4)) # do(x1=2) eps1 <- rnorm(n,0,1) eps2 <- rnorm(n,0,2) eps3 <- rnorm(n,0,1) eps4 <- rnorm(n,0,2) x2 <- eps2 x1 <- rep(2,n) x3 <- 4*x1 + 2*x2 + eps3 x4 <- 5*x3 + eps4 (do2 <- mean(x4)) do2-do1 ### Some intuition for restricted structural equation models: eps1 <- runif(n,-1,1) eps2 <- runif(n,-1,1) x1 <- eps1 x2 <- x1 + eps2 # consider x1 -> x2: plot(x1,x2, xlim=c(-1.5,1.5),ylim=c(-2.5,2.5),xlab="x1",ylab="x2") fit1 <- lm(x2 ~ x1) abline(fit1) # noise is independent of x1 # consider x2 -> x1 plot(x2,x1,ylim=c(-1.5,1.5),xlab="x2",ylab="x1",xlim=c(-2.5,2.5)) fit2 <- lm(x1 ~ x2) abline(fit2) # noise is not independent of x1 # => we can conclude that x1 -> x2 # in the Gaussian setting, we get a football shape, # and we cannot distinguish x1 -> x2 and x2 -> x1