library(mice) ## for md.pattern and for data set "boys" ## data ?boys str(boys) head(boys) ################################################## ## Look at missing values ################################################## md.pattern(boys) ## 223 rows are complete 223/748 ## only 30% of rows are complete 1622/(748*9) ## 24% missing values in total ## Just for illustrative purposes: ## We want to explain the head circumference (hc) by age, hgt and wgt ################################################## ## Default of lm if missing values are present: na.omit ################################################## fmDefault <- lm(hc ~ age + hgt + wgt, data = boys) summary(fmDefault) ?lm ?na.omit ## is the same as: fmDefault <- lm(hc ~ age + hgt + wgt, data = boys, na.action = na.omit) summary(fmDefault) ## set to other default fmDefault <- lm(hc ~ age + hgt + wgt, data = boys, na.action = na.fail) ################################################## ## complete case analysis by hand ################################################## dfBoys <- boys[,c(1,2,3,5)] ## select variables: age, hgt, wgt, hc ?complete.cases complete.cases(dfBoys) datCC <- boys[complete.cases(dfBoys),] dim(boys) dim(datCC) ## gives the same result as with na.omit fmCC <- lm(hc ~ age + hgt + wgt, data = datCC) summary(fmCC) ################################################## ## Single Imputation (Conditional Mean imputation using missForest) ################################################## str(boys) library(missForest) set.seed(32) impMF <- missForest(boys) impMF$OOBerror datImp <- impMF$ximp str(datImp) fmMF <- lm(hc ~ age + hgt + wgt, data = datImp) summary(fmMF)