# In 1951, all british doctors were sent a questionnaire, # asking if they smoked tabacco. # We have data on the nr of deaths from coronary heart disease 10 years # after this survey. # It also shows the total number of person years of observation at the time # of the analysis. # Questions: # Is the death rate higher for smokers than for non smokers? # If so, by how much? # And is the effect related to age? # age groups: 35-44, 45-54, 55-64, 65-74, 75-84 data <- cbind(c(1,2,3,4,5), c(32,104,206,186,102), c(52407, 43248, 28612, 12663, 5317), c(2,12,28,28,31), c(18790, 10673, 5710, 2585, 1462)) data <- data.frame(data) data <- cbind(data, data[,2]/data[,3]*100000, data[,4]/data[,5]*100000) names(data) <- c("age.group", "deaths.sm", "person.yr.sm", "deaths.nonsm", "person.yr.nonsm", "rate.sm", "rate.nonsm") attach(data) # Plot data: plot(age.group, rate.sm, pch=1, ylim=c(0,2200), ylab="deaths per 100,000 person years", xlab="age group") points(age.group, rate.nonsm, pch=2) legend(1,2200, c("smokers","non-smokers"), pch=c(1,2)) # Reformat the data: data.new <- data.frame(cbind(c(deaths.sm, deaths.nonsm), c(age.group, age.group), c(person.yr.sm, person.yr.nonsm), c(rep(1,5), rep(0,5)))) names(data.new) <- c("deaths", "age.group", "person.years", "smoke") data.new m <- glm(deaths ~ smoke + age.group + I(age.group^2) + age.group*smoke, family=poisson(), offset=log(person.years), data=data.new) summary(m)