[R] How to formulate an (effect-modifying) interaction with matching variable in a conditional logistic regression?
Fredrik Nilsson
laf.nilsson at gmail.com
Mon Jun 13 12:49:51 CEST 2011
Hi,
I would like to see if a matching variable is an effect-modifier in a
conditional logistic regression. Naturally, the matching variable
can't enter directly in the model but as an interaction with terms
that are in.
However, I have problems in formulating the correct model the term
that's already in the model is a factor. I am using treatment
contrasts and the problem is that if I write:
update(model, . ~ . + factorX:matchingvariableY)
then I get one extra level for the level that is contained in the
intercept which is conditioned away, and I get numerical problems. I
show an example below using data from Hosmer and Lemeshow's Applied
Logistic Regression (2nd ed. p.230 onwards)
I can solve the problem by introducing new variables that contains the
interaction I want, but I'd like to avoid this unwieldy solution. Can
you suggest a better one?
Thank you for your help!
Fredrik Nilsson
# Example
# Code Sheet for the Variables in the 1-1 matched Low Birth Weight Study
# Described in Section 7.3 Page 230
#
# Name Description Codes/Values
# pair Pair 1 - 56
# low Low Birth Weight 1 = BWT<=2500g,
# 0 = BWT>2500g
# age Age of Mother Years
# lwt Weight of Mother at Pounds
# Last Menstrual Period
# race Race 1 = White
# 2 = Black
# 3 = Other
# smoke Smoking Status 0 = No,1 = Yes
# During Pregnancy
# ptd History of Premature Labor 0 = None,1 = Yes
# ht History of Hypertension 0 = No, 1 = Yes
# ui Presence of Uterine 0 = No, 1 = Yes
# Irritability
pair<-rep(1:56, each=2)
low<-rep(c(0,1), 56)
age<-c(14,15,16,17,17,17,17,17,18,18,19,19,19,20,20,20,20,20,
20,20,20,21,21,21,21,21,22,22,23,23,23,23,23,24,24,24,
24,24,25,25,25,25,25,25,26,26,26,26,27,28,28,29,30,31,32,35)
age<-rep(age,each=2)
# actually the last one should be 34 not 35?
age[112]<-34
lwt<-c(135,101,98,115,95,130,103,130,122,110,113,120,113,120,119,142,
100,148,90,110,150,91,115,102,235,112,120,150,103,125,169,120,
141,80,121,109,127,121,120,122,158,105,108,165,124,200,185,103,
160,100,115,130,95,130,158,130,130,97,128,187,119,120,115,110,
190,94,90,128,115,132,110,155,115,138,110,105,118,105,120,85,
155,115,125,92,140,89,241,105,113,117,168,96,133,154,160,190,
124,130,120,120,130,95,135,130,95,142,215,102,121,105,170,187)
race<-c(0,2,1,2,2,2,2,2,0,0,1,0,1,1,2,1,0,2,0,1,2,0,2,0,0,0,2,0,2,2,2,
1,0,2,1,2,2,0,2,1,0,2,0,0,2,1,1,2,0,2,0,0,2,0,1,0,1,2,2,1,2,2,
2,0,0,2,0,1,0,2,2,0,2,0,2,1,0,2,2,2,0,2,1,0,0,2,1,2,0,0,1,2,2,
2,2,0,0,1,2,2,2,0,0,0,0,0,0,0,2,0,0,1)
race<-as.factor(race);levels(race)<-c("white","black","other")
smoke<-c(0,1,0,0,0,0,0,1,1,1,0,1,0,0,0,0,1,0,1,1,0,1,0,0,1,1,0,1,0,0,0,
1,0,1,1,0,0,1,0,1,0,0,1,1,0,0,1,0,0,0,0,1,0,1,0,1,0,0,0,1,0,0,
1,1,0,1,1,0,0,0,0,1,0,0,0,1,1,0,0,0,0,0,0,1,0,0,0,0,1,1,1,0,1,
0,0,1,1,0,0,1,0,1,0,0,1,1,1,1,0,1,0,1)
smoke<-as.factor(smoke);levels(smoke)<-c("no","yes")
ptd<-c(0,1,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,1,0,1,0,0,0,0,0,0,0,0,1,0,
1,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,1,1,0,0,0,0,0,0,0,1,
0,0,1,1,0,0,0,1,0,0,1,0,0,1,0,0,0,0,0,0,0,1,0,1,0,1,0,0,1,1,0,0,
0,0,0,1,0,0,0,0,0,1,0,1,0,0,1,0)
ptd<-as.factor(ptd); levels(ptd)<-c("no","yes")
ht<-c(0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,
0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,1,1,0,0,0,0,0,0,0,0,0,0,0,0,0,
0,0,0,1,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,1,0,0,0,0,0,
0,0,0,0,0,0,0,0,0,0,0,0,1)
ht<-as.factor(ht);levels(ht)<-c("no","yes")
ui<-c(0,0,0,1,0,0,0,1,0,0,0,0,0,0,0,0,0,0,1,0,0,1,0,0,0,1,1,0,0,1,1,0,1,
1,0,0,0,1,0,0,0,0,1,0,0,1,0,0,0,0,0,0,0,0,0,1,0,1,0,0,0,0,0,0,0,0,
0,0,0,0,0,0,0,0,0,0,0,0,1,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,
1,0,0,0,1,0,0,0,0,0,0,0,0)
dataset<-data.frame(pair,low,age,lwt,race,smoke,ptd,ht,ui)
library(survival)
# model with all covariates
mult.cl<-clogit(low~lwt+race+smoke+ptd+ht+ui+strata(pair),data=dataset)
summary(mult.cl)
# H&L drop race
mult2.cl<-update(mult.cl,.~.-race)
summary(mult2.cl)
######################
# check interactions #
######################
multi1.cl<-update(mult2.cl,.~.+age:lwt)
summary(multi1.cl)
anova(mult2.cl,multi1.cl)
# then comes the interaction with smoke
# no good! Here's the problem
multi2.cl<-update(mult2.cl,.~.+age:smoke)
summary(multi2.cl)
anova(mult2.cl,multi2.cl)
# has to define my own variable?
dataset$ageNsmoke<-as.numeric(dataset$smoke=="yes")*dataset$age
multi2a.cl<-update(mult2.cl,.~.+ageNsmoke)
summary(multi2a.cl)
anova(mult2.cl,multi2a.cl)
# but suppose I had a model with race, then I'd have to
# define two variables
dataset$ageNblack<-as.numeric(dataset$race=="black")*dataset$age
dataset$ageNother<-as.numeric(dataset$race=="other")*dataset$age
multi3.cl<-update(mult.cl,.~.+race:age)
summary(multi3.cl)
anova(multi3.cl, mult.cl)
multi3.cl<-update(mult.cl,.~.+ageNblack+ageNother)
summary(multi3.cl)
anova(multi3.cl, mult.cl)
# even more unwieldy with factors as matching variables
# I fake a second matching variable "type" with three levels
dataset$type<-c(rep(1,36),rep(2,38),rep(3,38))
dataset$type<-as.factor(dataset$type)
levels(dataset$type)<-c("type1","type2","type3")
dataset$smokeNtype1<-as.numeric(dataset$smoke=="yes")*as.numeric(dataset$type=="type1")
dataset$smokeNtype2<-as.numeric(dataset$smoke=="yes")*as.numeric(dataset$type=="type2")
multi4.cl<-update(mult.cl,.~.+smokeNtype1+smokeNtype2)
More information about the R-help
mailing list