[R] doubt with Odds ratio - URGENT HELP NEEDED
Rosa Oliveira
rosita21 at gmail.com
Wed Sep 23 17:06:22 CEST 2015
Dear Michael,
I found some of the errors, but others I wasn’t able to.
And my huge huge problem concerns OR and OR confidence interval :(
New Corrected code:
casedata <-read.spss("tas_05112008.sav")
tas.data<-data.frame(casedata)
#Delete patients that were not discharged
tas.data <- tas.data[ tas.data$hosp!="si ",]
tas.data$resultado.hosp <- ifelse(tas.data$hosp=="l", 0, 1)
tas.data$tas_d2 <- log(ifelse(tas.data$tas_d2==88888|tas.data$tas_d2==99999, NA, tas.data$tas_d2))
tas.data$tas_d3 <- log(ifelse(tas.data$tas_d3==88888|tas.data$tas_d3==99999, NA, tas.data$tas_d3))
tas.data$tas_d4 <- log(ifelse(tas.data$tas_d4==88888|tas.data$tas_d4==99999, NA, tas.data$tas_d4))
tas.data$tas_d5 <- log(ifelse(tas.data$tas_d5==88888|tas.data$tas_d5==99999, NA, tas.data$tas_d5))
tas.data$tas_d6 <- log(ifelse(tas.data$tas_d6==88888|tas.data$tas_d6==99999, NA, tas.data$tas_d6))
tas.data$age <- ifelse(tas.data$age==88888|tas.data$age==99999, NA, tas.data$age)
tas.data <- data.frame(tas1 = tas.data$tas_d2, tas2 = tas.data$tas_d3,
tas3 = tas.data$tas_d4, tas4 = tas.data$tas_d5,
tas5 = tas.data$tas_d6, age = tas.data$age,
discharge = tas.data$resultado.hosp, id.pat=tas.data$id)
# tas.data$discharge <- factor( tas.data$discharge , levels=c(0,1), labels = c("dead", "alive"))
#select only cases that have more than 3 tas
tas.data <- tas.data[apply(tas.data[,-8:-6], 1, function(x) sum(!is.na(x)))>2,]
nsample <- n.obs <- dim(tas.data)[1] #nr of patients with more than 2 tas measurements
tas.data.long <- data.frame( tas=c(unlist(tas.data[,-8:-6])), time=rep(c(0:4), each=n.obs), age=rep(tas.data$age, 5), discharge=rep(tas.data$discharge, 5),
id=rep(c(1:n.obs), 5))
tas.data.long <- tas.data.long [order(tas.data.long$id),]
age=tas.data$age
##################################################################################################
#PLOT EMPIRICAL MEANS OF CRP FOR ALIVE DEATh
##################################################################################################
mean.alive <- apply(tas.data[tas.data$discharge==0, -8:-6], 2, mean, na.rm=T)
mean.dead <- apply(tas.data[tas.data$discharge==1, -8:-6], 2, mean, na.rm=T)
stderr <- function(x) sqrt(var(x,na.rm=TRUE)/length(na.omit(x)))
se.alive <- apply(tas.data[tas.data$discharge==0, -8:-6], 2, stderr)
se.dead <- apply(tas.data[tas.data$discharge==1, -8:-6], 2, stderr)
summarytas <- data.frame (media = c(mean.dead, mean.alive),
standarderror = c( se.dead, se.alive), discharge = c(rep("dead",5), rep("alive", 5)))
ggplot(summarytas, aes(x=c(c(1:5), c(1:5)), y=mean, colour=discharge)) +
geom_errorbar(aes(ymin=mean-2* standarderror, ymax=media+2* standarderror), width=.1) +
scale_color_manual(values=c("blue", "red")) +
theme(legend.text=element_text(size=20), axis.text=element_text(size=16), axis.title=element_text(size=20,face="bold")) +
scale_x_continuous(name="Days") +
scale_y_continuous(name="log tas") +
geom_line() +
geom_point()
library(verification)
prev <- summary(glm(tas.data[,7]~tas.data[,4],family=binomial(link=probit)))
answer = c(prev$coefficients[,1:2])
roc.plot(tas.data[,7], prev, show.thres = FALSE, legen=F )
modelo<-function (dataainit)
{
#dataa<-tas.data
dataa<-dataainit
dataa$ident<-seq(1:90)
tas.6days<-cbind(id=rep(dataa$id,5),tas=c(dataa$tas_d2, dataa$tas_d3,
dataa$tas_d4, dataa$tas_d5, dataa$tas_d6),
time=rep(c(2:6)-2, each=90), out.come=rep(dataa$hosp,5), ident=rep(dataa$ident,5))
tas.6days<-data.frame(tas.6days[order(tas.6days[,1]),])
tas.6days$tas[tas.6days$tas==88888|tas.6days$tas==99999 ]<-NA
#mixed model for the longitudinal tas
lme.1 <- lme(tas~ time+1, random = ~ time+1 |ident, data=tas.6days, na.action=na.exclude )
#Random intercept and slopes
pred.lme<-predict(lme.1)
lme.intercept<-lme.1$coef$random$ident[,1]+lme.1$coef$fixed[1]
lme.slope<- lme.1$coef$random$ident[,2]+lme.1$coef$fixed[2]
selector<-as.numeric(names(lme.intercept)) #to select not NA rows. Apply to the vector in the dataset
test(dataa$intercept[resultado.hosp==1], dataa$intercept[resultado.hosp==0])
print(summary(model.surv1))
return(model.surv1$coef)
}
CONSOLE RESULT: (errors in red)
> casedata <-read.spss("tas_05112008.sav")
Warning message:
In read.spss("tas_05112008.sav") :
tas_05112008.sav: Unrecognized record type 7, subtype 18 encountered in system file
> tas.data<-data.frame(casedata)
>
> #Delete patients that were not discharged
> tas.data <- tas.data[ tas.data$hosp!="si ",]
> tas.data$resultado.hosp <- ifelse(tas.data$hosp=="l", 0, 1)
>
> tas.data$tas_d2 <- log(ifelse(tas.data$tas_d2==88888|tas.data$tas_d2==99999, NA, tas.data$tas_d2))
> tas.data$tas_d3 <- log(ifelse(tas.data$tas_d3==88888|tas.data$tas_d3==99999, NA, tas.data$tas_d3))
> tas.data$tas_d4 <- log(ifelse(tas.data$tas_d4==88888|tas.data$tas_d4==99999, NA, tas.data$tas_d4))
> tas.data$tas_d5 <- log(ifelse(tas.data$tas_d5==88888|tas.data$tas_d5==99999, NA, tas.data$tas_d5))
> tas.data$tas_d6 <- log(ifelse(tas.data$tas_d6==88888|tas.data$tas_d6==99999, NA, tas.data$tas_d6))
>
> tas.data$age <- ifelse(tas.data$age==88888|tas.data$age==99999, NA, tas.data$age)
>
>
> tas.data <- data.frame(tas1 = tas.data$tas_d2, tas2 = tas.data$tas_d3,
+ tas3 = tas.data$tas_d4, tas4 = tas.data$tas_d5,
+ tas5 = tas.data$tas_d6, age = tas.data$age,
+ discharge = tas.data$resultado.hosp, id.pat=tas.data$id)
>
> # tas.data$discharge <- factor( tas.data$discharge , levels=c(0,1), labels = c("dead", "alive"))
>
> #select only cases that have more than 3 tas
> tas.data <- tas.data[apply(tas.data[,-8:-6], 1, function(x) sum(!is.na(x)))>2,]
>
>
>
> nsample <- n.obs <- dim(tas.data)[1] #nr of patients with more than 2 tas measurements
>
> tas.data.long <- data.frame( tas=c(unlist(tas.data[,-8:-6])), time=rep(c(0:4), each=n.obs), age=rep(tas.data$age, 5), discharge=rep(tas.data$discharge, 5),
+ id=rep(c(1:n.obs), 5))
> tas.data.long <- tas.data.long [order(tas.data.long$id),]
>
> age=tas.data$age
>
> ##################################################################################################
> #PLOT EMPIRICAL MEANS OF CRP FOR ALIVE DEATh
> ##################################################################################################
> mean.alive <- apply(tas.data[tas.data$discharge==0, -8:-6], 2, mean, na.rm=T)
> mean.dead <- apply(tas.data[tas.data$discharge==1, -8:-6], 2, mean, na.rm=T)
> stderr <- function(x) sqrt(var(x,na.rm=TRUE)/length(na.omit(x)))
> se.alive <- apply(tas.data[tas.data$discharge==0, -8:-6], 2, stderr)
> se.dead <- apply(tas.data[tas.data$discharge==1, -8:-6], 2, stderr)
> summarytas <- data.frame (media = c(mean.dead, mean.alive),
+ standarderror = c( se.dead, se.alive), discharge = c(rep("dead",5), rep("alive", 5)))
>
>
> ggplot(summarytas, aes(x=c(c(1:5), c(1:5)), y=mean, colour=discharge)) +
+ geom_errorbar(aes(ymin=mean-2* standarderror, ymax=media+2* standarderror), width=.1) +
+ scale_color_manual(values=c("blue", "red")) +
+ theme(legend.text=element_text(size=20), axis.text=element_text(size=16), axis.title=element_text(size=20,face="bold")) +
+ scale_x_continuous(name="Days") +
+ scale_y_continuous(name="log tas") +
+ geom_line() +
+ geom_point()
Error in mean - 2 * standarderror :
non-numeric argument to binary operator
>
>
> library(verification)
> prev <- summary(glm(tas.data[,7]~tas.data[,4],family=binomial(link=probit)))
> answer = c(prev$coefficients[,1:2])
>
>
> roc.plot(tas.data[,7], prev, show.thres = FALSE, legen=F )
Error in is.finite(x) : default method not implemented for type 'list'
>
>
>
> modelo<-function (dataainit)
+
+ {
+
+ #dataa<-tas.data
+ dataa<-dataainit
+
+ dataa$ident<-seq(1:90)
+ tas.6days<-cbind(id=rep(dataa$id,5),tas=c(dataa$tas_d2, dataa$tas_d3,
+ dataa$tas_d4, dataa$tas_d5, dataa$tas_d6),
+ time=rep(c(2:6)-2, each=90), out.come=rep(dataa$hosp,5), ident=rep(dataa$ident,5))
+
+ tas.6days<-data.frame(tas.6days[order(tas.6days[,1]),])
+ tas.6days$tas[tas.6days$tas==88888|tas.6days$tas==99999 ]<-NA
+
+ #mixed model for the longitudinal tas
+ lme.1 <- lme(tas~ time+1, random = ~ time+1 |ident, data=tas.6days, na.action=na.exclude )
+
+ #Random intercept and slopes
+ pred.lme<-predict(lme.1)
+ lme.intercept<-lme.1$coef$random$ident[,1]+lme.1$coef$fixed[1]
+ lme.slope<- lme.1$coef$random$ident[,2]+lme.1$coef$fixed[2]
+ selector<-as.numeric(names(lme.intercept)) #to select not NA rows. Apply to the vector in the dataset
+
+ test(dataa$intercept[resultado.hosp==1], dataa$intercept[resultado.hosp==0])
+
+ print(summary(model.surv1))
+ return(model.surv1$coef)
+
+ }
>
I can’t get the OR and OR CI :(
DATA:
Best,
RO
Atenciosamente,
Rosa Oliveira
--
____________________________________________________________________________
Rosa Celeste dos Santos Oliveira,
E-mail: rosita21 at gmail.com
Tlm: +351 939355143
Linkedin: https://pt.linkedin.com/in/rosacsoliveira
____________________________________________________________________________
"Many admire, few know"
Hippocrates
> On 23 Sep 2015, at 12:02, Michael Dewey <lists at dewey.myzen.co.uk> wrote:
>
> Dear Rosa
>
> It would help if you posted the error messages where they occur so that we can see which of your commands caused which error. However see comment inline below.
>
> On 22/09/2015 22:17, Rosa Oliveira wrote:
>> Dear all,
>>
>>
>> I’m trying to compute Odds ratio and OR confidence interval.
>>
>> I’m really naive, sorry for that.
>>
>>
>> I attach my data and my code.
>>
>> I’m having lots of errors:
>>
>> 1. Error in data.frame(tas1 = tas.data$tas_d2, tas2 = tas.data$tas_d3, tas3 = tas.data$tas_d4, :
>> arguments imply differing number of rows: 90, 0
>>
>
> At least one of tas_d2, tas_d3, tas_d4 does not exist
>
> I suggest fixing that one and hoping the rest go away.
>
>> 2. Error in data.frame(tas = c(unlist(tas.data[, -8:-6])), time = rep(c(0:4), :
>> arguments imply differing number of rows: 630, 450, 0
>>
>> 3. Error: object 'tas.data.long' not found
>>
>> 4. Error in data.frame(media = c(mean.dead, mean.alive), standarderror = c(se.dead, :
>> arguments imply differing number of rows: 14, 10
>>
>> 5. Error in ggplot(summarytas, aes(x = c(c(1:5), c(1:5)), y = mean, colour = discharge)) :
>> object 'summarytas' not found
>>
>> 6. Error in summary(glm(tas.data[, 6] ~ tas.data[, 4], family = binomial(link = probit))) :
>> error in evaluating the argument 'object' in selecting a method for function 'summary': Error in eval(expr, envir, enclos) : y values must be 0 <= y <= 1
>>
>> 7. Error in wilcox.test.default(pred[obs == 1], pred[obs == 0], alternative = "great") :
>> not enough (finite) 'x' observations
>> In addition: Warning message:
>> In is.finite(x) & apply(pred, 1, f) :
>> longer object length is not a multiple of shorter object length
>>
>>
>> and off course I’m not getting OR.
>>
>> Nonetheless all this errors, I think I have not been able to compute de code to get OR and OR confidence interval.
>>
>>
>> Can anyone help me please. It’s really urgent.
>>
>> PLEASE
>>
>> THE CODE:
>>
>> the hospital outcome is discharge.
>>
>> require(gdata)
>> library(foreign)
>> library(nlme)
>> library(lme4)
>> library(boot)
>> library(MASS)
>> library(Hmisc)
>> library(plotrix)
>> library(verification)
>> library(mvtnorm)
>> library(statmod)
>> library(epiR)
>>
>> #########################################################################################
>> # Data preparation #
>> #########################################################################################
>>
>> setwd("/Users/RO/Desktop")
>>
>> casedata <-read.spss("tas_05112008.sav")
>> tas.data<-data.frame(casedata)
>>
>> #Delete patients that were not discharged
>> tas.data <- tas.data[ tas.data$hosp!="si ",]
>> tas.data$resultado.hosp <- ifelse(tas.data$hosp=="l", 0, 1)
>>
>> tas.data$tas_d2 <- log(ifelse(tas.data$tas_d2==88888|tas.data$tas_d2==99999, NA, tas.data$tas_d2))
>> tas.data$tas_d3 <- log(ifelse(tas.data$tas_d3==88888|tas.data$tas_d3==99999, NA, tas.data$tas_d3))
>> tas.data$tas_d4 <- log(ifelse(tas.data$tas_d4==88888|tas.data$tas_d4==99999, NA, tas.data$tas_d4))
>> tas.data$tas_d5 <- log(ifelse(tas.data$tas_d5==88888|tas.data$tas_d5==99999, NA, tas.data$tas_d5))
>> tas.data$tas_d6 <- log(ifelse(tas.data$tas_d6==88888|tas.data$tas_d6==99999, NA, tas.data$tas_d6))
>>
>> tas.data$age <- ifelse(tas.data$age==88888|tas.data$age==99999, NA, tas.data$age)
>>
>>
>> tas.data <- data.frame(tas1 = tas.data$tas_d2, tas2 = tas.data$tas_d3,
>> tas3 = tas.data$tas_d4, tas4 = tas.data$tas_d5,
>> tas5 = tas.data$tas_d6, age = tas.data$age,
>> discharge = tas.data$resultado.hosp, id.pat=tas.data$ID)
>>
>> # tas.data$discharge <- factor( tas.data$discharge , levels=c(0,1), labels = c("dead", "alive"))
>>
>> #select only cases that have more than 3 tas
>> tas.data <- tas.data[apply(tas.data[,-8:-6], 1, function(x) sum(!is.na(x)))>2,]
>>
>>
>>
>> nsample <- n.obs <- dim(tas.data)[1] #nr of patients with more than 2 tas measurements
>>
>> tas.data.long <- data.frame( tas=c(unlist(tas.data[,-8:-6])), time=rep(c(0:4), each=n.obs), age=rep(tas.data$age, 5), discharge=rep(tas.data$discharge, 5),
>> id=rep(c(1:n.obs), 5))
>> tas.data.long <- tas.data.long [order(tas.data.long$id),]
>>
>> age=tas.data$age
>>
>> ##################################################################################################
>> #PLOT EMPIRICAL MEANS OF CRP FOR ALIVE DEATh
>> ##################################################################################################
>> mean.alive <- apply(tas.data[tas.data$discharge==0, -8:-6], 2, mean, na.rm=T)
>> mean.dead <- apply(tas.data[tas.data$discharge==1, -8:-6], 2, mean, na.rm=T)
>> stderr <- function(x) sqrt(var(x,na.rm=TRUE)/length(na.omit(x)))
>> se.alive <- apply(tas.data[tas.data$discharge==0, -8:-6], 2, stderr)
>> se.dead <- apply(tas.data[tas.data$discharge==1, -8:-6], 2, stderr)
>> summarytas <- data.frame (media = c(mean.dead, mean.alive),
>> standarderror = c( se.dead, se.alive), discharge = c(rep("dead",5), rep("alive", 5)))
>>
>>
>> ggplot(summarytas, aes(x=c(c(1:5), c(1:5)), y=mean, colour=discharge)) +
>> geom_errorbar(aes(ymin=mean-2* standarderror, ymax=media+2* standarderror), width=.1) +
>> scale_color_manual(values=c("blue", "red")) +
>> theme(legend.text=element_text(size=20), axis.text=element_text(size=16), axis.title=element_text(size=20,face="bold")) +
>> scale_x_continuous(name="Days") +
>> scale_y_continuous(name="log tas") +
>> geom_line() +
>> geom_point()
>>
>>
>> library(verification)
>> prev <- summary(glm(tas.data[,6]~tas.data[,4],family=binomial(link=probit)))
>> answer = c(prev$coefficients[,1:2])
>>
>>
>> roc.plot(tas.data[,6], prev, show.thres = FALSE, legen=F )
>>
>>
>> modelo<-function (dataainit)
>>
>> {
>>
>> #dataa<-tas.data
>> dataa<-dataainit
>>
>> dataa$ident<-seq(1:90)
>> tas.6days<-cbind(id=rep(dataa$id,5),tas=c(dataa$tas_d2, dataa$tas_d3,
>> dataa$tas_d4, dataa$tas_d5, dataa$tas_d6),
>> time=rep(c(2:6)-2, each=90), out.come=rep(dataa$hosp,5), ident=rep(dataa$ident,5))
>>
>> tas.6days<-data.frame(tas.6days[order(tas.6days[,1]),])
>> tas.6days$tas[tas.6days$tas==88888|tas.6days$tas==99999 ]<-NA
>>
>> #mixed model for the longitudinal tas
>> lme.1 <- lme(tas~ time+1, random = ~ time+1 |ident, data=tas.6days, na.action=na.exclude )
>>
>> #Random intercept and slopes
>> pred.lme<-predict(lme.1)
>> lme.intercept<-lme.1$coef$random$ident[,1]+lme.1$coef$fixed[1]
>> lme.slope<- lme.1$coef$random$ident[,2]+lme.1$coef$fixed[2]
>> selector<-as.numeric(names(lme.intercept)) #to select not NA rows. Apply to the vector in the dataset
>>
>> test(dataa$intercept[resultado.hosp==1], dataa$intercept[resultado.hosp==0])
>>
>> print(summary(model.surv1))
>> return(model.surv1$coef)
>>
>> }
>>
>>
>>
>>
>> Best,
>> RO
>>
>>
>>
>> ______________________________________________
>> R-help at r-project.org mailing list -- To UNSUBSCRIBE and more, see
>> https://stat.ethz.ch/mailman/listinfo/r-help
>> PLEASE do read the posting guide http://www.R-project.org/posting-guide.html
>> and provide commented, minimal, self-contained, reproducible code.
>>
>
> --
> Michael
> http://www.dewey.myzen.co.uk/home.html
More information about the R-help
mailing list