################################################################################### ## Fit Regression Model To Signal ## ################################################################################### ## function: - ################################################################################### ## input: phase-unfolded text file signal ## ## ## ## output: regression coefficients ## ## ## ################################################################################### ##.................... function FitRegModel ........................## FitRegModel <- function (filname,InDir,OutDir,signum) { ##set returned flag if(!file.exists(filname)) { cat("\n\n *** File: ",flname," not found! ! Exit current Script ! *** \n\n") cat ("\n FitRegModel: success = ",success,"\n") return(NULL) } ##read pure file xx <- read.table(filname,skip = 1, sep = " ") names(xx) <- c("samptime","sampamp","samphase","cycle","patient") ##predicted variable y <- xx[,"sampamp"] ##generate regressors t1 <- xx[,"samptime"] t2 <- I(t1^2) t3 <- I(t1^3) t4 <- I(t1^4) t5 <- I(t1^5) t6 <- I(t1^6) t7 <- I(t1^7) t8 <- I(t1^8) t9 <- I(t1^9) t10 <- I(t1^10) p1 <- xx[,"samphase"] p2 <- I(p1^2) p3 <- I(p1^3) p4 <- I(p1^4) p5 <- I(p1^5) p6 <- I(p1^6) p7 <- I(p1^7) p8 <- I(p1^8) p9 <- I(p1^9) p10 <- I(p1^10) ##place the data into a data frame my.df <- data.frame(y,t1,t2,t3,t4,t5,t6,t7,t8,t9,t10, p1,p2,p3,p4,p5,p6,p7,p8,p9,p10 ) ##fit the full model big.lm <- lm(y ~ ., data = my.df) summary(big.lm) ##step wise procedure step.lm <- step(big.lm) step.lm.sum <- summary(step.lm) ##get number of terms still not significant excluding intercept step.lm.sum$coefficients <- step.lm.sum$coefficients[-1,] #drop the intercept nleft <- which(step.lm.sum$coefficients[,"Pr(>|t|)"] >= 0.05) ##if no unsignificant terms present return step-output coefficients if(length(nleft) == 0) { cat("\n\n All Step Output Coefficients Significant for Signal: ",signum,"\n\n") return (step.lm.sum$coefficients) } ##if not significant terms still present create a new data frame with only the variables selected by step selected.variables <- names(step.lm$coefficients)[-1] selected.variables my.df.smaller <- my.df[,c("y",selected.variables)] ## leaps ## Based on Faraway, Linear Models with R, page 127 et seq. library(leaps) b <- regsubsets(y ~ . ,nvmax = length(selected.variables), data = my.df.smaller) summary(b) rs <- summary(b) no.parameters <- seq_along(rs$cp) + 1 ##set plot window to accommodate 3 graphs (1 row, 3 columns) # x11(width = 14, height = 10) # par(cex.axis=2, cex.lab=2,mfrow = c(1,3)) ##plot adjusted R^2 #plot(no.parameters, rs$adjr2, xlab = "Number of parameters", ylab = "Adjusted R-square",font.lab=2,font.axis=2) ##work with Cp (unlike Faraway's example) ##we desire models with Cp around or less than the number of parameters # plot(no.parameters , rs$cp, xlab = "Number of parameters", ylab = "Cp Statistics",font.lab=2,font.axis=2) # abline(0, 1) # plot(no.parameters , rs$cp - no.parameters, xlab = "Number of parameters p", ylab = "Cp Statistics - p",font.lab=2,font.axis=2) # abline(h = 0) ##wait for mouse input (consent) to remove drawing # locator() # graphics.off() ##find the smallest model with Cp <= number-parameters chosen.model <- min(which(rs$cp <= no.parameters)) chosen.model ##see which variables are included reduced.variables <- rs$which[chosen.model, ] which.variables <- which(reduced.variables) ##remove intercept which.variables <- which.variables[which.variables > 1] which.variables ##now reduce the data frame keeping the y variable (which is in column 1) reduced.df <- my.df.smaller[,c(1,which.variables)] ## check-point names(my.df.smaller) #previous data frame names(reduced.df) #new reduced data frame ##refit using the reduced data frame reduced.lm <- lm(y ~ ., data = reduced.df) reduced.lm.sum <- summary(reduced.lm) ##get number of terms still not significant excluding intercept reduced.lm.sum$coefficients <- reduced.lm.sum$coefficients[-1,] #drop the intercept nleft <- which(reduced.lm.sum$coefficients[,"Pr(>|t|)"] >= 0.05) ##if no unsignificant terms present return step-output coefficients if(length(nleft) == 0) { cat("\n\n All Regsubsets Output Coefficients Significant for Signal: ",signum,"\n\n") return (reduced.lm.sum$coefficients) } ##remove unsignificant coefficients while (length(nleft) > 0) { names(reduced.df) nn <- nleft[[1]] +1 # account for the "y" in position 1 reduced.df <- reduced.df[,-nn] names(reduced.df) reduced.lm <- lm(y ~ ., data = reduced.df) reduced.lm.sum <- summary(reduced.lm) reduced.lm.sum$coefficients <- reduced.lm.sum$coefficients[-1,] #drop the intercept nleft <- which(reduced.lm.sum$coefficients[,"Pr(>|t|)"] >= 0.05) } ##return coefficients return (reduced.lm.sum$coefficients) } ##............................. Main .................................## OutDir <- "/Users/mauede/Breathing-Curves-Dir/Modeling-Dir/Regression-Models-Dir" InDir <- "/Users/mauede/Breathing-Curves-Dir/Modeling-Dir/Pure-Signals-Dir/Mixed" setwd(InDir) cat("\n") print(list.files(pattern = "[0-1,a-z,A-Z, ,_]*.txt")) cat("\n\n") nms <- list.files(pattern="[0-1,a-z,A-Z, ,_]*.txt") if(is.null(nms)) { stop("\n\n *** Pure SIgnalss list empty. Exit current process. *** \n\n") } ##set up matrix to save regression model coefficients: ##columns(1:10) contain the coefficients of the time powers ##columns(11:20) contain the coefficients of the phase powers ##column(21) contain the patient' s ID rg <- matrix(nrow=length(nms),ncol=21) colnames(rg) = c("t1","t2","t3","t4","t5","t6","t7","t8","t9","t10","p1","p2","p3","p4","p5","p6","p7","p8","p9","p10","Id") ##initialize regression matrix to zero for (m in 1:ncol(rg)) { rg[,m] <- rep(0,nrow(rg)) } ns <- 0 #initialize index for(i in 1:length(nms)) { cat("\n i:",i, " nms[i]:",nms[i], "\n") insig <- NULL insig = strsplit(nms[i]," ")[[1]][1] result <- FitRegModel(nms[i],InDir,OutDir,insig) cat("\n result: ",result,"\n\n") if(length(result) == 0) { cat("\n\n Signal: ", insig, " not processed ! \n\n") next } cat("\n\n Signal: ", insig, " SUCCESS ! \n\n") ns <- ns +1 for (k in 1: nrow(result)) { for (j in 1:ncol(rg)) { if (rownames(result)[k] == colnames(rg)[j]) { rg[ns,j] <- result[k,"Estimate"] } } } rg [ns,"Id"] <- insig } ##save regression matrix setwd(OutDir) write.table(rg,file="RegressionCoeffMatrix",quote=FALSE,sep = " ",row.names=FALSE,col.names=colnames(rg)) ##create distance matrix between regression coefficients (ignore patient's ID) rgdist <- dist(rg[,1:20]) ##generate clusters from distance matrix h <- hclust(rgdist) plot(h)