#### Funktionen geschrieben von Valentin Rousson #### Minimal changes/adaptions, Martin Maechler, Sep.2012 TITLE <- function(txt) title(sub = txt, col.sub = "firebrick", font.sub = 4) f.compare.ise <- function(n = 50, sd = 0.1, nof = 1, b = 7, dpol = 3, N = 10, waitClick = (N <= 10), delay = .1) { stopifnot( require("lokern") ) ## compare ise of linear, quadratic, polynomial and nonparametric fit ## sd: standard deviation of noise ## nof=1: bimodal regression function on [0,b]; ## nof=2: exponential regression function on [0,b]; ## nof=3: logistic regression function on [-b,b] ## b: right bound of the regression domain ## dpol: degree of the polynomial model used ## N: number of samples simulated stopifnot(nof %in% 1:3, dpol >= 1) switch(nof, { ## 1: f <- function(x) { dnorm(x,3*b/7,0.5)+dnorm(x,5*b/7,0.2) } a <- 0 legx <- a labf <- "Bimodal function" }, { ## 2 : f <- function(x) { exp(-x) } a <- 0 legx <- (2/3)*b labf <- "Exponential function" }, { ## 3 : f <- function(x) { exp(x)/(1+exp(x)) } a <- -b legx <- a labf <- "Logistic function" }) grid <- seq(a,b, 0.01) scale <- (b-a)/(length(grid)-1) ztrue <- f(grid) ylim <- c(min(ztrue)- 4*sd, max(ztrue)+ 4*sd) x <- seq(a,b,len = n) X2 <- cbind(x,x^2) Xd <- NULL for(i in 1:dpol) Xd <- cbind(Xd,x^i) ltyest <- c(1,1,1,1,2) colest <- c(1,2,3,4,1) labest <- c("linear","quadratic",paste("polynom",dpol),"nonpar","true") isels1 <- isels2 <- iselsd <- isenp <- numeric(N) op <- par(mfrow = c(1,1)); on.exit(par(op)) for(I in 1:N) { y <- f(x) + rnorm(n,sd = sd) ls1 <- lsfit( x,y)$coef ls2 <- lsfit(X2,y)$coef lsd <- lsfit(Xd,y)$coef np <- glkerns(x,y,x.out = grid) zls1 <- ls1[1]+grid*ls1[2] zls2 <- ls2[1]+grid*ls2[2]+grid^2*ls2[3] zlsd <- lsd[1] for(i in 1:dpol) zlsd <- zlsd+grid^i*lsd[i+1] znp <- np$est isels1[I] <- sum((zls1-ztrue)^2)*scale isels2[I] <- sum((zls2-ztrue)^2)*scale iselsd[I] <- sum((zlsd-ztrue)^2)*scale isenp[I] <- sum((znp-ztrue)^2)*scale legtxt <- paste(labest,c(rep(" ISE=",4),""), c(round(c(isels1[I],isels2[I],iselsd[I],isenp[I]),4),"")) plot(x,y,ylim = ylim) lines(grid,zls1, lty = ltyest[1],col = colest[1]) lines(grid,zls2, lty = ltyest[2],col = colest[2]) lines(grid,zlsd, lty = ltyest[3],col = colest[3]) lines(grid,znp, lty = ltyest[4],col = colest[4]) lines(grid,ztrue,lty = ltyest[5],col = colest[5]) legend(legx,ylim[2],legtxt, lty = ltyest, col = colest) title(paste("Sample",I,":",labf,"n=",n,"sd=",round(sd,2))) if(waitClick) { TITLE("Click on the plot to get the next sample!") locator(1) } else Sys.sleep(delay) } boxplot(isels1,isels2,iselsd,isenp, names = labest[1:4]) title(paste("Summary of ISE of",N,"samples:",labf,"n=",n,"sd=",round(sd,2))) list(isels1=isels1, isels2=isels2, iselsd=iselsd, isenp=isenp, N = N, labFunc = labf, labest=labest) } f.compare.bdw <- function(h = c(.2,.5,1), n = 50, sd = 0.2, nof = 1, b = 7, N = 10, oneplot = (N > 10), click = !oneplot) { ## compare ise of nonparametric fits with three different bandwidth given ## by the user, as well as with the optimal bandwidth calculated by glkerns ## h: vector of 3 different bandwidths to be compared (or also length 1 or 2) ## n: sample size ## sd: standard deviation of noise ## nof=1: bimodal regression function on [0,b]; ## nof=2: exponential regression function on [0,b]; ## nof=3: logistic regression function on [-b,b] ## b: right bound of the regression domain ## N: number of samples simulated ## oneplot: the estimates for different bandwidths are shown ## on the same plot (oneplot = TRUE) or different plots (oneplot=FALSE) stopifnot(require("lokern"), nof %in% 1:3, length(h) >= 1, h > 0) switch(nof, { ## 1: f <- function(x) { dnorm(x,3*b/7,0.5)+dnorm(x,5*b/7,0.2) } a <- 0 legx <- a labf <- "Bimodal function" }, { ## 2 : f <- function(x) { exp(-x) } a <- 0 legx <- (1/3)*b labf <- "Exponential function" }, { ## 3 : f <- function(x) { exp(x)/(1+exp(x)) } a <- -b legx <- a labf <- "Logistic function" }) if(length(h) == 1) h <- c(.5*h,h,2*h) else if(length(h) == 2) h <- sort(h[1],h[2],max(h)*2) else if(length(h) >= 3) h <- sort(h[1:3]) grid <- seq(a,b,0.01) scale <- (b-a)/(length(grid)-1) ztrue <- f(grid) ylim <- c(min(ztrue)- 4*sd, max(ztrue)+ 4*sd) x <- seq(a,b, len = n) ltyest <- c(1,1,1,1,2) colest <- c(1,2,3,4,1) lab.h <- c(paste("h=", round(h,2)),"hopt") iseh <- matrix(0, nrow = N, ncol = 4, dimnames=list(NULL, lab.h)) zh <- array(dim = c(N,4,length(grid))) op <- if(oneplot) par(mfrow = c(1,1)) else par(mfrow = c(2,2)) on.exit(par(op)) for(I in 1:N) { y <- f(x) + rnorm(n,sd = sd) for(i in 1:4) { if(i <= 3) zh[I,i,] <- glkerns(x,y,x.out = grid,bandwidth = h[i])$est else { ## last one: take automatic h = "hopt" tmp <- glkerns(x,y,x.out = grid) zh[I,i,] <- tmp$est hopt <- tmp$bandwidth } iseh[I,i] <- sum((zh[I,i,] - ztrue)^2)*scale } legtxt <- c(paste(lab.h, " ISE=", signif(iseh[I,], digits=5)), "true") if(oneplot) { plot(x,y,ylim = ylim) for(i in 1:4) lines(grid,zh[I,i,],lty = ltyest[i],col = colest[i]) lines(grid,ztrue,lty = ltyest[5],col = colest[5]) legend(legx,ylim[2],legtxt,lty = ltyest,col = colest) title(paste("Sample",I,":",labf,"n=",n, "sd=",round(sd,2),"hopt=",round(hopt,2))) if(click) { TITLE("Click on the plot to get the next sample!") locator(1) } } else { ## (!oneplot) for(i in 1:4) { plot(x,y,ylim = ylim) lines(grid,zh[I,i,],lty = ltyest[i],col = colest[i]) lines(grid,ztrue,lty = ltyest[5],col = colest[5]) legend(legx,ylim[2],legtxt[c(i,5)],lty = ltyest[c(i,5)], col = colest[c(i,5)]) title(paste("Sample",I,":",labf,"n=",n,"sd=",round(sd,2), if(i < 4) paste("h=",round(h[i],2)) else paste("hopt=",round(hopt,2)))) if(click) TITLE("Click on the plot to get the next sample!") } if(click) locator(1) } }## for( 1:N ) meanzh <- apply(zh,2:3,"mean") biash <- varh <- mseh <- numeric(4) for(i in 1:4) { biash[i] <- sum((meanzh[i,]-ztrue)^2)*scale mseh[i] <- mean(iseh[,i]) varh[i] <- mseh[i]-biash[i] } par(mfrow = c(1,1)) boxplot(iseh, ylim = c(0,max(iseh))) abline(h=0, col="gray", lty=3) text(1:4,biash,"Bias",col = 2) text(1:4,varh, "Var", col = 3) text(1:4,mseh, "MSE", col = 4) title(paste("Summary of ISE of",N,"samples:",labf,"n=",n,"sd=",round(sd,2))) list(iseh=iseh, biash=biash, varh=varh, mseh=mseh) } ##' This function plots distance curves and velocity curves of nboys boys and ##' ngirls girls (randomly selected from a population of 120 boys and 112 girls) ##' It also allows to extract features characterizing the ##' puberty from the velocity curves ##' @param nboys ##' @param ngirls ##' @author Valentin Rousson & Martin Maechler f.compare.veloc <- function(nboys = 12, ngirls = nboys, maxvel = 18, age.grid = seq(0, 20, 1/64), notch=TRUE, ...) { stopifnot( require("lokern"), nboys >= 1, ngirls >= 1) growthLab <- "growth velocity [cm / yr]" draw.and.select <- function(dat, nSel, col.velo) { name <- deparse(substitute(dat)) stopifnot(is.list(dat), is.list(dat[[1]]), nSel >= 1, nSel == as.integer(nSel)) if(grepl("s$", name)) ## drop plural "s": name <- sub("s$", "", name) N <- length(dat) nSel <- min(N, nSel) i. <- sample(1:N, nSel) tpub <- peak <- numeric(nSel) for(i in 1:nSel) { ## for each of the selected child x <- dat[[i.[i]]]$age y <- dat[[i.[i]]]$height no <- dat[[i.[i]]]$nos dist <- glkerns(x,y,x.out = age.grid)$est dx <- x[-1]-diff(x)/2 rx <- range(x, dx, age.grid) ## MM thinks this should be labeled 'height' rather than 'distance' plot(x,y, xlim=rx, ylim= range(y,dist), xlab = "age", ylab = "distance") lines(age.grid,dist) title(paste("Distance curve for",name,"no",no)) dy <- diff(y)/diff(x) veloc0 <- glkerns(dx,dy, x.out = age.grid)$est hopt <- glkerns(x,y, x.out = age.grid, deriv = 1)$bandwidth tmp <- interpol.xy(x,y) newx <- tmp$x newy <- tmp$y veloc1 <- glkerns(newx,newy,x.out = age.grid, deriv = 1, bandwidth = hopt)$est plot(dx,dy*(dy <= maxvel)+maxvel*(dy > maxvel), xlim=rx, ylim = c(0,maxvel), xlab = "age", ylab = growthLab) lines(age.grid, veloc1, col = col.velo, lwd=2) lines(age.grid, veloc0, col = 1) legend("top", c("Estimate of derivative", "Smoothing the differences"), lwd = c(2,1), lty = c(1,1), col = c(col.velo, 1), bty="n") title(paste("Velocity curve for",name,"no",no)) TITLE("Click on the puberty peak") tmp <- locator(1) tpub[i] <- tmp$x peak[i] <- tmp$y } list(tpub = tpub, peak = peak) } ## draw.and.select() r <- getGrData() boys <- r$boys girls <- r$girls op <- par(mfrow = c(2,1), mgp = c(1.5, 0.6, 0), mar = .1+c(7,5,4,1)/2) on.exit(par(op)) L.boys <- draw.and.select(boys , nboys, col.velo = "skyblue3") L.girl <- draw.and.select(girls, ngirls,col.velo = "pink4") par(mfrow = c(1,2)) boxplot(L.boys$tpub, L.girl$tpub, names = c("boys","girls"), notch=notch, ...) title(paste("\"High\" puberty age for", nboys, "boys and", ngirls,"girls"), cex.main = 0.8, ylab = "age [years]") boxplot(L.boys$peak, L.girl$peak, names = c("boys","girls"), notch=notch, ...) title(paste("\"High\" puberty peak for", nboys, "boys and", ngirls,"girls"), cex.main = 0.8, ylab = growthLab) invisible(list(boys = L.boys, girls = L.girl)) } ##---------------------------------------------------------------------------- ##' called only from f.compare.veloc() getGrData <- function(dir = "http://stat.ethz.ch/Teaching/Datasets/WBL") { age <- matrix(scan(paste(dir,"zeiten.dat", sep="/")), byrow = TRUE, ncol = 38) height <- matrix(scan(paste(dir,"laengen.dat", sep="/")), byrow = TRUE, ncol = 38) nos <- age[,1] sex <- age[,2] get.hgt.age <- function(sel) { stopifnot(is.logical(sel), length(sel) == length(nos)) nn <- length(ind <- which(sel)) r <- vector("list", nn) for(i in seq_len(nn)) { h.i <- abs(height[ind[i], -(1:2)]) a.i <- age[ind[i], -(1:2)] r[[i]] <- list(nos = nos[ind[i]], height = h.i[a.i > 0], age = a.i[a.i > 0]) } r } boys <- get.hgt.age(sex == 9) girls <- get.hgt.age(sex == 8) list(boys=boys, girls=girls) } interpol.xy <- function(x,y, f = 1.25) { n <- length(x) stopifnot(n >= 2, length(y) == n, is.numeric(f), length(f)==1, f >= 1) m <- round(f*n) newx <- seq(min(x),max(x),len = m) newy <- c(y[1],rep(0,m-2),y[n]) for(i in 2:(m-1)) { i0 <- max(which(x < newx[i])) i1 <- min(which(x > newx[i])) b <- (y[i1]-y[i0])/(x[i1]-x[i0]) a <- y[i1] - b*x[i1] newy[i] <- a + b*newx[i] } list(x=newx, y=newy) }