#### Funktionen geschrieben von Valentin Rousson 
#### Minimal changes/adaptions, Martin Maechler, Sep.2012

TITLE <- function(txt) title(sub = txt, col.sub = "firebrick", font.sub = 4)

f.compare.ise <- function(n = 50, sd = 0.1, nof = 1, b = 7, dpol = 3, N = 10,
			  waitClick = (N <= 10), delay = .1)
{
    stopifnot( require("lokern") )
    ## compare ise of linear, quadratic, polynomial and nonparametric fit

    ## sd: standard deviation of noise
    ## nof=1: bimodal regression function on [0,b]; 
    ## nof=2: exponential regression function on [0,b]; 
    ## nof=3: logistic regression function on [-b,b]
    ## b: right bound of the regression domain
    ## dpol: degree of the polynomial model used 
    ## N: number of samples simulated 

    stopifnot(nof %in% 1:3, dpol >= 1)
    switch(nof, { ## 1:
	f <- function(x) { dnorm(x,3*b/7,0.5)+dnorm(x,5*b/7,0.2) }
	a <- 0
	legx <- a
	labf <- "Bimodal function"
    }, { ## 2 :
	f <- function(x) { exp(-x) }
	a <- 0
	legx <- (2/3)*b
	labf <- "Exponential function"
    }, { ## 3 :
	f <- function(x) { exp(x)/(1+exp(x)) }
	a <- -b
	legx <- a
	labf <- "Logistic function"
    })

    grid <- seq(a,b, 0.01)
    scale <- (b-a)/(length(grid)-1)
    ztrue <- f(grid)
    ylim  <- c(min(ztrue)- 4*sd,
	       max(ztrue)+ 4*sd)
    x <- seq(a,b,len = n)
    X2 <- cbind(x,x^2)
    Xd <- NULL
    for(i in 1:dpol) Xd <- cbind(Xd,x^i)

    ltyest <- c(1,1,1,1,2)
    colest <- c(1,2,3,4,1)
    labest <- c("linear","quadratic",paste("polynom",dpol),"nonpar","true")

    isels1 <- isels2 <- iselsd <- isenp <- numeric(N)

    op <- par(mfrow = c(1,1)); on.exit(par(op))

    for(I in 1:N) {
	y <- f(x) + rnorm(n,sd = sd)

	ls1 <- lsfit( x,y)$coef
	ls2 <- lsfit(X2,y)$coef
	lsd <- lsfit(Xd,y)$coef
	np <- glkerns(x,y,x.out = grid)

	zls1 <- ls1[1]+grid*ls1[2]
	zls2 <- ls2[1]+grid*ls2[2]+grid^2*ls2[3]
	zlsd <- lsd[1]
	for(i in 1:dpol) zlsd <- zlsd+grid^i*lsd[i+1]
	znp <- np$est

	isels1[I] <- sum((zls1-ztrue)^2)*scale
	isels2[I] <- sum((zls2-ztrue)^2)*scale
	iselsd[I] <- sum((zlsd-ztrue)^2)*scale
	isenp[I] <- sum((znp-ztrue)^2)*scale
	legtxt <- paste(labest,c(rep("	ISE=",4),""),
			c(round(c(isels1[I],isels2[I],iselsd[I],isenp[I]),4),""))

	plot(x,y,ylim = ylim)
	lines(grid,zls1, lty = ltyest[1],col = colest[1])
	lines(grid,zls2, lty = ltyest[2],col = colest[2])
	lines(grid,zlsd, lty = ltyest[3],col = colest[3])
	lines(grid,znp,	 lty = ltyest[4],col = colest[4])
	lines(grid,ztrue,lty = ltyest[5],col = colest[5])
	legend(legx,ylim[2],legtxt, lty = ltyest, col = colest)
	title(paste("Sample",I,":",labf,"n=",n,"sd=",round(sd,2)))
	if(waitClick) {
	    TITLE("Click on the plot to get the next sample!")
	    locator(1)
	} else Sys.sleep(delay)
    }

    boxplot(isels1,isels2,iselsd,isenp,
	    names = labest[1:4])
    title(paste("Summary of ISE of",N,"samples:",labf,"n=",n,"sd=",round(sd,2)))

    list(isels1=isels1, isels2=isels2, iselsd=iselsd, isenp=isenp,
	 N = N, labFunc = labf, labest=labest)
}


f.compare.bdw <- function(h = c(.2,.5,1), n = 50, sd = 0.2,
			  nof = 1, b = 7, N = 10,
			  oneplot = (N > 10), click = !oneplot)
{
    ## compare ise of nonparametric fits with three different bandwidth given
    ## by the user, as well as with the optimal bandwidth calculated by glkerns

    ## h: vector of 3 different bandwidths to be compared (or also length 1 or 2)
    ## n: sample size
    ## sd: standard deviation of noise
    ## nof=1: bimodal regression function     on  [0,b];
    ## nof=2: exponential regression function on  [0,b];
    ## nof=3: logistic regression function    on [-b,b]
    ## b: right bound of the regression domain
    ## N: number of samples simulated
    ## oneplot: the estimates for different bandwidths are shown
    ##	  on the same plot (oneplot = TRUE) or different plots (oneplot=FALSE)

    stopifnot(require("lokern"),
	      nof %in% 1:3, length(h) >= 1, h > 0)
    switch(nof, { ## 1:
	f <- function(x) { dnorm(x,3*b/7,0.5)+dnorm(x,5*b/7,0.2) }
	a <- 0
	legx <- a
	labf <- "Bimodal function"
    }, { ## 2 :
	f <- function(x) { exp(-x) }
	a <- 0
	legx <- (1/3)*b
	labf <- "Exponential function"
    }, { ## 3 :
	f <- function(x) { exp(x)/(1+exp(x)) }
	a <- -b
	legx <- a
	labf <- "Logistic function"
    })

    if(length(h) == 1) h <- c(.5*h,h,2*h)
    else if(length(h) == 2) h <- sort(h[1],h[2],max(h)*2)
    else if(length(h) >= 3) h <- sort(h[1:3])

    grid <- seq(a,b,0.01)
    scale <- (b-a)/(length(grid)-1)
    ztrue <- f(grid)
    ylim  <- c(min(ztrue)- 4*sd,
	       max(ztrue)+ 4*sd)
    x <- seq(a,b, len = n)

    ltyest <- c(1,1,1,1,2)
    colest <- c(1,2,3,4,1)
    lab.h <- c(paste("h=", round(h,2)),"hopt")
    iseh <- matrix(0, nrow = N, ncol = 4, dimnames=list(NULL, lab.h))
    zh <- array(dim = c(N,4,length(grid)))

    op <- if(oneplot) par(mfrow = c(1,1)) else par(mfrow = c(2,2))
    on.exit(par(op))

    for(I in 1:N) {
	y <- f(x) + rnorm(n,sd = sd)
	for(i in 1:4) {
	    if(i <= 3)
                zh[I,i,] <- glkerns(x,y,x.out = grid,bandwidth = h[i])$est
	    else { ## last one: take automatic h = "hopt"
		tmp <- glkerns(x,y,x.out = grid)
		zh[I,i,] <- tmp$est
		hopt <- tmp$bandwidth
	    }
	    iseh[I,i] <- sum((zh[I,i,] - ztrue)^2)*scale
	}
	legtxt <- c(paste(lab.h, " ISE=", signif(iseh[I,], digits=5)),
                    "true")
	if(oneplot) {
	    plot(x,y,ylim = ylim)
	    for(i in 1:4)
		lines(grid,zh[I,i,],lty = ltyest[i],col = colest[i])
	    lines(grid,ztrue,lty = ltyest[5],col = colest[5])
	    legend(legx,ylim[2],legtxt,lty = ltyest,col = colest)
	    title(paste("Sample",I,":",labf,"n=",n,
			"sd=",round(sd,2),"hopt=",round(hopt,2)))
	    if(click) {
		TITLE("Click on the plot to get the next sample!")
		locator(1)
	    }
	} else { ## (!oneplot)
	    for(i in 1:4) {
		plot(x,y,ylim = ylim)
		lines(grid,zh[I,i,],lty = ltyest[i],col = colest[i])
		lines(grid,ztrue,lty = ltyest[5],col = colest[5])
		legend(legx,ylim[2],legtxt[c(i,5)],lty = ltyest[c(i,5)],
		       col = colest[c(i,5)])
		title(paste("Sample",I,":",labf,"n=",n,"sd=",round(sd,2),
			    if(i < 4) paste("h=",round(h[i],2))
			    else paste("hopt=",round(hopt,2))))
		if(click)
		    TITLE("Click on the plot to get the next sample!")
	    }
	    if(click) locator(1)
	}
    }## for( 1:N )

    meanzh <- apply(zh,2:3,"mean")
    biash <- varh <- mseh <- numeric(4)
    for(i in 1:4) {
	biash[i] <- sum((meanzh[i,]-ztrue)^2)*scale
	mseh[i] <- mean(iseh[,i])
	varh[i] <- mseh[i]-biash[i]
    }
    par(mfrow = c(1,1))
    boxplot(iseh, ylim = c(0,max(iseh)))
    abline(h=0, col="gray", lty=3)
    text(1:4,biash,"Bias",col = 2)
    text(1:4,varh, "Var", col = 3)
    text(1:4,mseh, "MSE", col = 4)
    title(paste("Summary of ISE of",N,"samples:",labf,"n=",n,"sd=",round(sd,2)))

    list(iseh=iseh, biash=biash, varh=varh, mseh=mseh)
}


##' This function plots distance curves and velocity curves of nboys boys and
##' ngirls girls (randomly selected from a population of 120 boys and 112 girls)
##' It also allows to extract features characterizing the
##' puberty from the velocity curves
##' @param nboys 
##' @param ngirls 
##' @author Valentin Rousson & Martin Maechler
f.compare.veloc <- function(nboys = 12, ngirls = nboys,
                            maxvel = 18, age.grid = seq(0, 20, 1/64),
                            notch=TRUE, ...)
{
    stopifnot( require("lokern"), nboys >= 1, ngirls >= 1)
    growthLab <- "growth velocity [cm / yr]"
    draw.and.select <- function(dat, nSel, col.velo)
    {
	name <- deparse(substitute(dat))
	stopifnot(is.list(dat), is.list(dat[[1]]),
		  nSel >= 1, nSel == as.integer(nSel))
	if(grepl("s$", name)) ## drop plural "s":
	    name <- sub("s$", "", name)
	N <- length(dat)
	nSel <- min(N, nSel)
	i. <- sample(1:N, nSel)
	tpub <- peak <- numeric(nSel)
	for(i in 1:nSel) { ## for each of the selected child
	    x  <- dat[[i.[i]]]$age
	    y  <- dat[[i.[i]]]$height
	    no <- dat[[i.[i]]]$nos
	    dist <- glkerns(x,y,x.out = age.grid)$est
	    dx <- x[-1]-diff(x)/2
            rx <- range(x, dx, age.grid)
            ## MM thinks this should be labeled  'height' rather than 'distance'
	    plot(x,y, xlim=rx, ylim= range(y,dist), xlab = "age", ylab = "distance")
	    lines(age.grid,dist)
	    title(paste("Distance curve for",name,"no",no))
	    
	    dy <- diff(y)/diff(x)
	    veloc0 <- glkerns(dx,dy, x.out = age.grid)$est
	    hopt <- glkerns(x,y, x.out = age.grid, deriv = 1)$bandwidth
	    tmp <- interpol.xy(x,y)
	    newx <- tmp$x
	    newy <- tmp$y
	    veloc1 <- glkerns(newx,newy,x.out = age.grid, deriv = 1, bandwidth = hopt)$est
	    plot(dx,dy*(dy <= maxvel)+maxvel*(dy > maxvel),
                 xlim=rx, ylim = c(0,maxvel), xlab = "age", ylab = growthLab)
	    lines(age.grid, veloc1, col = col.velo, lwd=2)
	    lines(age.grid, veloc0, col = 1)
	    legend("top", c("Estimate of derivative", "Smoothing the differences"),
		   lwd = c(2,1), lty = c(1,1), col = c(col.velo, 1), bty="n")
	    title(paste("Velocity curve for",name,"no",no))
	    TITLE("Click on the puberty peak")
	    tmp <- locator(1)
	    tpub[i] <- tmp$x
	    peak[i] <- tmp$y
	}
	list(tpub = tpub, peak = peak)
    } ## draw.and.select()

    r <- getGrData()
    boys  <- r$boys
    girls <- r$girls

    op <- par(mfrow = c(2,1), mgp = c(1.5, 0.6, 0), mar = .1+c(7,5,4,1)/2)
    on.exit(par(op))

    L.boys <- draw.and.select(boys , nboys, col.velo = "skyblue3")
    L.girl <- draw.and.select(girls, ngirls,col.velo = "pink4")
    
    par(mfrow = c(1,2))
    boxplot(L.boys$tpub, L.girl$tpub, names = c("boys","girls"), notch=notch, ...)
    title(paste("\"High\" puberty age for", nboys,
		"boys and", ngirls,"girls"), cex.main = 0.8,
          ylab = "age [years]")
    boxplot(L.boys$peak, L.girl$peak, names = c("boys","girls"), notch=notch, ...)
    title(paste("\"High\" puberty peak for", nboys,
		"boys and", ngirls,"girls"), cex.main = 0.8, ylab = growthLab)
    invisible(list(boys = L.boys, girls = L.girl))
}
##----------------------------------------------------------------------------

##' called only from f.compare.veloc()
getGrData <- function(dir = "http://stat.ethz.ch/Teaching/Datasets/WBL")
{
    age	   <- matrix(scan(paste(dir,"zeiten.dat", sep="/")),
		     byrow = TRUE, ncol = 38)
    height <- matrix(scan(paste(dir,"laengen.dat", sep="/")),
		     byrow = TRUE, ncol = 38)
    nos <- age[,1]
    sex <- age[,2]
    get.hgt.age <- function(sel) {
        stopifnot(is.logical(sel), length(sel) == length(nos))
        nn <- length(ind <- which(sel))
        r <- vector("list", nn)
        for(i in seq_len(nn)) {
            h.i <- abs(height[ind[i], -(1:2)])
            a.i <-        age[ind[i], -(1:2)]
            r[[i]] <-
                list(nos = nos[ind[i]],
                     height = h.i[a.i > 0],
                     age    = a.i[a.i > 0])
        }
        r
    }
    boys  <- get.hgt.age(sex == 9)
    girls <- get.hgt.age(sex == 8)

    list(boys=boys, girls=girls)
}

interpol.xy <- function(x,y, f = 1.25) {
    n <- length(x)
    stopifnot(n >= 2, length(y) == n,
	      is.numeric(f), length(f)==1, f >= 1)
    m <- round(f*n)
    newx <- seq(min(x),max(x),len = m)
    newy <- c(y[1],rep(0,m-2),y[n])
    for(i in 2:(m-1)) {
	i0 <- max(which(x < newx[i]))
	i1 <- min(which(x > newx[i]))
	b <- (y[i1]-y[i0])/(x[i1]-x[i0])
	a <- y[i1] - b*x[i1]
	newy[i] <- a + b*newx[i]
    }
    list(x=newx, y=newy)
}