[R] mixture distributions

Tue Sep 4 02:32:56 CEST 2001

here are some functions I wrote and tested in Splus; originally
based on some else's version for two components.  I haven't tried
them in R yet...  I expect there is plenty of room for
improvement.

albyn
----------------------------------------------------------------------
"Mixture.em" <- 
function(y, mu, v, p=1/length(mu), eps = .00001, max.iter=25, prnt=F,probs=F)
{
	p <- p/sum(p)  # make sure probabilities add to 1
	iter <- 1
	n <- length(y)
	k <- length(mu)
	done <- 0
	loglik0 <- -(2^1000)
        while(!done && iter <= max.iter) {
		#
		# E step: guess probability y[i] is from group j
		#
		L <- Norm.mixd(y,p,mu,v)
		Lsum <- apply(L,1,"sum")
		loglik <- sum(log(Lsum))
                bayes <- sweep(L,1,Lsum,"/")
		#  
		# M step: estimate component parameters given 
		#         group assignment probabilities
		#
		N <- apply(bayes,2,sum)
                mu <- as.numeric(y %*% bayes)/N
		Y <- matrix(y, nrow=n, ncol=k)
		M <- matrix(mu, nrow=n, ncol=k, byrow=T)
		SQ <- bayes*(Y-M)^2
		v <- apply(SQ,2,sum)/N
		p <- N/sum(N)
		if(abs((loglik-loglik0)/loglik) < eps) done <- 1
		if(prnt){
			cat("step:",k,"\n")
			cat("means:",mu,"\n \n")
			}
		loglik0 <- loglik
		iter <- iter + 1
        }
if( !probs)        list(mu=mu,v=v,p=p,loglik=loglik)
else		list(mu=mu,v=v,p=p,loglik=loglik,P=bayes)
}

Norm.mixd <- function(y,p,m,v)
{
# evaluates the components of the normal mixture density determined
# by the vector of means mu, variances v, and proportions p, for the
# vector of data y.
# Pi is assumed to be predefined
# returns a matrix with rows corresponding to obs.
# and columns corresponding to mixture components, ie.
# d(i,j)= f(y[i] | m[j],v[j],p[j])
        k<-length(p)
        n<-length(y)
        a<-matrix(y,nrow=n,ncol=k)
        b<-matrix(m,nrow=n,ncol=k,byrow=T)
        d<-sweep((a-b)^2,2,v,"/")/2
        b<-matrix(p,nrow=n,ncol=k,byrow=T) 
        b<-sweep(b,2,sqrt(2*Pi*v),"/")
        d<- b*exp(-d)
        d
}

"norm.mix.plot"<-
function(p, m, v)
{
# creates data structure containing $x, $y for plotting the
# mixture density specified by the parameters in params
	s <- sqrt(v)
	if(length(s) == 1)
		s <- rep(s, length(p))
	if(length(p) != length(m))
		fatal("invalid parameter specification")
	k <- length(m)
	xlim <- c(min(m - 3 * s), max(m + 3 * s))
	x <- seq(xlim[1], xlim[2], length = 400)
	y <- matrix(0, nrow = k, ncol = length(x))
	for(i in 1:k) {
		y[i,  ] <- p[i] * dnorm(x, m[i], s[i])
		ifelse(is.na(y[i,  ]), 0, y[i,  ])
	}
	y <- apply(y, 2, "sum")
	z <- list(x = x, y = y)
	z
}
-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-
r-help mailing list -- Read http://www.ci.tuwien.ac.at/~hornik/R/R-FAQ.html
Send "info", "help", or "[un]subscribe"
(in the "body", not the subject !)  To: r-help-request at stat.math.ethz.ch
_._._._._._._._._._._._._._._._._._._._._._._._._._._._._._._._._._._._._._._._