[R] mixture distributions
Albyn Jones
jones at reed.edu
Tue Sep 4 02:32:56 CEST 2001
here are some functions I wrote and tested in Splus; originally
based on some else's version for two components. I haven't tried
them in R yet... I expect there is plenty of room for
improvement.
albyn
----------------------------------------------------------------------
"Mixture.em" <-
function(y, mu, v, p=1/length(mu), eps = .00001, max.iter=25, prnt=F,probs=F)
{
p <- p/sum(p) # make sure probabilities add to 1
iter <- 1
n <- length(y)
k <- length(mu)
done <- 0
loglik0 <- -(2^1000)
while(!done && iter <= max.iter) {
#
# E step: guess probability y[i] is from group j
#
L <- Norm.mixd(y,p,mu,v)
Lsum <- apply(L,1,"sum")
loglik <- sum(log(Lsum))
bayes <- sweep(L,1,Lsum,"/")
#
# M step: estimate component parameters given
# group assignment probabilities
#
N <- apply(bayes,2,sum)
mu <- as.numeric(y %*% bayes)/N
Y <- matrix(y, nrow=n, ncol=k)
M <- matrix(mu, nrow=n, ncol=k, byrow=T)
SQ <- bayes*(Y-M)^2
v <- apply(SQ,2,sum)/N
p <- N/sum(N)
if(abs((loglik-loglik0)/loglik) < eps) done <- 1
if(prnt){
cat("step:",k,"\n")
cat("means:",mu,"\n \n")
}
loglik0 <- loglik
iter <- iter + 1
}
if( !probs) list(mu=mu,v=v,p=p,loglik=loglik)
else list(mu=mu,v=v,p=p,loglik=loglik,P=bayes)
}
Norm.mixd <- function(y,p,m,v)
{
# evaluates the components of the normal mixture density determined
# by the vector of means mu, variances v, and proportions p, for the
# vector of data y.
# Pi is assumed to be predefined
# returns a matrix with rows corresponding to obs.
# and columns corresponding to mixture components, ie.
# d(i,j)= f(y[i] | m[j],v[j],p[j])
k<-length(p)
n<-length(y)
a<-matrix(y,nrow=n,ncol=k)
b<-matrix(m,nrow=n,ncol=k,byrow=T)
d<-sweep((a-b)^2,2,v,"/")/2
b<-matrix(p,nrow=n,ncol=k,byrow=T)
b<-sweep(b,2,sqrt(2*Pi*v),"/")
d<- b*exp(-d)
d
}
"norm.mix.plot"<-
function(p, m, v)
{
# creates data structure containing $x, $y for plotting the
# mixture density specified by the parameters in params
s <- sqrt(v)
if(length(s) == 1)
s <- rep(s, length(p))
if(length(p) != length(m))
fatal("invalid parameter specification")
k <- length(m)
xlim <- c(min(m - 3 * s), max(m + 3 * s))
x <- seq(xlim[1], xlim[2], length = 400)
y <- matrix(0, nrow = k, ncol = length(x))
for(i in 1:k) {
y[i, ] <- p[i] * dnorm(x, m[i], s[i])
ifelse(is.na(y[i, ]), 0, y[i, ])
}
y <- apply(y, 2, "sum")
z <- list(x = x, y = y)
z
}
-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-
r-help mailing list -- Read http://www.ci.tuwien.ac.at/~hornik/R/R-FAQ.html
Send "info", "help", or "[un]subscribe"
(in the "body", not the subject !) To: r-help-request at stat.math.ethz.ch
_._._._._._._._._._._._._._._._._._._._._._._._._._._._._._._._._._._._._._._._
More information about the R-help
mailing list