[R] Re: Gaussian Mixtures Models

Noel Yvonnick noel at univ-lille3.fr
Thu Oct 24 10:10:11 CEST 2002


I join below a piece of code I have written for GMM in the special case of 
spherical noise. Note that the end condition is a fixed number of iterations. 
you may want to replace it by some criterion on the change in loglikelihood 
or something like that.

You may want to test it by typing:

# generate some bivariate data
t <- seq(.15,3.05,length=100)
X <- cbind(t,t+1.25*sin(2*t)) + matrix(rnorm(200,0,.1),100,2)

# GMM with 20 classes (or "points")
gm <- gmm(X,npts=20)

You may also want to free the variance parameters (var.equal=FALSE) and the 
prior probabilities (prior.probs=TRUE), though it is not recommended to free 
both of them.

Hope this helps,

-- 
Yvonnick Noel
Universite de Lille 3
UFR de psychologie


#####################################################################################
gmm <- function(X,npts,tmax=40,var.equal=TRUE,prior.prob=FALSE,plot.true=T)
{    

    # Needed packages
    require(mva)
    require(scatterplot3d)
    #require(MASS)
 
    X <- as.matrix(X)
    n <- nrow(X)
    p <- ncol(X)
    priors <- matrix(1,n,1) %*% matrix(1/npts,1,npts)
    
    # For drawing distribution width in the 2D case
    theta <- seq(0,2*pi,length=360)
    circle <- cbind(cos(theta),sin(theta))
    
    # Starting model: sample of observed data
    model <- X[sample(npts),]
    
    # Initialize inverse variance parameter
    d2 <- dist2(X,model)
    beta <- rep((n*p*npts)/sum(d2),npts)
  
    for(t in 1:tmax)
    {
        # Probabilities
        R <- exp(-.5 * (d2 %*% diag(beta))) * priors
                  
        # Responsibilities
        R <- R / apply(R,1,"sum")
        G <- apply(R,2,"sum")
        model <- (t(R)/G) %*% X
        
        # Plotting (each 5 iterations)
	if((plot.true)&&(((t%/%5)-(t/5))==0))
        {
            if(p==2)
            { 
                plot(X,pch=19,cex=.5,col="red",xlab="",ylab="",main="")
                points(model,col="blue",pch=19,cex=.7)
                for(l in 1:npts)
                {
	            icirc <- circle * 1.96 * sqrt(1/beta[l])
                    lines((matrix(1,360,1) %*% model[l,]) + 
icirc,col="darkgreen")
                }
            }
            else if(p==3)
            {
        
                gph <- 
scatterplot3d(X[,c(1,3,2)],highlight.3d=TRUE,pch=19,xlab="",ylab="",zlab="",main="")
                gph$points3d(model[,c(1,3,2)],col="blue",pch=19)

            }
            else if(p>3)
            {
                pc<-princomp(model)
                gph <- 
scatterplot3d(predict(pc,X)[,c(1,3,2)],highlight.3d=TRUE,pch=19,xlab="",ylab="",zlab="",main="")
                gph$points3d(pc$scores[,c(1,3,2)],col="blue",pch=19)

            }
        
        } 

        d2 <- dist2(X,model)

        # Constrain to equal variances
	if(var.equal)  { beta <- rep((n*p)/sum(R*d2),npts) }
        
        # In case of unequal variances
        else           { beta <- (p*G)/apply(R*d2,2,"sum") }

        # Take prior probabilities into account...
        if(prior.prob) { priors <- matrix(1,n,1) %*% (G / n) }
        
    }
    list(model=model,beta=beta,d2=d2,priors=priors)
}

############################################################################################
dist2 <- function(X,Y)
{
	if(missing(Y)) Y <- X
	
	if(!is.matrix(X)) X <- as.matrix(X)
	if(!is.matrix(Y)) Y <- as.matrix(Y)

	dimx<-dim(X)
	dimy<-dim(Y)
 
	sqrx<-diag(X%*%t(X))
	sqry<-diag(Y%*%t(Y))
 
	(sqrx %*% matrix(1,1,dimy[1])) + (matrix(1,dimx[1],1) %*% sqry) - 
(2*X%*%t(Y))
}    
###########################################################################################


-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-
r-help mailing list -- Read http://www.ci.tuwien.ac.at/~hornik/R/R-FAQ.html
Send "info", "help", or "[un]subscribe"
(in the "body", not the subject !)  To: r-help-request at stat.math.ethz.ch
_._._._._._._._._._._._._._._._._._._._._._._._._._._._._._._._._._._._._._._._



More information about the R-help mailing list