prcomp and princomp

Paul Gilbert pgilbert@bank-banque-canada.ca
Mon, 24 Aug 1998 11:08:38 -0400


Below are mva functions prcomp and princomp as well as my own version
prcomponents. Also included are contents for prcomp.Rd and princomp.Rd.
Functions prcomp and princomp are intended to replicate Splus results. (I think
this princomp is already in the R distribution, but the version of prcomp
already in the R distribution does not reproduce Splus results.)

My version, prcomponent, is preliminary and as yet undocumented, but I would
appreciate comments. It uses the preferred svd as in prcomp, not eigen as in
princomp,  but returns some extra information as in princomp. Following Bill
Venables suggestion, it also allows the user to specify the effective scaling
factor (e.g. N or N-1).

Also, I have just noticed that there is some very small duplication
(print.princomp, summary.princomp, plot.princomp) between the functions  below
and the file princomp-add.R already in the R distribution. If someone could pick
the best versions I would appreciate it. I am about to get buried with some
other work and would like to get this in the R distribution before it gets lost
on my desk (again).

Paul Gilbert

############## prcomp ##############

prcomp <- function(x, retx=TRUE, center=TRUE, scale=FALSE) {
# s <- svd(scale(x, center=center, scale=scale),nu=0)
# above produces warning since scale is both a function and a variable so
 s <- svd(get("scale",envir=.GlobalEnv)
   (x, center=center, scale=scale),nu=0)
# rank <- sum(s$d > 0)
 rank <- sum(s$d > (s$d[1]*sqrt(.Machine$double.eps)))
 if (rank < ncol(x)) s$v <- s$v[,1:rank, drop=F]
 s$d <- s$d/sqrt(max(1,nrow(x) -1))
 if(retx) r <- list(sdev=s$d, rotation=s$v, x=as.matrix(x) %*% s$v)
 else     r <- list(sdev=s$d, rotation=s$v)
 class(r) <- "prcomp"
 r
}

print.prcomp <- function(x) {
 cat("Standard deviations:\n")
 print(x$sdev)
 cat("\nRotation:\n")
 print(x$rotation)
 if (!is.null(x$x))
  {cat("\nRotated variables:\n")
   print(x$x)
  }
 invisible(x)
}

plot.prcomp <- function(obj, x=1, y=2, main="Scree Plot",
  xlab=paste("Principle component", x),
  ylab=paste("Principle component", y), ...) {
    if (is.null(obj$x))
 stop("Rotated x has not be calculated by prcomp. Use retx=T in prcomp.")
    plot(obj$x[,c(x,y)], main=main, xlab=xlab, ylab=ylab, ...)
}

############### prcomp.Rd ###############

\name{prcomp}
\title{Principal Components Analysis}
\usage{
prcomp(x, retx=TRUE, center=TRUE, scale=FALSE)

}
\arguments{
\item{x}{a matrix (or data frame) which provides the data
for the principal components analysis.}
\item{retx}{a logical value indicating whether the rotated variables should
be returned.}
\item{center}{a logical value indicating whether the variables should
be shifted to be zero centered.}
\item{scale}{a logical value indicating whether the variables should
be scaled to have unit variance before the analysis takes place. The default
is F for consistency with S, but in general scaling is advisable.}
}
\description{
This function performs a principal components analysis on
the given data matrix and returns the results as a
\code{prcomp} object. The calculation is done with svd on the data matrix, not
by using eigen on the covariance matrix. This is generally the preferred method
for numerical accuracy.  The print method for the these
objects prints the results in a nice format and the
plot method produces a scree plot.
}
\value{
\code{prcomp} returns an list with class \code{"prcomp"}
containing the following components:
\item{sdev}{the standard deviation of the principal components
(i.e. the eigenvalues of the cov matrix - though the calculation is actually
done with the singular values of the data matrix.)}
\item{rotation}{the matrix of variable loadings (i.e. a matrix
whose columns contain the eigenvectors). The function princomp returns this
in the element \code{loadings}.}
\item{x}{if retx is true the value of the rotated data (the data multiplied by
the \code{rotation} matrix) is returned.}
}
\references{
Mardia, K. V., J. T. Kent, J and M. Bibby (1979),
\emph{Multivariate Analysis}, London: Academic Press.

Venerables, W. N. and B. D. Ripley (1994).
\emph{Modern Applied Statistics with S-Plus}, Springer-Verlag.
}

\seealso{
\code{\link{princomp}}, \code{\link{prcomponents}}, \code{\link{cor}},
\code{\link{cov}},
\code{\link{svd}}, \code{\link{eigen}}.
}
\examples{
# the variances of the variables in the
# crimes data vary by orders of magnitude
data(crimes)
prcomp(crimes)
prcomp(crimes,scale=TRUE)
}




############## princomp ##############

princomp <- function(x, cor=FALSE, scores=TRUE,
  subset=rep(TRUE, nrow(as.matrix(x)))) {
 # it is tempting to add  use="all.obs" which could be passed to cov or
 # cor but then the calculation of N is complicated.
 z<- as.matrix(x)[subset,, drop=F]
 N <- nrow(z)
 if(cor) cv <- get("cor",envir=.GlobalEnv)(z)
 else    cv <- cov(z)
#  (svd can be used but gives different signs for some vectors)
 edc <- eigen(cv)
 cn <- paste("Comp.", 1:ncol(cv), sep="")
 names(edc$values) <- cn
 dimnames(edc$vectors) <- list(dimnames(x)[[2]], cn)
 scr<- NULL
 if (cor)
   {sdev <- sqrt(edc$values)
    sc <- (apply(z,2,var)*(N-1)/N)^0.5
    if (scores)
        scr<-(scale(z,center=T,scale=T) %*% edc$vectors)*sqrt(N/(N-1))
   }
 else
   {sdev <- sqrt(edc$values*(N-1)/N)
    sc <- rep(1, ncol(z))
    if (scores)
        scr<- (scale(z,center=T,scale=F) %*% edc$vectors)
   }
 names(sc) <- dimnames(x)[[2]]
 edc <-list(sdev=sdev, loadings=edc$vectors,
     center=apply(z,2,mean), scale=sc, n.obs=N, scores=scr)
# The Splus function also return list elements factor.sdev, correlations
# and coef, but these are not documented in the help. coef seems to equal
# load. The Splus function also return list elements call and terms which
# are not supported here.
 class(edc) <- "princomp"
 edc
}

print.princomp <- function(x) {
 cat("Standard deviations:\n")
 print(x$sdev)
 cat(length(x$scale), " variables and ", x$n.obs, "observations.\n")
 cat("Scale:\n")
 print(x$scale)
 invisible(x)
}

summary.princomp <- function(x) {
 per.var <- (x$sdev^2)/sum(x$sdev^2)
 r <- list(sdev=x$sdev, per.var= per.var, cum.var=cumsum(per.var))
 class(r) <- "summary.princomp"
 r
}

print.summary.princomp <- function(x) {
 cat("                        ",names(x$sdev),"\n")
 cat("Standard deviations   : ", x$sdev,      "\n")
 cat("Proportion of variance: ", x$per.var,   "\n")
 cat("Cumulative proportion : ", x$cum.var,   "\n")
 invisible(x)
}

plot.princomp <- function(obj, x=1, y=2, main="Scree Plot",
  xlab=paste("Principle component", x),
  ylab=paste("Principle component", y), ...) {
    plot(obj$scores[,c(x,y)], main=main, xlab=xlab, ylab=ylab, ...)
}

############### princomp.Rd ###############

\name{princomp}
\title{Principal Components Analysis}
\usage{
princomp(x, cor=FALSE, scores=TRUE, subset=rep(TRUE, nrow(as.matrix(x))))
print.princomp(obj)
summary.princomp(obj)
plot.princomp(obj)
}
\alias{print.princomp}
\alias{plot.princomp}
\arguments{
\item{x}{a matrix (or data frame) which provides the data
for the principal components analysis.}
\item{cor}{a logical value indicating whether the calculation should use the
correlation matrix or the covariance matrix.}
\item{score}{a logical value indicating whether the score on each principal
component should be calculated.}
\item{subset}{a vector used to select rows (observations) of the
data matrix \code{x}.}
}
\description{
This function performs a principal components analysis on
the given data matrix and returns the results as a \code{princomp} object.
The calculation is done using \code{eigen} on the correlation or
covariance matrix, as determined by \code{cor}. This is done for compatibility
with the Splus result (even though alternate forms for \code{x} - e.g. a
covariance matrix - are not supported as they are in Splus). A preferred method
of calculation is to use svd on \code{x}, as is done in \code{prcomp}.

Note that the scaling of results is affected by the setting of \code{cor}.
If \code{cor} is T then the divisor in the calculation of the sdev is N-1,
otherwise it is N. This has the effect that the result is slightly different
depending on whether scaling is done first on the data and cor set to F, or
done automatically in \code{princomp} with cor=T.

The print method for the these
objects prints the results in a nice format and the
plot method produces a scree plot.
}
\value{
\code{princomp} returns an list with class \code{"princomp"}
containing the following components:
\item{var}{the variances of the principal components
(i.e. the eigenvalues)}
\item{load}{the matrix of variable loadings (i.e. a matrix
whose columns contain the eigenvectors).}
\item{scale}{the value of the \code{scale} argument.}
}
\references{
Mardia, K. V., J. T. Kent and J. M. Bibby (1979).
\emph{Multivariate Analysis}, London: Academic Press.

Venerables, W. N. and B. D. Ripley (1994).
\emph{Modern Applied Statistics with S-Plus}, Springer-Verlag.
}
\seealso{
\code{\link{prcomp}}, \code{\link{prcomponents}}, \code{\link{cor}},
\code{\link{cov}}, \code{\link{eigen}}.
}
\examples{
# the variances of the variables in the
# crimes data vary by orders of magnitude
data(crimes)
princomp(crimes)
princomp(crimes,cor=TRUE)
princomp(scale(crimes, scale=T, center=T), cor=F)
}


############## prcomponents ##############

prcomponents <- function(x, center=TRUE, scale=TRUE, N=nrow(x)-1)
   {if (center) center <- apply(x,2,mean)
    else        center <- rep(0, ncol(x))
    if (scale)  scale  <- sqrt(apply(x,2,var))
    else        scale  <- rep(1, ncol(x))
    s <- svd(sweep(sweep(as.matrix(x),2, center),2, scale, FUN="/"))
    # remove anything corresponding to effectively zero singular values.
    rank <- sum(s$d > (s$d[1]*sqrt(.Machine$double.eps)))
    if (rank < ncol(x)) s$v <- s$v[,1:rank, drop=FALSE]
    s$d <- s$d/sqrt(N)

#   r <- list(sdev=s$d, proj=s$v,x=x %*% s$v, center=center, scale=scale)
    r <- list(sdev=s$d, proj=s$v, center=center, scale=scale)
    r
}



-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-
r-devel mailing list -- Read http://www.ci.tuwien.ac.at/~hornik/R/R-FAQ.html
Send "info", "help", or "[un]subscribe"
(in the "body", not the subject !)  To: r-devel-request@stat.math.ethz.ch
_._._._._._._._._._._._._._._._._._._._._._._._._._._._._._._._._._._._._._._._