# [R] Writing a function to fit ALSOS models. problem with normalization?

David Hugh-Jones davidhughjones at gmail.com
Thu Mar 23 19:14:14 CET 2006

```Dear all,

Below is my attempt at a function to fit Alternate Least Squares
Optimal Scaling models, as described in Young (1981) "Quantitative
Analysis of Qualitative Data" and Jacoby (1999) "Levels of Measurement
and Political Research: An Optimistic View".

I would welcome any comments on coding style, tips & tricks etc.

I also have a specific problem: the output tends to give very small
coefficients, and very large fitted values for specific factor levels.
Am I doing the normalization right?

Cheers
David

library(car) # for "recode"

alsos.fit = function (y, x, tol = 1e-7) {
# y is a vector or factor or ordered factor
# x is a data frame of vectors/factors/ordereds

# we treat the y's as the first column of the matrix
x = cbind(y, x)
x = x[complete.cases(x),]
ox = x
x.facs = sapply(x, is.factor)
x.ords = sapply(x, is.ordered)

x = sapply(x, as.numeric)

old.SSE = Inf
while(T) {
# least squares regression with an intercept
ols = lm.fit(cbind(rep(1,nrow(x)), x[,-1]) , x[,1])
b = ols\$coefficients
SSE = drop(ols\$residuals %*% ols\$residuals)
if (old.SSE-SSE<tol) {
factor.scores=list()
for (i in (1:ncol(x))[x.facs]) {
nm = colnames(x)[i]
factor.scores[[nm]] = tapply(x[,i], ox[,i],
function (foo) {return(foo)})
names(factor.scores[[nm]]) = levels(ox[,i])
}

return(list(
factor.scores=factor.scores,
scaled.y=x[,1], scaled.x=x[,-1],
coefficients=b, SSE=SSE,
))
}
old.SSE=SSE

mx = nx = x
mx[] = 0
nx[] = 0
for (i in (1:ncol(x))[x.facs]) {

# optimal scaling
if (i==1) nx[,i] = ols\$fitted.values
else nx[,i] = (x[,1] - cbind(rep(1,nrow(x)), x[,c(-1,-i)]) %*% b[-i])/b[i]

# create within-category means
tmpfac = factor(ox[,i], labels=1:nlevels(ox[,i]))
catmeans = tapply(nx[,i], tmpfac, mean)

# ensure ordinal values are correctly ordered
if (x.ords[i]) {
tmp = kruskal.ordering(nx[,i], tmpfac)
tmpfac = tmp\$tmpfac
catmeans = tmp\$catmeans
}

# set values to within-category means
mx[,i] = catmeans[tmpfac]

# normalize according to Young (1981)
mx[,i] = mx[,i] * (nx[,i] %*% nx[,i]) / (mx[,i] %*% mx[,i])

}
x[,x.facs] = mx[,x.facs]
}
}

# as described in Kruskal (1964)
kruskal.ordering = function(numeric.data, tmpfac) {
j = 1
upact = T
while(T) {
catmeans = tapply(numeric.data, tmpfac, mean) # vector w as many
items as tmpfac cats
# have we finished?
if (j>nlevels(tmpfac)) return (list(catmeans=catmeans,tmpfac=tmpfac))
if ((j==nlevels(tmpfac) || catmeans[j] <= catmeans[j+1]) &&
(j==1 || catmeans[j] >= catmeans[j-1])) {
j=j+1
upact=T
next
}
if (upact) {
if (j < nlevels(tmpfac) && catmeans[j] > catmeans[j+1]) {
tmpfac = recode(tmpfac, paste(j, ":", j+1,"='", j+1, "'", sep=""))
levels(tmpfac) = 1:nlevels(tmpfac)
}
upact=F
}
else {
if (j > 1 && catmeans[j] < catmeans[j-1]) {
tmpfac = recode(tmpfac, paste(j-1, ":", j, "='", j, "'", sep=""))
levels(tmpfac) = 1:nlevels(tmpfac)
j=j-1
}
upact=T
}
}
}

```