######################################################################## # OBTdesign.R # # # # generates design matrix for simple and ordinal paired BT model # # (first column in output file is dependent variable) # # the designmatrix is stored in the dataframe "dm" and is written # # # to "outFile" (see below) # # # # preference responses are assumed to be coded # # 0,...,nrespcat-1, # # where 0 denotes (highest) preference for # # object i in comparison (object i, object j) # # # # the order of comparisons in the preferences data file is # # assumed to be # # (1,2) (1,3) (2,3) (1,4) (2,4) (3,4) (1,5) .... (J-1,J) # # # # if model with subject covariates: # # # # factors are assumed with levels # # 1,2,3,..,K # # for models with continuos subject covariates # # choose casewise==TRUE # # corresponding element of covlevels is NA (see below) # # # ######################################################################## ######################################################################## # BEGIN USER SPECIFICATIONS # ######################################################################## # names for objects: objnames<-c("reseach","career","education","title","transition") # name of preferences data file inputFile <- "motives_pc.dat" # number of rows in preferences data file nsubj=100 # number of response categories nrespcat<- 7 # name of output file (dep.variable + design matrix) outFile <- "motives_design.dat" # number of subject covariates ncov <- 3 # remaining specifications are ignored if ncov is zero: # for models with continuos subject covariates or # if nsubj < number of cells in subject factor crossclassification: # casewise <- TRUE # otherwise all covariates are crossclassified casewise <- FALSE # name of subject covariates file covFile <- "motives_covs.dat" # labels for covariates covnames<-c("gender","break","faculty") # number of factorlevels of covariates, NA for continuos covariates # (only if casewise==TRUE) covlevels <- c(2,2,2) ######################################################################## # END USER SPECIFICATIONS # ######################################################################## nobj<-length(objnames) ncomp <- nobj * (nobj-1) / 2 nrows <- ncomp * nrespcat # number of rows for 1 design matrix (1 subject or no covs) nrespcat <- nrespcat - 1 # response categories start with 0 if (ncov == 0 ) { totlev <- 1 ones.totlev <- 1 } else if (casewise){ totlev<-nsubj ones.totlev <- rep(1,nsubj) } else { totlev <- prod(covlevels) # total number of covariate levels ones.totlev<-rep(1,totlev) # vector for kronecker products } ones.ncomp<-rep(1,ncomp) # vector for kronecker products ones.nrows<-rep(1,nrows) # vector for kronecker products # # design matrix for objects # obj<-matrix(c(0:0),nrows,nobj) row<-1 for (j in 2:nobj) { for (i in 1:(j-1) ){ d <- nrespcat for (c in 0:nrespcat) { obj[row + c, i] <- d obj[row + c, j] <- -d d <- d-2 } row <- row + nrespcat + 1 } } obj<- ones.totlev %x% obj # stack object design matrix totlev times # # mu - factor for comparisons # mu<-rep.int(1:ncomp, rep.int((nrespcat+1),ncomp)) mu<-factor(rep(mu,totlev)) # stack mu totlev times #mu<-gl(ncomp,nrespcat+1,totlev*ncomp*(nrespcat+1)) # # design matrix for gammas # I <- diag(nrespcat+1) g <- ones.ncomp %x% I # stack gamma matrix ncomp times for 1 subject g <- ones.totlev %x% g # stack gammas totlev times gamnames<-paste("G", 0:nrespcat, sep = "") # # read data into responsevector # (count frequencies of preferences) # # read ascii data into datamatrix rawdata<-matrix(scan(inputFile, n = nsubj*ncomp), nsubj, ncomp, byrow = TRUE) # case: no subject covariates # if (ncov == 0 ) { y<-rep(0,nrows) for (i in 1:nsubj) { k<-1 for (j in 1:ncomp) { t<-rawdata[i,j] if (t >= 0 && t <= nrespcat) { y[k+t]=y[k+t]+1 } k <- k + nrespcat + 1 } } # case: categorical subject covariates # } else if (!casewise) { # read subject covariates data into datamatrix cov.case<-matrix(scan(covFile, n = nsubj*ncov), nr=nsubj, byrow = TRUE) # # transform response data into responsevector y # according to subject covariates # (count frequencies of preferences) # indx <- ncov:1 if (ncov == 1) { baslev <- nrows } else { baslev <- c(covlevels[2:ncov],nrows) } levmult <- rev(cumprod(baslev[indx])) y<-rep(0,totlev * nrows) for (i in 1:nsubj) { y.address <- sum((cov.case[i,]-1)*levmult) for (j in 1:ncomp) { t<-rawdata[i,j] if (t >= 0 && t <= nrespcat) { y[y.address+t+1]=y[y.address+t+1]+1 } y.address <- y.address + nrespcat+1 } } # # transform subject covariates data into covariate vectors # cov<-NULL for (j in 1:ncov) { scov<-gl(covlevels[j],levmult[j],totlev*nrows) cov<-cbind(cov,scov) } # case: metric (and categorical) subject covariates # } else { # read subject covariates data into datamatrix cov.case<-matrix(scan(covFile, n = nsubj*ncov), nr=nsubj, byrow = TRUE) cov<-cov.case %x% ones.nrows case<-rep.int(1:nsubj, rep.int(nrows,nsubj)) case<-factor(case) # factor for cases cov<-cbind(cov,case) covnames<-c(covnames,"CASE") covlevels<-c(covlevels,nsubj) k<-1 y<-rep(0,nrows * totlev) for (i in 1:nsubj) { for (j in 1:ncomp) { t<-rawdata[i,j] if (t >= 0 && t <= nrespcat) { y[k+t]=y[k+t]+1 } k <- k + nrespcat + 1 } } } # # prepare dataframe and export # if (ncov == 0) { dm<-data.frame(y,mu,g,obj) varnames<-c("mu",gamnames,objnames) } else { dm<-data.frame(y,mu,g,obj,cov) varnames<-c("mu",gamnames,objnames,covnames) } names(dm)<-c("!y",varnames) write.table(dm,outFile,quote=F,row.names=F) ################################################################