########################################################################
# OBTdesign.R                                                          #
#                                                                      #
# generates design matrix for simple and ordinal paired BT model       #
# (first column in output file is dependent variable)                  #
# the designmatrix is stored in the dataframe "dm" and is written      #                                                              #
# to "outFile" (see below)                                             #
#                                                                      #
# preference responses are assumed to be coded                         #
#   0,...,nrespcat-1,                                                  #
#   where 0 denotes (highest) preference for                           #
#   object i in comparison (object i, object j)                        #
#                                                                      #
# the order of comparisons in the preferences data file is             #
# assumed to be                                                        #
#   (1,2) (1,3) (2,3) (1,4) (2,4) (3,4) (1,5) .... (J-1,J)             #
#                                                                      #
# if model with subject covariates:                                    #
#                                                                      #
# factors are assumed with levels                                      #
#   1,2,3,..,K                                                         #
# for models with continuos subject covariates                         #
#   choose casewise==TRUE                                              #
#   corresponding element of covlevels is NA (see below)               #
#                                                                      #
########################################################################

########################################################################
#  BEGIN USER SPECIFICATIONS                                           #
########################################################################

# names for objects:
       objnames<-c("reseach","career","education","title","transition")

# name of preferences data file
       inputFile <- "motives_pc.dat"

# number of rows in preferences data file
       nsubj=100

# number of response categories
       nrespcat<- 7

# name of output file (dep.variable + design matrix)
       outFile <- "motives_design.dat"

# number of subject covariates
       ncov <- 3

# remaining specifications are ignored if ncov is zero:

# for models with continuos subject covariates or
# if nsubj < number of cells in subject factor crossclassification:
#      casewise <- TRUE
# otherwise all covariates are crossclassified
       casewise <- FALSE

# name of subject covariates file
       covFile <- "motives_covs.dat"

# labels for covariates
       covnames<-c("gender","break","faculty")

# number of factorlevels of covariates, NA for continuos covariates
#                                       (only if casewise==TRUE)
       covlevels <- c(2,2,2)

########################################################################
#  END USER SPECIFICATIONS                                             #
########################################################################


nobj<-length(objnames)
ncomp <- nobj * (nobj-1) / 2
nrows <- ncomp * nrespcat        # number of rows for 1 design matrix (1 subject or no covs)
nrespcat <- nrespcat - 1         # response categories start with 0


if (ncov == 0 ) {
      totlev <- 1
      ones.totlev <- 1
} else if (casewise){
      totlev<-nsubj
      ones.totlev <- rep(1,nsubj)
} else {
      totlev <- prod(covlevels)  # total number of covariate levels
      ones.totlev<-rep(1,totlev) # vector for kronecker products
}

ones.ncomp<-rep(1,ncomp)         # vector for kronecker products
ones.nrows<-rep(1,nrows)         # vector for kronecker products

#
# design matrix for objects
#

obj<-matrix(c(0:0),nrows,nobj)
row<-1
for (j in 2:nobj) {
    for (i in 1:(j-1) ){
       d <- nrespcat
       for (c in 0:nrespcat) {
         obj[row + c, i]  <-   d
         obj[row + c, j]  <-  -d
         d <- d-2
       }
      row <- row + nrespcat + 1
    }
}
obj<- ones.totlev %x% obj            # stack object design matrix totlev times


#
# mu - factor for comparisons
#
mu<-rep.int(1:ncomp, rep.int((nrespcat+1),ncomp))
mu<-factor(rep(mu,totlev))           # stack mu totlev times

#mu<-gl(ncomp,nrespcat+1,totlev*ncomp*(nrespcat+1))


#
# design matrix for gammas
#

I <- diag(nrespcat+1)
g <- ones.ncomp %x% I               # stack gamma matrix ncomp times for 1 subject
g <- ones.totlev %x% g              # stack gammas totlev times

gamnames<-paste("G", 0:nrespcat, sep = "")


#
# read data into responsevector
#   (count frequencies of preferences)
#

# read ascii data into datamatrix
      rawdata<-matrix(scan(inputFile, n = nsubj*ncomp), nsubj, ncomp, byrow = TRUE)

# case: no subject covariates
#
if (ncov == 0 ) {

      y<-rep(0,nrows)
      for (i in 1:nsubj) {
        k<-1
        for (j in 1:ncomp) {
           t<-rawdata[i,j]
           if (t >= 0 && t <= nrespcat) {
              y[k+t]=y[k+t]+1
           }
           k <- k + nrespcat + 1
        }
      }


# case: categorical subject covariates
#
} else if (!casewise) {

# read subject covariates data into datamatrix
      cov.case<-matrix(scan(covFile, n = nsubj*ncov), nr=nsubj, byrow = TRUE)

#
# transform response data into responsevector y
# according to subject covariates
# (count frequencies of preferences)
#

      indx <- ncov:1
      if (ncov == 1) {
           baslev <- nrows
      } else {
           baslev <- c(covlevels[2:ncov],nrows)
      }

      levmult <- rev(cumprod(baslev[indx]))

      y<-rep(0,totlev * nrows)
      for (i in 1:nsubj) {
          y.address <- sum((cov.case[i,]-1)*levmult)
          for (j in 1:ncomp) {
             t<-rawdata[i,j]
             if (t >= 0 && t <= nrespcat) {
                y[y.address+t+1]=y[y.address+t+1]+1
             }
          y.address <- y.address + nrespcat+1
          }
      }

#
# transform subject covariates data into covariate vectors
#
      cov<-NULL
      for (j in 1:ncov) {
          scov<-gl(covlevels[j],levmult[j],totlev*nrows)
          cov<-cbind(cov,scov)
      }

# case: metric (and categorical) subject covariates
#
} else {

# read subject covariates data into datamatrix
      cov.case<-matrix(scan(covFile, n = nsubj*ncov), nr=nsubj, byrow = TRUE)
      cov<-cov.case %x% ones.nrows

      case<-rep.int(1:nsubj, rep.int(nrows,nsubj))
      case<-factor(case)                            # factor for cases
      cov<-cbind(cov,case)
      covnames<-c(covnames,"CASE")
      covlevels<-c(covlevels,nsubj)

      k<-1
      y<-rep(0,nrows * totlev)

      for (i in 1:nsubj) {
        for (j in 1:ncomp) {
           t<-rawdata[i,j]
           if (t >= 0 && t <= nrespcat) {
              y[k+t]=y[k+t]+1
           }
           k <- k + nrespcat + 1
        }
      }

}

#
# prepare dataframe and export
#

if (ncov == 0) {
      dm<-data.frame(y,mu,g,obj)
      varnames<-c("mu",gamnames,objnames)
} else {
      dm<-data.frame(y,mu,g,obj,cov)
      varnames<-c("mu",gamnames,objnames,covnames)
}
names(dm)<-c("!y",varnames)

write.table(dm,outFile,quote=F,row.names=F)

################################################################