[R] Dataframes in PLS package

Sun Mar 4 20:20:42 CET 2012

It's nice to cc the list for archival reasons -- it also usually gets
you a faster response as more folks can see how the thread develops.

The problem is that the colnames aren't ctually depy and indx: they
are depy.w, depy.h, etc. If you want to model, you need to use those
as is: e.g., with your code

eqn <- structure(list(depy.w = c(63L, 145L, 104L, 109L, 221L, 110L,
194L, 120L, 210L, 243L, 163L, 93L, 167L, 232L, 112L, 185L, 103L,
202L, 203L, 207L, 239L, 109L, 112L, 176L, 126L, 145L, 125L, 191L,
110L, 92L), depy.h = c(55L, 52L, 32L, 69L, 61L, 40L, 41L, 76L,
61L, 101L, 62L, 55L, 61L, 65L, 52L, 52L, 43L, 87L, 57L, 37L,
74L, 44L, 45L, 52L, 54L, 51L, 66L, 53L, 43L, 36L), depy.d = c(1L,
1L, 0L, 1L, 0L, 1L, 0L, 1L, 0L, 1L, 0L, 0L, 1L, 0L, 0L, 0L, 1L,
0L, 0L, 1L, 0L, 0L, 1L, 0L, 0L, 1L, 0L, 1L, 0L, 0L), depy.s = c(0L,
1L, 1L, 1L, 1L, 1L, 0L, 1L, 0L, 1L, 1L, 1L, 0L, 0L, 1L, 0L, 1L,
1L, 0L, 0L, 1L, 1L, 0L, 1L, 1L, 1L, 1L, 1L, 1L, 1L), indx.a = c(44L,
33L, 68L, 94L, 72L, 48L, 85L, 19L, 41L, 57L, 64L, 27L, 64L, 32L,
31L, 88L, 80L, 70L, 68L, 58L, 42L, 87L, 69L, 52L, 45L, 25L, 66L,
80L, 17L, 70L), indx.i = c(37200L, 69300L, 56900L, 44300L, 79800L,
17600L, 58100L, 76700L, 37600L, 40800L, 400L, 33400L, 6000L,
7400L, 94000L, 84200L, 0L, 0L, 43300L, 0L, 68600L, 47300L, 16100L,
95900L, 69200L, 12200L, 7500L, 70600L, 11400L, 0L), indx.r = c(4L,
4L, 3L, 6L, 6L, 5L, 4L, 3L, 1L, 5L, 3L, 3L, 5L, 1L, 6L, 4L, 2L,
1L, 4L, 1L, 4L, 6L, 1L, 6L, 4L, 2L, 2L, 5L, 3L, 4L), indx.x = c(0L,
1L, 1L, 1L, 0L, 1L, 0L, 0L, 0L, 1L, 0L, 0L, 2L, 0L, 0L, 1L, 0L,
0L, 0L, 0L, 1L, 1L, 0L, 0L, 1L, 0L, 0L, 1L, 0L, 1L)), .Names = c("depy.w",
"depy.h", "depy.d", "depy.s", "indx.a", "indx.i", "indx.r", "indx.x"
), row.names = c(NA, 30L), class = "data.frame")

library(pls)

apls <- plsr(depy.w + depy.h + depy.d + depy.s ~ ., data=eqn) # Works
like a charm

I don't believe there's a way to do wildcard names in general
(something like depy.*, but I'd welcome correction) but you can save
some key strokes by using the "." term to mean "everything else I
haven't already used"

Hope this helps,

Michael

On Sun, Mar 4, 2012 at 1:30 PM, Chris Westland <westland at uic.edu> wrote:
> Thanks Michael.  I had tried to drop the I(as.matrix(...)) conversions, and
> fiddled with a number of other permutations of code ... I still can't seem
> to get it right.
>
> The col names appear to be depy and indx ... here is the output (and the
> rows are just line numbers)
>
>
>
>> colnames(eqn)
>
> [1] "depy.w" "depy.h" "depy.d" "depy.s" "indx.a" "indx.i" "indx.r"
> "indx.x"row>
>
>
>
>> rownames(eqn)
>
>     [1]
> "1"     "2"     "3"     "4"     "5"     "6"     "7"     "8"     "9"     "10"    "11"    "12"    "13"    "14"    "15"    "16"    "17"    "18"    "19"    "20"    "21"    "22"    "23"    "24"    "25"    "26"    "27"    "28"
>
>    [29] "29"    "30"    "31"….etc.
>
>
>
>
>
>
> Here is the dput(eqn)  and showData for the file 'eqn':
>
>
>
>> dput(head(eqn, 30))
>
> structure(list(depy.w = c(63L, 145L, 104L, 109L, 221L, 110L,
>
> 194L, 120L, 210L, 243L, 163L, 93L, 167L, 232L, 112L, 185L, 103L,
>
> 202L, 203L, 207L, 239L, 109L, 112L, 176L, 126L, 145L, 125L, 191L,
>
> 110L, 92L), depy.h = c(55L, 52L, 32L, 69L, 61L, 40L, 41L, 76L,
>
> 61L, 101L, 62L, 55L, 61L, 65L, 52L, 52L, 43L, 87L, 57L, 37L,
>
> 74L, 44L, 45L, 52L, 54L, 51L, 66L, 53L, 43L, 36L), depy.d = c(1L,
>
> 1L, 0L, 1L, 0L, 1L, 0L, 1L, 0L, 1L, 0L, 0L, 1L, 0L, 0L, 0L, 1L,
>
> 0L, 0L, 1L, 0L, 0L, 1L, 0L, 0L, 1L, 0L, 1L, 0L, 0L), depy.s = c(0L,
>
> 1L, 1L, 1L, 1L, 1L, 0L, 1L, 0L, 1L, 1L, 1L, 0L, 0L, 1L, 0L, 1L,
>
> 1L, 0L, 0L, 1L, 1L, 0L, 1L, 1L, 1L, 1L, 1L, 1L, 1L), indx.a = c(44L,
>
> 33L, 68L, 94L, 72L, 48L, 85L, 19L, 41L, 57L, 64L, 27L, 64L, 32L,
>
> 31L, 88L, 80L, 70L, 68L, 58L, 42L, 87L, 69L, 52L, 45L, 25L, 66L,
>
> 80L, 17L, 70L), indx.i = c(37200L, 69300L, 56900L, 44300L, 79800L,
>
> 17600L, 58100L, 76700L, 37600L, 40800L, 400L, 33400L, 6000L,
>
> 7400L, 94000L, 84200L, 0L, 0L, 43300L, 0L, 68600L, 47300L, 16100L,
>
> 95900L, 69200L, 12200L, 7500L, 70600L, 11400L, 0L), indx.r = c(4L,
>
> 4L, 3L, 6L, 6L, 5L, 4L, 3L, 1L, 5L, 3L, 3L, 5L, 1L, 6L, 4L, 2L,
>
> 1L, 4L, 1L, 4L, 6L, 1L, 6L, 4L, 2L, 2L, 5L, 3L, 4L), indx.x = c(0L,
>
> 1L, 1L, 1L, 0L, 1L, 0L, 0L, 0L, 1L, 0L, 0L, 2L, 0L, 0L, 1L, 0L,
>
> 0L, 0L, 0L, 1L, 1L, 0L, 0L, 1L, 0L, 0L, 1L, 0L, 1L)), .Names = c("depy.w",
>
> "depy.h", "depy.d", "depy.s", "indx.a", "indx.i", "indx.r", "indx.x"
>
> ), row.names = c(NA, 30L), class = "data.frame")
>
>
>
>
>
>
>
>> showData(eqn)
>
>
>
> depy.w depy.h depy.d depy.s indx.a indx.i indx.r indx.x
>
>   63     55      1      0     44  37200      4      0
>
>    145     52      1      1     33  69300      4      1
>
>    104     32      0      1     68  56900      3      1
>
>    109     69      1      1     94  44300      6      1
>
>    221     61      0      1     72  79800      6      0
>
>    110     40      1      1     48  17600      5      1
>
>    194     41      0      0     85  58100      4      0
>
>    120     76      1      1     19  76700      3      0
>
>    210     61      0      0     41  37600      1      0 ... etc.
>
>
>
>
> Initially, I had input a file 'pls' with the script:
>
>
> dep <- pls[,1:4]
>
> ind <- pls[,5:8]
>
> eqn <- data.frame(depy = dep, indx = ind)
>
> apls <- plsr(depy ~ indx, data=eqn)
>
>
> .... and this gives me   [7] ERROR:  object 'depy' not found
>
>
> Note that the original input comes from a matrix 'pls' and my intent is to
> convert this to data.frames that the plsr package can parse ...  a dput(pls)
> gives me ...
>
>
>
>   .... lots and lots of leading line information ...  0L, 0L, 1L, 2L, 0L,
> 0L, 1L, 1L, 0L, 0L, 0L, 2L, 1L, 0L, 1L,
>
>     2L, 1L, 0L, 1L, 0L, 0L, 0L, 1L, 1L, 0L, 0L, 0L, 1L, 0L, 0L,
>
>     0L, 0L, 1L, 0L, 0L, 1L, 0L, 1L, 0L, 0L, 1L, 1L, 1L, 1L, 0L,
>
>     1L, 0L, 1L, 1L, 1L, 1L)), .Names = c("w", "h", "d", "s",
>
> "a", "i", "r", "x"), class = "data.frame", row.names = c(NA,
>
> -10000L))
>
>
>
> If you have any other suggestions concerning how I might fiddle the files to
> get them into a format that PLSR package would like, that would be great
>
>
> Chris Westland
>
>
>
> On Sat, Mar 3, 2012 at 8:17 PM, R. Michael Weylandt
> <michael.weylandt at gmail.com> wrote:
>>
>> Can you post dput(head(eqn, 30)) so we can take a look at your data?
>> It's something of a cryptic error and that would go a long way in
>> helping us help you.
>>
>> Without that though, I'm not sure you need the I(as.matrix.(dep)) and
>> I(as.matrix(ind)), I would imagine (untested) that eqn <-
>> data.frame(depy = dep, indx = ind) would work (probably better as I()
>> changes things just a little).
>>
>> I have a hunch that the colnames of eqn are not actually depy and indx
>> and that's what ultimately leads to the error. Can you look at
>> colnames(eqn) and use those exactly in the formula to plsr? That might
>> fix it.
>>
>> Michael
>>
>>
>>
>> On Sat, Mar 3, 2012 at 5:01 PM, westland <westland at uic.edu> wrote:
>> > I am still/again having trouble getting PLSR to recognize the input data
>> > frames.   Here is what I have done:
>> >
>> >  I read in an 10000 x 8 table of data to 'pls'
>> >
>> > assign the first four columns to  matrix 'dep' and the second four to
>> > matrix
>> > 'ind' with the following commands:
>> >
>> > dep <- pls[,1:4]
>> > ind <- pls[,5:8]
>> >
>> > I create the data.frame 'eqn' :
>> >
>> > eqn <- data.frame(depy = I(as.matrix(dep)), indx = I(as.matrix(ind)))
>> >
>> > And run the PLSR package
>> >
>> > apls <- plsr(depy ~ indx, data=eqn)
>> >
>> > I seem to be getting either one of two error messages:
>> >
>> > [12] ERROR:
>> >  invalid type (list) for variable 'dep'
>> > [13] ERROR:
>> >  object of type 'closure' is not subsettable
>> >
>> > I'm sure now that this is a problem in my creation of data.frames, but
>> > can't
>> > seem to find anything that describes the problem
>> >
>> >
>> > -----
>> > J. Christopher Westland
>> > Professor, Information & Decision Sciences, University of Illinois -
>> > Chicago
>> > 601 S. Morgan Street (UH2400) Chicago, IL    60607-7124
>> > Telephone       +1.312.860.0587
>> > Google Voice  +1.209.757.8849
>> > westland at uic.edu
>> > http://uic.edu/~westland
>> > --
>> > View this message in context:
>> > http://r.789695.n4.nabble.com/Dataframes-in-PLS-package-tp4405798p4442436.html
>> > Sent from the R help mailing list archive at Nabble.com.
>> >
>> > ______________________________________________
>> > R-help at r-project.org mailing list
>> > https://stat.ethz.ch/mailman/listinfo/r-help
>> > PLEASE do read the posting guide
>> > http://www.R-project.org/posting-guide.html
>> > and provide commented, minimal, self-contained, reproducible code.
>
>
>
>
> --
> J. Christopher Westland
> Professor, Information & Decision Sciences, University of Illinois - Chicago
> 601 S. Morgan Street (UH2400) Chicago, IL    60607-7124
> Telephone       +1.312.860.0587
> Google Voice  +1.209.757.8849
> westland at uic.edu
> http://uic.edu/~westland
>
>