[R] Dataframes in PLS package
R. Michael Weylandt
michael.weylandt at gmail.com
Sun Mar 4 20:20:42 CET 2012
It's nice to cc the list for archival reasons -- it also usually gets
you a faster response as more folks can see how the thread develops.
The problem is that the colnames aren't ctually depy and indx: they
are depy.w, depy.h, etc. If you want to model, you need to use those
as is: e.g., with your code
eqn <- structure(list(depy.w = c(63L, 145L, 104L, 109L, 221L, 110L,
194L, 120L, 210L, 243L, 163L, 93L, 167L, 232L, 112L, 185L, 103L,
202L, 203L, 207L, 239L, 109L, 112L, 176L, 126L, 145L, 125L, 191L,
110L, 92L), depy.h = c(55L, 52L, 32L, 69L, 61L, 40L, 41L, 76L,
61L, 101L, 62L, 55L, 61L, 65L, 52L, 52L, 43L, 87L, 57L, 37L,
74L, 44L, 45L, 52L, 54L, 51L, 66L, 53L, 43L, 36L), depy.d = c(1L,
1L, 0L, 1L, 0L, 1L, 0L, 1L, 0L, 1L, 0L, 0L, 1L, 0L, 0L, 0L, 1L,
0L, 0L, 1L, 0L, 0L, 1L, 0L, 0L, 1L, 0L, 1L, 0L, 0L), depy.s = c(0L,
1L, 1L, 1L, 1L, 1L, 0L, 1L, 0L, 1L, 1L, 1L, 0L, 0L, 1L, 0L, 1L,
1L, 0L, 0L, 1L, 1L, 0L, 1L, 1L, 1L, 1L, 1L, 1L, 1L), indx.a = c(44L,
33L, 68L, 94L, 72L, 48L, 85L, 19L, 41L, 57L, 64L, 27L, 64L, 32L,
31L, 88L, 80L, 70L, 68L, 58L, 42L, 87L, 69L, 52L, 45L, 25L, 66L,
80L, 17L, 70L), indx.i = c(37200L, 69300L, 56900L, 44300L, 79800L,
17600L, 58100L, 76700L, 37600L, 40800L, 400L, 33400L, 6000L,
7400L, 94000L, 84200L, 0L, 0L, 43300L, 0L, 68600L, 47300L, 16100L,
95900L, 69200L, 12200L, 7500L, 70600L, 11400L, 0L), indx.r = c(4L,
4L, 3L, 6L, 6L, 5L, 4L, 3L, 1L, 5L, 3L, 3L, 5L, 1L, 6L, 4L, 2L,
1L, 4L, 1L, 4L, 6L, 1L, 6L, 4L, 2L, 2L, 5L, 3L, 4L), indx.x = c(0L,
1L, 1L, 1L, 0L, 1L, 0L, 0L, 0L, 1L, 0L, 0L, 2L, 0L, 0L, 1L, 0L,
0L, 0L, 0L, 1L, 1L, 0L, 0L, 1L, 0L, 0L, 1L, 0L, 1L)), .Names = c("depy.w",
"depy.h", "depy.d", "depy.s", "indx.a", "indx.i", "indx.r", "indx.x"
), row.names = c(NA, 30L), class = "data.frame")
library(pls)
apls <- plsr(depy.w + depy.h + depy.d + depy.s ~ ., data=eqn) # Works
like a charm
I don't believe there's a way to do wildcard names in general
(something like depy.*, but I'd welcome correction) but you can save
some key strokes by using the "." term to mean "everything else I
haven't already used"
Hope this helps,
Michael
On Sun, Mar 4, 2012 at 1:30 PM, Chris Westland <westland at uic.edu> wrote:
> Thanks Michael. I had tried to drop the I(as.matrix(...)) conversions, and
> fiddled with a number of other permutations of code ... I still can't seem
> to get it right.
>
> The col names appear to be depy and indx ... here is the output (and the
> rows are just line numbers)
>
>
>
>> colnames(eqn)
>
> [1] "depy.w" "depy.h" "depy.d" "depy.s" "indx.a" "indx.i" "indx.r"
> "indx.x"row>
>
>
>
>> rownames(eqn)
>
> [1]
> "1" "2" "3" "4" "5" "6" "7" "8" "9" "10" "11" "12" "13" "14" "15" "16" "17" "18" "19" "20" "21" "22" "23" "24" "25" "26" "27" "28"
>
> [29] "29" "30" "31"….etc.
>
>
>
>
>
>
> Here is the dput(eqn) and showData for the file 'eqn':
>
>
>
>> dput(head(eqn, 30))
>
> structure(list(depy.w = c(63L, 145L, 104L, 109L, 221L, 110L,
>
> 194L, 120L, 210L, 243L, 163L, 93L, 167L, 232L, 112L, 185L, 103L,
>
> 202L, 203L, 207L, 239L, 109L, 112L, 176L, 126L, 145L, 125L, 191L,
>
> 110L, 92L), depy.h = c(55L, 52L, 32L, 69L, 61L, 40L, 41L, 76L,
>
> 61L, 101L, 62L, 55L, 61L, 65L, 52L, 52L, 43L, 87L, 57L, 37L,
>
> 74L, 44L, 45L, 52L, 54L, 51L, 66L, 53L, 43L, 36L), depy.d = c(1L,
>
> 1L, 0L, 1L, 0L, 1L, 0L, 1L, 0L, 1L, 0L, 0L, 1L, 0L, 0L, 0L, 1L,
>
> 0L, 0L, 1L, 0L, 0L, 1L, 0L, 0L, 1L, 0L, 1L, 0L, 0L), depy.s = c(0L,
>
> 1L, 1L, 1L, 1L, 1L, 0L, 1L, 0L, 1L, 1L, 1L, 0L, 0L, 1L, 0L, 1L,
>
> 1L, 0L, 0L, 1L, 1L, 0L, 1L, 1L, 1L, 1L, 1L, 1L, 1L), indx.a = c(44L,
>
> 33L, 68L, 94L, 72L, 48L, 85L, 19L, 41L, 57L, 64L, 27L, 64L, 32L,
>
> 31L, 88L, 80L, 70L, 68L, 58L, 42L, 87L, 69L, 52L, 45L, 25L, 66L,
>
> 80L, 17L, 70L), indx.i = c(37200L, 69300L, 56900L, 44300L, 79800L,
>
> 17600L, 58100L, 76700L, 37600L, 40800L, 400L, 33400L, 6000L,
>
> 7400L, 94000L, 84200L, 0L, 0L, 43300L, 0L, 68600L, 47300L, 16100L,
>
> 95900L, 69200L, 12200L, 7500L, 70600L, 11400L, 0L), indx.r = c(4L,
>
> 4L, 3L, 6L, 6L, 5L, 4L, 3L, 1L, 5L, 3L, 3L, 5L, 1L, 6L, 4L, 2L,
>
> 1L, 4L, 1L, 4L, 6L, 1L, 6L, 4L, 2L, 2L, 5L, 3L, 4L), indx.x = c(0L,
>
> 1L, 1L, 1L, 0L, 1L, 0L, 0L, 0L, 1L, 0L, 0L, 2L, 0L, 0L, 1L, 0L,
>
> 0L, 0L, 0L, 1L, 1L, 0L, 0L, 1L, 0L, 0L, 1L, 0L, 1L)), .Names = c("depy.w",
>
> "depy.h", "depy.d", "depy.s", "indx.a", "indx.i", "indx.r", "indx.x"
>
> ), row.names = c(NA, 30L), class = "data.frame")
>
>
>
>
>
>
>
>> showData(eqn)
>
>
>
> depy.w depy.h depy.d depy.s indx.a indx.i indx.r indx.x
>
> 63 55 1 0 44 37200 4 0
>
> 145 52 1 1 33 69300 4 1
>
> 104 32 0 1 68 56900 3 1
>
> 109 69 1 1 94 44300 6 1
>
> 221 61 0 1 72 79800 6 0
>
> 110 40 1 1 48 17600 5 1
>
> 194 41 0 0 85 58100 4 0
>
> 120 76 1 1 19 76700 3 0
>
> 210 61 0 0 41 37600 1 0 ... etc.
>
>
>
>
> Initially, I had input a file 'pls' with the script:
>
>
> dep <- pls[,1:4]
>
> ind <- pls[,5:8]
>
> eqn <- data.frame(depy = dep, indx = ind)
>
> apls <- plsr(depy ~ indx, data=eqn)
>
>
> .... and this gives me [7] ERROR: object 'depy' not found
>
>
> Note that the original input comes from a matrix 'pls' and my intent is to
> convert this to data.frames that the plsr package can parse ... a dput(pls)
> gives me ...
>
>
>
> .... lots and lots of leading line information ... 0L, 0L, 1L, 2L, 0L,
> 0L, 1L, 1L, 0L, 0L, 0L, 2L, 1L, 0L, 1L,
>
> 2L, 1L, 0L, 1L, 0L, 0L, 0L, 1L, 1L, 0L, 0L, 0L, 1L, 0L, 0L,
>
> 0L, 0L, 1L, 0L, 0L, 1L, 0L, 1L, 0L, 0L, 1L, 1L, 1L, 1L, 0L,
>
> 1L, 0L, 1L, 1L, 1L, 1L)), .Names = c("w", "h", "d", "s",
>
> "a", "i", "r", "x"), class = "data.frame", row.names = c(NA,
>
> -10000L))
>
>
>
> If you have any other suggestions concerning how I might fiddle the files to
> get them into a format that PLSR package would like, that would be great
>
>
> Chris Westland
>
>
>
> On Sat, Mar 3, 2012 at 8:17 PM, R. Michael Weylandt
> <michael.weylandt at gmail.com> wrote:
>>
>> Can you post dput(head(eqn, 30)) so we can take a look at your data?
>> It's something of a cryptic error and that would go a long way in
>> helping us help you.
>>
>> Without that though, I'm not sure you need the I(as.matrix.(dep)) and
>> I(as.matrix(ind)), I would imagine (untested) that eqn <-
>> data.frame(depy = dep, indx = ind) would work (probably better as I()
>> changes things just a little).
>>
>> I have a hunch that the colnames of eqn are not actually depy and indx
>> and that's what ultimately leads to the error. Can you look at
>> colnames(eqn) and use those exactly in the formula to plsr? That might
>> fix it.
>>
>> Michael
>>
>>
>>
>> On Sat, Mar 3, 2012 at 5:01 PM, westland <westland at uic.edu> wrote:
>> > I am still/again having trouble getting PLSR to recognize the input data
>> > frames. Here is what I have done:
>> >
>> > I read in an 10000 x 8 table of data to 'pls'
>> >
>> > assign the first four columns to matrix 'dep' and the second four to
>> > matrix
>> > 'ind' with the following commands:
>> >
>> > dep <- pls[,1:4]
>> > ind <- pls[,5:8]
>> >
>> > I create the data.frame 'eqn' :
>> >
>> > eqn <- data.frame(depy = I(as.matrix(dep)), indx = I(as.matrix(ind)))
>> >
>> > And run the PLSR package
>> >
>> > apls <- plsr(depy ~ indx, data=eqn)
>> >
>> > I seem to be getting either one of two error messages:
>> >
>> > [12] ERROR:
>> > invalid type (list) for variable 'dep'
>> > [13] ERROR:
>> > object of type 'closure' is not subsettable
>> >
>> > I'm sure now that this is a problem in my creation of data.frames, but
>> > can't
>> > seem to find anything that describes the problem
>> >
>> >
>> > -----
>> > J. Christopher Westland
>> > Professor, Information & Decision Sciences, University of Illinois -
>> > Chicago
>> > 601 S. Morgan Street (UH2400) Chicago, IL 60607-7124
>> > Telephone +1.312.860.0587
>> > Google Voice +1.209.757.8849
>> > westland at uic.edu
>> > http://uic.edu/~westland
>> > --
>> > View this message in context:
>> > http://r.789695.n4.nabble.com/Dataframes-in-PLS-package-tp4405798p4442436.html
>> > Sent from the R help mailing list archive at Nabble.com.
>> >
>> > ______________________________________________
>> > R-help at r-project.org mailing list
>> > https://stat.ethz.ch/mailman/listinfo/r-help
>> > PLEASE do read the posting guide
>> > http://www.R-project.org/posting-guide.html
>> > and provide commented, minimal, self-contained, reproducible code.
>
>
>
>
> --
> J. Christopher Westland
> Professor, Information & Decision Sciences, University of Illinois - Chicago
> 601 S. Morgan Street (UH2400) Chicago, IL 60607-7124
> Telephone +1.312.860.0587
> Google Voice +1.209.757.8849
> westland at uic.edu
> http://uic.edu/~westland
>
>
More information about the R-help
mailing list