[R-sig-eco] manipulating species matrices

Vít Syrovátka syrovat at sci.muni.cz
Tue Oct 29 15:29:54 CET 2013


Dear Kendra,
I am not sure I understand your question exactly. Maybe the R code you 
already have would help.

Below is a sample of the data as returned by dput() so that anyone can 
follow or extend this reply.

First, I would create a variable distinguishing all levels, for which 
the mean is to be calculated:
env$site.horizon<- paste(env$site, env$horizon, sep= '.')

Now, aggregate(), as you write, may be used to calculate mean 
abundances for every site/horizon category (level). Note that only 
samples where om == 0 are used for the calculation:
Means<- aggregate(spe[env$om == 0,], by= list(env$site.horizon[env$om 
== 0]), FUN= mean)

The following just makes rownames from the first column, so that one 
may use them in a name subscript to select desired rows.
Means.dtf<- data.frame(Means, row.names=1)

This dataframe now contains the mean values for every species and every 
site/horizon combination,
and may be extended by the name subscript to the dimensions of the 
original species dataframe:
Means.extended<- Means.dtf[env$site.horizon,]

This is probably what you want to divide the species counts with:
spe.divided<- spe / Means.extended

NaN (0/0) and Inf (0/anything) values will emerge, these can be changed 
to something meaningful:

spe.div[is.na(spe.div)]<- 0 # or anything you want
spe.div[spe.div==Inf]<- 0 # or anything you want

Hope that helps,
Vit

... and the data:
env<- structure(list(site = structure(c(1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L
), .Label = "A7", class = "factor"), om = c(2L, 2L, 3L, 3L, 1L,
1L, 2L, 2L, 3L, 3L, 1L, 1L, 2L, 2L, 3L, 3L, 1L, 1L, 0L, 0L, 0L,
0L, 0L), horizon = c(1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L,
1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 2L, 1L, 2L, 1L, 2L), site.horizon = 
c("A7.1",
"A7.2", "A7.1", "A7.2", "A7.1", "A7.2", "A7.1", "A7.2", "A7.1",
"A7.2", "A7.1", "A7.2", "A7.1", "A7.2", "A7.1", "A7.2", "A7.1",
"A7.2", "A7.2", "A7.1", "A7.2", "A7.1", "A7.2")), .Names = c("site",
"om", "horizon", "site.horizon"), row.names = c("A7001", "A7002",
"A7003", "A7004", "A7007", "A7008", "A7009", "A7010", "A7011",
"A7012", "A7015", "A7016", "A7019", "A7020", "A7021", "A7022",
"A7023", "A7024", "A7026", "A7027", "A7028", "A7029", "A7030"
), class = "data.frame")


spe<- structure(list(Otu00209 = c(4L, 0L, 8L, 1L, 9L, 1L, 31L, 3L,
3L, 6L, 13L, 1L, 7L, 3L, 13L, 0L, 23L, 4L, 0L, 0L, 2L, 17L, 13L
), Otu00358 = c(95L, 8L, 156L, 34L, 9L, 8L, 9L, 1L, 1L, 1L, 4L,
0L, 0L, 2L, 3L, 8L, 3L, 10L, 0L, 0L, 1L, 8L, 0L), Otu00359 = c(1L,
0L, 0L, 0L, 6L, 0L, 0L, 1L, 1L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L,
0L, 0L, 1L, 0L, 1L, 0L), Otu00388 = c(0L, 0L, 4L, 1L, 3L, 0L,
0L, 1L, 0L, 3L, 9L, 1L, 2L, 27L, 0L, 1L, 0L, 1L, 1L, 0L, 6L,
2L, 0L), Otu00389 = c(0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L,
1L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L), Otu00533 = c(7L,
0L, 3L, 0L, 21L, 3L, 0L, 1L, 6L, 13L, 0L, 1L, 2L, 2L, 1L, 0L,
2L, 2L, 5L, 0L, 0L, 2L, 1L), Otu00608 = c(1L, 1L, 3L, 4L, 0L,
0L, 9L, 1L, 2L, 0L, 0L, 32L, 13L, 1L, 58L, 3L, 27L, 6L, 0L, 0L,
0L, 3L, 0L), Otu00618 = c(0L, 0L, 0L, 91L, 0L, 10L, 0L, 0L, 0L,
0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L), Otu00669 = 
c(1L,
0L, 0L, 0L, 8L, 0L, 4L, 0L, 1L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 1L,
0L, 0L, 0L, 0L, 0L, 0L), Otu00695 = c(0L, 0L, 0L, 0L, 0L, 0L,
0L, 0L, 0L, 0L, 1L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L,
0L), Otu00696 = c(4L, 0L, 3L, 1L, 1L, 0L, 1L, 2L, 0L, 2L, 3L,
0L, 2L, 0L, 0L, 0L, 1L, 3L, 1L, 7L, 1L, 19L, 0L), Otu00706 = c(0L,
0L, 0L, 0L, 2L, 0L, 0L, 0L, 0L, 0L, 1L, 0L, 0L, 0L, 0L, 0L, 0L,
0L, 0L, 0L, 0L, 2L, 0L), Otu00725 = c(0L, 0L, 0L, 3L, 1L, 0L,
0L, 0L, 0L, 0L, 1L, 0L, 2L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L,
0L), Otu00761 = c(0L, 4L, 0L, 1L, 0L, 1L, 0L, 0L, 0L, 0L, 1L,
0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 1L)), .Names = 
c("Otu00209",
"Otu00358", "Otu00359", "Otu00388", "Otu00389", "Otu00533", "Otu00608",
"Otu00618", "Otu00669", "Otu00695", "Otu00696", "Otu00706", "Otu00725",
"Otu00761"), class = "data.frame", row.names = c("A7001", "A7002",
"A7003", "A7004", "A7007", "A7008", "A7009", "A7010", "A7011",
"A7012", "A7015", "A7016", "A7019", "A7020", "A7021", "A7022",
"A7023", "A7024", "A7026", "A7027", "A7028", "A7029", "A7030"
))


Dne 2013-10-28 22:27, Mitchell, Kendra napsal:
> I have a large species matrix with counts that I want to express as
> ratio to the reference plots.  I want to use a heatmap to display
> change in species as a ratio to the reference
> 
> the layout is samples by rows and species by columns, it's currently
> a data.frame with row.name=sample name, header=species name, here's
> part of the table for one site (I have 36 sites).
> 
> 
>         Otu00209        Otu00358        Otu00359        Otu00388
>  Otu00389        Otu00533        Otu00608        Otu00618
> Otu00669        Otu00695        Otu00696        Otu00706
> Otu00725        Otu00761
> A7001   4       95      1       0       0       7       1       0
>  1       0       4       0       0       0
> A7002   0       8       0       0       0       0       1       0
>  0       0       0       0       0       4
> A7003   8       156     0       4       0       3       3       0
>  0       0       3       0       0       0
> A7004   1       34      0       1       0       0       4       91
>  0       0       1       0       3       1
> A7007   9       9       6       3       0       21      0       0
>  8       0       1       2       1       0
> A7008   1       8       0       0       0       3       0       10
>  0       0       0       0       0       1
> A7009   31      9       0       0       0       0       9       0
>  4       0       1       0       0       0
> A7010   3       1       1       1       0       1       1       0
>  0       0       2       0       0       0
> A7011   3       1       1       0       0       6       2       0
>  1       0       0       0       0       0
> A7012   6       1       0       3       0       13      0       0
>  0       0       2       0       0       0
> A7015   13      4       0       9       1       0       0       0
>  0       1       3       1       1       1
> A7016   1       0       0       1       0       1       32      0
>  0       0       0       0       0       0
> A7019   7       0       0       2       0       2       13      0
>  0       0       2       0       2       0
> A7020   3       2       0       27      0       2       1       0
>  0       0       0       0       0       0
> A7021   13      3       0       0       0       1       58      0
>  0       0       0       0       0       0
> A7022   0       8       0       1       0       0       3       0
>  0       0       0       0       0       0
> A7023   23      3       0       0       0       2       27      0
>  1       0       1       0       0       0
> A7024   4       10      0       1       0       2       6       0
>  0       0       3       0       0       0
> A7026   0       0       0       1       0       5       0       0
>  0       0       1       0       0       0
> A7027   0       0       1       0       0       0       0       0
>  0       0       7       0       0       0
> A7028   2       1       0       6       0       0       0       0
>  0       0       1       0       0       0
> A7029   17      8       1       2       0       2       3       0
>  0       0       19      2       0       0
> A7030   13      0       0       0       0       1       0       0
>  0       0       0       0       0       1
> 
> 
> Environmental data is in a separate dataframe, here is one site as an 
> example:
> 
> fung_sample     site    om      horizon
> A7001   A7      2       1
> A7002   A7      2       2
> A7003   A7      3       1
> A7004   A7      3       2
> A7007   A7      1       1
> A7008   A7      1       2
> A7009   A7      2       1
> A7010   A7      2       2
> A7011   A7      3       1
> A7012   A7      3       2
> A7015   A7      1       1
> A7016   A7      1       2
> A7019   A7      2       1
> A7020   A7      2       2
> A7021   A7      3       1
> A7022   A7      3       2
> A7023   A7      1       1
> A7024   A7      1       2
> A7026   A7      0       2
> A7027   A7      0       1
> A7028   A7      0       2
> A7029   A7      0       1
> A7030   A7      0       2
> 
> 
> So I need to accomplish a few of things,
>     1. average the species counts for each horizon for the reference
> samples (om=0)  ## I was able to do this by subsetting the reference
> into their own matrix then using reshape to get the mean per
> site/horizon, but don't know if a separate matrix is really what I
> want
>     2. add a constant to each species to prevent errors from dividing 
> by zero
>     3. divide species counts in horizon 1 by the average reference for 
> horizon 1
> 
> I've looked at tapply which seems to be the direction that I need to
> go, but I'm not getting how.
> 
> Thanks
> 
> Kendra
> 
> 	[[alternative HTML version deleted]]
> 
> _______________________________________________
> R-sig-ecology mailing list
> R-sig-ecology at r-project.org
> https://stat.ethz.ch/mailman/listinfo/r-sig-ecology

-- 
Vit Syrovatka
Department of Botany and Zoology
Masaryk University
Kotlarska 2
CZ-611 37 Brno, Czech Republic

Tel.: +420-532146342
E-mail: syrovat at sci.muni.cz



More information about the R-sig-ecology mailing list