[R-sig-eco] Species Scaling methods in PCoA or CAP using vegan::capscale

Pierre THIRIET pierre.d.thiriet at gmail.com
Thu Feb 7 12:39:58 CET 2013


Dear R user,

I want to study relations between sites categories and species 
abundances through PCoA or CAP using vegan::capscale. For doing so, I 
overlay species scores on my ordination. However, I am getting confused 
with the different scaling options and their interpretation. From what I 
understood:
With scaling=1, arrow shows the direction from the origin for which 
sites have larger abundances for this species.
With scaling=2, we want to analyze the correlations among species. 
Species that have a small angle between their arrows are expected to be 
strongly positively correlated.
With scaling=-2, const = sqrt(nrow(dune)-1), we get correlations between 
species and axes. This come from Jari Oksanen at 
https://stat.ethz.ch/pipermail/r-sig-ecology/2010-August/001448.html

I did compare the 3 different options (see codes below), differences 
seem to be only a matter of arrow length. Hence, by considering that the 
most important are the relative length of arrows (relative to each 
other), am I allowed to use scaling=-2 (species axes correlations) for 
both analyzing site-species associations and species-species correlations?
One more questions, is there a well admitted threshold in the value of 
species-axes correlations, below which we consider that species are not 
correlated (I mean species don't differ in abundance across the sites, 
excluding the cases of non linear relationships).
If yes, do this threshold change depending on the scaling method used 
and on either the ordination is constrained or not.
Morover, if I want to overlay a vector for a quantitative 
environnemental variable, may I use also scaling=-2, from which 
correlation threshold?

I thank you in advance,
Pierre

library(vegan)
library(ggplot2)
library(grid)
data(dune)
data(dune.env)

dune=sqrt(dune)
mcap=capscale(dune~1,dist="bray") #PCoA

#sites scores
dims=c(1,2)
site=scores(mcap,display="wa",choices=dims)
site.env=cbind(site,dune.env)

#spider for management levels
dev.new();plot(mcap);coord_spider=with(dune.env,ordispider(mcap,Management,col="blue",label=F,choices=dims));dev.off()
coord_spider=as.data.frame(cbind(coord_spider[,],site.env))
names(coord_spider)[1:4]=c("MDS1","MDS2","MDS1end","MDS2end")

#species scores
#scaling1
cor.min=0.6 #below this threshold, arrows will be not plotted because 
correlation is considered too much week
cor_sp=as.data.frame(scores(mcap, dis="sp", scaling=1,choices=dims))
cor_sp$cor=with(cor_sp,sqrt(MDS1^2+MDS2^2))
cor_sp$sup=FALSE;cor_sp$sup[cor_sp$cor>=cor.min]<-TRUE
cor_sp$lab=row.names(cor_sp)
cor_sp=cor_sp[cor_sp$sup==TRUE,]
cor_sp_s1=cor_sp

#scaling2
cor.min=0.6 #below this threshold, arrows will be not plotted because 
correlation is considered too much week
cor_sp=as.data.frame(scores(mcap, dis="sp", scaling=2,choices=dims))
cor_sp$cor=with(cor_sp,sqrt(MDS1^2+MDS2^2))
cor_sp$sup=FALSE;cor_sp$sup[cor_sp$cor>=cor.min]<-TRUE
cor_sp$lab=row.names(cor_sp)
cor_sp=cor_sp[cor_sp$sup==TRUE,]
cor_sp_s2=cor_sp

#scaling -2, correlations between species and axes
#from Jari Oksanen at 
https://stat.ethz.ch/pipermail/r-sig-ecology/2010-August/001448.html
cor.min=0.6 #below this threshold, arrows will be not plotted because 
correlation is considered too much week
cor_sp=as.data.frame(scores(mcap, dis="sp", scaling=-2, const = 
sqrt(nrow(dune)-1),choices=dims))
cor_sp$cor=with(cor_sp,sqrt(MDS1^2+MDS2^2))
cor_sp$sup=FALSE;cor_sp$sup[cor_sp$cor>=cor.min]<-TRUE
cor_sp$lab=row.names(cor_sp)
cor_sp=cor_sp[cor_sp$sup==TRUE,]
cor_sp_s3=cor_sp

#plot
mon.plot1=ggplot(data=site.env)+theme_bw()+
   geom_point(aes(x=MDS1,y=MDS2,color=Management))# les sites

#add spider
mon.plot2=mon.plot1+
geom_segment(data=coord_spider,aes(x=MDS1,y=MDS2,xend=MDS1end,yend=MDS2end,colour=Management),lwd=0.5,alpha=1/3)+
geom_point(data=coord_spider,aes(x=MDS1,y=MDS2,colour=Management),cex=3,shape=19)

#add species scores as vector
#scaling1
mon.plot_s1=mon.plot2+ggtitle("Scaling 1")+
geom_point(aes(x=0,y=0),shape=21,fill="black",color="black",size=3)+#central 
point
   geom_segment(data=cor_sp_s1,aes(x=0,y=0,xend=MDS1,yend=MDS2),arrow = 
arrow(length = unit(0.3,"cm")))+#arrows
   geom_text(data = cor_sp_s1, aes(x = MDS1, y = MDS2, label = lab), 
size = 3)#labels

#scaling2
mon.plot_s2=mon.plot2+ggtitle("Scaling 2")+
geom_point(aes(x=0,y=0),shape=21,fill="black",color="black",size=3)+#central 
point
   geom_segment(data=cor_sp_s2,aes(x=0,y=0,xend=MDS1,yend=MDS2),arrow = 
arrow(length = unit(0.3,"cm")))+#arrows
   geom_text(data = cor_sp_s2, aes(x = MDS1, y = MDS2, label = lab), 
size = 3)#labels

#scaling -2, correlations between species and axes
mon.plot_s3=mon.plot2+ggtitle("Scaling -2")+
geom_point(aes(x=0,y=0),shape=21,fill="black",color="black",size=3)+#central 
point
   geom_segment(data=cor_sp_s3,aes(x=0,y=0,xend=MDS1,yend=MDS2),arrow = 
arrow(length = unit(0.3,"cm")))+#arrows
   geom_text(data = cor_sp_s3, aes(x = MDS1, y = MDS2, label = lab), 
size = 3)#labels

#plot everything
grid.newpage()
pushViewport(viewport(layout = grid.layout(2, 2)))
print(mon.plot_s1,vp=viewport(layout.pos.row = 1, layout.pos.col = 1))
print(mon.plot_s2,vp=viewport(layout.pos.row = 2, layout.pos.col = 1))
print(mon.plot_s3,vp=viewport(layout.pos.row = 1, layout.pos.col = 2))



More information about the R-sig-ecology mailing list