# Name: Amanda Winegardner, Xavier Giroux-Bougard and Bérenger Bourgeois # Date: October 2014 # Description: Ordination # Dataset: File names are "DoubsSpe.csv", "DoubsEnv.csv" # Notes: Material in R script obtained from # Borcard D., Gillet F. et Legendre P., 2011. Numerical Ecology with R. Springer. #***************************************************************************************# # Loading the data #### rm(list=ls()) install.packages("vegan") install.packages("gclus") install.packages("ape") library(vegan) library(gclus) library(ape) source(file.choose()) # fichier coldiss.R # Species community data frame (fish abundance): "DoubsSpe.csv" spe<-read.csv(file.choose(), row.names=1) spe=spe[-8,] # site number 8 contains no species and must be removed # Environmental data frame: "DoubsEnv.csv" env<-read.csv(file.choose(), row.names=1) env=env[-8,] #***************************************************************************************# # 1.Explore the data ## 1.1.Species data names(spe) dim(spe) str(spe) head(spe) summary(spe) ### Species distibution, all species confounded (ab<-table(unlist(spe))) barplot(ab, las=1, xlab="Abundance class", ylab="Frequency", col=grey(5:0/5)) ### Number of absences sum(spe==0) ### Proportion of zeros in the community data set sum(spe==0)/(nrow(spe)*ncol(spe)) ### Species occurences spe.pres<-colSums(spe>0) # compute the number of sites where each species is present hist(spe.pres, main="Species occurence", las=1, xlab="Frequency of occurences", ylab="Number of species", breaks=seq(0,30, by=5), col="grey") ### Sites richness site.pres<-rowSums(spe>0) # compute the number of species at ecah site hist(site.pres, main="Species richness", las=1, xlab="Frequency of sites", ylab="Number of species", breaks=seq(0,30, by=5), col="grey") ### Compute diversity indices ?diversity (H<-diversity(spe, index="shannon")) # Shannon diversity index (N<-diversity(spe, index="simpson")) # Simpson diversity index ## 1.2.Environmental data names(env) dim(env) str(env) head(env) summary(env) pairs(env, main="Bivariate Plots of the Environmental Data" ) # Standardization of the 11 environmental data env.z<-decostand(env, method="standardize") apply(env.z, 2, mean) # the data are now centered (means~0) apply(env.z, 2, sd) # the data are now scaled (standard deviations=1) par(mfrow=c(1,2)) hist(env$oxy, main="Histogram of Oxygen Concentration", ylab="Frequency", ylim=c(0,10), col="grey") hist(env.z$oxy, main="Histogram of Standardized Oxygen Concentration", ylab="Frequency", ylim=c(0,10), col="grey") # Note: explanatory variables expressed in different units # must obligatory be standardized before computing distances measures and ordination analysis. #***************************************************************************************# # 2.Association measures ## 2.1. Distance measures ### Quantitative species data ?vegdist # this function compute dissimilarity indices useful for community decomposition data (see method options) (spe.db<-vegdist(spe, method="bray")) # Bray-Curtis dissimilarity (spe.dj<-vegdist(spe, method="jac")) # Jaccard dissimilarity (spe.dg<-vegdist(spe, method="gower")) # Gower dissimilarity ### Presence-Absence species data (spe.db.pa<-vegdist(spe, method="bray", binary=TRUE)) #Bray-Curtis dissimilarity (spe.dj.pa<-vegdist(spe, method="jac", binary=TRUE)) # Jaccard dissimilarity (spe.dg.pa<-vegdist(spe, method="gower", binary=TRUE)) # Gower dissimilarity ### Graphical display of association matrices: heat maps windows() coldiss(spe.db, byrank=FALSE, diag=TRUE) # Heat map of Bray-Curtis dissimilarity windows() coldiss(spe.dj, byrank=FALSE, diag=TRUE) # Heat map of Jaccard dissimilarity windows() coldiss(spe.dg, byrank=FALSE, diag=TRUE) # Heat map of Gower dissimilarity ### Quantitative environmental data #### Association between environmental data (Q mode) ?dist # this function also compute dissimilarity matrix env.de<-dist(env.z, method = "euclidean") # euclidean distance matrix of the standardized environmental variables windows() coldiss(env.de, diag=TRUE) ### Dependance between environmental data (R mode) (env.pearson<-cor(env)) # Pearson r linear correlation round(env.pearson, 2) (env.ken<-cor(env, method="kendall")) # Kendall tau rank correlation round(env.ken, 2) ### Mixed types of environmental variables #### Association between mixed types of environmental data (Q mode) ##### Create a fictious data frame var.g1<-rnorm(30, 0, 1) var.g2<-runif(30, 0, 5) var.g3<-gl(3, 10) var.g4<-gl(2, 5, 30) (dat2<-data.frame(var.g1, var.g2, var.g3, var.g4)) str(dat2) summary(dat2) ##### Compute the Gower dissimilarity matrix ?daisy (dat2.dg<-daisy(dat2, metric="gower")) coldiss(dat2.dg) # Challenge 1 #### # Calculate by hand the Bray-Curtis and the Gower dissimilarity of species abundance CHA, TRU and VAI for sites 1, 2 and 3 # (answer below) # # # # # # # # # # # # # # # # # Bray-Curtis dissimilarity : d[jk] = (sum abs(x[ij]-x[ik]))/(sum (x[ij]+x[ik])) (spe.challenge<-spe[1:3,1:3]) # Calculate the total species abundance by site (Abund.s1<-sum(spe.challenge[1,])) (Abund.s2<-sum(spe.challenge[2,])) (Abund.s3<-sum(spe.challenge[3,])) # Calculate the species abundance differences between pairs of sites for each species Spec.s1s2<-0 Spec.s1s3<-0 Spec.s2s3<-0 for (i in 1:3) { Spec.s1s2<-Spec.s1s2+abs(sum(spe.challenge[1,i]-spe.challenge[2,i])) Spec.s1s3<-Spec.s1s3+abs(sum(spe.challenge[1,i]-spe.challenge[3,i])) Spec.s2s3<-Spec.s2s3+abs(sum(spe.challenge[2,i]-spe.challenge[3,i])) } # Calculate the Bray-Curtis dissimilarity (db.s1s2<-Spec.s1s2/(Abund.s1+Abund.s2)) (db.s1s3<-Spec.s1s3/(Abund.s1+Abund.s3)) (db.s2s3<-Spec.s2s3/(Abund.s2+Abund.s3)) # Compare your results (spe.db.challenge<-vegdist(spe.challenge, method="bray")) # Gower dissimilarity : d[jk] = (1/M) sum(abs(x[ij]-x[ik])/(max(x[i])-min(x[i]))) # Calculate the number of columns in your dataset M<-ncol(spe.challenge) # Calculate the species abundance differences between pairs of sites for each species Spe1.s1s2<-abs(spe.challenge[1,1]-spe.challenge[2,1]) Spe2.s1s2<-abs(spe.challenge[1,2]-spe.challenge[2,2]) Spe3.s1s2<-abs(spe.challenge[1,3]-spe.challenge[2,3]) Spe1.s1s3<-abs(spe.challenge[1,1]-spe.challenge[3,1]) Spe2.s1s3<-abs(spe.challenge[1,2]-spe.challenge[3,2]) Spe3.s1s3<-abs(spe.challenge[1,3]-spe.challenge[3,3]) Spe1.s2s3<-abs(spe.challenge[2,1]-spe.challenge[3,1]) Spe2.s2s3<-abs(spe.challenge[2,2]-spe.challenge[3,2]) Spe3.s2s3<-abs(spe.challenge[2,3]-spe.challenge[3,3]) # Calculate the range of each species abundance between sites Range.spe1<-max(spe.challenge[,1]) - min (spe.challenge[,1]) Range.spe2<-max(spe.challenge[,2]) - min (spe.challenge[,2]) Range.spe3<-max(spe.challenge[,3]) - min (spe.challenge[,3]) # Calculate the Gower dissimilarity (dg.s1s2<-(1/M)*((Spe2.s1s2/Range.spe2)+(Spe3.s1s2/Range.spe3))) (dg.s1s3<-(1/M)*((Spe2.s1s3/Range.spe2)+(Spe3.s1s3/Range.spe3))) (dg.s2s3<-(1/M)*((Spe2.s2s3/Range.spe2)+(Spe3.s2s3/Range.spe3))) # Compare your results (spe.db.challenge<-vegdist(spe.challenge, method="gower")) ## 2.2. Transformations for community composition data ?decostand # this function provide some standardiztion methods for community domposition data (see method options) # Transform abundances to absence-presence (1-0) (spe.pa<-decostand(spe, method="pa")) # Hellinger transformation (spe.hel<-decostand(spe, method="hellinger")) # Chi.square transformation (spe.chi<-decostand(spe, method="chi.square")) # Challenge 2 #### # Calculate by hand the Hellinger and the Chi-square transformed abundances data # (answer below) # # # # # # # # # # # # # # # # # # Hellinger tranformation # First calculate the total species abundances by site (site.totals<-apply(spe, 1, sum)) # Scale species abundances by dividing them by site totals (scale.spe<-spe/site.totals) # Calculate de square root of scaled species abundances (sqrt.scale.spe<-sqrt(scale.spe)) # Compare the results sqrt.scale.spe spe.hel sqrt.scale.spe-spe.hel # or: sqrt.scale.spe/spe.hel # Chi-squre transformation # First calculate the total species abundances by site (site.totals<-apply(spe, 1, sum)) # Then calculate the square root of total species abundances (sqrt.spe.totals<-sqrt(apply(spe, 2, sum))) # Scale species abundances by dividing them by the site totals and the species totals scale.spe2<-spe for (i in 1:nrow(spe)) { for (j in 1:ncol(spe)) { (scale.spe2[i,j]<-scale.spe2[i,j]/(site.totals[i]*sqrt.spe.totals[j])) }} #Adjust the scale abundance species by multplying by the square root of the species matrix total (adjust.scale.spe2=scale.spe2*sqrt(sum(rowSums(spe)))) # Compare the results adjust.scale.spe2 spe.chi adjust.scale.spe2-spe.chi # or: adjust.scale.spe2/spe.chi #***************************************************************************************# # 3.Unconstrained Ordination ## 3.1. Principal Components Analysis (PCA) ### PCA on hellinger-transformed species data ?rda #### Run the PCA spe.h.pca<-rda(spe.hel) # Identify the significant axis using the Kaiser-Guttman criterion ev<-spe.h.pca$CA$eig ev[ev>mean(ev)] n<-length(ev) bsm<-data.frame(j=seq(1:n), p=0) bsm$p[1]=1/n for (i in 2:n) { bsm$p[i]=bsm$p[i-1]+(1/(n=1-i))} bsm$p=100*bsm$p/n bsm barplot(ev, main="Eigenvalues", col="grey", las=2) abline(h=mean(ev), col="red") legend("topright", "Average eigenvalues", lwd=1, col=2, bty="n") #### Extract the results summary(spe.h.pca) # overall results summary(spe.h.pca) summary(spe.h.pca, display=NULL) # only eigenvalues and their contribution to the variance eigen(cov(spe.hel)) # also compute the eigenvalues (spe.scores<-scores(spe.h.pca, display="species", choices=c(1,2))) # species scores on the first two PCA axes (sit.scores<-scores(spe.h.pca, display="sites", choices=c(1,2))) # sites scores on the first two PCA axes ### PCA on the environmental variables #### Run the PCA env.pca<-rda(env.z) # or rda(env, scale=TRUE) # Identify the significant axis using the Kaiser-Guttman criterion ev<-env.pca$CA$eig ev[ev>mean(ev)] n=length(ev) bsm=data.frame(j=seq(1:n), p=0) bsm$p[1]=1/n for (i in 2:n) { bsm$p[i]=bsm$p[i-1]+(1/(n=1-i))} bsm$p=100*bsm$p/n bsm barplot(ev, main="Eigenvalues", col="grey", las=2) abline(h=mean(ev), col="red") legend("topright", "Average eigenvalues", lwd=1, col=2, bty="n") #### Extract the results summary(env.pca) summary(env.pca, scaling=2) ### Construct a biplot #### Biplot of the PCA on transformed species data (scaling 1) windows() plot(spe.h.pca) windows() ?biplot biplot(spe.h.pca) windows() plot(spe.h.pca, scaling=1, type="none", # scaling 1 = distance biplot : # distances among abjects in the biplot approximate their Euclidean distances # but angles among descriptor vectors DO NOT reflect their correlation xlab=c("PC1 (%)", round((spe.h.pca$CA$eig[1]/sum(spe.h.pca$CA$eig))*100,2)), ylab=c("PC2 (%)", round((spe.h.pca$CA$eig[2]/sum(spe.h.pca$CA$eig))*100,2))) points(scores(spe.h.pca, display="sites", choices=c(1,2), scaling=1), pch=21, col="black", bg="steelblue", cex=1.2) text(scores(spe.h.pca, display="species", choices=c(1), scaling=1), scores(spe.h.pca, display="species", choices=c(2), scaling=1), labels=rownames(scores(spe.h.pca, display="species", scaling=1)), col="red", cex=0.8) #### Biplot on the environmental variables (scaling 2) windows() plot(env.pca) windows() plot(env.pca, scaling=2, type="none", # scaling 2 = correlation biplot : # distances among abjects in the biplot DO NOT approximate their Euclidean distances # but angles among descriptor vectors reflect their correlation xlab=c("PC1 (%)", round((env.pca$CA$eig[1]/sum(env.pca$CA$eig))*100,2)), ylab=c("PC2 (%)", round((env.pca$CA$eig[2]/sum(env.pca$CA$eig))*100,2)), xlim=c(-1,1), ylim=c(-1,1)) points(scores(env.pca, display="sites", choices=c(1,2), scaling=2), pch=21, col="black", bg="darkgreen", cex=1.2) text(scores(env.pca, display="species", choices=c(1), scaling=2), scores(env.pca, display="species", choices=c(2), scaling=2), labels=rownames(scores(env.pca, display="species", scaling=2)), col="red", cex=0.8) # Challenge 3 #### # Run the PCA of the mite species abundances # Which are the significant axes ? # Which group of sites can you identify ? # Which species are related to each group of sites mite.spe<-read.table(file.choose(), row.names=1, h=T) # (answer below) # # # # # # # # # # # # # # # # # # mite.spe.hel<-decostand(mite.spe, method="hellinger") mite.spe.h.pca<-rda(mite.spe.hel) ev<-mite.spe.h.pca$CA$eig ev[ev>mean(ev)] n=length(ev) bsm=data.frame(j=seq(1:n), p=0) bsm$p[1]=1/n for (i in 2:n) { bsm$p[i]=bsm$p[i-1]+(1/(n=1-i))} bsm$p=100*bsm$p/n bsm barplot(ev, main="Eigenvalues", col="grey", las=2) abline(h=mean(ev), col="red") legend("topright", "Average eigenvalues", lwd=1, col=2, bty="n") summary(mite.spe.h.pca, display=NULL) windows() plot(mite.spe.h.pca, scaling=1, type="none", xlab=c("PC1 (%)", round((mite.spe.h.pca$CA$eig[1]/sum(mite.spe.h.pca$CA$eig))*100,2)), ylab=c("PC2 (%)", round((mite.spe.h.pca$CA$eig[2]/sum(mite.spe.h.pca$CA$eig))*100,2))) points(scores(mite.spe.h.pca, display="sites", choices=c(1,2), scaling=1), pch=21, col="black", bg="steelblue", cex=1.2) text(scores(mite.spe.h.pca, display="species", choices=c(1), scaling=1), scores(mite.spe.h.pca, display="species", choices=c(2), scaling=1), labels=rownames(scores(mite.spe.h.pca, display="species", scaling=1)), col="red", cex=0.8) ## 3.2. Principal Coordinate Anaysis (PCoA) ### Run the PCoA on hellinger transformed species abundances ?cmdscale cmdscale(dist(spe.hel), k=(nrow(spe)-1), eig=TRUE) ?pcoa spe.h.pcoa<-pcoa(dist(spe.hel)) ### Extract the results spe.h.pcoa ### Construct the biplot windows() biplot.pcoa(spe.h.pcoa, spe.hel, dir.axis2=-1) ### Run the PCoA on a Bray-Curtis dissimilarity matrix spe.db spe.bray.pcoa<-pcoa(spe.db) spe.bray.pcoa windows() biplot.pcoa(spe.bray.pcoa, spe.hel, dir.axis2=-1) # The distance measure choosen thus strongly influenced the PCoA results # Challenge 4 #### # Run the PCoA of the hellinger-transformed mite species abundances # Which are the significant axes ? # Which group of sites can you identify ? # Which species are related to each group of sites ? # How does the PCoA results compare with the PCA # (answer below) # # # # # # # # # # # # # # # # # # mite.spe.h.pcoa<-pcoa(dist(mite.spe.hel)) mite.spe.h.pcoa windows() biplot.pcoa(mite.spe.h.pcoa, mite.spe.hel, dir.axis2=-1) #***************************************************************************************# # 4. Canonical Analysis ## 4.1. Redundancy Analysis (RDA) ### Prepare the data env<-env[,-1] # remove the "distance from the source" variable pen2 <- rep("very_steep", nrow(env)) # recode the slope variable into a factor pen2[env$pen <= quantile(env$pen)[4]] = "steep" pen2[env$pen <= quantile(env$pen)[3]] = "moderate" pen2[env$pen <= quantile(env$pen)[2]] = "low" pen2 <- factor(pen2, levels=c("low", "moderate", "steep", "very_steep")) table(pen2) env2 <- env env2$pen <- pen2 ### Run the RDA of all exlanatory variables of env2 on species abundances ?rda spe.rda<-rda(spe.hel~., data=env2) ### Extract the results summary(spe.rda, display=NULL) ### Select the significant explanatory variables by forward selection ?ordiR2step ordiR2step(rda(spe.hel~1, data=env2), scope= formula(spe.rda), direction= "forward", R2scope=TRUE, pstep=1000) which(colnames(env2)=="alt") which(colnames(env2)=="oxy") (env2.signif=env2[,c(1,9)]) ### Re-run the RDA of significant explanatory variables on species abundances spe.rda.signif=rda(spe.hel~., data=env2.signif) ### Extract the results summary(spe.rda.signif, display=NULL) ### Calculate the adjusted R^2 of the RDA (R2adj <- RsquareAdj(spe.rda.signif)$adj.r.squared) ### Test the significance of the model and the significance of axis ?anova.cca anova.cca(spe.rda.signif, step=1000) anova.cca(spe.rda.signif, step=1000, by="axis") ### Plot the results #### scaling 1 windows(title="RDA scaling 1") plot(spe.rda.signif, scaling=1, main="Triplot RDA") windows(title="RDA scaling 1") plot(spe.rda.signif, scaling=1, main="Triplot RDA - scaling 1", type="none", xlab=c("RDA1"), ylab=c("RDA2"), xlim=c(-1,1), ylim=c(-1,1)) points(scores(spe.rda.signif, display="sites", choices=c(1,2), scaling=1), pch=21, col="black", bg="steelblue", cex=1.2) text(scores(spe.rda.signif, display="species", choices=c(1), scaling=1), scores(spe.rda.signif, display="species", choices=c(2), scaling=1), labels=rownames(scores(spe.rda.signif, display="species", scaling=1)), col="grey", cex=0.8) arrows(0,0, scores(spe.rda.signif, display="bp", choices=c(1), scaling=1), scores(spe.rda.signif, display="bp", choices=c(2), scaling=1), col="red") text(scores(spe.rda.signif, display="bp", choices=c(1), scaling=1)+0.05, scores(spe.rda.signif, display="bp", choices=c(2), scaling=1)+0.05, labels=rownames(scores(spe.rda.signif, display="bp", choices=c(2), scaling=1)), col="red", cex=1) #### scaling 2 windows(title="RDA scaling 2") plot(spe.rda.signif, scaling=2, main="Triplot RDA - scaling 2", type="none", xlab=c("RDA1"), ylab=c("RDA2"), xlim=c(-1,1), ylim=c(-1,1)) points(scores(spe.rda.signif, display="sites", choices=c(1,2), scaling=2), pch=21, col="black", bg="steelblue", cex=1.2) text(scores(spe.rda.signif, display="species", choices=c(1), scaling=2), scores(spe.rda.signif, display="species", choices=c(2), scaling=2), labels=rownames(scores(spe.rda.signif, display="species", scaling=2)), col="grey", cex=0.8) arrows(0,0, scores(spe.rda.signif, display="bp", choices=c(1), scaling=2), scores(spe.rda.signif, display="bp", choices=c(2), scaling=2), col="red") text(scores(spe.rda.signif, display="bp", choices=c(1), scaling=2)+0.05, scores(spe.rda.signif, display="bp", choices=c(2), scaling=2)+0.05, labels=rownames(scores(spe.rda.signif, display="bp", choices=c(2), scaling=2)), col="red", cex=1) # Challenge 5 #### # Run the RDA of the mite environmental variables of the mite species abundances mite.env<-read.table(file.choose(), row.names=1, h=T) # Which are the significant explanatory variables ? # How much of the variation explain the significant explanatory variables ? # Which are the significant axis ? # Which group of sites can you identify ? # Which species are related to each group of sites ? # (answer below) # # # # # # # # # # # # # # # # # # mite.spe.rda<-rda(mite.spe.hel~., data=mite.env) ordiR2step(rda(mite.spe.hel~1, data=mite.env), scope= formula(mite.spe.rda), direction= "forward", R2scope=TRUE, pstep=1000) which(colnames(mite.env)=="WatrCont") which(colnames(mite.env)=="Shrub") which(colnames(mite.env)=="Substrate") which(colnames(mite.env)=="Topo") which(colnames(mite.env)=="SubsDens") (mite.env.signif=mite.env[,c(2,4,3,5, 1)]) mite.spe.rda.signif=rda(mite.spe~., data=mite.env.signif) (R2adj <- RsquareAdj(mite.spe.rda.signif)$adj.r.squared) anova.cca(mite.spe.rda.signif, step=1000) anova.cca(mite.spe.rda.signif, step=1000, by="axis") summary(mite.spe.rda.signif, display=NULL) windows(title="RDA scaling 1") plot(mite.spe.rda.signif, scaling=1, main="Triplot RDA - scaling 1", type="none", xlab=c("RDA1"), ylab=c("RDA2"), xlim=c(-1,1), ylim=c(-1,1)) points(scores(mite.spe.rda.signif, display="sites", choices=c(1,2), scaling=1), pch=21, col="black", bg="steelblue", cex=1.2) text(scores(mite.spe.rda.signif, display="species", choices=c(1), scaling=1), scores(mite.spe.rda.signif, display="species", choices=c(2), scaling=1), labels=rownames(scores(mite.spe.rda.signif, display="species", scaling=1)), col="grey", cex=0.8) arrows(0,0, scores(mite.spe.rda.signif, display="bp", choices=c(1), scaling=1), scores(mite.spe.rda.signif, display="bp", choices=c(2), scaling=1), col="red") text(scores(mite.spe.rda.signif, display="bp", choices=c(1), scaling=1)+0.05, scores(mite.spe.rda.signif, display="bp", choices=c(2), scaling=1)+0.05, labels=rownames(scores(mite.spe.rda.signif, display="bp", choices=c(2), scaling=1)), col="red", cex=1) ## 4.2 Partial RDA : effect of water chemistry on species abundances partialling out the physiography ### Divide the env2 data frame in two data frames envtopo <- env[, c(1:3)] # Physiography : explanatory dataset 1 names(envtopo) envchem <- env[, c(4:10)] # Water quality : explanatory dataset 2 names(envchem) ### Run the partial RDA spechem.physio<-rda(spe.hel, envchem, envtopo) # or : spechem.physio2<-rda(spe.hel ~ pH + dur + pho + nit + amm + oxy + dbo + Condition(alt + pen + deb), data=env) ### Extract the results summary(spechem.physio, display=NULL) ### Calculate adjusted the R^2 of the partial RDA (R2adj <- RsquareAdj(spechem.physio)$adj.r.squared) ### Test of the partial RDA and the significance of axis anova.cca(spechem.physio, step=1000) anova.cca(spechem.physio2, step=1000, by="axis") ### Construct the triplots #### Scaling 1 windows(title="Partial RDA scaling 1") plot(spechem.physio, scaling=1, main="Triplot partial RDA - scaling 1", type="none", xlab=c("RDA1"), ylab=c("RDA2"), xlim=c(-1,1), ylim=c(-1,1)) points(scores(spechem.physio, display="sites", choices=c(1,2), scaling=1), pch=21, col="black", bg="steelblue", cex=1.2) text(scores(spechem.physio, display="species", choices=c(1), scaling=1), scores(spechem.physio, display="species", choices=c(2), scaling=1), labels=rownames(scores(spechem.physio, display="species", scaling=1)), col="grey", cex=0.8) arrows(0,0, scores(spechem.physio, display="bp", choices=c(1), scaling=1), scores(spechem.physio, display="bp", choices=c(2), scaling=1), col="red") text(scores(spechem.physio, display="bp", choices=c(1), scaling=1)+0.05, scores(spechem.physio, display="bp", choices=c(2), scaling=1)+0.05, labels=rownames(scores(spechem.physio, display="bp", choices=c(2), scaling=1)), col="red", cex=1) #### Scaling 2 windows(title="Partial RDA scaling 2") plot(spechem.physio, scaling=2, main="Triplot partial RDA - scaling 2", type="none", xlab=c("RDA1"), ylab=c("RDA2"), xlim=c(-1,1), ylim=c(-1,1)) points(scores(spechem.physio, display="sites", choices=c(1,2), scaling=2), pch=21, col="black", bg="steelblue", cex=1.2) text(scores(spechem.physio, display="species", choices=c(1), scaling=2), scores(spechem.physio, display="species", choices=c(2), scaling=2), labels=rownames(scores(spechem.physio, display="species", scaling=2)), col="grey", cex=0.8) arrows(0,0, scores(spechem.physio, display="bp", choices=c(1), scaling=2), scores(spechem.physio, display="bp", choices=c(2), scaling=2), col="red") text(scores(spechem.physio, display="bp", choices=c(1), scaling=2)+0.05, scores(spechem.physio, display="bp", choices=c(2), scaling=2)+0.05, labels=rownames(scores(spechem.physio, display="bp", choices=c(2), scaling=2)), col="red", cex=1) # Challenge 6 #### # Run the partial RDA of the mite environmental variables of the mite species abundances # partialling out for the substrate variables (SubsDens, WaterCont and Substrate) # Is the model significant ? # Which are the significant axis ? # Interpret the obtained triplot. # (answer below) # # # # # # # # # # # # # # # # # # mite.spe.subs<-rda(mite.spe.hel ~ Shrub + Topo + Condition(SubsDens + WatrCont + Substrate), data=mite.env) summary(mite.spe.subs, display=NULL) (R2adj <- RsquareAdj(mite.spe.subs)$adj.r.squared) anova.cca(mite.spe.subs, step=1000) anova.cca(mite.spe.subs, step=1000, by="axis") #### Triplot scaling 1 windows(title="Partial RDA scaling 1") plot(mite.spe.subs, scaling=1, main="Triplot partial RDA - scaling 1", type="none", xlab=c("RDA1"), ylab=c("RDA2"), xlim=c(-1,1), ylim=c(-1,1)) points(scores(mite.spe.subs, display="sites", choices=c(1,2), scaling=1), pch=21, col="black", bg="steelblue", cex=1.2) text(scores(mite.spe.subs, display="species", choices=c(1), scaling=1), scores(mite.spe.subs, display="species", choices=c(2), scaling=1), labels=rownames(scores(mite.spe.subs, display="species", scaling=1)), col="grey", cex=0.8) arrows(0,0, scores(mite.spe.subs, display="bp", choices=c(1), scaling=1), scores(mite.spe.subs, display="bp", choices=c(2), scaling=1), col="red") text(scores(mite.spe.subs, display="bp", choices=c(1), scaling=1)+0.05, scores(mite.spe.subs, display="bp", choices=c(2), scaling=1)+0.05, labels=rownames(scores(mite.spe.subs, display="bp", choices=c(2), scaling=1)), col="red", cex=1) #### Triplot scaling 2 windows(title="Partial RDA scaling 2") plot(mite.spe.subs, scaling=2, main="Triplot partial RDA - scaling 2", type="none", xlab=c("RDA1"), ylab=c("RDA2"), xlim=c(-1,1), ylim=c(-1,1)) points(scores(mite.spe.subs, display="sites", choices=c(1,2), scaling=2), pch=21, col="black", bg="steelblue", cex=1.2) text(scores(mite.spe.subs, display="species", choices=c(1), scaling=2), scores(mite.spe.subs, display="species", choices=c(2), scaling=2), labels=rownames(scores(mite.spe.subs, display="species", scaling=2)), col="grey", cex=0.8) arrows(0,0, scores(mite.spe.subs, display="bp", choices=c(1), scaling=2), scores(mite.spe.subs, display="bp", choices=c(2), scaling=2), col="red") text(scores(mite.spe.subs, display="bp", choices=c(1), scaling=2)+0.05, scores(mite.spe.subs, display="bp", choices=c(2), scaling=2)+0.05, labels=rownames(scores(mite.spe.subs, display="bp", choices=c(2), scaling=2)), col="red", cex=1) # 4.3. Variation partitioning by partial RDA ?varpart vegandocs("partitioning.pdf") ## Variation partitioning with all explanatory variables spe.part.all <- varpart(spe.hel, envchem, envtopo) spe.part.all windows(title="Variation partitioning - all variables") plot(spe.part.all, digits=2) ## Variation partitioning with only significant variables ### Separate forward selection in each subset of environmental variables spe.chem <- rda(spe.hel~., data=envchem) R2a.all.chem <- RsquareAdj(spe.chem)$adj.r.squared ordiR2step(rda(spe.hel~1, data=envchem), scope= formula(spe.chem), direction= "forward", R2scope=TRUE, pstep=1000) names(envchem) (envchem.pars <- envchem[, c( 4, 6, 7 )]) spe.topo <- rda(spe.hel~., data=envtopo) R2a.all.topo <- RsquareAdj(spe.topo)$adj.r.squared ordiR2step(rda(spe.hel~1, data=envtopo), scope= formula(spe.topo), direction= "forward", R2scope=TRUE, pstep=1000) names(envtopo) envtopo.pars <- envtopo[, c(1,2)] ### Variation partitioning (spe.part <- varpart(spe.hel, envchem.pars, envtopo.pars)) windows(title="Variation partitioning - parsimonious subsets") plot(spe.part, digits=2) # Tests of all testable fractions anova.cca(rda(spe.hel, envchem.pars), step=1000) # Test of fractions [a+b] anova.cca(rda(spe.hel, envtopo.pars), step=1000) # Test of fractions [b+c] env.pars <- cbind(envchem.pars, envtopo.pars) anova.cca(rda(spe.hel, env.pars), step=1000) # Test of fractions [a+b+c] anova.cca(rda(spe.hel, envchem.pars, envtopo.pars), step=1000) # Test of fraction [a] anova.cca(rda(spe.hel, envtopo.pars, envchem.pars), step=1000) # Test of fraction [c] # Challenge 7 #### # Run the variation partitioning of the mite species abundances # with a first dataset for the significant substrate variables (SubsDens, WaterCont and Substrate) # and a second dataset for the significant other variables (Shrud and Topo) # Which proportion of the variation explain each dataset ? # Which are the significant fractions ? # (answer below) # # # # # # # # # # # # # # # # # # str(mite.env) (mite.subs<-mite.env[,c(1,2,3)]) (mite.other<-mite.env[,c(4,5)]) rda.mite.subs <- rda(mite.spe.hel~., data=mite.subs) R2a.all.chem <- RsquareAdj(rda.mite.subs)$adj.r.squared ordiR2step(rda(mite.spe.hel~1, data=mite.subs), scope= formula(rda.mite.subs), direction= "forward", R2scope=TRUE, pstep=1000) names(mite.subs) (mite.subs.pars <- mite.subs[, c(2, 3)]) rda.mite.other <- rda(mite.spe.hel~., data=mite.other) R2a.all.chem <- RsquareAdj(rda.mite.other)$adj.r.squared ordiR2step(rda(mite.spe.hel~1, data=mite.other), scope= formula(rda.mite.other), direction= "forward", R2scope=TRUE, pstep=1000) names(mite.other) (mite.other.pars <- mite.other[, c(1,2)]) ### Variation partitioning (mite.spe.part <- varpart(mite.spe.hel, ~WatrCont+Substrate, ~Shrub+Topo, data=mite.env)) windows(title="Variation partitioning - parsimonious subsets") plot(mite.spe.part, digits=2) # Tests of all testable fractions anova.cca(rda(mite.spe.hel~ WatrCont+Substrate, data=mite.env), step=1000) # Test of fractions [a+b] anova.cca(rda(mite.spe.hel~Shrub+Topo, data=mite.env), step=1000) # Test of fractions [b+c] (env.pars <- cbind(mite.env[,c(2,3,4,5)])) anova.cca(rda(mite.spe.hel~ WatrCont+Substrate+Shrub+Topo, data=env.pars), step=1000) # Test of fractions [a+b+c] anova.cca(rda(mite.spe.hel~WatrCont+Substrate + Condition(Shrub+Topo), data=env.pars), step=1000) # Test of fraction [a] anova.cca(rda(mite.spe.hel~Shrub+Topo+ Condition(WatrCont+Substrate ), data=env.pars), step=1000) # Test of fraction [c] ## 4.4. Principal Response Curves (PRC) ?prc # van den Brink, P.J. & ter Braak, C.J.F. (1999). # Principal response curves: Analysis of time-dependent multivariate responses of biological community to stress. # Environmental Toxicology and Chemistry, 18, 138-148 # Chlorpyrifos experiment and experimental design data(pyrifos) week <- gl(11, 12, labels=c(-4, -1, 0.1, 1, 2, 4, 8, 12, 15, 19, 24)) dose <- factor(rep(c(0.1, 0, 0, 0.9, 0, 44, 6, 0.1, 44, 0.9, 0, 6), 11)) # PRC mod <- prc(pyrifos, dose, week) mod # RDA summary(mod) # PRC logabu <- colSums(pyrifos) windows() plot(mod, select = logabu > 100) # Permutations should be done only within one week, and we only # are interested on the first axis anova(mod, strata = week, first=TRUE, perm.max = 100) # PRC can be useful in community ecolgy # to compare the temporal evolution of a communty in experimental sites (e.g. restored ecosystems) # to the temporal evolution of a communty in control sites (e.g. reference ecosystems) # Challenge 8 #### # Run the PRC on the Doubs fish species abundances with last site removed considering that: # the first 14 sites correpond to 4 reference ecosystems # the last 14 sites correpond to 4 restored ecosystems # the first seven lines correponds to successive sampling of the reference sites 1 during year 1,2,3,4,5,6 and 7 # the lines 8 to 14 correponds to successive sampling of the reference sites 2 during year 1,2,3,4,5,6 and 7 # etc... # Does restoration enables the recovery of reference communities ? # Which species are related to reference ecosystems, to restored ecosystems ? # (answer below) # # # # # # # # # # # # # # # # # # dim(spe.hel) spe.hel<-spe.hel[-29,] reference<-rep(c(0), 14) restored<-rep(c(1), 14) type.site<-as.factor(c(reference, restored)) year<-as.factor(rep(c(1,2,3,4,5,6,7), 4)) spe.hel.prc<-prc(spe.hel, type.site, year) spe.hel.prc summary(spe.hel.prc) plot(spe.hel.prc) #***************************************************************************************# # 5. Other useful ordination methods ?metaMDS # Nonmetric Multidimesional Scaling help(lda, package=MASS) # linear discriminant analysis ?CCorA # Canonical Correlation Analysis help(coinertia, package=ade4) # Coinertia Analysis help(mfa, package=ade4) # Multiple Factorial Analysis https://r-forge.r-project.org/R/?group_id=195 # packages AEM and PCNM contain functions to perform spatial analysis# Name: Amanda Winegardner, Xavier Giroux-Bougard and Bérenger Bourgeois #################### #################### #################### # Date: October 2014 # Description: Ordination # Dataset: File names are "DoubsSpe.csv", "DoubsEnv.csv" # Notes: Material in R script obtained from # Borcard D., Gillet F. et Legendre P., 2011. Numerical Ecology with R. Springer. #***************************************************************************************# # Loading the data #### rm(list=ls()) install.packages("vegan") install.packages("gclus") install.packages("ape") library(vegan) library(gclus) library(ape) source(file.choose()) # fichier coldiss.R # Species community data frame (fish abundance): "DoubsSpe.csv" spe<-read.csv(file.choose(), row.names=1) spe=spe[-8,] # site number 8 contains no species and must be removed # Environmental data frame: "DoubsEnv.csv" env<-read.csv(file.choose(), row.names=1) env=env[-8,] #***************************************************************************************# # 1.Explore the data ## 1.1.Species data names(spe) dim(spe) str(spe) head(spe) summary(spe) ### Species distibution, all species confounded (ab=table(unlist(spe))) barplot(ab, las=1, xlab="Abundance class", ylab="Frequency", col=grey(5:0/5)) ### Number of absences sum(spe==0) ### Proportion of zeros in the community data set sum(spe==0)/(nrow(spe)*ncol(spe)) ### Species occurences spe.pres=colSums(spe>0) # compute the number of sites where each species is present hist(spe.pres, main="Species occurence", las=1, xlab="Frequency of occurences", ylab="Number of species", breaks=seq(0,30, by=5), col="grey") ### Sites richness site.pres=rowSums(spe>0) # compute the number of species at ecah site hist(site.pres, main="Species richness", las=1, xlab="Frequency of sites", ylab="Number of species", breaks=seq(0,30, by=5), col="grey") ### Compute diversity indices ?diversity (H=diversity(spe, index="shannon")) # Shannon diversity index (N=diversity(spe, index="simpson")) # Simpson diversity index ## 1.2.Environmental data names(env) dim(env) str(env) head(env) summary(env) pairs(env, main="Bivariate Plots of the Environmental Data" ) # Standardization of the 11 environmental data env.z=decostand(env, method="standardize") apply(env.z, 2, mean) # the data are now centered (means~0) apply(env.z, 2, sd) # the data are now scaled (standard deviations=1) par(mfrow=c(1,2)) hist(env$oxy, main="Histogram of Oxygen Concentration", ylab="Frequency", ylim=c(0,10), col="grey") hist(env.z$oxy, main="Histogram of Standardized Oxygen Concentration", ylab="Frequency", ylim=c(0,10), col="grey") # Note: explanatory variables expressed in different units # must obligatory be standardized before computing distances measures and ordination analysis. #***************************************************************************************# # 2.Association measures ## 2.1. Distance measures ### Quantitative species data ?vegdist # this function compute dissimilarity indices useful for community decomposition data (see method options) (spe.db=vegdist(spe, method="bray")) # Bray-Curtis dissimilarity (spe.dj=vegdist(spe, method="jac")) # Jaccard dissimilarity (spe.dg=vegdist(spe, method="gower")) # Gower dissimilarity ### Presence-Absence species data (spe.db.pa=vegdist(spe, method="bray", binary=TRUE)) #Bray-Curtis dissimilarity (spe.dj.pa=vegdist(spe, method="jac", binary=TRUE)) # Jaccard dissimilarity (spe.dg.pa=vegdist(spe, method="gower", binary=TRUE)) # Gower dissimilarity ### Graphical display of association matrices: heat maps windows() coldiss(spe.db, byrank=FALSE, diag=TRUE) # Heat map of Bray-Curtis dissimilarity windows() coldiss(spe.dj, byrank=FALSE, diag=TRUE) # Heat map of Jaccard dissimilarity windows() coldiss(spe.dg, byrank=FALSE, diag=TRUE) # Heat map of Gower dissimilarity ### Quantitative environmental data #### Association between environmental data (Q mode) ?dist # this function also compute dissimilarity matrix env.de=dist(env.z, method = "euclidean") # euclidean distance matrix of the standardized environmental variables windows() coldiss(env.de, diag=TRUE) ### Dependance between environmental data (R mode) (env.pearson=cor(env)) # Pearson r linear correlation round(env.pearson, 2) (env.ken=cor(env, method="kendall")) # Kendall tau rank correlation round(env.ken, 2) ### Mixed types of environmental variables #### Association between mixed types of environmental data (Q mode) ##### Create a fictious data frame var.g1=rnorm(30, 0, 1) var.g2=runif(30, 0, 5) var.g3=gl(3, 10) var.g4=gl(2, 5, 30) (dat2=data.frame(var.g1, var.g2, var.g3, var.g4)) str(dat2) summary(dat2) ##### Compute the Gower dissimilarity matrix ?daisy (dat2.dg=daisy(dat2, metric="gower")) coldiss(dat2.dg) # Challenge 1 #### # Calculate by hand the Bray-Curtis and the Gower dissimilarity of species abundance CHA, TRU and VAI for sites 1, 2 and 3 # (answer below) # # # # # # # # # # # # # # # # # Bray-Curtis dissimilarity : d[jk] = (sum abs(x[ij]-x[ik]))/(sum (x[ij]+x[ik])) (spe.challenge=spe[1:3,1:3]) # Calculate the total species abundance by site (Abund.s1=sum(spe.challenge[1,])) (Abund.s2=sum(spe.challenge[2,])) (Abund.s3=sum(spe.challenge[3,])) # Calculate the species abundance differences between pairs of sites for each species Spec.s1s2=0 Spec.s1s3=0 Spec.s2s3=0 for (i in 1:3) { Spec.s1s2=Spec.s1s2+abs(sum(spe.challenge[1,i]-spe.challenge[2,i])) Spec.s1s3=Spec.s1s3+abs(sum(spe.challenge[1,i]-spe.challenge[3,i])) Spec.s2s3=Spec.s2s3+abs(sum(spe.challenge[2,i]-spe.challenge[3,i])) } # Calculate the Bray-Curtis dissimilarity (db.s1s2=Spec.s1s2/(Abund.s1+Abund.s2)) (db.s1s3=Spec.s1s3/(Abund.s1+Abund.s3)) (db.s2s3=Spec.s2s3/(Abund.s2+Abund.s3)) # Compare your results (spe.db.challenge=vegdist(spe.challenge, method="bray")) # Gower dissimilarity : d[jk] = (1/M) sum(abs(x[ij]-x[ik])/(max(x[i])-min(x[i]))) # Calculate the number of columns in your dataset M=ncol(spe.challenge) # Calculate the species abundance differences between pairs of sites for each species Spe1.s1s2=abs(spe.challenge[1,1]-spe.challenge[2,1]) Spe2.s1s2=abs(spe.challenge[1,2]-spe.challenge[2,2]) Spe3.s1s2=abs(spe.challenge[1,3]-spe.challenge[2,3]) Spe1.s1s3=abs(spe.challenge[1,1]-spe.challenge[3,1]) Spe2.s1s3=abs(spe.challenge[1,2]-spe.challenge[3,2]) Spe3.s1s3=abs(spe.challenge[1,3]-spe.challenge[3,3]) Spe1.s2s3=abs(spe.challenge[2,1]-spe.challenge[3,1]) Spe2.s2s3=abs(spe.challenge[2,2]-spe.challenge[3,2]) Spe3.s2s3=abs(spe.challenge[2,3]-spe.challenge[3,3]) # Calculate the range of each species abundance between sites Range.spe1=max(spe.challenge[,1]) - min (spe.challenge[,1]) Range.spe2=max(spe.challenge[,2]) - min (spe.challenge[,2]) Range.spe3=max(spe.challenge[,3]) - min (spe.challenge[,3]) # Calculate the Gower dissimilarity (dg.s1s2=(1/M)*((Spe2.s1s2/Range.spe2)+(Spe3.s1s2/Range.spe3))) (dg.s1s3=(1/M)*((Spe2.s1s3/Range.spe2)+(Spe3.s1s3/Range.spe3))) (dg.s2s3=(1/M)*((Spe2.s2s3/Range.spe2)+(Spe3.s2s3/Range.spe3))) # Compare your results (spe.db.challenge=vegdist(spe.challenge, method="gower")) ## 2.2. Transformations for community composition data ?decostand # this function provide some standardiztion methods for community domposition data (see method options) # Transform abundances to absence-presence (1-0) (spe.pa=decostand(spe, method="pa")) # Hellinger transformation (spe.hel=decostand(spe, method="hellinger")) # Chi.square transformation (spe.chi=decostand(spe, method="chi.square")) # Challenge 2 #### # Calculate by hand the Hellinger and the Chi-square transformed abundances data # (answer below) # # # # # # # # # # # # # # # # # # Hellinger tranformation # First calculate the total species abundances by site (site.totals=apply(spe, 1, sum)) # Scale species abundances by dividing them by site totals (scale.spe=spe/site.totals) # Calculate de square root of scaled species abundances (sqrt.scale.spe=sqrt(scale.spe)) # Compare the results sqrt.scale.spe spe.hel sqrt.scale.spe-spe.hel # or: sqrt.scale.spe/spe.hel # Chi-squre transformation # First calculate the total species abundances by site (site.totals=apply(spe, 1, sum)) # Then calculate the square root of total species abundances (sqrt.spe.totals=sqrt(apply(spe, 2, sum))) # Scale species abundances by dividing them by the site totals and the species totals scale.spe2=spe for (i in 1:nrow(spe)) { for (j in 1:ncol(spe)) { (scale.spe2[i,j]=scale.spe2[i,j]/(site.totals[i]*sqrt.spe.totals[j])) }} #Adjust the scale abundance species by multplying by the square root of the species matrix total (adjust.scale.spe2=scale.spe2*sqrt(sum(rowSums(spe)))) # Compare the results adjust.scale.spe2 spe.chi adjust.scale.spe2-spe.chi # or: adjust.scale.spe2/spe.chi #***************************************************************************************# # 3.Unconstrained Ordination ## 3.1. Principal Components Analysis (PCA) ### PCA on hellinger-transformed species data ?rda #### Run the PCA spe.h.pca=rda(spe.hel) # Identify the significant axis using the Kaiser-Guttman criterion ev=spe.h.pca$CA$eig ev[ev>mean(ev)] n=length(ev) bsm=data.frame(j=seq(1:n), p=0) bsm$p[1]=1/n for (i in 2:n) { bsm$p[i]=bsm$p[i-1]+(1/(n=1-i))} bsm$p=100*bsm$p/n bsm barplot(ev, main="Eigenvalues", col="grey", las=2) abline(h=mean(ev), col="red") legend("topright", "Average eigenvalues", lwd=1, col=2, bty="n") #### Extract the results summary(spe.h.pca) # overall results summary(spe.h.pca) summary(spe.h.pca, display=NULL) # only eigenvalues and their contribution to the variance eigen(cov(spe.hel)) # also compute the eigenvalues (spe.scores=scores(spe.h.pca, display="species", choices=c(1,2))) # species scores on the first two PCA axes (sit.scores=scores(spe.h.pca, display="sites", choices=c(1,2))) # sites scores on the first two PCA axes ### PCA on the environmental variables #### Run the PCA env.pca=rda(env.z) # or rda(env, scale=TRUE) # Identify the significant axis using the Kaiser-Guttman criterion ev=env.pca$CA$eig ev[ev>mean(ev)] n=length(ev) bsm=data.frame(j=seq(1:n), p=0) bsm$p[1]=1/n for (i in 2:n) { bsm$p[i]=bsm$p[i-1]+(1/(n=1-i))} bsm$p=100*bsm$p/n bsm barplot(ev, main="Eigenvalues", col="grey", las=2) abline(h=mean(ev), col="red") legend("topright", "Average eigenvalues", lwd=1, col=2, bty="n") #### Extract the results summary(env.pca) summary(env.pca, scaling=2) ### Construct a biplot #### Biplot of the PCA on transformed species data (scaling 1) windows() plot(spe.h.pca) windows() ?biplot biplot(spe.h.pca) windows() plot(spe.h.pca, scaling=1, type="none", # scaling 1 = distance biplot : # distances among abjects in the biplot approximate their Euclidean distances # but angles among descriptor vectors DO NOT reflect their correlation xlab=c("PC1 (%)", round((spe.h.pca$CA$eig[1]/sum(spe.h.pca$CA$eig))*100,2)), ylab=c("PC2 (%)", round((spe.h.pca$CA$eig[2]/sum(spe.h.pca$CA$eig))*100,2))) points(scores(spe.h.pca, display="sites", choices=c(1,2), scaling=1), pch=21, col="black", bg="steelblue", cex=1.2) text(scores(spe.h.pca, display="species", choices=c(1), scaling=1), scores(spe.h.pca, display="species", choices=c(2), scaling=1), labels=rownames(scores(spe.h.pca, display="species", scaling=1)), col="red", cex=0.8) #### Biplot on the environmental variables (scaling 2) windows() plot(env.pca) windows() plot(env.pca, scaling=2, type="none", # scaling 2 = correlation biplot : # distances among abjects in the biplot DO NOT approximate their Euclidean distances # but angles among descriptor vectors reflect their correlation xlab=c("PC1 (%)", round((env.pca$CA$eig[1]/sum(env.pca$CA$eig))*100,2)), ylab=c("PC2 (%)", round((env.pca$CA$eig[2]/sum(env.pca$CA$eig))*100,2)), xlim=c(-1,1), ylim=c(-1,1)) points(scores(env.pca, display="sites", choices=c(1,2), scaling=2), pch=21, col="black", bg="darkgreen", cex=1.2) text(scores(env.pca, display="species", choices=c(1), scaling=2), scores(env.pca, display="species", choices=c(2), scaling=2), labels=rownames(scores(env.pca, display="species", scaling=2)), col="red", cex=0.8) # Challenge 3 #### # Run the PCA of the mite species abundances # Which are the significant axes ? # Which group of sites can you identify ? # Which species are related to each group of sites mite.spe<-read.table(file.choose(), row.names=1, h=T) # (answer below) # # # # # # # # # # # # # # # # # # mite.spe.hel<-decostand(mite.spe, method="hellinger") mite.spe.h.pca<-rda(mite.spe.hel) ev<-mite.spe.h.pca$CA$eig ev[ev>mean(ev)] n<-length(ev) bsm<-data.frame(j=seq(1:n), p=0) bsm$p[1]=1/n for (i in 2:n) { bsm$p[i]=bsm$p[i-1]+(1/(n=1-i))} bsm$p=100*bsm$p/n bsm barplot(ev, main="Eigenvalues", col="grey", las=2) abline(h=mean(ev), col="red") legend("topright", "Average eigenvalues", lwd=1, col=2, bty="n") summary(mite.spe.h.pca, display=NULL) windows() plot(mite.spe.h.pca, scaling=1, type="none", xlab=c("PC1 (%)", round((mite.spe.h.pca$CA$eig[1]/sum(mite.spe.h.pca$CA$eig))*100,2)), ylab=c("PC2 (%)", round((mite.spe.h.pca$CA$eig[2]/sum(mite.spe.h.pca$CA$eig))*100,2))) points(scores(mite.spe.h.pca, display="sites", choices=c(1,2), scaling=1), pch=21, col="black", bg="steelblue", cex=1.2) text(scores(mite.spe.h.pca, display="species", choices=c(1), scaling=1), scores(mite.spe.h.pca, display="species", choices=c(2), scaling=1), labels=rownames(scores(mite.spe.h.pca, display="species", scaling=1)), col="red", cex=0.8) ## 3.2. Principal Coordinate Anaysis (PCoA) ### Run the PCoA on hellinger transformed species abundances ?cmdscale cmdscale(dist(spe.hel), k=(nrow(spe)-1), eig=TRUE) ?pcoa spe.h.pcoa<-pcoa(dist(spe.hel)) ### Extract the results spe.h.pcoa ### Construct the biplot windows() biplot.pcoa(spe.h.pcoa, spe.hel, dir.axis2=-1) ### Run the PCoA on a Bray-Curtis dissimilarity matrix spe.db spe.bray.pcoa<-pcoa(spe.db) spe.bray.pcoa windows() biplot.pcoa(spe.bray.pcoa, spe.hel, dir.axis2=-1) # The distance measure choosen thus strongly influenced the PCoA results # Challenge 4 #### # Run the PCoA of the hellinger-transformed mite species abundances # Which are the significant axes ? # Which group of sites can you identify ? # Which species are related to each group of sites ? # How does the PCoA results compare with the PCA # (answer below) # # # # # # # # # # # # # # # # # # mite.spe.h.pcoa<-pcoa(dist(mite.spe.hel)) mite.spe.h.pcoa windows() biplot.pcoa(mite.spe.h.pcoa, mite.spe.hel, dir.axis2=-1) #***************************************************************************************# # 4. Canonical Analysis ## 4.1. Redundancy Analysis (RDA) ### Prepare the data env<-env[,-1] # remove the "distance from the source" variable pen2 <- rep("very_steep", nrow(env)) # recode the slope variable into a factor pen2[env$pen <= quantile(env$pen)[4]] = "steep" pen2[env$pen <= quantile(env$pen)[3]] = "moderate" pen2[env$pen <= quantile(env$pen)[2]] = "low" pen2 <- factor(pen2, levels=c("low", "moderate", "steep", "very_steep")) table(pen2) env2 <- env env2$pen <- pen2 ### Run the RDA of all exlanatory variables of env2 on species abundances ?rda spe.rda<-rda(spe.hel~., data=env2) ### Extract the results summary(spe.rda, display=NULL) ### Select the significant explanatory variables by forward selection ?ordiR2step ordiR2step(rda(spe.hel~1, data=env2), scope= formula(spe.rda), direction= "forward", R2scope=TRUE, pstep=1000) which(colnames(env2)=="alt") which(colnames(env2)=="oxy") (env2.signif=env2[,c(1,9)]) ### Re-run the RDA of significant explanatory variables on species abundances spe.rda.signif=rda(spe.hel~., data=env2.signif) ### Extract the results summary(spe.rda.signif, display=NULL) ### Calculate the adjusted R^2 of the RDA (R2adj <- RsquareAdj(spe.rda.signif)$adj.r.squared) ### Test the significance of the model and the significance of axis ?anova.cca anova.cca(spe.rda.signif, step=1000) anova.cca(spe.rda.signif, step=1000, by="axis") ### Plot the results #### scaling 1 windows(title="RDA scaling 1") plot(spe.rda.signif, scaling=1, main="Triplot RDA") windows(title="RDA scaling 1") plot(spe.rda.signif, scaling=1, main="Triplot RDA - scaling 1", type="none", xlab=c("RDA1"), ylab=c("RDA2"), xlim=c(-1,1), ylim=c(-1,1)) points(scores(spe.rda.signif, display="sites", choices=c(1,2), scaling=1), pch=21, col="black", bg="steelblue", cex=1.2) text(scores(spe.rda.signif, display="species", choices=c(1), scaling=1), scores(spe.rda.signif, display="species", choices=c(2), scaling=1), labels=rownames(scores(spe.rda.signif, display="species", scaling=1)), col="grey", cex=0.8) arrows(0,0, scores(spe.rda.signif, display="bp", choices=c(1), scaling=1), scores(spe.rda.signif, display="bp", choices=c(2), scaling=1), col="red") text(scores(spe.rda.signif, display="bp", choices=c(1), scaling=1)+0.05, scores(spe.rda.signif, display="bp", choices=c(2), scaling=1)+0.05, labels=rownames(scores(spe.rda.signif, display="bp", choices=c(2), scaling=1)), col="red", cex=1) #### scaling 2 windows(title="RDA scaling 2") plot(spe.rda.signif, scaling=2, main="Triplot RDA - scaling 2", type="none", xlab=c("RDA1"), ylab=c("RDA2"), xlim=c(-1,1), ylim=c(-1,1)) points(scores(spe.rda.signif, display="sites", choices=c(1,2), scaling=2), pch=21, col="black", bg="steelblue", cex=1.2) text(scores(spe.rda.signif, display="species", choices=c(1), scaling=2), scores(spe.rda.signif, display="species", choices=c(2), scaling=2), labels=rownames(scores(spe.rda.signif, display="species", scaling=2)), col="grey", cex=0.8) arrows(0,0, scores(spe.rda.signif, display="bp", choices=c(1), scaling=2), scores(spe.rda.signif, display="bp", choices=c(2), scaling=2), col="red") text(scores(spe.rda.signif, display="bp", choices=c(1), scaling=2)+0.05, scores(spe.rda.signif, display="bp", choices=c(2), scaling=2)+0.05, labels=rownames(scores(spe.rda.signif, display="bp", choices=c(2), scaling=2)), col="red", cex=1) # Challenge 5 #### # Run the RDA of the mite environmental variables of the mite species abundances mite.env<-read.table(file.choose(), row.names=1, h=T) # Which are the significant explanatory variables ? # How much of the variation explain the significant explanatory variables ? # Which are the significant axis ? # Which group of sites can you identify ? # Which species are related to each group of sites ? # (answer below) # # # # # # # # # # # # # # # # # # mite.spe.rda<-rda(mite.spe.hel~., data=mite.env) ordiR2step(rda(mite.spe.hel~1, data=mite.env), scope= formula(mite.spe.rda), direction= "forward", R2scope=TRUE, pstep=1000) which(colnames(mite.env)=="WatrCont") which(colnames(mite.env)=="Shrub") which(colnames(mite.env)=="Substrate") which(colnames(mite.env)=="Topo") (mite.env.signif=mite.env[,c(2,4,3,5)]) mite.spe.rda.signif=rda(mite.spe~., data=mite.env.signif) (R2adj <- RsquareAdj(mite.spe.rda.signif)$adj.r.squared) anova.cca(mite.spe.rda.signif, step=1000) anova.cca(mite.spe.rda.signif, step=1000, by="axis") summary(mite.spe.rda.signif, display=NULL) windows(title="RDA scaling 1") plot(mite.spe.rda.signif, scaling=1, main="Triplot RDA - scaling 1", type="none", xlab=c("RDA1"), ylab=c("RDA2"), xlim=c(-1,1), ylim=c(-1,1)) points(scores(mite.spe.rda.signif, display="sites", choices=c(1,2), scaling=1), pch=21, col="black", bg="steelblue", cex=1.2) text(scores(mite.spe.rda.signif, display="species", choices=c(1), scaling=1), scores(mite.spe.rda.signif, display="species", choices=c(2), scaling=1), labels=rownames(scores(mite.spe.rda.signif, display="species", scaling=1)), col="grey", cex=0.8) arrows(0,0, scores(mite.spe.rda.signif, display="bp", choices=c(1), scaling=1), scores(mite.spe.rda.signif, display="bp", choices=c(2), scaling=1), col="red") text(scores(mite.spe.rda.signif, display="bp", choices=c(1), scaling=1)+0.05, scores(mite.spe.rda.signif, display="bp", choices=c(2), scaling=1)+0.05, labels=rownames(scores(mite.spe.rda.signif, display="bp", choices=c(2), scaling=1)), col="red", cex=1) ## 4.2 Partial RDA : effect of water chemistry on species abundances partialling out the physiography ### Divide the env2 data frame in two data frames envtopo <- env[, c(1:3)] # Physiography : explanatory dataset 1 names(envtopo) envchem <- env[, c(4:10)] # Water quality : explanatory dataset 2 names(envchem) ### Run the partial RDA spechem.physio<-rda(spe.hel, envchem, envtopo) # or : spechem.physio2<-rda(spe.hel ~ pH + dur + pho + nit + amm + oxy + dbo + Condition(alt + pen + deb), data=env) ### Extract the results summary(spechem.physio, display=NULL) ### Calculate adjusted the R^2 of the partial RDA (R2adj <- RsquareAdj(spechem.physio)$adj.r.squared) ### Test of the partial RDA and the significance of axis anova.cca(spechem.physio, step=1000) anova.cca(spechem.physio2, step=1000, by="axis") ### Construct the triplots #### Scaling 1 windows(title="Partial RDA scaling 1") plot(spechem.physio, scaling=1, main="Triplot partial RDA - scaling 1", type="none", xlab=c("RDA1"), ylab=c("RDA2"), xlim=c(-1,1), ylim=c(-1,1)) points(scores(spechem.physio, display="sites", choices=c(1,2), scaling=1), pch=21, col="black", bg="steelblue", cex=1.2) text(scores(spechem.physio, display="species", choices=c(1), scaling=1), scores(spechem.physio, display="species", choices=c(2), scaling=1), labels=rownames(scores(spechem.physio, display="species", scaling=1)), col="grey", cex=0.8) arrows(0,0, scores(spechem.physio, display="bp", choices=c(1), scaling=1), scores(spechem.physio, display="bp", choices=c(2), scaling=1), col="red") text(scores(spechem.physio, display="bp", choices=c(1), scaling=1)+0.05, scores(spechem.physio, display="bp", choices=c(2), scaling=1)+0.05, labels=rownames(scores(spechem.physio, display="bp", choices=c(2), scaling=1)), col="red", cex=1) #### Scaling 2 windows(title="Partial RDA scaling 2") plot(spechem.physio, scaling=2, main="Triplot partial RDA - scaling 2", type="none", xlab=c("RDA1"), ylab=c("RDA2"), xlim=c(-1,1), ylim=c(-1,1)) points(scores(spechem.physio, display="sites", choices=c(1,2), scaling=2), pch=21, col="black", bg="steelblue", cex=1.2) text(scores(spechem.physio, display="species", choices=c(1), scaling=2), scores(spechem.physio, display="species", choices=c(2), scaling=2), labels=rownames(scores(spechem.physio, display="species", scaling=2)), col="grey", cex=0.8) arrows(0,0, scores(spechem.physio, display="bp", choices=c(1), scaling=2), scores(spechem.physio, display="bp", choices=c(2), scaling=2), col="red") text(scores(spechem.physio, display="bp", choices=c(1), scaling=2)+0.05, scores(spechem.physio, display="bp", choices=c(2), scaling=2)+0.05, labels=rownames(scores(spechem.physio, display="bp", choices=c(2), scaling=2)), col="red", cex=1) # Challenge 6 #### # Run the partial RDA of the mite environmental variables of the mite species abundances # partialling out for the substrate variables (SubsDens, WaterCont and Substrate) # Which are the significant explanatory variables ? # How much of the variation explain the significant explanatory variables ? # Which are the significant axis ? # Which group of sites can you identify ? # Which species are related to each group of sites ? # (answer below) # # # # # # # # # # # # # # # # # # mite.spe.subs<-rda(mite.spe.hel ~ Shrub + Topo + Condition(SubsDens + WatrCont + Substrate), data=mite.env) summary(mite.spe.subs, display=NULL) (R2adj <- RsquareAdj(mite.spe.subs)$adj.r.squared) anova.cca(mite.spe.subs, step=1000) anova.cca(mite.spe.subs, step=1000, by="axis") #### Triplot scaling 1 windows(title="Partial RDA scaling 1") plot(mite.spe.subs, scaling=1, main="Triplot partial RDA - scaling 1", type="none", xlab=c("RDA1"), ylab=c("RDA2"), xlim=c(-1,1), ylim=c(-1,1)) points(scores(mite.spe.subs, display="sites", choices=c(1,2), scaling=1), pch=21, col="black", bg="steelblue", cex=1.2) text(scores(mite.spe.subs, display="species", choices=c(1), scaling=1), scores(mite.spe.subs, display="species", choices=c(2), scaling=1), labels=rownames(scores(mite.spe.subs, display="species", scaling=1)), col="grey", cex=0.8) arrows(0,0, scores(mite.spe.subs, display="bp", choices=c(1), scaling=1), scores(mite.spe.subs, display="bp", choices=c(2), scaling=1), col="red") text(scores(mite.spe.subs, display="bp", choices=c(1), scaling=1)+0.05, scores(mite.spe.subs, display="bp", choices=c(2), scaling=1)+0.05, labels=rownames(scores(mite.spe.subs, display="bp", choices=c(2), scaling=1)), col="red", cex=1) #### Triplot scaling 2 windows(title="Partial RDA scaling 2") plot(mite.spe.subs, scaling=2, main="Triplot partial RDA - scaling 2", type="none", xlab=c("RDA1"), ylab=c("RDA2"), xlim=c(-1,1), ylim=c(-1,1)) points(scores(mite.spe.subs, display="sites", choices=c(1,2), scaling=2), pch=21, col="black", bg="steelblue", cex=1.2) text(scores(mite.spe.subs, display="species", choices=c(1), scaling=2), scores(mite.spe.subs, display="species", choices=c(2), scaling=2), labels=rownames(scores(mite.spe.subs, display="species", scaling=2)), col="grey", cex=0.8) arrows(0,0, scores(mite.spe.subs, display="bp", choices=c(1), scaling=2), scores(mite.spe.subs, display="bp", choices=c(2), scaling=2), col="red") text(scores(mite.spe.subs, display="bp", choices=c(1), scaling=2)+0.05, scores(mite.spe.subs, display="bp", choices=c(2), scaling=2)+0.05, labels=rownames(scores(mite.spe.subs, display="bp", choices=c(2), scaling=2)), col="red", cex=1) # 4.4. Variation partitionning by partial RDA ?varpart vegandocs("partitioning.pdf") ## Variation partitioning with all explanatory variables spe.part.all <- varpart(spe.hel, envchem, envtopo) spe.part.all windows(title="Variation partitioning - all variables") plot(spe.part.all, digits=2) ## Variation partitioning with only significant variables ### Separate forward selection in each subset of environmental variables spe.chem <- rda(spe.hel~., data=envchem) R2a.all.chem <- RsquareAdj(spe.chem)$adj.r.squared ordiR2step(rda(spe.hel~1, data=envchem), scope= formula(spe.chem), direction= "forward", R2scope=TRUE, pstep=1000) names(envchem) (envchem.pars <- envchem[, c( 4, 6, 7 )]) spe.topo <- rda(spe.hel~., data=envtopo) R2a.all.topo <- RsquareAdj(spe.topo)$adj.r.squared ordiR2step(rda(spe.hel~1, data=envtopo), scope= formula(spe.topo), direction= "forward", R2scope=TRUE, pstep=1000) names(envtopo) envtopo.pars <- envtopo[, c(1,2)] ### Variation partitioning (spe.part <- varpart(spe.hel, envchem.pars, envtopo.pars)) windows(title="Variation partitioning - parsimonious subsets") plot(spe.part, digits=2) # Tests of all testable fractions anova.cca(rda(spe.hel, envchem.pars), step=1000) # Test of fractions [a+b] anova.cca(rda(spe.hel, envtopo.pars), step=1000) # Test of fractions [b+c] env.pars <- cbind(envchem.pars, envtopo.pars) anova.cca(rda(spe.hel, env.pars), step=1000) # Test of fractions [a+b+c] anova.cca(rda(spe.hel, envchem.pars, envtopo.pars), step=1000) # Test of fraction [a] anova.cca(rda(spe.hel, envtopo.pars, envchem.pars), step=1000) # Test of fraction [c] # Challenge 7 #### # Run the variation partitionning of the mite species abundances # with a first dataset for the significant substrate variables (SubsDens, WaterCont and Substrate) # and a second dataset for the significant other variables (Shrud and Topo) # Which proportion of the variation explain each dataset ? # Which are the significant fractions ? # (answer below) # # # # # # # # # # # # # # # # # # str(mite.env) (mite.subs<-mite.env[,c(1,2,3)]) (mite.other<-mite.env[,c(4,5)]) rda.mite.subs <- rda(mite.spe.hel~., data=mite.subs) R2a.all.chem <- RsquareAdj(rda.mite.subs)$adj.r.squared ordiR2step(rda(mite.spe.hel~1, data=mite.subs), scope= formula(rda.mite.subs), direction= "forward", R2scope=TRUE, pstep=1000) names(mite.subs) (mite.subs.pars <- mite.subs[, c(2, 3)]) rda.mite.other <- rda(mite.spe.hel~., data=mite.other) R2a.all.chem <- RsquareAdj(rda.mite.other)$adj.r.squared ordiR2step(rda(mite.spe.hel~1, data=mite.other), scope= formula(rda.mite.other), direction= "forward", R2scope=TRUE, pstep=1000) names(mite.other) (mite.other.pars <- mite.other[, c(1,2)]) ### Variation partitioning (mite.spe.part <- varpart(mite.spe.hel, ~WatrCont+Substrate, ~Shrub+Topo, data=mite.env)) windows(title="Variation partitioning - parsimonious subsets") plot(mite.spe.part, digits=2) # Tests of all testable fractions anova.cca(rda(mite.spe.hel~ WatrCont+Substrate, data=mite.env), step=1000) # Test of fractions [a+b] anova.cca(rda(mite.spe.hel~Shrub+Topo, data=mite.env), step=1000) # Test of fractions [b+c] (env.pars <- cbind(mite.env[,c(2,3,4,5)])) anova.cca(rda(mite.spe.hel~ WatrCont+Substrate+Shrub+Topo, data=env.pars), step=1000) # Test of fractions [a+b+c] anova.cca(rda(mite.spe.hel~WatrCont+Substrate + Condition(Shrub+Topo), data=env.pars), step=1000) # Test of fraction [a] anova.cca(rda(mite.spe.hel~Shrub+Topo+ Condition(WatrCont+Substrate ), data=env.pars), step=1000) # Test of fraction [c] ## 4.3. Principal Response Curves (PRC) ?prc # van den Brink, P.J. & ter Braak, C.J.F. (1999). # Principal response curves: Analysis of time-dependent multivariate responses of biological community to stress. # Environmental Toxicology and Chemistry, 18, 138-148 # Chlorpyrifos experiment and experimental design data(pyrifos) week <- gl(11, 12, labels=c(-4, -1, 0.1, 1, 2, 4, 8, 12, 15, 19, 24)) dose <- factor(rep(c(0.1, 0, 0, 0.9, 0, 44, 6, 0.1, 44, 0.9, 0, 6), 11)) # PRC mod <- prc(pyrifos, dose, week) mod # RDA summary(mod) # PRC logabu <- colSums(pyrifos) plot(mod, select = logabu > 100) # Permutations should be done only within one week, and we only # are interested on the first axis anova(mod, strata = week, first=TRUE, perm.max = 100) # PRC can be useful in community ecolgy # to compare the temporal evolution of a communty in experimental sites (e.g. restored ecosystems) # to the temporal evolution of a communty in control sites (e.g. reference ecosystems) # Challenge 8 #### # Run the PRC on the Doubs fish species abundances with last site removed considering that: # the first 14 sites correpond to 4 reference ecosystems # the last 14 sites correpond to 4 restored ecosystems # the first seven lines correponds to successive sampling of the reference sites 1 during year 1,2,3,4,5,6 and 7 # the lines 8 to 14 correponds to successive sampling of the reference sites 2 during year 1,2,3,4,5,6 and 7 # etc... # Does restoration enables the recovery of reference communities ? # Which species are related to reference ecosystems, to restored ecosystems ? # (answer below) # # # # # # # # # # # # # # # # # # dim(spe.hel) spe.hel<-spe.hel[-29,] reference<-rep(c(0), 14) restored<-rep(c(1), 14) type.site<-as.factor(c(reference, restored)) year<-as.factor(rep(c(1,2,3,4,5,6,7), 4)) spe.hel.prc<-prc(spe.hel, type.site, year) spe.hel.prc summary(spe.hel.prc) plot(spe.hel.prc) #***************************************************************************************# # 5. Other useful ordination methods ?metaMDS # Nonmetric Multidimesional Scaling help(lda, package=MASS) # linear discriminant analysis ?CCorA # Canonical Correlation Analysis help(coinertia, package=ade4) # Coinertia Analysis help(mfa, package=ade4) # Multiple Factorial Analysis https://r-forge.r-project.org/R/?group_id=195 # packages AEM and PCNM contain functions to perform spatial analysis# Name: Amanda Winegardner, Xavier Giroux-Bougard and Bérenger Bourgeois # Date: October 2014 # Description: Ordination # Dataset: File names are "DoubsSpe.csv", "DoubsEnv.csv" # Notes: Material in R script obtained from # Borcard D., Gillet F. et Legendre P., 2011. Numerical Ecology with R. Springer. #***************************************************************************************# # Loading the data #### rm(list=ls()) install.packages("vegan") install.packages("gclus") install.packages("ape") library(vegan) library(gclus) library(ape) source(file.choose()) # fichier coldiss.R # Species community data frame (fish abundance): "DoubsSpe.csv" spe<-read.csv(file.choose(), row.names=1) spe=spe[-8,] # site number 8 contains no species and must be removed # Environmental data frame: "DoubsEnv.csv" env<-read.csv(file.choose(), row.names=1) env=env[-8,] #***************************************************************************************# # 1.Explore the data ## 1.1.Species data names(spe) dim(spe) str(spe) head(spe) summary(spe) ### Species distibution, all species confounded (ab=table(unlist(spe))) barplot(ab, las=1, xlab="Abundance class", ylab="Frequency", col=grey(5:0/5)) ### Number of absences sum(spe==0) ### Proportion of zeros in the community data set sum(spe==0)/(nrow(spe)*ncol(spe)) ### Species occurences spe.pres=colSums(spe>0) # compute the number of sites where each species is present hist(spe.pres, main="Species occurence", las=1, xlab="Frequency of occurences", ylab="Number of species", breaks=seq(0,30, by=5), col="grey") ### Sites richness site.pres=rowSums(spe>0) # compute the number of species at ecah site hist(site.pres, main="Species richness", las=1, xlab="Frequency of sites", ylab="Number of species", breaks=seq(0,30, by=5), col="grey") ### Compute diversity indices ?diversity (H=diversity(spe, index="shannon")) # Shannon diversity index (N=diversity(spe, index="simpson")) # Simpson diversity index ## 1.2.Environmental data names(env) dim(env) str(env) head(env) summary(env) pairs(env, main="Bivariate Plots of the Environmental Data" ) # Standardization of the 11 environmental data env.z=decostand(env, method="standardize") apply(env.z, 2, mean) # the data are now centered (means~0) apply(env.z, 2, sd) # the data are now scaled (standard deviations=1) par(mfrow=c(1,2)) hist(env$oxy, main="Histogram of Oxygen Concentration", ylab="Frequency", ylim=c(0,10), col="grey") hist(env.z$oxy, main="Histogram of Standardized Oxygen Concentration", ylab="Frequency", ylim=c(0,10), col="grey") # Note: explanatory variables expressed in different units # must obligatory be standardized before computing distances measures and ordination analysis. #***************************************************************************************# # 2.Association measures ## 2.1. Distance measures ### Quantitative species data ?vegdist # this function compute dissimilarity indices useful for community decomposition data (see method options) (spe.db=vegdist(spe, method="bray")) # Bray-Curtis dissimilarity (spe.dj=vegdist(spe, method="jac")) # Jaccard dissimilarity (spe.dg=vegdist(spe, method="gower")) # Gower dissimilarity ### Presence-Absence species data (spe.db.pa=vegdist(spe, method="bray", binary=TRUE)) #Bray-Curtis dissimilarity (spe.dj.pa=vegdist(spe, method="jac", binary=TRUE)) # Jaccard dissimilarity (spe.dg.pa=vegdist(spe, method="gower", binary=TRUE)) # Gower dissimilarity ### Graphical display of association matrices: heat maps windows() coldiss(spe.db, byrank=FALSE, diag=TRUE) # Heat map of Bray-Curtis dissimilarity windows() coldiss(spe.dj, byrank=FALSE, diag=TRUE) # Heat map of Jaccard dissimilarity windows() coldiss(spe.dg, byrank=FALSE, diag=TRUE) # Heat map of Gower dissimilarity ### Quantitative environmental data #### Association between environmental data (Q mode) ?dist # this function also compute dissimilarity matrix env.de=dist(env.z, method = "euclidean") # euclidean distance matrix of the standardized environmental variables windows() coldiss(env.de, diag=TRUE) ### Dependance between environmental data (R mode) (env.pearson=cor(env)) # Pearson r linear correlation round(env.pearson, 2) (env.ken=cor(env, method="kendall")) # Kendall tau rank correlation round(env.ken, 2) ### Mixed types of environmental variables #### Association between mixed types of environmental data (Q mode) ##### Create a fictious data frame var.g1=rnorm(30, 0, 1) var.g2=runif(30, 0, 5) var.g3=gl(3, 10) var.g4=gl(2, 5, 30) (dat2=data.frame(var.g1, var.g2, var.g3, var.g4)) str(dat2) summary(dat2) ##### Compute the Gower dissimilarity matrix ?daisy (dat2.dg=daisy(dat2, metric="gower")) coldiss(dat2.dg) # Challenge 1 #### # Calculate by hand the Bray-Curtis and the Gower dissimilarity of species abundance CHA, TRU and VAI for sites 1, 2 and 3 # (answer below) # # # # # # # # # # # # # # # # # Bray-Curtis dissimilarity : d[jk] = (sum abs(x[ij]-x[ik]))/(sum (x[ij]+x[ik])) (spe.challenge=spe[1:3,1:3]) # Calculate the total species abundance by site (Abund.s1=sum(spe.challenge[1,])) (Abund.s2=sum(spe.challenge[2,])) (Abund.s3=sum(spe.challenge[3,])) # Calculate the species abundance differences between pairs of sites for each species Spec.s1s2=0 Spec.s1s3=0 Spec.s2s3=0 for (i in 1:3) { Spec.s1s2=Spec.s1s2+abs(sum(spe.challenge[1,i]-spe.challenge[2,i])) Spec.s1s3=Spec.s1s3+abs(sum(spe.challenge[1,i]-spe.challenge[3,i])) Spec.s2s3=Spec.s2s3+abs(sum(spe.challenge[2,i]-spe.challenge[3,i])) } # Calculate the Bray-Curtis dissimilarity (db.s1s2=Spec.s1s2/(Abund.s1+Abund.s2)) (db.s1s3=Spec.s1s3/(Abund.s1+Abund.s3)) (db.s2s3=Spec.s2s3/(Abund.s2+Abund.s3)) # Compare your results (spe.db.challenge=vegdist(spe.challenge, method="bray")) # Gower dissimilarity : d[jk] = (1/M) sum(abs(x[ij]-x[ik])/(max(x[i])-min(x[i]))) # Calculate the number of columns in your dataset M=ncol(spe.challenge) # Calculate the species abundance differences between pairs of sites for each species Spe1.s1s2=abs(spe.challenge[1,1]-spe.challenge[2,1]) Spe2.s1s2=abs(spe.challenge[1,2]-spe.challenge[2,2]) Spe3.s1s2=abs(spe.challenge[1,3]-spe.challenge[2,3]) Spe1.s1s3=abs(spe.challenge[1,1]-spe.challenge[3,1]) Spe2.s1s3=abs(spe.challenge[1,2]-spe.challenge[3,2]) Spe3.s1s3=abs(spe.challenge[1,3]-spe.challenge[3,3]) Spe1.s2s3=abs(spe.challenge[2,1]-spe.challenge[3,1]) Spe2.s2s3=abs(spe.challenge[2,2]-spe.challenge[3,2]) Spe3.s2s3=abs(spe.challenge[2,3]-spe.challenge[3,3]) # Calculate the range of each species abundance between sites Range.spe1=max(spe.challenge[,1]) - min (spe.challenge[,1]) Range.spe2=max(spe.challenge[,2]) - min (spe.challenge[,2]) Range.spe3=max(spe.challenge[,3]) - min (spe.challenge[,3]) # Calculate the Gower dissimilarity (dg.s1s2=(1/M)*((Spe2.s1s2/Range.spe2)+(Spe3.s1s2/Range.spe3))) (dg.s1s3=(1/M)*((Spe2.s1s3/Range.spe2)+(Spe3.s1s3/Range.spe3))) (dg.s2s3=(1/M)*((Spe2.s2s3/Range.spe2)+(Spe3.s2s3/Range.spe3))) # Compare your results (spe.db.challenge=vegdist(spe.challenge, method="gower")) ## 2.2. Transformations for community composition data ?decostand # this function provide some standardiztion methods for community domposition data (see method options) # Transform abundances to absence-presence (1-0) (spe.pa=decostand(spe, method="pa")) # Hellinger transformation (spe.hel=decostand(spe, method="hellinger")) # Chi.square transformation (spe.chi=decostand(spe, method="chi.square")) # Challenge 2 #### # Calculate by hand the Hellinger and the Chi-square transformed abundances data # (answer below) # # # # # # # # # # # # # # # # # # Hellinger tranformation # First calculate the total species abundances by site (site.totals=apply(spe, 1, sum)) # Scale species abundances by dividing them by site totals (scale.spe=spe/site.totals) # Calculate de square root of scaled species abundances (sqrt.scale.spe=sqrt(scale.spe)) # Compare the results sqrt.scale.spe spe.hel sqrt.scale.spe-spe.hel # or: sqrt.scale.spe/spe.hel # Chi-squre transformation # First calculate the total species abundances by site (site.totals=apply(spe, 1, sum)) # Then calculate the square root of total species abundances (sqrt.spe.totals=sqrt(apply(spe, 2, sum))) # Scale species abundances by dividing them by the site totals and the species totals scale.spe2=spe for (i in 1:nrow(spe)) { for (j in 1:ncol(spe)) { (scale.spe2[i,j]=scale.spe2[i,j]/(site.totals[i]*sqrt.spe.totals[j])) }} #Adjust the scale abundance species by multplying by the square root of the species matrix total (adjust.scale.spe2=scale.spe2*sqrt(sum(rowSums(spe)))) # Compare the results adjust.scale.spe2 spe.chi adjust.scale.spe2-spe.chi # or: adjust.scale.spe2/spe.chi #***************************************************************************************# # 3.Unconstrained Ordination ## 3.1. Principal Components Analysis (PCA) ### PCA on hellinger-transformed species data ?rda #### Run the PCA spe.h.pca=rda(spe.hel) # Identify the significant axis using the Kaiser-Guttman criterion ev=spe.h.pca$CA$eig ev[ev>mean(ev)] n=length(ev) bsm=data.frame(j=seq(1:n), p=0) bsm$p[1]=1/n for (i in 2:n) { bsm$p[i]=bsm$p[i-1]+(1/(n=1-i))} bsm$p=100*bsm$p/n bsm barplot(ev, main="Eigenvalues", col="grey", las=2) abline(h=mean(ev), col="red") legend("topright", "Average eigenvalues", lwd=1, col=2, bty="n") #### Extract the results summary(spe.h.pca) # overall results summary(spe.h.pca) summary(spe.h.pca, display=NULL) # only eigenvalues and their contribution to the variance eigen(cov(spe.hel)) # also compute the eigenvalues (spe.scores=scores(spe.h.pca, display="species", choices=c(1,2))) # species scores on the first two PCA axes (sit.scores=scores(spe.h.pca, display="sites", choices=c(1,2))) # sites scores on the first two PCA axes ### PCA on the environmental variables #### Run the PCA env.pca=rda(env.z) # or rda(env, scale=TRUE) # Identify the significant axis using the Kaiser-Guttman criterion ev=env.pca$CA$eig ev[ev>mean(ev)] n=length(ev) bsm=data.frame(j=seq(1:n), p=0) bsm$p[1]=1/n for (i in 2:n) { bsm$p[i]=bsm$p[i-1]+(1/(n=1-i))} bsm$p=100*bsm$p/n bsm barplot(ev, main="Eigenvalues", col="grey", las=2) abline(h=mean(ev), col="red") legend("topright", "Average eigenvalues", lwd=1, col=2, bty="n") #### Extract the results summary(env.pca) summary(env.pca, scaling=2) ### Construct a biplot #### Biplot of the PCA on transformed species data (scaling 1) windows() plot(spe.h.pca) windows() ?biplot biplot(spe.h.pca) windows() plot(spe.h.pca, scaling=1, type="none", # scaling 1 = distance biplot : # distances among abjects in the biplot approximate their Euclidean distances # but angles among descriptor vectors DO NOT reflect their correlation xlab=c("PC1 (%)", round((spe.h.pca$CA$eig[1]/sum(spe.h.pca$CA$eig))*100,2)), ylab=c("PC2 (%)", round((spe.h.pca$CA$eig[2]/sum(spe.h.pca$CA$eig))*100,2))) points(scores(spe.h.pca, display="sites", choices=c(1,2), scaling=1), pch=21, col="black", bg="steelblue", cex=1.2) text(scores(spe.h.pca, display="species", choices=c(1), scaling=1), scores(spe.h.pca, display="species", choices=c(2), scaling=1), labels=rownames(scores(spe.h.pca, display="species", scaling=1)), col="red", cex=0.8) #### Biplot on the environmental variables (scaling 2) windows() plot(env.pca) windows() plot(env.pca, scaling=2, type="none", # scaling 2 = correlation biplot : # distances among abjects in the biplot DO NOT approximate their Euclidean distances # but angles among descriptor vectors reflect their correlation xlab=c("PC1 (%)", round((env.pca$CA$eig[1]/sum(env.pca$CA$eig))*100,2)), ylab=c("PC2 (%)", round((env.pca$CA$eig[2]/sum(env.pca$CA$eig))*100,2)), xlim=c(-1,1), ylim=c(-1,1)) points(scores(env.pca, display="sites", choices=c(1,2), scaling=2), pch=21, col="black", bg="darkgreen", cex=1.2) text(scores(env.pca, display="species", choices=c(1), scaling=2), scores(env.pca, display="species", choices=c(2), scaling=2), labels=rownames(scores(env.pca, display="species", scaling=2)), col="red", cex=0.8) # Challenge 3 #### # Run the PCA of the mite species abundances # Which are the significant axes ? # Which group of sites can you identify ? # Which species are related to each group of sites mite.spe<-read.table(file.choose(), row.names=1, h=T) # (answer below) # # # # # # # # # # # # # # # # # # mite.spe.hel=decostand(mite.spe, method="hellinger") mite.spe.h.pca=rda(mite.spe.hel) ev=mite.spe.h.pca$CA$eig ev[ev>mean(ev)] n=length(ev) bsm=data.frame(j=seq(1:n), p=0) bsm$p[1]=1/n for (i in 2:n) { bsm$p[i]=bsm$p[i-1]+(1/(n=1-i))} bsm$p=100*bsm$p/n bsm barplot(ev, main="Eigenvalues", col="grey", las=2) abline(h=mean(ev), col="red") legend("topright", "Average eigenvalues", lwd=1, col=2, bty="n") summary(mite.spe.h.pca, display=NULL) windows() plot(mite.spe.h.pca, scaling=1, type="none", xlab=c("PC1 (%)", round((mite.spe.h.pca$CA$eig[1]/sum(mite.spe.h.pca$CA$eig))*100,2)), ylab=c("PC2 (%)", round((mite.spe.h.pca$CA$eig[2]/sum(mite.spe.h.pca$CA$eig))*100,2))) points(scores(mite.spe.h.pca, display="sites", choices=c(1,2), scaling=1), pch=21, col="black", bg="steelblue", cex=1.2) text(scores(mite.spe.h.pca, display="species", choices=c(1), scaling=1), scores(mite.spe.h.pca, display="species", choices=c(2), scaling=1), labels=rownames(scores(mite.spe.h.pca, display="species", scaling=1)), col="red", cex=0.8) ## 3.2. Principal Coordinate Anaysis (PCoA) ### Run the PCoA on hellinger transformed species abundances ?cmdscale cmdscale(dist(spe.hel), k=(nrow(spe)-1), eig=TRUE) ?pcoa spe.h.pcoa=pcoa(dist(spe.hel)) ### Extract the results spe.h.pcoa ### Construct the biplot windows() biplot.pcoa(spe.h.pcoa, spe.hel, dir.axis2=-1) ### Run the PCoA on a Bray-Curtis dissimilarity matrix spe.db spe.bray.pcoa=pcoa(spe.db) spe.bray.pcoa windows() biplot.pcoa(spe.bray.pcoa, spe.hel, dir.axis2=-1) # The distance measure choosen thus strongly influenced the PCoA results # Challenge 4 #### # Run the PCoA of the hellinger-transformed mite species abundances # Which are the significant axes ? # Which group of sites can you identify ? # Which species are related to each group of sites ? # How does the PCoA results compare with the PCA # (answer below) # # # # # # # # # # # # # # # # # # mite.spe.h.pcoa=pcoa(dist(mite.spe.hel)) mite.spe.h.pcoa windows() biplot.pcoa(mite.spe.h.pcoa, mite.spe.hel, dir.axis2=-1) #***************************************************************************************# # 4. Canonical Analysis ## 4.1. Redundancy Analysis (RDA) ### Prepare the data env=env[,-1] # remove the "distance from the source" variable pen2 <- rep("very_steep", nrow(env)) # recode the slope variable into a factor pen2[env$pen <= quantile(env$pen)[4]] = "steep" pen2[env$pen <= quantile(env$pen)[3]] = "moderate" pen2[env$pen <= quantile(env$pen)[2]] = "low" pen2 <- factor(pen2, levels=c("low", "moderate", "steep", "very_steep")) table(pen2) env2 <- env env2$pen <- pen2 ### Run the RDA of all exlanatory variables of env2 on species abundances ?rda spe.rda=rda(spe.hel~., data=env2) ### Extract the results summary(spe.rda, display=NULL) ### Select the significant explanatory variables by forward selection ?ordiR2step ordiR2step(rda(spe.hel~1, data=env2), scope= formula(spe.rda), direction= "forward", R2scope=TRUE, pstep=1000) which(colnames(env2)=="alt") which(colnames(env2)=="oxy") (env2.signif=env2[,c(1,9)]) ### Re-run the RDA of significant explanatory variables on species abundances spe.rda.signif=rda(spe.hel~., data=env2.signif) ### Extract the results summary(spe.rda.signif, display=NULL) ### Calculate the adjusted R^2 of the RDA (R2adj <- RsquareAdj(spe.rda.signif)$adj.r.squared) ### Test the significance of the model and the significance of axis ?anova.cca anova.cca(spe.rda.signif, step=1000) anova.cca(spe.rda.signif, step=1000, by="axis") ### Plot the results #### scaling 1 windows(title="RDA scaling 1") plot(spe.rda.signif, scaling=1, main="Triplot RDA") windows(title="RDA scaling 1") plot(spe.rda.signif, scaling=1, main="Triplot RDA - scaling 1", type="none", xlab=c("RDA1"), ylab=c("RDA2"), xlim=c(-1,1), ylim=c(-1,1)) points(scores(spe.rda.signif, display="sites", choices=c(1,2), scaling=1), pch=21, col="black", bg="steelblue", cex=1.2) text(scores(spe.rda.signif, display="species", choices=c(1), scaling=1), scores(spe.rda.signif, display="species", choices=c(2), scaling=1), labels=rownames(scores(spe.rda.signif, display="species", scaling=1)), col="grey", cex=0.8) arrows(0,0, scores(spe.rda.signif, display="bp", choices=c(1), scaling=1), scores(spe.rda.signif, display="bp", choices=c(2), scaling=1), col="red") text(scores(spe.rda.signif, display="bp", choices=c(1), scaling=1)+0.05, scores(spe.rda.signif, display="bp", choices=c(2), scaling=1)+0.05, labels=rownames(scores(spe.rda.signif, display="bp", choices=c(2), scaling=1)), col="red", cex=1) #### scaling 2 windows(title="RDA scaling 2") plot(spe.rda.signif, scaling=2, main="Triplot RDA - scaling 2", type="none", xlab=c("RDA1"), ylab=c("RDA2"), xlim=c(-1,1), ylim=c(-1,1)) points(scores(spe.rda.signif, display="sites", choices=c(1,2), scaling=2), pch=21, col="black", bg="steelblue", cex=1.2) text(scores(spe.rda.signif, display="species", choices=c(1), scaling=2), scores(spe.rda.signif, display="species", choices=c(2), scaling=2), labels=rownames(scores(spe.rda.signif, display="species", scaling=2)), col="grey", cex=0.8) arrows(0,0, scores(spe.rda.signif, display="bp", choices=c(1), scaling=2), scores(spe.rda.signif, display="bp", choices=c(2), scaling=2), col="red") text(scores(spe.rda.signif, display="bp", choices=c(1), scaling=2)+0.05, scores(spe.rda.signif, display="bp", choices=c(2), scaling=2)+0.05, labels=rownames(scores(spe.rda.signif, display="bp", choices=c(2), scaling=2)), col="red", cex=1) # Challenge 5 #### # Run the RDA of the mite environmental variables of the mite species abundances mite.env<-read.table(file.choose(), row.names=1, h=T) # Which are the significant explanatory variables ? # How much of the variation explain the significant explanatory variables ? # Which are the significant axis ? # Which group of sites can you identify ? # Which species are related to each group of sites ? # (answer below) # # # # # # # # # # # # # # # # # # mite.spe.rda=rda(mite.spe.hel~., data=mite.env) ordiR2step(rda(mite.spe.hel~1, data=mite.env), scope= formula(mite.spe.rda), direction= "forward", R2scope=TRUE, pstep=1000) which(colnames(mite.env)=="WatrCont") which(colnames(mite.env)=="Shrub") which(colnames(mite.env)=="Substrate") which(colnames(mite.env)=="Topo") (mite.env.signif=mite.env[,c(2,4,3,5)]) mite.spe.rda.signif=rda(mite.spe~., data=mite.env.signif) (R2adj <- RsquareAdj(mite.spe.rda.signif)$adj.r.squared) anova.cca(mite.spe.rda.signif, step=1000) anova.cca(mite.spe.rda.signif, step=1000, by="axis") summary(mite.spe.rda.signif, display=NULL) windows(title="RDA scaling 1") plot(mite.spe.rda.signif, scaling=1, main="Triplot RDA - scaling 1", type="none", xlab=c("RDA1"), ylab=c("RDA2"), xlim=c(-1,1), ylim=c(-1,1)) points(scores(mite.spe.rda.signif, display="sites", choices=c(1,2), scaling=1), pch=21, col="black", bg="steelblue", cex=1.2) text(scores(mite.spe.rda.signif, display="species", choices=c(1), scaling=1), scores(mite.spe.rda.signif, display="species", choices=c(2), scaling=1), labels=rownames(scores(mite.spe.rda.signif, display="species", scaling=1)), col="grey", cex=0.8) arrows(0,0, scores(mite.spe.rda.signif, display="bp", choices=c(1), scaling=1), scores(mite.spe.rda.signif, display="bp", choices=c(2), scaling=1), col="red") text(scores(mite.spe.rda.signif, display="bp", choices=c(1), scaling=1)+0.05, scores(mite.spe.rda.signif, display="bp", choices=c(2), scaling=1)+0.05, labels=rownames(scores(mite.spe.rda.signif, display="bp", choices=c(2), scaling=1)), col="red", cex=1) ## 4.2 Partial RDA : effect of water chemistry on species abundances partialling out the physiography ### Divide the env2 data frame in two data frames envtopo <- env[, c(1:3)] # Physiography : explanatory dataset 1 names(envtopo) envchem <- env[, c(4:10)] # Water quality : explanatory dataset 2 names(envchem) ### Run the partial RDA spechem.physio=rda(spe.hel, envchem, envtopo) # or : spechem.physio2=rda(spe.hel ~ pH + dur + pho + nit + amm + oxy + dbo + Condition(alt + pen + deb), data=env) ### Extract the results summary(spechem.physio, display=NULL) ### Calculate adjusted the R^2 of the partial RDA (R2adj <- RsquareAdj(spechem.physio)$adj.r.squared) ### Test of the partial RDA and the significance of axis anova.cca(spechem.physio, step=1000) anova.cca(spechem.physio2, step=1000, by="axis") ### Construct the triplots #### Scaling 1 windows(title="Partial RDA scaling 1") plot(spechem.physio, scaling=1, main="Triplot partial RDA - scaling 1", type="none", xlab=c("RDA1"), ylab=c("RDA2"), xlim=c(-1,1), ylim=c(-1,1)) points(scores(spechem.physio, display="sites", choices=c(1,2), scaling=1), pch=21, col="black", bg="steelblue", cex=1.2) text(scores(spechem.physio, display="species", choices=c(1), scaling=1), scores(spechem.physio, display="species", choices=c(2), scaling=1), labels=rownames(scores(spechem.physio, display="species", scaling=1)), col="grey", cex=0.8) arrows(0,0, scores(spechem.physio, display="bp", choices=c(1), scaling=1), scores(spechem.physio, display="bp", choices=c(2), scaling=1), col="red") text(scores(spechem.physio, display="bp", choices=c(1), scaling=1)+0.05, scores(spechem.physio, display="bp", choices=c(2), scaling=1)+0.05, labels=rownames(scores(spechem.physio, display="bp", choices=c(2), scaling=1)), col="red", cex=1) #### Scaling 2 windows(title="Partial RDA scaling 2") plot(spechem.physio, scaling=2, main="Triplot partial RDA - scaling 2", type="none", xlab=c("RDA1"), ylab=c("RDA2"), xlim=c(-1,1), ylim=c(-1,1)) points(scores(spechem.physio, display="sites", choices=c(1,2), scaling=2), pch=21, col="black", bg="steelblue", cex=1.2) text(scores(spechem.physio, display="species", choices=c(1), scaling=2), scores(spechem.physio, display="species", choices=c(2), scaling=2), labels=rownames(scores(spechem.physio, display="species", scaling=2)), col="grey", cex=0.8) arrows(0,0, scores(spechem.physio, display="bp", choices=c(1), scaling=2), scores(spechem.physio, display="bp", choices=c(2), scaling=2), col="red") text(scores(spechem.physio, display="bp", choices=c(1), scaling=2)+0.05, scores(spechem.physio, display="bp", choices=c(2), scaling=2)+0.05, labels=rownames(scores(spechem.physio, display="bp", choices=c(2), scaling=2)), col="red", cex=1) # Challenge 6 #### # Run the partial RDA of the mite environmental variables of the mite species abundances # partialling out for the substrate variables (SubsDens, WaterCont and Substrate) # Which are the significant explanatory variables ? # How much of the variation explain the significant explanatory variables ? # Which are the significant axis ? # Which group of sites can you identify ? # Which species are related to each group of sites ? # (answer below) # # # # # # # # # # # # # # # # # # mite.spe.subs=rda(mite.spe.hel ~ Shrub + Topo + Condition(SubsDens + WatrCont + Substrate), data=mite.env) summary(mite.spe.subs, display=NULL) (R2adj <- RsquareAdj(mite.spe.subs)$adj.r.squared) anova.cca(mite.spe.subs, step=1000) anova.cca(mite.spe.subs, step=1000, by="axis") #### Triplot scaling 1 windows(title="Partial RDA scaling 1") plot(mite.spe.subs, scaling=1, main="Triplot partial RDA - scaling 1", type="none", xlab=c("RDA1"), ylab=c("RDA2"), xlim=c(-1,1), ylim=c(-1,1)) points(scores(mite.spe.subs, display="sites", choices=c(1,2), scaling=1), pch=21, col="black", bg="steelblue", cex=1.2) text(scores(mite.spe.subs, display="species", choices=c(1), scaling=1), scores(mite.spe.subs, display="species", choices=c(2), scaling=1), labels=rownames(scores(mite.spe.subs, display="species", scaling=1)), col="grey", cex=0.8) arrows(0,0, scores(mite.spe.subs, display="bp", choices=c(1), scaling=1), scores(mite.spe.subs, display="bp", choices=c(2), scaling=1), col="red") text(scores(mite.spe.subs, display="bp", choices=c(1), scaling=1)+0.05, scores(mite.spe.subs, display="bp", choices=c(2), scaling=1)+0.05, labels=rownames(scores(mite.spe.subs, display="bp", choices=c(2), scaling=1)), col="red", cex=1) #### Triplot scaling 2 windows(title="Partial RDA scaling 2") plot(mite.spe.subs, scaling=2, main="Triplot partial RDA - scaling 2", type="none", xlab=c("RDA1"), ylab=c("RDA2"), xlim=c(-1,1), ylim=c(-1,1)) points(scores(mite.spe.subs, display="sites", choices=c(1,2), scaling=2), pch=21, col="black", bg="steelblue", cex=1.2) text(scores(mite.spe.subs, display="species", choices=c(1), scaling=2), scores(mite.spe.subs, display="species", choices=c(2), scaling=2), labels=rownames(scores(mite.spe.subs, display="species", scaling=2)), col="grey", cex=0.8) arrows(0,0, scores(mite.spe.subs, display="bp", choices=c(1), scaling=2), scores(mite.spe.subs, display="bp", choices=c(2), scaling=2), col="red") text(scores(mite.spe.subs, display="bp", choices=c(1), scaling=2)+0.05, scores(mite.spe.subs, display="bp", choices=c(2), scaling=2)+0.05, labels=rownames(scores(mite.spe.subs, display="bp", choices=c(2), scaling=2)), col="red", cex=1) # 4.4. Variation partitionning by partial RDA ?varpart vegandocs("partitioning.pdf") ## Variation partitioning with all explanatory variables spe.part.all <- varpart(spe.hel, envchem, envtopo) spe.part.all windows(title="Variation partitioning - all variables") plot(spe.part.all, digits=2) ## Variation partitioning with only significant variables ### Separate forward selection in each subset of environmental variables spe.chem <- rda(spe.hel~., data=envchem) R2a.all.chem <- RsquareAdj(spe.chem)$adj.r.squared ordiR2step(rda(spe.hel~1, data=envchem), scope= formula(spe.chem), direction= "forward", R2scope=TRUE, pstep=1000) names(envchem) (envchem.pars <- envchem[, c( 4, 6, 7 )]) spe.topo <- rda(spe.hel~., data=envtopo) R2a.all.topo <- RsquareAdj(spe.topo)$adj.r.squared ordiR2step(rda(spe.hel~1, data=envtopo), scope= formula(spe.topo), direction= "forward", R2scope=TRUE, pstep=1000) names(envtopo) envtopo.pars <- envtopo[, c(1,2)] ### Variation partitioning (spe.part <- varpart(spe.hel, envchem.pars, envtopo.pars)) windows(title="Variation partitioning - parsimonious subsets") plot(spe.part, digits=2) # Tests of all testable fractions anova.cca(rda(spe.hel, envchem.pars), step=1000) # Test of fractions [a+b] anova.cca(rda(spe.hel, envtopo.pars), step=1000) # Test of fractions [b+c] env.pars <- cbind(envchem.pars, envtopo.pars) anova.cca(rda(spe.hel, env.pars), step=1000) # Test of fractions [a+b+c] anova.cca(rda(spe.hel, envchem.pars, envtopo.pars), step=1000) # Test of fraction [a] anova.cca(rda(spe.hel, envtopo.pars, envchem.pars), step=1000) # Test of fraction [c] # Challenge 7 #### # Run the variation partitionning of the mite species abundances # with a first dataset for the significant substrate variables (SubsDens, WaterCont and Substrate) # and a second dataset for the significant other variables (Shrud and Topo) # Which proportion of the variation explain each dataset ? # Which are the significant fractions ? # (answer below) # # # # # # # # # # # # # # # # # # str(mite.env) (mite.subs=mite.env[,c(1,2,3)]) (mite.other=mite.env[,c(4,5)]) rda.mite.subs <- rda(mite.spe.hel~., data=mite.subs) R2a.all.chem <- RsquareAdj(rda.mite.subs)$adj.r.squared ordiR2step(rda(mite.spe.hel~1, data=mite.subs), scope= formula(rda.mite.subs), direction= "forward", R2scope=TRUE, pstep=1000) names(mite.subs) (mite.subs.pars <- mite.subs[, c(2, 3)]) rda.mite.other <- rda(mite.spe.hel~., data=mite.other) R2a.all.chem <- RsquareAdj(rda.mite.other)$adj.r.squared ordiR2step(rda(mite.spe.hel~1, data=mite.other), scope= formula(rda.mite.other), direction= "forward", R2scope=TRUE, pstep=1000) names(mite.other) (mite.other.pars <- mite.other[, c(1,2)]) ### Variation partitioning (mite.spe.part <- varpart(mite.spe.hel, ~WatrCont+Substrate, ~Shrub+Topo, data=mite.env)) windows(title="Variation partitioning - parsimonious subsets") plot(mite.spe.part, digits=2) # Tests of all testable fractions anova.cca(rda(mite.spe.hel~ WatrCont+Substrate, data=mite.env), step=1000) # Test of fractions [a+b] anova.cca(rda(mite.spe.hel~Shrub+Topo, data=mite.env), step=1000) # Test of fractions [b+c] (env.pars <- cbind(mite.env[,c(2,3,4,5)])) anova.cca(rda(mite.spe.hel~ WatrCont+Substrate+Shrub+Topo, data=env.pars), step=1000) # Test of fractions [a+b+c] anova.cca(rda(mite.spe.hel~WatrCont+Substrate + Condition(Shrub+Topo), data=env.pars), step=1000) # Test of fraction [a] anova.cca(rda(mite.spe.hel~Shrub+Topo+ Condition(WatrCont+Substrate ), data=env.pars), step=1000) # Test of fraction [c] ## 4.3. Principal Response Curves (PRC) ?prc # van den Brink, P.J. & ter Braak, C.J.F. (1999). # Principal response curves: Analysis of time-dependent multivariate responses of biological community to stress. # Environmental Toxicology and Chemistry, 18, 138-148 # Chlorpyrifos experiment and experimental design data(pyrifos) week <- gl(11, 12, labels=c(-4, -1, 0.1, 1, 2, 4, 8, 12, 15, 19, 24)) dose <- factor(rep(c(0.1, 0, 0, 0.9, 0, 44, 6, 0.1, 44, 0.9, 0, 6), 11)) # PRC mod <- prc(pyrifos, dose, week) mod # RDA summary(mod) # PRC logabu <- colSums(pyrifos) plot(mod, select = logabu > 100) # Permutations should be done only within one week, and we only # are interested on the first axis anova(mod, strata = week, first=TRUE, perm.max = 100) # PRC can be useful in community ecolgy # to compare the temporal evolution of a communty in experimental sites (e.g. restored ecosystems) # to the temporal evolution of a communty in control sites (e.g. reference ecosystems) # Challenge 8 #### # Run the PRC on the Doubs fish species abundances with last site removed considering that: # the first 14 sites correpond to 4 reference ecosystems # the last 14 sites correpond to 4 restored ecosystems # the first seven lines correponds to successive sampling of the reference sites 1 during year 1,2,3,4,5,6 and 7 # the lines 8 to 14 correponds to successive sampling of the reference sites 2 during year 1,2,3,4,5,6 and 7 # etc... # Does restoration enables the recovery of reference communities ? # Which species are related to reference ecosystems, to restored ecosystems ? # (answer below) # # # # # # # # # # # # # # # # # # dim(spe.hel) spe.hel=spe.hel[-29,] reference=rep(c(0), 14) restored=rep(c(1), 14) type.site=as.factor(c(reference, restored)) year=as.factor(rep(c(1,2,3,4,5,6,7), 4)) spe.hel.prc=prc(spe.hel, type.site, year) spe.hel.prc summary(spe.hel.prc) plot(spe.hel.prc) #***************************************************************************************# # 5. Other useful ordination methods ?metaMDS # Nonmetric Multidimesional Scaling help(lda, package=MASS) # linear discriminant analysis ?CCorA # Canonical Correlation Analysis help(coinertia, package=ade4) # Coinertia Analysis help(mfa, package=ade4) # Multiple Factorial Analysis https://r-forge.r-project.org/R/?group_id=195 # packages AEM and PCNM contain functions to perform spatial analysis