# Name: Amanda Winegardner, Xavier Giroux-Bougard and B?renger Bourgeois # Date: January 2015 # Description: Multivariate analysis # Dataset: File names are "DoubsSpe.csv", "DoubsEnv.csv" # Notes: Material in R script obtained from # Borcard D., Gillet F. et Legendre P., 2011. Numerical Ecology with R. Springer. #***************************************************************************************# # 0. Loading the data #### rm(list=ls()) install.packages("vegan") install.packages("gclus") install.packages("ape") library(vegan) library(gclus) library(ape) source(file.choose()) # fichier coldiss.R # Species community data frame (fish abundance): "DoubsSpe.csv" spe <- read.csv(file.choose(), row.names=1) spe <- spe[-8,] # site number 8 contains no species and must be removed # Environmental data frame: "DoubsEnv.csv" env <- read.csv(file.choose(), row.names=1) env <- env[-8,] #***************************************************************************************# # 1.Explore the data #### ## 1.1 Species data #### names(spe) dim(spe) str(spe) head(spe) summary(spe) ### Species distibution, all species confounded (ab <- table(unlist(spe))) barplot(ab, las=1, xlab="Abundance class", ylab="Frequency", col=grey(5:0/5)) ### Number of absences sum(spe==0) ### Proportion of zeros in the community data set sum(spe==0)/(nrow(spe)*ncol(spe)) ### Species occurences spe.pres <- colSums(spe>0) # compute the number of sites where each species is present hist(spe.pres, main="Species occurence", las=1, xlab="Frequency of occurences", ylab="Number of species", breaks=seq(0,30, by=5), col="grey") ### Sites richness site.pres <- rowSums(spe>0) # compute the number of species at ecah site hist(site.pres, main="Species richness", las=1, xlab="Frequency of sites", ylab="Number of species", breaks=seq(0,30, by=5), col="grey") ### Compute diversity indices ?diversity (H <- diversity(spe, index="shannon")) # Shannon diversity index (N <- diversity(spe, index="simpson")) # Simpson diversity index ## 1.2 Environmental data #### names(env) dim(env) str(env) head(env) summary(env) pairs(env, main="Bivariate Plots of the Environmental Data" ) # Standardization of the 11 environmental data env.z <- decostand(env, method="standardize") apply(env.z, 2, mean) # the data are now centered (means~0) apply(env.z, 2, sd) # the data are now scaled (standard deviations=1) # Note: explanatory variables expressed in different units # must obligatory be standardized before computing distances measures and ordination analysis. #***************************************************************************************# # 2.Association measures #### ## 2.1. Distance measures #### ### Quantitative species data ?vegdist # this function compute dissimilarity indices useful for community decomposition data (see method options) (spe.db<-vegdist(spe, method="bray")) # Bray-Curtis dissimilarity (spe.dj<-vegdist(spe, method="jac")) # Jaccard dissimilarity (spe.dg<-vegdist(spe, method="gower")) # Gower dissimilarity ### Presence-Absence species data (spe.db.pa<-vegdist(spe, method="bray", binary=TRUE)) #Bray-Curtis dissimilarity (spe.dj.pa<-vegdist(spe, method="jac", binary=TRUE)) # Jaccard dissimilarity (spe.dg.pa<-vegdist(spe, method="gower", binary=TRUE)) # Gower dissimilarity ### Graphical display of association matrices: heat maps windows() #use x11() or quartz() on Mac coldiss(spe.db, byrank=FALSE, diag=TRUE) # Heat map of Bray-Curtis dissimilarity windows() coldiss(spe.dj, byrank=FALSE, diag=TRUE) # Heat map of Jaccard dissimilarity windows() coldiss(spe.dg, byrank=FALSE, diag=TRUE) # Heat map of Gower dissimilarity ### Quantitative environmental data #### Association between environmental data (Q mode) ?dist # this function also compute dissimilarity matrix env.de <- dist(env.z, method = "euclidean") # euclidean distance matrix of the standardized environmental variables windows() coldiss(env.de, diag=TRUE) ### Dependance between environmental data (R mode) (env.pearson<-cor(env)) # Pearson r linear correlation round(env.pearson, 2) (env.ken<-cor(env, method="kendall")) # Kendall tau rank correlation round(env.ken, 2) ### Mixed types of environmental variables #### Association between mixed types of environmental data (Q mode) ##### Create a fictious data frame var.g1 <- rnorm(30, 0, 1) var.g2 <- runif(30, 0, 5) var.g3 <- gl(3, 10) var.g4 <- gl(2, 5, 30) (dat2 <- data.frame(var.g1, var.g2, var.g3, var.g4)) str(dat2) summary(dat2) ##### Compute the Gower dissimilarity matrix ?daisy (dat2.dg <- daisy(dat2, metric="gower")) coldiss(dat2.dg) # Challenge 1 Advanced #### # Calculate by hand the Bray-Curtis and the Gower dissimilarity of species abundance CHA, TRU and VAI for sites 1, 2 and 3 # (answer below) # # # # # # # # # # # # # # # # # Bray-Curtis dissimilarity : d[jk] = (sum abs(x[ij]-x[ik]))/(sum (x[ij]+x[ik])) (spe.challenge<-spe[1:3,1:3]) # Calculate the total species abundance by site (Abund.s1<-sum(spe.challenge[1,])) (Abund.s2<-sum(spe.challenge[2,])) (Abund.s3<-sum(spe.challenge[3,])) # Calculate the species abundance differences between pairs of sites for each species Spec.s1s2<-0 Spec.s1s3<-0 Spec.s2s3<-0 for (i in 1:3) { Spec.s1s2<-Spec.s1s2+abs(sum(spe.challenge[1,i]-spe.challenge[2,i])) Spec.s1s3<-Spec.s1s3+abs(sum(spe.challenge[1,i]-spe.challenge[3,i])) Spec.s2s3<-Spec.s2s3+abs(sum(spe.challenge[2,i]-spe.challenge[3,i])) } # Calculate the Bray-Curtis dissimilarity (db.s1s2<-Spec.s1s2/(Abund.s1+Abund.s2)) (db.s1s3<-Spec.s1s3/(Abund.s1+Abund.s3)) (db.s2s3<-Spec.s2s3/(Abund.s2+Abund.s3)) # Compare your results (spe.db.challenge<-vegdist(spe.challenge, method="bray")) # Gower dissimilarity : d[jk] = (1/M) sum(abs(x[ij]-x[ik])/(max(x[i])-min(x[i]))) # Calculate the number of columns in your dataset M<-ncol(spe.challenge) # Calculate the species abundance differences between pairs of sites for each species Spe1.s1s2<-abs(spe.challenge[1,1]-spe.challenge[2,1]) Spe2.s1s2<-abs(spe.challenge[1,2]-spe.challenge[2,2]) Spe3.s1s2<-abs(spe.challenge[1,3]-spe.challenge[2,3]) Spe1.s1s3<-abs(spe.challenge[1,1]-spe.challenge[3,1]) Spe2.s1s3<-abs(spe.challenge[1,2]-spe.challenge[3,2]) Spe3.s1s3<-abs(spe.challenge[1,3]-spe.challenge[3,3]) Spe1.s2s3<-abs(spe.challenge[2,1]-spe.challenge[3,1]) Spe2.s2s3<-abs(spe.challenge[2,2]-spe.challenge[3,2]) Spe3.s2s3<-abs(spe.challenge[2,3]-spe.challenge[3,3]) # Calculate the range of each species abundance between sites Range.spe1<-max(spe.challenge[,1]) - min (spe.challenge[,1]) Range.spe2<-max(spe.challenge[,2]) - min (spe.challenge[,2]) Range.spe3<-max(spe.challenge[,3]) - min (spe.challenge[,3]) # Calculate the Gower dissimilarity (dg.s1s2<-(1/M)*((Spe2.s1s2/Range.spe2)+(Spe3.s1s2/Range.spe3))) (dg.s1s3<-(1/M)*((Spe2.s1s3/Range.spe2)+(Spe3.s1s3/Range.spe3))) (dg.s2s3<-(1/M)*((Spe2.s2s3/Range.spe2)+(Spe3.s2s3/Range.spe3))) # Compare your results (spe.db.challenge<-vegdist(spe.challenge, method="gower")) ## 2.2. Transformations for community composition data #### ?decostand # this function provide some standardiztion methods for community domposition data (see method options) # Transform abundances to absence-presence (1-0) (spe.pa<-decostand(spe, method="pa")) # Hellinger transformation (spe.hel<-decostand(spe, method="hellinger")) # Chi.square transformation (spe.chi<-decostand(spe, method="chi.square")) # Challenge 2 Advanced #### # Calculate by hand the Hellinger and the Chi-square transformed abundances data # (answer below) # # # # # # # # # # # # # # # # # # Hellinger tranformation # First calculate the total species abundances by site (site.totals<-apply(spe, 1, sum)) # Scale species abundances by dividing them by site totals (scale.spe<-spe/site.totals) # Calculate de square root of scaled species abundances (sqrt.scale.spe<-sqrt(scale.spe)) # Compare the results sqrt.scale.spe spe.hel sqrt.scale.spe-spe.hel # or: sqrt.scale.spe/spe.hel # Chi-squre transformation # First calculate the total species abundances by site (site.totals<-apply(spe, 1, sum)) # Then calculate the square root of total species abundances (sqrt.spe.totals<-sqrt(apply(spe, 2, sum))) # Scale species abundances by dividing them by the site totals and the species totals scale.spe2<-spe for (i in 1:nrow(spe)) { for (j in 1:ncol(spe)) { (scale.spe2[i,j]<-scale.spe2[i,j]/(site.totals[i]*sqrt.spe.totals[j])) }} #Adjust the scale abundance species by multplying by the square root of the species matrix total (adjust.scale.spe2=scale.spe2*sqrt(sum(rowSums(spe)))) # Compare the results adjust.scale.spe2 spe.chi adjust.scale.spe2-spe.chi # or: adjust.scale.spe2/spe.chi ## 2.3 Clustering #### ### Calculating Hellinger distance spe.dhel<-vegdist(spe.hel,method="euclidean") #generates the distance matrix from Hellinger transformed data # See difference between the two matrices head(spe.hel)# Hellinger-transformed species data head(spe.dhel)# Hellinger distances among sites ### Comparison of single and complete linkage clustering #Perform single linkage clustering spe.dhel.single<-hclust(spe.dhel, method=”single”) plot(spe.dhel.single) #Perform complete linkage clustering spe.dhel.complete<-hclust(spe.dhel, method=”complete”) plot(spe.dhel.complete) ### Ward's minimum variance method # Perform Ward minimum variance clustering spe.dhel.ward<-hclust(spe.dhel, method=”ward.D2”) plot(spe.dhel.ward) # Re-plot the dendrogram by using the square roots of the fusion levels spe.dhel.ward$height<-sqrt(spe.dhel.ward$height) plot(spe.dhel.ward) plot(spe.dhel.ward, hang=-1) # hang=-1 aligns all objets on the same line #***************************************************************************************# # 3.Unconstrained Ordination #### ## 3.1. Principal Components Analysis (PCA) #### ### PCA on hellinger-transformed species data ?rda #### Run the PCA spe.h.pca <- rda(spe.hel) # Extract the results summary(spe.h.pca) #### Extract specific parts of the results summary(spe.h.pca) # overall results summary(spe.h.pca, display=NULL) # only eigenvalues and their contribution to the variance eigen(cov(spe.hel)) # also compute the eigenvalues (spe.scores <- scores(spe.h.pca, display="species", choices=c(1,2))) # species scores on the first two PCA axes (site.scores <- scores(spe.h.pca, display="sites", choices=c(1,2))) # sites scores on the first two PCA axes #Note: if you don’t specify the number of principal components to extract (e.g. choices=c(1,2) or choices=c(1:2) then all of the scores will be extracted for all of the principal components. # Identify the significant axis using the Kaiser-Guttman criterion ev <- spe.h.pca$CA$eig ev[ev>mean(ev)] n <- length(ev) barplot(ev, main="Eigenvalues", col="grey", las=2) abline(h=mean(ev), col="red") legend("topright", "Average eigenvalue", lwd=1, col=2, bty="n") ### PCA on the environmental variables #### Run the PCA env.pca <- rda(env.z) # or rda(env, scale=TRUE) #### Extract the results summary(env.pca) summary(env.pca, scaling=2) # Identify the significant axis using the Kaiser-Guttman criterion ev <- env.pca$CA$eig ev[ev>mean(ev)] n <- length(ev) barplot(ev, main="Eigenvalues", col="grey", las=2) abline(h=mean(ev), col="red") legend("topright", "Average eigenvalue", lwd=1, col=2, bty="n") ### Construct a biplot #### Biplot of the PCA on transformed species data (scaling 1) windows() plot(spe.h.pca) windows() ?biplot biplot(spe.h.pca) windows() plot(spe.h.pca, scaling=1, type="none", # scaling 1 = distance biplot : # distances among abjects in the biplot approximate their Euclidean distances # but angles among descriptor vectors DO NOT reflect their correlation xlab=c("PC1 (%)", round((spe.h.pca$CA$eig[1]/sum(spe.h.pca$CA$eig))*100,2)), ylab=c("PC2 (%)", round((spe.h.pca$CA$eig[2]/sum(spe.h.pca$CA$eig))*100,2))) points(scores(spe.h.pca, display="sites", choices=c(1,2), scaling=1), pch=21, col="black", bg="steelblue", cex=1.2) text(scores(spe.h.pca, display="species", choices=c(1), scaling=1), scores(spe.h.pca, display="species", choices=c(2), scaling=1), labels=rownames(scores(spe.h.pca, display="species", scaling=1)), col="red", cex=0.8) #### Biplot on the environmental variables (scaling 2) windows() plot(env.pca) windows() plot(env.pca, scaling=2, type="none", # scaling 2 = correlation biplot : # distances among abjects in the biplot DO NOT approximate their Euclidean distances # but angles among descriptor vectors reflect their correlation xlab=c("PC1 (%)", round((env.pca$CA$eig[1]/sum(env.pca$CA$eig))*100,2)), ylab=c("PC2 (%)", round((env.pca$CA$eig[2]/sum(env.pca$CA$eig))*100,2)), xlim=c(-1,1), ylim=c(-1,1)) points(scores(env.pca, display="sites", choices=c(1,2), scaling=2), pch=21, col="black", bg="darkgreen", cex=1.2) text(scores(env.pca, display="species", choices=c(1), scaling=2), scores(env.pca, display="species", choices=c(2), scaling=2), labels=rownames(scores(env.pca, display="species", scaling=2)), col="red", cex=0.8) # Challenge 3 #### # Run the PCA of the mite species abundances # Which are the significant axes ? # Which group of sites can you identify ? # Which species are related to each group of sites data(mite) mite.spe <- mite #data from the vegan package # (answer below) # # # # # # # # # # # # # # # # # # mite.spe.hel<-decostand(mite.spe, method="hellinger") mite.spe.h.pca<-rda(mite.spe.hel) ev<-mite.spe.h.pca$CA$eig ev[ev>mean(ev)] n=length(ev) barplot(ev, main="Eigenvalues", col="grey", las=2) abline(h=mean(ev), col="red") legend("topright", "Average eigenvalue", lwd=1, col=2, bty="n") summary(mite.spe.h.pca, display=NULL) windows() plot(mite.spe.h.pca, scaling=1, type="none", xlab=c("PC1 (%)", round((mite.spe.h.pca$CA$eig[1]/sum(mite.spe.h.pca$CA$eig))*100,2)), ylab=c("PC2 (%)", round((mite.spe.h.pca$CA$eig[2]/sum(mite.spe.h.pca$CA$eig))*100,2))) points(scores(mite.spe.h.pca, display="sites", choices=c(1,2), scaling=1), pch=21, col="black", bg="steelblue", cex=1.2) text(scores(mite.spe.h.pca, display="species", choices=c(1), scaling=1), scores(mite.spe.h.pca, display="species", choices=c(2), scaling=1), labels=rownames(scores(mite.spe.h.pca, display="species", scaling=1)), col="red", cex=0.8) ## 3.2. Principal Coordinate Anaysis (PCoA) ### Run the PCoA on hellinger transformed species abundances ?cmdscale cmdscale(dist(spe.hel), k=(nrow(spe)-1), eig=TRUE) ?pcoa spe.h.pcoa<-pcoa(dist(spe.hel)) ### Extract the results spe.h.pcoa ### Construct the biplot windows() biplot.pcoa(spe.h.pcoa, spe.hel) ### Run the PCoA on a Bray-Curtis dissimilarity matrix spe.db spe.bray.pcoa<-pcoa(spe.db, correction="cailliez") spe.bray.pcoa windows() biplot.pcoa(spe.bray.pcoa) # The distance measure choosen thus strongly influenced the PCoA results # Challenge 4 #### # Run the PCoA of the hellinger-transformed mite species abundances # Which are the significant axes ? # Which group of sites can you identify ? # Which species are related to each group of sites ? # How does the PCoA results compare with the PCA # (answer below) # # # # # # # # # # # # # # # # # # mite.spe.h.pcoa<-pcoa(dist(mite.spe.hel)) mite.spe.h.pcoa windows() biplot.pcoa(mite.spe.h.pcoa, mite.spe.hel) #***************************************************************************************# # 4. Canonical Analysis ## 4.1. Redundancy Analysis (RDA) ### Prepare the data env <- subset(env, select = -das) # remove the "distance from the source" variable ### Run the RDA of all exlanatory variables of env2 on species abundances ?rda spe.rda <- rda(spe.hel~., data=env) ### Extract the results summary(spe.rda, display=NULL) ### Select the significant explanatory variables by forward selection ?ordiR2step ordiR2step(rda(spe.hel~1, data=env), scope= formula(spe.rda), direction= "forward", R2scope=TRUE, pstep=1000) env.signif <- subset(env, select = c("alt", "oxy", "dbo")) ### Re-run the RDA of significant explanatory variables on species abundances spe.rda.signif <- rda(spe.hel~., data=env.signif) ### Extract the results summary(spe.rda.signif, display=NULL) ### Calculate the adjusted R^2 of the RDA (R2adj <- RsquareAdj(spe.rda.signif)$adj.r.squared) ### Test the significance of the model and the significance of axis ?anova.cca anova(spe.rda.signif, step=1000) anova(spe.rda.signif, step=1000, by="axis") ### Plot the results #### quick plots scaling 1 and 2 windows() plot(spe.rda.signif, scaling=1, main="Triplot RDA (scaling 1)") windows() plot(spe.rda.signif, scaling=2, main="Triplot RDA (scaling 2)") #### advanced plots scaling 1 windows() plot(spe.rda.signif, scaling=1, main="Triplot RDA - scaling 1", type="none", xlab=c("RDA1"), ylab=c("RDA2"), xlim=c(-1,1), ylim=c(-1,1)) points(scores(spe.rda.signif, display="sites", choices=c(1,2), scaling=1), pch=21, col="black", bg="steelblue", cex=1.2) text(scores(spe.rda.signif, display="species", choices=c(1), scaling=1), scores(spe.rda.signif, display="species", choices=c(2), scaling=1), labels=rownames(scores(spe.rda.signif, display="species", scaling=1)), col="grey", cex=0.8) arrows(0,0, scores(spe.rda.signif, display="bp", choices=c(1), scaling=1), scores(spe.rda.signif, display="bp", choices=c(2), scaling=1), col="red") text(scores(spe.rda.signif, display="bp", choices=c(1), scaling=1)+0.05, scores(spe.rda.signif, display="bp", choices=c(2), scaling=1)+0.05, labels=rownames(scores(spe.rda.signif, display="bp", choices=c(2), scaling=1)), col="red", cex=1) #### advanced plots scaling 2 windows() plot(spe.rda.signif, scaling=2, main="Triplot RDA - scaling 2", type="none", xlab=c("RDA1"), ylab=c("RDA2"), xlim=c(-1,1), ylim=c(-1,1)) points(scores(spe.rda.signif, display="sites", choices=c(1,2), scaling=2), pch=21, col="black", bg="steelblue", cex=1.2) text(scores(spe.rda.signif, display="species", choices=c(1), scaling=2), scores(spe.rda.signif, display="species", choices=c(2), scaling=2), labels=rownames(scores(spe.rda.signif, display="species", scaling=2)), col="grey", cex=0.8) arrows(0,0, scores(spe.rda.signif, display="bp", choices=c(1), scaling=2), scores(spe.rda.signif, display="bp", choices=c(2), scaling=2), col="red") text(scores(spe.rda.signif, display="bp", choices=c(1), scaling=2)+0.05, scores(spe.rda.signif, display="bp", choices=c(2), scaling=2)+0.05, labels=rownames(scores(spe.rda.signif, display="bp", choices=c(2), scaling=2)), col="red", cex=1) # Challenge 5 #### # Run the RDA of the mite environmental variables of the mite species abundances data(mite.env) #data from vegan package # Which are the significant explanatory variables ? # How much of the variation explain the significant explanatory variables ? # Which are the significant axis ? # Which group of sites can you identify ? # Which species are related to each group of sites ? # (answer below) # # # # # # # # # # # # # # # # # # mite.spe.rda <- rda(mite.spe.hel~., data=mite.env) ordiR2step(rda(mite.spe.hel~1, data=mite.env), scope= formula(mite.spe.rda), direction= "forward", R2scope=TRUE, pstep=1000) mite.env.signif <- subset(mite.env, select = c("WatrCont", "Shrub", "Substrate", "Topo", "SubsDens")) mite.spe.rda.signif <- rda(mite.spe~., data=mite.env.signif) summary(mite.spe.rda.signif, display=NULL) (R2adj <- RsquareAdj(mite.spe.rda.signif)$adj.r.squared) anova(mite.spe.rda.signif, step=1000) anova(mite.spe.rda.signif, step=1000, by="axis") windows() plot(mite.spe.rda.signif, scaling=1, main="Triplot RDA - scaling 1", type="none", xlab=c("RDA1"), ylab=c("RDA2"), xlim=c(-1,1), ylim=c(-1,1)) points(scores(mite.spe.rda.signif, display="sites", choices=c(1,2), scaling=1), pch=21, col="black", bg="steelblue", cex=1.2) text(scores(mite.spe.rda.signif, display="species", choices=c(1), scaling=1), scores(mite.spe.rda.signif, display="species", choices=c(2), scaling=1), labels=rownames(scores(mite.spe.rda.signif, display="species", scaling=1)), col="grey", cex=0.8) arrows(0,0, scores(mite.spe.rda.signif, display="bp", choices=c(1), scaling=1), scores(mite.spe.rda.signif, display="bp", choices=c(2), scaling=1), col="red") text(scores(mite.spe.rda.signif, display="bp", choices=c(1), scaling=1)+0.05, scores(mite.spe.rda.signif, display="bp", choices=c(2), scaling=1)+0.05, labels=rownames(scores(mite.spe.rda.signif, display="bp", choices=c(2), scaling=1)), col="red", cex=1) ## 4.2 Partial RDA : effect of water chemistry on species abundances partialling out the physiography ### Divide the env data frame in two data frames envtopo <- env[, c(1:3)] # Physiography : explanatory dataset 1 names(envtopo) envchem <- env[, c(4:10)] # Water quality : explanatory dataset 2 names(envchem) ### Run the partial RDA spechem.physio <- rda(spe.hel, envchem, envtopo) # or : spechem.physio2 <- rda(spe.hel ~ pH + dur + pho + nit + amm + oxy + dbo + Condition(alt + pen + deb), data=env) ### Extract the results summary(spechem.physio, display=NULL) ### Calculate adjusted the R^2 of the partial RDA (R2adj <- RsquareAdj(spechem.physio)$adj.r.squared) ### Test of the partial RDA and the significance of axis anova(spechem.physio, step=1000) anova(spechem.physio2, step=1000, by="axis") ### Construct the triplots #### Scaling 1 windows(title="Partial RDA scaling 1") plot(spechem.physio, scaling=1, main="Triplot partial RDA - scaling 1", type="none", xlab=c("RDA1"), ylab=c("RDA2"), xlim=c(-1,1), ylim=c(-1,1)) points(scores(spechem.physio, display="sites", choices=c(1,2), scaling=1), pch=21, col="black", bg="steelblue", cex=1.2) text(scores(spechem.physio, display="species", choices=c(1), scaling=1), scores(spechem.physio, display="species", choices=c(2), scaling=1), labels=rownames(scores(spechem.physio, display="species", scaling=1)), col="grey", cex=0.8) arrows(0,0, scores(spechem.physio, display="bp", choices=c(1), scaling=1), scores(spechem.physio, display="bp", choices=c(2), scaling=1), col="red") text(scores(spechem.physio, display="bp", choices=c(1), scaling=1)+0.05, scores(spechem.physio, display="bp", choices=c(2), scaling=1)+0.05, labels=rownames(scores(spechem.physio, display="bp", choices=c(2), scaling=1)), col="red", cex=1) #### Scaling 2 windows(title="Partial RDA scaling 2") plot(spechem.physio, scaling=2, main="Triplot partial RDA - scaling 2", type="none", xlab=c("RDA1"), ylab=c("RDA2"), xlim=c(-1,1), ylim=c(-1,1)) points(scores(spechem.physio, display="sites", choices=c(1,2), scaling=2), pch=21, col="black", bg="steelblue", cex=1.2) text(scores(spechem.physio, display="species", choices=c(1), scaling=2), scores(spechem.physio, display="species", choices=c(2), scaling=2), labels=rownames(scores(spechem.physio, display="species", scaling=2)), col="grey", cex=0.8) arrows(0,0, scores(spechem.physio, display="bp", choices=c(1), scaling=2), scores(spechem.physio, display="bp", choices=c(2), scaling=2), col="red") text(scores(spechem.physio, display="bp", choices=c(1), scaling=2)+0.05, scores(spechem.physio, display="bp", choices=c(2), scaling=2)+0.05, labels=rownames(scores(spechem.physio, display="bp", choices=c(2), scaling=2)), col="red", cex=1) # Challenge 6 #### # Run the partial RDA of the mite environmental variables of the mite species abundances # partialling out for the substrate variables (SubsDens, WaterCont and Substrate) # Is the model significant ? # Which are the significant axis ? # Interpret the obtained triplot. # (answer below) # # # # # # # # # # # # # # # # # # mite.spe.subs<-rda(mite.spe.hel ~ Shrub + Topo + Condition(SubsDens + WatrCont + Substrate), data=mite.env) summary(mite.spe.subs, display=NULL) (R2adj <- RsquareAdj(mite.spe.subs)$adj.r.squared) anova(mite.spe.subs, step=1000) anova(mite.spe.subs, step=1000, by="axis") #### Triplot scaling 1 windows(title="Partial RDA scaling 1") plot(mite.spe.subs, scaling=1, main="Triplot partial RDA - scaling 1", type="none", xlab=c("RDA1"), ylab=c("RDA2"), xlim=c(-1,1), ylim=c(-1,1)) points(scores(mite.spe.subs, display="sites", choices=c(1,2), scaling=1), pch=21, col="black", bg="steelblue", cex=1.2) text(scores(mite.spe.subs, display="species", choices=c(1), scaling=1), scores(mite.spe.subs, display="species", choices=c(2), scaling=1), labels=rownames(scores(mite.spe.subs, display="species", scaling=1)), col="grey", cex=0.8) arrows(0,0, scores(mite.spe.subs, display="bp", choices=c(1), scaling=1), scores(mite.spe.subs, display="bp", choices=c(2), scaling=1), col="red") text(scores(mite.spe.subs, display="bp", choices=c(1), scaling=1)+0.05, scores(mite.spe.subs, display="bp", choices=c(2), scaling=1)+0.05, labels=rownames(scores(mite.spe.subs, display="bp", choices=c(2), scaling=1)), col="red", cex=1) #### Triplot scaling 2 windows(title="Partial RDA scaling 2") plot(mite.spe.subs, scaling=2, main="Triplot partial RDA - scaling 2", type="none", xlab=c("RDA1"), ylab=c("RDA2"), xlim=c(-1,1), ylim=c(-1,1)) points(scores(mite.spe.subs, display="sites", choices=c(1,2), scaling=2), pch=21, col="black", bg="steelblue", cex=1.2) text(scores(mite.spe.subs, display="species", choices=c(1), scaling=2), scores(mite.spe.subs, display="species", choices=c(2), scaling=2), labels=rownames(scores(mite.spe.subs, display="species", scaling=2)), col="grey", cex=0.8) arrows(0,0, scores(mite.spe.subs, display="bp", choices=c(1), scaling=2), scores(mite.spe.subs, display="bp", choices=c(2), scaling=2), col="red") text(scores(mite.spe.subs, display="bp", choices=c(1), scaling=2)+0.05, scores(mite.spe.subs, display="bp", choices=c(2), scaling=2)+0.05, labels=rownames(scores(mite.spe.subs, display="bp", choices=c(2), scaling=2)), col="red", cex=1) # 4.3. Variation partitioning by partial RDA ?varpart vegandocs("partitioning.pdf") ## Variation partitioning with all explanatory variables spe.part.all <- varpart(spe.hel, envchem, envtopo) spe.part.all windows(title="Variation partitioning - all variables") plot(spe.part.all, digits=2) ## Variation partitioning with only significant variables ### Separate forward selection in each subset of environmental variables spe.chem <- rda(spe.hel~., data=envchem) R2a.all.chem <- RsquareAdj(spe.chem)$adj.r.squared ordiR2step(rda(spe.hel~1, data=envchem), scope= formula(spe.chem), direction= "forward", R2scope=TRUE, pstep=1000) names(envchem) (envchem.pars <- envchem[, c( 4, 6, 7 )]) spe.topo <- rda(spe.hel~., data=envtopo) R2a.all.topo <- RsquareAdj(spe.topo)$adj.r.squared ordiR2step(rda(spe.hel~1, data=envtopo), scope= formula(spe.topo), direction= "forward", R2scope=TRUE, pstep=1000) names(envtopo) envtopo.pars <- envtopo[, c(1,2)] ### Variation partitioning (spe.part <- varpart(spe.hel, envchem.pars, envtopo.pars)) windows(title="Variation partitioning - parsimonious subsets") plot(spe.part, digits=2) # Tests of all testable fractions anova(rda(spe.hel, envchem.pars), step=1000) # Test of fractions [a+b] anova(rda(spe.hel, envtopo.pars), step=1000) # Test of fractions [b+c] env.pars <- cbind(envchem.pars, envtopo.pars) anova(rda(spe.hel, env.pars), step=1000) # Test of fractions [a+b+c] anova(rda(spe.hel, envchem.pars, envtopo.pars), step=1000) # Test of fraction [a] anova(rda(spe.hel, envtopo.pars, envchem.pars), step=1000) # Test of fraction [c] # Challenge 7 #### # Run the variation partitioning of the mite species abundances # with a first dataset for the significant substrate variables (SubsDens, WaterCont and Substrate) # and a second dataset for the significant other variables (Shrud and Topo) # Which proportion of the variation explain each dataset ? # Which are the significant fractions ? # (answer below) # # # # # # # # # # # # # # # # # # str(mite.env) (mite.subs<-mite.env[,c(1,2,3)]) (mite.other<-mite.env[,c(4,5)]) rda.mite.subs <- rda(mite.spe.hel~., data=mite.subs) R2a.all.chem <- RsquareAdj(rda.mite.subs)$adj.r.squared ordiR2step(rda(mite.spe.hel~1, data=mite.subs), scope= formula(rda.mite.subs), direction= "forward", R2scope=TRUE, pstep=1000) names(mite.subs) (mite.subs.pars <- mite.subs[, c(2, 3)]) rda.mite.other <- rda(mite.spe.hel~., data=mite.other) R2a.all.chem <- RsquareAdj(rda.mite.other)$adj.r.squared ordiR2step(rda(mite.spe.hel~1, data=mite.other), scope= formula(rda.mite.other), direction= "forward", R2scope=TRUE, pstep=1000) names(mite.other) (mite.other.pars <- mite.other[, c(1,2)]) ### Variation partitioning (mite.spe.part <- varpart(mite.spe.hel, ~WatrCont+Substrate, ~Shrub+Topo, data=mite.env)) windows(title="Variation partitioning - parsimonious subsets") plot(mite.spe.part, digits=2) # Tests of all testable fractions anova(rda(mite.spe.hel~ WatrCont+Substrate, data=mite.env), step=1000) # Test of fractions [a+b] anova(rda(mite.spe.hel~Shrub+Topo, data=mite.env), step=1000) # Test of fractions [b+c] (env.pars <- cbind(mite.env[,c(2,3,4,5)])) anova(rda(mite.spe.hel~ WatrCont+Substrate+Shrub+Topo, data=env.pars), step=1000) # Test of fractions [a+b+c] anova(rda(mite.spe.hel~WatrCont+Substrate + Condition(Shrub+Topo), data=env.pars), step=1000) # Test of fraction [a] anova(rda(mite.spe.hel~Shrub+Topo+ Condition(WatrCont+Substrate ), data=env.pars), step=1000) # Test of fraction [c] ## 4.4. Principal Response Curves (PRC) ?prc # van den Brink, P.J. & ter Braak, C.J.F. (1999). # Principal response curves: Analysis of time-dependent multivariate responses of biological community to stress. # Environmental Toxicology and Chemistry, 18, 138-148 # Chlorpyrifos experiment and experimental design data(pyrifos) week <- gl(11, 12, labels=c(-4, -1, 0.1, 1, 2, 4, 8, 12, 15, 19, 24)) dose <- factor(rep(c(0.1, 0, 0, 0.9, 0, 44, 6, 0.1, 44, 0.9, 0, 6), 11)) # PRC mod <- prc(pyrifos, dose, week) mod # RDA summary(mod) # PRC logabu <- colSums(pyrifos) windows() plot(mod, select = logabu > 100) # Permutations should be done only within one week, and we only # are interested on the first axis anova(mod, strata = week, first=TRUE, perm.max = 100) # PRC can be useful in community ecolgy # to compare the temporal evolution of a communty in experimental sites (e.g. restored ecosystems) # to the temporal evolution of a communty in control sites (e.g. reference ecosystems) # Challenge 8 #### # Run the PRC on the Doubs fish species abundances with last site removed considering that: # the first 14 sites correpond to 4 reference ecosystems # the last 14 sites correpond to 4 restored ecosystems # the first seven lines correponds to successive sampling of the reference sites 1 during year 1,2,3,4,5,6 and 7 # the lines 8 to 14 correponds to successive sampling of the reference sites 2 during year 1,2,3,4,5,6 and 7 # etc... # Does restoration enables the recovery of reference communities ? # Which species are related to reference ecosystems, to restored ecosystems ? # (answer below) # # # # # # # # # # # # # # # # # # dim(spe.hel) spe.hel<-spe.hel[-29,] reference<-rep(c(0), 14) restored<-rep(c(1), 14) type.site<-as.factor(c(reference, restored)) year<-as.factor(rep(c(1,2,3,4,5,6,7), 4)) spe.hel.prc<-prc(spe.hel, type.site, year) spe.hel.prc summary(spe.hel.prc) plot(spe.hel.prc) #***************************************************************************************# # 5. Other useful ordination methods ?cca # Constrained Correspondence Analysis (CCA) ?metaMDS # Nonmetric Multidimesional Scaling help(lda, package=MASS) # linear discriminant analysis ?CCorA # Canonical Correlation Analysis help(coinertia, package=ade4) # Coinertia Analysis help(mfa, package=ade4) # Multiple Factorial Analysis https://r-forge.r-project.org/R/?group_id=195 # packages AEM and PCNM contain functions to perform spatial analysis