# Name: Amanda Winegardner, Xavier Giroux-Bougard and B?renger Bourgeois  
# Date: January 2015
# Description: Ordination
# Dataset: File names are "DoubsSpe.csv", "DoubsEnv.csv"
# Notes: Material in R script obtained from 
#        Borcard D., Gillet F. et Legendre P., 2011. Numerical Ecology with R. Springer.
#***************************************************************************************#


# 0. Loading the data ####

rm(list=ls())
install.packages("vegan")
install.packages("gclus")
install.packages("ape")
library(vegan)
library(gclus)
library(ape)
source(file.choose()) # fichier coldiss.R

# Species community data frame (fish abundance): "DoubsSpe.csv"
spe <- read.csv(file.choose(), row.names=1)
spe <- spe[-8,] # site number 8 contains no species and must be removed

# Environmental data frame: "DoubsEnv.csv"
env <- read.csv(file.choose(), row.names=1)
env <- env[-8,]

#***************************************************************************************#

# 1.Explore the data ####

## 1.1 Species data ####
names(spe)
dim(spe)
str(spe)
head(spe)
summary(spe) 

### Species distibution, all species confounded
(ab <- table(unlist(spe)))
barplot(ab, las=1, xlab="Abundance class", ylab="Frequency", col=grey(5:0/5))

### Number of absences
sum(spe==0)

### Proportion of zeros in the community data set
sum(spe==0)/(nrow(spe)*ncol(spe))

### Species occurences
spe.pres <- colSums(spe>0) # compute the number of sites where each species is present
hist(spe.pres, main="Species occurence", las=1, xlab="Frequency of occurences", ylab="Number of species", breaks=seq(0,30, by=5), col="grey")

### Sites richness
site.pres <- rowSums(spe>0) # compute the number of species at ecah site
hist(site.pres, main="Species richness", las=1, xlab="Frequency of sites", ylab="Number of species", breaks=seq(0,30, by=5), col="grey")

### Compute diversity indices
?diversity
(H <- diversity(spe, index="shannon")) # Shannon diversity index
(N <- diversity(spe, index="simpson")) # Simpson diversity index

## 1.2 Environmental data ####
names(env)
dim(env)
str(env)
head(env)
summary(env)
pairs(env, main="Bivariate Plots of the Environmental Data" )

# Standardization of the 11 environmental data
env.z <- decostand(env, method="standardize")
apply(env.z, 2, mean) # the data are now centered (means~0)
apply(env.z, 2, sd)   # the data are now scaled (standard deviations=1)

# Note: explanatory variables expressed in different units
#       must obligatory be standardized before computing distances measures and ordination analysis.

#***************************************************************************************#

# 2.Association measures ####

## 2.1. Distance measures ####

### Quantitative species data
?vegdist # this function compute dissimilarity indices useful for community decomposition data (see method options)
(spe.db<-vegdist(spe, method="bray")) # Bray-Curtis dissimilarity
(spe.dj<-vegdist(spe, method="jac")) # Jaccard dissimilarity
(spe.dg<-vegdist(spe, method="gower")) # Gower dissimilarity

### Presence-Absence species data
(spe.db.pa<-vegdist(spe, method="bray", binary=TRUE)) #Bray-Curtis dissimilarity
(spe.dj.pa<-vegdist(spe, method="jac", binary=TRUE)) # Jaccard dissimilarity
(spe.dg.pa<-vegdist(spe, method="gower", binary=TRUE)) # Gower dissimilarity

### Graphical display of association matrices: heat maps
windows() #use x11() or quartz() on Mac
coldiss(spe.db, byrank=FALSE, diag=TRUE) # Heat map of Bray-Curtis dissimilarity
windows()
coldiss(spe.dj, byrank=FALSE, diag=TRUE) # Heat map of Jaccard dissimilarity
windows() 
coldiss(spe.dg, byrank=FALSE, diag=TRUE) # Heat map of Gower dissimilarity 

### Quantitative environmental data
#### Association between environmental data (Q mode)
?dist # this function also compute dissimilarity matrix
env.de <- dist(env.z, method = "euclidean") # euclidean distance matrix of the standardized environmental variables 
windows() 
coldiss(env.de, diag=TRUE) 

### Dependance between environmental data (R mode)
(env.pearson<-cor(env)) # Pearson r linear correlation
round(env.pearson, 2)
(env.ken<-cor(env, method="kendall")) # Kendall tau rank correlation
round(env.ken, 2)

### Mixed types of environmental variables
#### Association between mixed types of environmental data (Q mode)
##### Create a fictious data frame
var.g1 <- rnorm(30, 0, 1)
var.g2 <- runif(30, 0, 5)
var.g3 <- gl(3, 10)
var.g4 <- gl(2, 5, 30)
(dat2 <- data.frame(var.g1, var.g2, var.g3, var.g4))
str(dat2)
summary(dat2)
##### Compute the Gower dissimilarity matrix
?daisy
(dat2.dg <- daisy(dat2, metric="gower"))
coldiss(dat2.dg)

# Challenge 1 Advanced ####

# Calculate by hand the Bray-Curtis and the Gower dissimilarity of species abundance CHA, TRU and VAI for sites 1, 2 and 3
# (answer below)
# 
# 
# 
# 
# 
# 
# 
# 
# 
# 
# 
# 
# 
# 
# 
# 
# Bray-Curtis dissimilarity : d[jk] = (sum abs(x[ij]-x[ik]))/(sum (x[ij]+x[ik]))
(spe.challenge<-spe[1:3,1:3])
# Calculate the total species abundance by site
(Abund.s1<-sum(spe.challenge[1,]))
(Abund.s2<-sum(spe.challenge[2,]))
(Abund.s3<-sum(spe.challenge[3,]))
# Calculate the species abundance differences between pairs of sites for each species
Spec.s1s2<-0
Spec.s1s3<-0
Spec.s2s3<-0
for (i in 1:3) {
  Spec.s1s2<-Spec.s1s2+abs(sum(spe.challenge[1,i]-spe.challenge[2,i]))
  Spec.s1s3<-Spec.s1s3+abs(sum(spe.challenge[1,i]-spe.challenge[3,i]))
  Spec.s2s3<-Spec.s2s3+abs(sum(spe.challenge[2,i]-spe.challenge[3,i])) }
# Calculate the Bray-Curtis dissimilarity  
(db.s1s2<-Spec.s1s2/(Abund.s1+Abund.s2))
(db.s1s3<-Spec.s1s3/(Abund.s1+Abund.s3)) 
(db.s2s3<-Spec.s2s3/(Abund.s2+Abund.s3)) 
# Compare your results
(spe.db.challenge<-vegdist(spe.challenge, method="bray"))

# Gower dissimilarity :    d[jk] = (1/M) sum(abs(x[ij]-x[ik])/(max(x[i])-min(x[i])))
# Calculate the number of columns in your dataset
M<-ncol(spe.challenge)
# Calculate the species abundance differences between pairs of sites for each species
Spe1.s1s2<-abs(spe.challenge[1,1]-spe.challenge[2,1])
Spe2.s1s2<-abs(spe.challenge[1,2]-spe.challenge[2,2])
Spe3.s1s2<-abs(spe.challenge[1,3]-spe.challenge[2,3])
Spe1.s1s3<-abs(spe.challenge[1,1]-spe.challenge[3,1])
Spe2.s1s3<-abs(spe.challenge[1,2]-spe.challenge[3,2])
Spe3.s1s3<-abs(spe.challenge[1,3]-spe.challenge[3,3])
Spe1.s2s3<-abs(spe.challenge[2,1]-spe.challenge[3,1])
Spe2.s2s3<-abs(spe.challenge[2,2]-spe.challenge[3,2])
Spe3.s2s3<-abs(spe.challenge[2,3]-spe.challenge[3,3])
# Calculate the range of each species abundance between sites  
Range.spe1<-max(spe.challenge[,1]) - min (spe.challenge[,1])
Range.spe2<-max(spe.challenge[,2]) - min (spe.challenge[,2])
Range.spe3<-max(spe.challenge[,3]) - min (spe.challenge[,3])
# Calculate the Gower dissimilarity  
(dg.s1s2<-(1/M)*((Spe2.s1s2/Range.spe2)+(Spe3.s1s2/Range.spe3)))
(dg.s1s3<-(1/M)*((Spe2.s1s3/Range.spe2)+(Spe3.s1s3/Range.spe3)))
(dg.s2s3<-(1/M)*((Spe2.s2s3/Range.spe2)+(Spe3.s2s3/Range.spe3)))
# Compare your results
(spe.db.challenge<-vegdist(spe.challenge, method="gower"))


## 2.2. Transformations for community composition data ####

?decostand # this function provide some standardiztion methods for community domposition data (see method options)
# Transform abundances to absence-presence (1-0)
(spe.pa<-decostand(spe, method="pa"))

# Hellinger transformation
(spe.hel<-decostand(spe, method="hellinger"))

# Chi.square transformation
(spe.chi<-decostand(spe, method="chi.square"))


# Challenge 2 Advanced ####

# Calculate by hand the Hellinger and the Chi-square transformed abundances data
# (answer below)
# 
# 
# 
# 
# 
# 
# 
# 
# 
# 
# 
# 
# 
# 
# 
# 
#
# Hellinger tranformation
# First calculate the total species abundances by site
(site.totals<-apply(spe, 1, sum))
# Scale species abundances by dividing them by site totals
(scale.spe<-spe/site.totals)
# Calculate de square root of scaled species abundances
(sqrt.scale.spe<-sqrt(scale.spe))
# Compare the results
sqrt.scale.spe
spe.hel
sqrt.scale.spe-spe.hel # or: sqrt.scale.spe/spe.hel

# Chi-squre transformation
# First calculate the total species abundances by site
(site.totals<-apply(spe, 1, sum))
# Then calculate the square root of total species abundances
(sqrt.spe.totals<-sqrt(apply(spe, 2, sum)))
# Scale species abundances by dividing them by the site totals and the species totals
scale.spe2<-spe
for (i in 1:nrow(spe)) {
  for (j in 1:ncol(spe)) {
    (scale.spe2[i,j]<-scale.spe2[i,j]/(site.totals[i]*sqrt.spe.totals[j]))   }}
#Adjust the scale abundance species by multplying by the square root of the species matrix total                                         
(adjust.scale.spe2=scale.spe2*sqrt(sum(rowSums(spe))))
# Compare the results
adjust.scale.spe2
spe.chi
adjust.scale.spe2-spe.chi # or: adjust.scale.spe2/spe.chi

#***************************************************************************************#

# 3.Unconstrained Ordination ####

## 3.1. Principal Components Analysis (PCA) ####

### PCA on hellinger-transformed species data
?rda
#### Run the PCA
spe.h.pca <- rda(spe.hel)
# Extract the results
summary(spe.h.pca)

#### Extract specific parts of the results
summary(spe.h.pca) # overall results
summary(spe.h.pca, display=NULL) # only eigenvalues and their contribution to the variance
eigen(cov(spe.hel)) # also compute the eigenvalues
(spe.scores <- scores(spe.h.pca, display="species", choices=c(1,2))) # species scores on the first two PCA axes
(site.scores <- scores(spe.h.pca, display="sites", choices=c(1,2))) # sites scores on the first two PCA axes
#Note: if you don’t specify the number of principal components to extract (e.g. choices=c(1,2) or choices=c(1:2) then all of the scores will be extracted for all of the principal components.

# Identify the significant axis using the Kaiser-Guttman criterion
ev <- spe.h.pca$CA$eig
ev[ev>mean(ev)]
n <- length(ev)
barplot(ev, main="Eigenvalues", col="grey", las=2)
abline(h=mean(ev), col="red") 
legend("topright", "Average eigenvalue", lwd=1, col=2, bty="n")

### PCA on the environmental variables
#### Run the PCA
env.pca <- rda(env.z) # or rda(env, scale=TRUE)

#### Extract the results
summary(env.pca)
summary(env.pca, scaling=2)

# Identify the significant axis using the Kaiser-Guttman criterion
ev <- env.pca$CA$eig
ev[ev>mean(ev)]
n <- length(ev)
barplot(ev, main="Eigenvalues", col="grey", las=2)
abline(h=mean(ev), col="red") 
legend("topright", "Average eigenvalue", lwd=1, col=2, bty="n")


### Construct a biplot
#### Biplot of the PCA on transformed species data (scaling 1)
windows()
plot(spe.h.pca)
windows()
?biplot 
biplot(spe.h.pca)
windows()
plot(spe.h.pca, scaling=1, type="none", # scaling 1 = distance biplot : 
     # distances among abjects in the biplot approximate their Euclidean distances
     # but angles among descriptor vectors DO NOT reflect their correlation
     xlab=c("PC1 (%)", round((spe.h.pca$CA$eig[1]/sum(spe.h.pca$CA$eig))*100,2)),
     ylab=c("PC2 (%)", round((spe.h.pca$CA$eig[2]/sum(spe.h.pca$CA$eig))*100,2)))
points(scores(spe.h.pca, display="sites", choices=c(1,2), scaling=1),
       pch=21, col="black", bg="steelblue", cex=1.2)
text(scores(spe.h.pca, display="species", choices=c(1), scaling=1),
     scores(spe.h.pca, display="species", choices=c(2), scaling=1),
     labels=rownames(scores(spe.h.pca, display="species", scaling=1)),
     col="red", cex=0.8)       

#### Biplot on the environmental variables (scaling 2)
windows()
plot(env.pca)
windows()
plot(env.pca, scaling=2, type="none", # scaling 2 = correlation biplot : 
     # distances among abjects in the biplot DO NOT approximate their Euclidean distances
     # but angles among descriptor vectors reflect their correlation
     xlab=c("PC1 (%)", round((env.pca$CA$eig[1]/sum(env.pca$CA$eig))*100,2)),
     ylab=c("PC2 (%)", round((env.pca$CA$eig[2]/sum(env.pca$CA$eig))*100,2)),
     xlim=c(-1,1), ylim=c(-1,1))
points(scores(env.pca, display="sites", choices=c(1,2), scaling=2), 
       pch=21, col="black", bg="darkgreen", cex=1.2) 
text(scores(env.pca, display="species", choices=c(1), scaling=2),
     scores(env.pca, display="species", choices=c(2), scaling=2),
     labels=rownames(scores(env.pca, display="species", scaling=2)),
     col="red", cex=0.8)

# Challenge 3 ####
# Run the PCA of the mite species abundances
# Which are the significant axes ?
# Which group of sites can you identify ?
# Which species are related to each group of sites
data(mite)
mite.spe <- mite #data from the vegan package
# (answer below)
# 
# 
# 
# 
# 
# 
# 
# 
# 
# 
# 
# 
# 
# 
# 
# 
#
# 
mite.spe.hel<-decostand(mite.spe, method="hellinger")
mite.spe.h.pca<-rda(mite.spe.hel)        
ev<-mite.spe.h.pca$CA$eig
ev[ev>mean(ev)]
n=length(ev)
barplot(ev, main="Eigenvalues", col="grey", las=2)
abline(h=mean(ev), col="red") 
legend("topright", "Average eigenvalue", lwd=1, col=2, bty="n")       
summary(mite.spe.h.pca, display=NULL)
windows()
plot(mite.spe.h.pca, scaling=1, type="none", 
     xlab=c("PC1 (%)", round((mite.spe.h.pca$CA$eig[1]/sum(mite.spe.h.pca$CA$eig))*100,2)),
     ylab=c("PC2 (%)", round((mite.spe.h.pca$CA$eig[2]/sum(mite.spe.h.pca$CA$eig))*100,2)))
points(scores(mite.spe.h.pca, display="sites", choices=c(1,2), scaling=1),
       pch=21, col="black", bg="steelblue", cex=1.2)
text(scores(mite.spe.h.pca, display="species", choices=c(1), scaling=1),
     scores(mite.spe.h.pca, display="species", choices=c(2), scaling=1),
     labels=rownames(scores(mite.spe.h.pca, display="species", scaling=1)),
     col="red", cex=0.8)  

## 3.2. Principal Coordinate Anaysis (PCoA)
### Run the PCoA on hellinger transformed species abundances
?cmdscale
cmdscale(dist(spe.hel), k=(nrow(spe)-1), eig=TRUE)
?pcoa
spe.h.pcoa<-pcoa(dist(spe.hel))
### Extract the results
spe.h.pcoa
### Construct the biplot
windows()
biplot.pcoa(spe.h.pcoa, spe.hel)

### Run the PCoA on a Bray-Curtis dissimilarity matrix
spe.db
spe.bray.pcoa<-pcoa(spe.db, correction="cailliez")
spe.bray.pcoa
windows()
biplot.pcoa(spe.bray.pcoa)
# The distance measure choosen thus strongly influenced the PCoA results

# Challenge 4 ####
# Run the PCoA of the hellinger-transformed mite species abundances
# Which are the significant axes ?
# Which group of sites can you identify ?
# Which species are related to each group of sites ?
# How does the PCoA results compare with the PCA
# (answer below)
# 
# 
# 
# 
# 
# 
# 
# 
# 
# 
# 
# 
# 
# 
# 
# 
#
# 
mite.spe.h.pcoa<-pcoa(dist(mite.spe.hel))
mite.spe.h.pcoa
windows()
biplot.pcoa(mite.spe.h.pcoa, mite.spe.hel)

#***************************************************************************************#
# 4. Canonical Analysis
## 4.1. Redundancy Analysis (RDA)

### Prepare the data
env <- subset(env, select = -das) # remove the "distance from the source" variable

### Run the RDA of all exlanatory variables of env2 on species abundances
?rda
spe.rda <- rda(spe.hel~., data=env)

### Extract the results
summary(spe.rda, display=NULL)

### Select the significant explanatory variables by forward selection
?ordiR2step
ordiR2step(rda(spe.hel~1, data=env), scope= formula(spe.rda), direction= "forward", R2scope=TRUE, pstep=1000)
env.signif <- subset(env, select = c("alt", "oxy", "dbo"))

### Re-run the RDA of significant explanatory variables on species abundances
spe.rda.signif <- rda(spe.hel~., data=env.signif)

### Extract the results
summary(spe.rda.signif, display=NULL)

### Calculate the adjusted R^2 of the RDA
(R2adj <- RsquareAdj(spe.rda.signif)$adj.r.squared)

### Test the significance of the model and the significance of axis
?anova.cca
anova(spe.rda.signif, step=1000)
anova(spe.rda.signif, step=1000, by="axis")

### Plot the results
#### quick plots scaling 1 and 2
windows()
plot(spe.rda.signif, scaling=1, main="Triplot RDA (scaling 1)")
windows()
plot(spe.rda.signif, scaling=2, main="Triplot RDA (scaling 2)")

#### advanced plots scaling 1
windows()
plot(spe.rda.signif, scaling=1, main="Triplot RDA - scaling 1", type="none", xlab=c("RDA1"), ylab=c("RDA2"), xlim=c(-1,1), ylim=c(-1,1))
points(scores(spe.rda.signif, display="sites", choices=c(1,2), scaling=1),
       pch=21, col="black", bg="steelblue", cex=1.2)
text(scores(spe.rda.signif, display="species", choices=c(1), scaling=1),
     scores(spe.rda.signif, display="species", choices=c(2), scaling=1),
     labels=rownames(scores(spe.rda.signif, display="species", scaling=1)),
     col="grey", cex=0.8)  
arrows(0,0,
       scores(spe.rda.signif, display="bp", choices=c(1), scaling=1),
       scores(spe.rda.signif, display="bp", choices=c(2), scaling=1),
       col="red")
text(scores(spe.rda.signif, display="bp", choices=c(1), scaling=1)+0.05,
     scores(spe.rda.signif, display="bp", choices=c(2), scaling=1)+0.05,
     labels=rownames(scores(spe.rda.signif, display="bp", choices=c(2), scaling=1)),
     col="red", cex=1) 

#### advanced plots scaling 2
windows()
plot(spe.rda.signif, scaling=2, main="Triplot RDA - scaling 2", type="none", xlab=c("RDA1"), ylab=c("RDA2"), xlim=c(-1,1), ylim=c(-1,1))
points(scores(spe.rda.signif, display="sites", choices=c(1,2), scaling=2),
       pch=21, col="black", bg="steelblue", cex=1.2)
text(scores(spe.rda.signif, display="species", choices=c(1), scaling=2),
     scores(spe.rda.signif, display="species", choices=c(2), scaling=2),
     labels=rownames(scores(spe.rda.signif, display="species", scaling=2)),
     col="grey", cex=0.8)  
arrows(0,0,
       scores(spe.rda.signif, display="bp", choices=c(1), scaling=2),
       scores(spe.rda.signif, display="bp", choices=c(2), scaling=2),
       col="red")
text(scores(spe.rda.signif, display="bp", choices=c(1), scaling=2)+0.05,
     scores(spe.rda.signif, display="bp", choices=c(2), scaling=2)+0.05,
     labels=rownames(scores(spe.rda.signif, display="bp", choices=c(2), scaling=2)),
     col="red", cex=1)  

# Challenge 5 ####
# Run the RDA of the mite environmental variables of the mite species abundances
data(mite.env) #data from vegan package
# Which are the significant explanatory variables ?  
# How much of the variation explain the significant explanatory variables ?
# Which are the significant axis ?
# Which group of sites can you identify ? 
# Which species are related to each group of sites ?
# (answer below)
# 
# 
# 
# 
# 
# 
# 
# 
# 
# 
# 
# 
# 
# 
# 
# 
#
# 

mite.spe.rda <- rda(mite.spe.hel~., data=mite.env)
ordiR2step(rda(mite.spe.hel~1, data=mite.env), 
           scope= formula(mite.spe.rda), direction= "forward", R2scope=TRUE, pstep=1000)
mite.env.signif <- subset(mite.env, 
                          select = c("WatrCont", "Shrub", "Substrate", "Topo", "SubsDens"))
mite.spe.rda.signif <- rda(mite.spe~., data=mite.env.signif) 
summary(mite.spe.rda.signif, display=NULL)
(R2adj <- RsquareAdj(mite.spe.rda.signif)$adj.r.squared)
anova(mite.spe.rda.signif, step=1000)
anova(mite.spe.rda.signif, step=1000, by="axis")          
windows()
plot(mite.spe.rda.signif, scaling=1, main="Triplot RDA - scaling 1", type="none", xlab=c("RDA1"), ylab=c("RDA2"), xlim=c(-1,1), ylim=c(-1,1))
points(scores(mite.spe.rda.signif, display="sites", choices=c(1,2), scaling=1),
       pch=21, col="black", bg="steelblue", cex=1.2)
text(scores(mite.spe.rda.signif, display="species", choices=c(1), scaling=1),
     scores(mite.spe.rda.signif, display="species", choices=c(2), scaling=1),
     labels=rownames(scores(mite.spe.rda.signif, display="species", scaling=1)),
     col="grey", cex=0.8)  
arrows(0,0,
       scores(mite.spe.rda.signif, display="bp", choices=c(1), scaling=1),
       scores(mite.spe.rda.signif, display="bp", choices=c(2), scaling=1),
       col="red")
text(scores(mite.spe.rda.signif, display="bp", choices=c(1), scaling=1)+0.05,
     scores(mite.spe.rda.signif, display="bp", choices=c(2), scaling=1)+0.05,
     labels=rownames(scores(mite.spe.rda.signif, display="bp", choices=c(2), scaling=1)),
     col="red", cex=1) 

## 4.2  Partial RDA : effect of water chemistry on species abundances partialling out the physiography 
### Divide the env data frame in two data frames
envtopo <- env[, c(1:3)] # Physiography : explanatory dataset 1
names(envtopo)
envchem <- env[, c(4:10)] # Water quality : explanatory dataset 2
names(envchem)
### Run the partial RDA
spechem.physio <- rda(spe.hel, envchem, envtopo)
# or : 
spechem.physio2 <- rda(spe.hel ~ pH + dur + pho + nit + amm + oxy + dbo +
                         Condition(alt + pen + deb), data=env)
### Extract the results
summary(spechem.physio, display=NULL)
### Calculate adjusted the R^2 of the partial RDA
(R2adj <- RsquareAdj(spechem.physio)$adj.r.squared)
### Test of the partial RDA and the significance of axis
anova(spechem.physio, step=1000)
anova(spechem.physio2, step=1000, by="axis")
### Construct the triplots
#### Scaling 1
windows(title="Partial RDA scaling 1")
plot(spechem.physio, scaling=1, main="Triplot partial RDA - scaling 1", type="none", xlab=c("RDA1"), ylab=c("RDA2"), xlim=c(-1,1), ylim=c(-1,1))
points(scores(spechem.physio, display="sites", choices=c(1,2), scaling=1),
       pch=21, col="black", bg="steelblue", cex=1.2)
text(scores(spechem.physio, display="species", choices=c(1), scaling=1),
     scores(spechem.physio, display="species", choices=c(2), scaling=1),
     labels=rownames(scores(spechem.physio, display="species", scaling=1)),
     col="grey", cex=0.8)  
arrows(0,0,
       scores(spechem.physio, display="bp", choices=c(1), scaling=1),
       scores(spechem.physio, display="bp", choices=c(2), scaling=1),
       col="red")
text(scores(spechem.physio, display="bp", choices=c(1), scaling=1)+0.05,
     scores(spechem.physio, display="bp", choices=c(2), scaling=1)+0.05,
     labels=rownames(scores(spechem.physio, display="bp", choices=c(2), scaling=1)),
     col="red", cex=1) 
#### Scaling 2
windows(title="Partial RDA scaling 2")
plot(spechem.physio, scaling=2, main="Triplot partial RDA - scaling 2", type="none", xlab=c("RDA1"), ylab=c("RDA2"), xlim=c(-1,1), ylim=c(-1,1))
points(scores(spechem.physio, display="sites", choices=c(1,2), scaling=2),
       pch=21, col="black", bg="steelblue", cex=1.2)
text(scores(spechem.physio, display="species", choices=c(1), scaling=2),
     scores(spechem.physio, display="species", choices=c(2), scaling=2),
     labels=rownames(scores(spechem.physio, display="species", scaling=2)),
     col="grey", cex=0.8)  
arrows(0,0,
       scores(spechem.physio, display="bp", choices=c(1), scaling=2),
       scores(spechem.physio, display="bp", choices=c(2), scaling=2),
       col="red")
text(scores(spechem.physio, display="bp", choices=c(1), scaling=2)+0.05,
     scores(spechem.physio, display="bp", choices=c(2), scaling=2)+0.05,
     labels=rownames(scores(spechem.physio, display="bp", choices=c(2), scaling=2)),
     col="red", cex=1)  

# Challenge 6 ####
# Run the partial RDA of the mite environmental variables of the mite species abundances 
# partialling out for the substrate variables (SubsDens, WaterCont and Substrate)
# Is the model significant ?
# Which are the significant axis ?
# Interpret the obtained triplot.
# (answer below)
# 
# 
# 
# 
# 
# 
# 
# 
# 
# 
# 
# 
# 
# 
# 
# 
#
# 
mite.spe.subs<-rda(mite.spe.hel ~ Shrub + Topo
                  + Condition(SubsDens + WatrCont + Substrate), data=mite.env)
summary(mite.spe.subs, display=NULL)
(R2adj <- RsquareAdj(mite.spe.subs)$adj.r.squared)
anova(mite.spe.subs, step=1000)
anova(mite.spe.subs, step=1000, by="axis")
#### Triplot scaling 1
windows(title="Partial RDA scaling 1")
plot(mite.spe.subs, scaling=1, main="Triplot partial RDA - scaling 1", type="none", xlab=c("RDA1"), ylab=c("RDA2"), xlim=c(-1,1), ylim=c(-1,1))
points(scores(mite.spe.subs, display="sites", choices=c(1,2), scaling=1),
       pch=21, col="black", bg="steelblue", cex=1.2)
text(scores(mite.spe.subs, display="species", choices=c(1), scaling=1),
     scores(mite.spe.subs, display="species", choices=c(2), scaling=1),
     labels=rownames(scores(mite.spe.subs, display="species", scaling=1)),
     col="grey", cex=0.8)  
arrows(0,0,
       scores(mite.spe.subs, display="bp", choices=c(1), scaling=1),
       scores(mite.spe.subs, display="bp", choices=c(2), scaling=1),
       col="red")
text(scores(mite.spe.subs, display="bp", choices=c(1), scaling=1)+0.05,
     scores(mite.spe.subs, display="bp", choices=c(2), scaling=1)+0.05,
     labels=rownames(scores(mite.spe.subs, display="bp", choices=c(2), scaling=1)),
     col="red", cex=1) 
#### Triplot scaling 2
windows(title="Partial RDA scaling 2")
plot(mite.spe.subs, scaling=2, main="Triplot partial RDA - scaling 2", type="none", xlab=c("RDA1"), ylab=c("RDA2"), xlim=c(-1,1), ylim=c(-1,1))
points(scores(mite.spe.subs, display="sites", choices=c(1,2), scaling=2),
       pch=21, col="black", bg="steelblue", cex=1.2)
text(scores(mite.spe.subs, display="species", choices=c(1), scaling=2),
     scores(mite.spe.subs, display="species", choices=c(2), scaling=2),
     labels=rownames(scores(mite.spe.subs, display="species", scaling=2)),
     col="grey", cex=0.8)  
arrows(0,0,
       scores(mite.spe.subs, display="bp", choices=c(1), scaling=2),
       scores(mite.spe.subs, display="bp", choices=c(2), scaling=2),
       col="red")
text(scores(mite.spe.subs, display="bp", choices=c(1), scaling=2)+0.05,
     scores(mite.spe.subs, display="bp", choices=c(2), scaling=2)+0.05,
     labels=rownames(scores(mite.spe.subs, display="bp", choices=c(2), scaling=2)),
     col="red", cex=1)  

# 4.3. Variation partitioning by partial RDA
?varpart
vegandocs("partitioning.pdf")
## Variation partitioning with all explanatory variables
spe.part.all <- varpart(spe.hel, envchem, envtopo)
spe.part.all
windows(title="Variation partitioning - all variables")
plot(spe.part.all, digits=2)  
## Variation partitioning with only significant variables
### Separate forward selection in each subset of environmental  variables
spe.chem <- rda(spe.hel~., data=envchem)
R2a.all.chem <- RsquareAdj(spe.chem)$adj.r.squared
ordiR2step(rda(spe.hel~1, data=envchem), 
           scope= formula(spe.chem), direction= "forward", R2scope=TRUE, pstep=1000)
names(envchem)
(envchem.pars <- envchem[, c( 4, 6, 7 )])
spe.topo <- rda(spe.hel~., data=envtopo)
R2a.all.topo <- RsquareAdj(spe.topo)$adj.r.squared
ordiR2step(rda(spe.hel~1, data=envtopo), 
           scope= formula(spe.topo), direction= "forward", R2scope=TRUE, pstep=1000)
names(envtopo)
envtopo.pars <- envtopo[, c(1,2)]
### Variation partitioning
(spe.part <- varpart(spe.hel, envchem.pars, envtopo.pars))
windows(title="Variation partitioning - parsimonious subsets")
plot(spe.part, digits=2)
# Tests of all testable fractions
anova(rda(spe.hel, envchem.pars), step=1000) # Test of fractions [a+b]
anova(rda(spe.hel, envtopo.pars), step=1000) # Test of fractions [b+c]
env.pars <- cbind(envchem.pars, envtopo.pars) 
anova(rda(spe.hel, env.pars), step=1000) # Test of fractions [a+b+c]
anova(rda(spe.hel, envchem.pars, envtopo.pars), step=1000) # Test of fraction [a]
anova(rda(spe.hel, envtopo.pars, envchem.pars), step=1000) # Test of fraction [c]

# Challenge 7 ####
# Run the variation partitioning of the mite species abundances 
# with a first dataset for the significant substrate variables (SubsDens, WaterCont and Substrate)
# and a second dataset for the significant other variables (Shrud and Topo)
# Which proportion of the variation explain each dataset ?  
# Which are the significant fractions ?
# (answer below)
# 
# 
# 
# 
# 
# 
# 
# 
# 
# 
# 
# 
# 
# 
# 
# 
#
# 
str(mite.env)
(mite.subs<-mite.env[,c(1,2,3)])
(mite.other<-mite.env[,c(4,5)])
rda.mite.subs <- rda(mite.spe.hel~., data=mite.subs)
R2a.all.chem <- RsquareAdj(rda.mite.subs)$adj.r.squared
ordiR2step(rda(mite.spe.hel~1, data=mite.subs), 
           scope= formula(rda.mite.subs), direction= "forward", R2scope=TRUE, pstep=1000)
names(mite.subs)
(mite.subs.pars <- mite.subs[, c(2, 3)])
rda.mite.other <- rda(mite.spe.hel~., data=mite.other)
R2a.all.chem <- RsquareAdj(rda.mite.other)$adj.r.squared
ordiR2step(rda(mite.spe.hel~1, data=mite.other), 
           scope= formula(rda.mite.other), direction= "forward", R2scope=TRUE, pstep=1000)
names(mite.other)
(mite.other.pars <- mite.other[, c(1,2)])
### Variation partitioning
(mite.spe.part <- varpart(mite.spe.hel, ~WatrCont+Substrate, ~Shrub+Topo,
                          data=mite.env))
windows(title="Variation partitioning - parsimonious subsets")
plot(mite.spe.part, digits=2)
# Tests of all testable fractions
anova(rda(mite.spe.hel~ WatrCont+Substrate, data=mite.env), step=1000) # Test of fractions [a+b]
anova(rda(mite.spe.hel~Shrub+Topo, data=mite.env), step=1000) # Test of fractions [b+c]
(env.pars <- cbind(mite.env[,c(2,3,4,5)])) 
anova(rda(mite.spe.hel~ WatrCont+Substrate+Shrub+Topo, data=env.pars), step=1000) # Test of fractions [a+b+c]
anova(rda(mite.spe.hel~WatrCont+Substrate + Condition(Shrub+Topo), data=env.pars), step=1000) # Test of fraction [a]
anova(rda(mite.spe.hel~Shrub+Topo+ Condition(WatrCont+Substrate ), data=env.pars), step=1000) # Test of fraction [c]



## 4.4. Principal Response Curves (PRC)
?prc
# van den Brink, P.J. & ter Braak, C.J.F. (1999). 
# Principal response curves: Analysis of time-dependent multivariate responses of biological community to stress.
# Environmental Toxicology and Chemistry, 18, 138-148
# Chlorpyrifos experiment and experimental design
data(pyrifos)
week <- gl(11, 12, labels=c(-4, -1, 0.1, 1, 2, 4, 8, 12, 15, 19, 24))
dose <- factor(rep(c(0.1, 0, 0, 0.9, 0, 44, 6, 0.1, 44, 0.9, 0, 6), 11))
# PRC
mod <- prc(pyrifos, dose, week)
mod            # RDA
summary(mod)   # PRC
logabu <- colSums(pyrifos)
windows()
plot(mod, select = logabu > 100)
# Permutations should be done only within one week, and we only
# are interested on the first axis
anova(mod, strata = week, first=TRUE, perm.max = 100)

# PRC can be useful in community ecolgy
# to compare the temporal evolution of a communty in experimental sites (e.g. restored ecosystems)
# to the temporal evolution of a communty in control sites (e.g. reference ecosystems)

# Challenge 8 ####
# Run the PRC on the Doubs fish species abundances with last site removed considering that:
# the first 14 sites correpond to 4 reference ecosystems
# the last 14 sites correpond to 4 restored ecosystems
# the first seven lines correponds to successive sampling of the reference sites 1 during year 1,2,3,4,5,6 and 7
# the lines 8 to 14 correponds to successive sampling of the reference sites 2 during year 1,2,3,4,5,6 and 7
# etc...
# Does restoration enables the recovery of reference communities ?  
# Which species are related to reference ecosystems, to restored ecosystems ?
# (answer below)
# 
# 
# 
# 
# 
# 
# 
# 
# 
# 
# 
# 
# 
# 
# 
# 
#
# 
dim(spe.hel)
spe.hel<-spe.hel[-29,]
reference<-rep(c(0), 14)
restored<-rep(c(1), 14)
type.site<-as.factor(c(reference, restored))
year<-as.factor(rep(c(1,2,3,4,5,6,7), 4))
spe.hel.prc<-prc(spe.hel, type.site, year)
spe.hel.prc
summary(spe.hel.prc)
plot(spe.hel.prc)

#***************************************************************************************#

# 5. Other useful ordination methods

?cca # Constrained Correspondence Analysis (CCA)

?metaMDS # Nonmetric Multidimesional Scaling

help(lda, package=MASS) # linear discriminant analysis

?CCorA # Canonical Correlation Analysis

help(coinertia, package=ade4) # Coinertia Analysis 

help(mfa, package=ade4) # Multiple Factorial Analysis

https://r-forge.r-project.org/R/?group_id=195  # packages AEM and PCNM contain functions to perform spatial analysis