### ### ### ### ### ### ### ### ### ### ### ### ### LMM course - UQAM jan 2019 ### taught by Kate Laskowski ### http://www.qcbs.ca/wiki/lmm.workshop ### ### ### ### ### ### ### ### ### ### ### ### # Day 1 ------------------------------------------------------------------ # set working directory where data files are stored # Here we assume that your file is store on Dropbox in the folder called "GLMM course" # setwd("C:/Users/"your.computer.name"/Dropbox/GLMM course") # setwd("~/Dropbox/GLMM course") # Download the data here -------------------------------------------------- # http://highstat.com/Books/Book2/ZuurDataMixedModelling.zip # Clams data # CHECKING MODEL ASSUMPTIONS ---------------------------------------------- clams <- read.table("Clams.txt", header = T, dec = ".") str(clams) # See that month is numeric and should be as factors head(clams) tail(clams) # Changing numeric month to as factor clams$fMONTH <- as.factor(clams$MONTH) str(clams) # Linear model in R (interaction of length and month; length + month + length:month) mod.full <- lm(AFD ~ LENGTH*fMONTH, data = clams) # Look at the summary of the linear model s summary(mod.full) # Look at the assumptions of the model plot(mod.full, 1) # Look at the residuals per group (month is the group factor ) plot(resid(mod.full) ~ clams$fMONTH) # Look that the shape of the residuals with continous variable plot(resid(mod.full) ~ clams$LENGTH) # Look at normality qqnorm(resid(mod.full)) qqline(resid(mod.full)) hist(resid(mod.full), main = 'Distribution of residuals', xlab = "Model residuals") mod.trans <- lm(LNAFD ~ LNLENGTH*fMONTH, data = clams) plot(mod.trans, 1) # Day 2 ------------------------------------------------------------------ # DATA EXPLORATION -------------------------------------------------------- loyn <- read.table("Loyn.txt", header = T, dec = ".") head(loyn) str(loyn) hist(loyn$ABUND) dotchart(loyn$ABUND) dotchart(loyn$ABUND, color = factor(loyn$GRAZE)) which(loyn$ABUND > 30) dotchart(loyn$DIST) # uh oh problem with large outlier? dotchart(loyn$LDIST, labels = row.names(loyn)) dotchart(loyn$AREA) # big problems with large values dotchart(loyn$ALT) dotchart(loyn$YR.ISOL) pairs(loyn) pairs(loyn[,2:5]) pairs(loyn[,c("ABUND", "AREA", "DIST", "LDIST", "ALT")]) loyn$L.AREA <- log(loyn$AREA) pairs(loyn[,c("ABUND", "L.AREA", "DIST", "LDIST", "ALT")]) boxplot(loyn$ABUND ~ factor(loyn$GRAZE)) table(loyn$GRAZE) # MODEL SELECTION --------------------------------------------------------- rikz <- read.table("RIKZ.txt", header = T, dec = ".") head(rikz) str(rikz) hist(rikz$Richness) dotchart(rikz$Richness, color = rikz$Beach) pairs(rikz[,c(2,3,4,5)]) # change exposure level 8 to exposure level 10 rikz$fExp <- rikz$Exposure rikz$fExp[rikz$fExp == 8] <- 10 rikz$fExp <- factor(rikz$fExp, levels = c(10,11)) str(rikz) # determining random effects with REML library(nlme) mod1 <- lme(Richness ~ NAP*fExp, method = "REML", random = ~1|Beach, data = rikz) mod0 <- gls(Richness ~ NAP*fExp, method = "REML", data = rikz) anova(mod1, mod0) # Random effect should stay in the model! # determine fixed effects with ML ----------------------------------------- mod.full <- lme(Richness ~ NAP*fExp, method = "ML", random = ~1|Beach, data = rikz) mod.twoway <- lme(Richness ~ NAP + fExp, method = "ML", random = ~1|Beach, data = rikz) # testing the two-way interaction ----------------------------------------- anova(mod.full, mod.twoway) # test the main effects --------------------------------------------------- mod.twoway <- lme(Richness ~ NAP + fExp, method = "ML", random = ~1|Beach, data = rikz) mod.nap <- lme(Richness ~ fExp, method = "ML", random = ~1|Beach, data = rikz) #testing effect of NAP anova(mod.twoway, mod.nap) mod.exp <- lme(Richness ~ NAP, method = "ML", random = ~1|Beach, data = rikz) # testing effect of Exposure anova(mod.twoway, mod.exp) # re-run final model with REML -------------------------------------------- mod.final <- lme(Richness ~ NAP + fExp, method = "REML", random = ~1|Beach, data = rikz) summary(mod.final) # posthoc testing --------------------------------------------------------- library(emmeans) mod.final <- lme(Richness ~ NAP + fExp, method = "REML", random = ~1|Beach, data = rikz) # Use this command if you want to look at the variable that is categorical emmeans(mod.final, ~fExp) CLD(emmeans(mod.final, ~fExp)) pairs(emmeans(mod.final, ~fExp)) clams <- read.table("Clams.txt", header = T, dec = ".") mod.clam <- lm(AFD ~ factor(MONTH) + LENGTH, data = clams) CLD(emmeans(mod.clam, ~MONTH)) pairs(emmeans(mod.clam, ~MONTH)) plot(CLD(emmeans(mod.clam, ~MONTH))) mod.clam2 <- lm(AFD ~ factor(MONTH)*LENGTH, data = clams) # look at differences in slopes emtrends(mod.clam2, ~"MONTH", var = "LENGTH") CLD(emtrends(mod.clam2, ~"MONTH", var = "LENGTH")) plot(emtrends(mod.clam2, ~"MONTH", var = "LENGTH")) # owl begging behavior ---------------------------------------------------- owls <- read.table("Owls.txt", header = T, dec = ".") str(owls) head(owls) # Explore data ------------------------------------------------------------ pairs(owls) dotchart(owls$ArrivalTime) dotchart(owls$BroodSize) hist(owls$NegPerChick) # Transform the data owls$LogNeg <- log10(owls$NegPerChick + 1) hist(owls$LogNeg) dotchart(owls$LogNeg) qqnorm(owls$LogNeg) # Here you can clearly see that there is structure in the data (there are a lot of "0" in the data) qqline(owls$LogNeg) boxplot(owls$LogNeg ~ owls$SexParent) # There doesn't seem to have an effect of sex boxplot(owls$LogNeg ~ owls$FoodTreatment) # Food treatment seems interesting!! Maybe our experiment worked?!?!?!?! boxplot(owls$LogNeg ~ owls$Nest) plot(owls$ArrivalTime ~ owls$LogNeg) boxplot(owls$LogNeg ~ owls$BroodSize) # Bird alone in the nest beggs more? table(owls$BroodSize) # Well, the sample size is just very small... hist(owls$ArrivalTime) # 1. beyond full model: put everything... # Test for the Random effect # Begging ~ food*arrival*food*sex, random = ~1|nest # Which is the same as # Begging ~ food + arrival + sex + food:arrival + food:sex + arrival+sex + food*arrival*food*sex, random = ~1|nest # Initial fixed (to test the fixed effects) # Model1 = Begging ~ food + arrival + food:arrival + sex + food:sex, random = ~1|nest # Model2 = Begging ~ food + arrival + sex + food:sex, random = ~1|nest # Model3 = Begging ~ food + arrival + food:arrival + sex + , random = ~1|nest #test random effect mod.rand <- lme(LogNeg ~ FoodTreatment*SexParent*ArrivalTime, random = ~1|Nest, method ="REML", data = owls) summary(mod.rand) mod0 <- gls(LogNeg ~ FoodTreatment*SexParent*ArrivalTime, method ="REML", data = owls) anova(mod.rand, mod0) # Testing the different fixed effects # model.all = beg ~ food + sex + arrival # This is the new model that you have to compare with # model.fod = beg ~ sex + arrival # model.sex = beg ~ food + arrival # model.arr = beg ~ food + sex # reduce fixed effects mod.full <- lme(LogNeg ~ FoodTreatment*SexParent + FoodTreatment*ArrivalTime, random = ~1|Nest, method = "ML", data = owls) #summary(mod.full) #food * sex mod2 <- lme(LogNeg ~ SexParent + FoodTreatment*ArrivalTime, random = ~1|Nest, method = "ML", data = owls) anova(mod.full, mod2) #ns #food * arrival mod3 <- lme(LogNeg ~ FoodTreatment*SexParent + ArrivalTime, random = ~1|Nest, method = "ML", data = owls) anova(mod.full, mod3) #ns # new full model mod.full2 <- lme(LogNeg ~ FoodTreatment + SexParent + ArrivalTime, random = ~1|Nest, method = "ML", data = owls) #food mod4 <- lme(LogNeg ~ SexParent + ArrivalTime, random = ~1|Nest, method = "ML", data = owls) anova(mod.full2, mod4) #sig - retain! #sex mod5 <- lme(LogNeg ~ FoodTreatment + ArrivalTime, random = ~1|Nest, method = "ML", data = owls) anova(mod.full2, mod5) # ns #arrival mod6 <- lme(LogNeg ~ FoodTreatment + SexParent, random = ~1|Nest, method = "ML", data = owls) anova(mod.full2, mod6) #sig - retain! # final model to report parameter estimates mod.final <- lme(LogNeg ~ FoodTreatment + ArrivalTime, random = ~1|Nest, method = "REML", data = owls) summary(mod.final) plot(mod.final) plot(resid(mod.final) ~ owls$FoodTreatment) # Good homogeneity plot(resid(mod.final) ~ owls$ArrivalTime) # Tails are a bit weird, but it's fine qqnorm(resid(mod.final)) qqline(resid(mod.final)) 0.09^2/(0.09^2+0.23^2) # to find the LLR of the random effect, you need to compare to the final model again with the non random model mod.0 <- gls(LogNeg ~ FoodTreatment + ArrivalTime, method = "REML", data = owls) anova(mod.final, mod.0) # LLR to report ----------------------------------------------------------- mod.final.ml <- lme(LogNeg ~ FoodTreatment + ArrivalTime, random = ~1|Nest, method = "ML", data = owls) #food mod7 <- lme(LogNeg ~ ArrivalTime, random = ~1|Nest, method = "ML", data = owls) anova(mod.final.ml, mod7) #arrival mod8 <- lme(LogNeg ~ FoodTreatment, random = ~1|Nest, method = "ML", data = owls) anova(mod.final.ml, mod8) # Day 3 ------------------------------------------------------------------ # ESTIMATING R2 ----------------------------------------------------------- rikz <- read.table("RIKZ.txt", header = T, dec = ".") head(rikz) rikz$fExp <- rikz$Exposure rikz$fExp[rikz$fExp == 8] <- 10 rikz$fExp <- factor(rikz$fExp, levels = c(10,11)) # run final model library(nlme) mod.final <- lme(Richness ~ scale(NAP) + fExp, random = ~1|Beach, data = rikz) summary(mod.final) # extract fixed effects fixef(mod.final) fix.matrix <- model.matrix(mod.final, data = rikz) head(fix.matrix) # Solve linear equation for every data point fixed <- (fixef(mod.final)[2]*fix.matrix[,2] + fixef(mod.final)[3]*fix.matrix[,3]) # Estimate variance in our fitted values varF <- var(fixed) # This is a more effective way of doing it! Compare varF with the line of code below var(fix.matrix %*% fixef(mod.final)) # Extract random variance estimates VarCorr(mod.final) var.est <- as.numeric(VarCorr(mod.final)) # Marginal R2 - variance explained by fixed effects varF/(varF + var.est[1] + var.est[2]) # Conditional R2 - variance explained by fixed & random (varF + var.est[1])/(varF + var.est[1] + var.est[2]) library(MuMIn) r.squaredGLMM(mod.final) # DEALING WITH VARIANCE HETEROGENEITY ------------------------------------- squid <- read.table("Squid.txt", header = T, dec = ".") head(squid) squid$fMONTH <- as.factor(squid$MONTH) str(squid) # Explore data! hist(squid$Testisweight) # Skew in the data... we need to check that dotchart(squid$Testisweight) # Mmmmm what is going on here? dotchart(squid$DML) # The variation is pretty good here. We don't outlier here. pairs(squid[,4:6]) pairs(squid[,c("DML","Testisweight","fMONTH")]) # Now we'll look at the distribution of the residuals. Let's run a quick model that will give us the residuals. mod1 <- lm(Testisweight ~ DML*fMONTH, data = squid) plot(mod1, 1) hist(resid(mod1)) plot(resid(mod1) ~ squid$fMONTH) plot(resid(mod1) ~ squid$DML) # fixed variance structure ------------------------------------------------ mod.normal <- gls(Testisweight ~ DML*fMONTH, data = squid) mod.fixed <- gls(Testisweight ~ DML*fMONTH, weights = varFixed(~DML), # This is the fixed variance data = squid) anova(mod.normal, mod.fixed) # no p-value since they are not nested. plot(mod.fixed) # identity variance ------------------------------------------------------- mod.ident <- gls(Testisweight ~ DML*fMONTH, weights = varIdent(form = ~1|fMONTH), # allow the variance to be different for evey month data = squid) # only look at the AIC differences anova(mod.normal, mod.fixed, mod.ident) summary(mod.ident) weights = varIdent(form = ~1|fMONTH*factor(location)) # power variance ---------------------------------------------------------- mod.power <- gls(Testisweight ~ DML*fMONTH, weights = varPower(form = ~DML), data = squid) anova(mod.normal, mod.fixed, mod.ident, mod.power) plot(mod.power) summary(mod.power) # TEMPORAL AUTO CORRELATION ----------------------------------------------- hawaii <- read.table("Hawaii.txt", header = T, dec = ".") head(hawaii) hawaii$moorhens <- sqrt(hawaii$Moorhen.Kauai) # Explore the data plot(moorhens ~ Year, data = hawaii) plot(moorhens ~ Rainfall, data = hawaii) mod1 <- gls(moorhens ~ Year + Rainfall, data = hawaii, na.action = na.omit) plot(mod1) # ACF plot - but need to account for NAs first E <- resid(mod1, type = "normalized") I1 <- !is.na(hawaii$moorhens) Efull <- vector(length = length(hawaii$moorhens)) Efull <- NA Efull[I1] <- E acf(Efull, na.action = na.pass) # Compound symmetry mod.cs <- gls(moorhens ~ Year + Rainfall, data = hawaii, na.action = na.omit, correlation = corCompSymm(form = ~Year)) summary(mod.cs) E <- resid(mod.cs, type = "normalized") I1 <- !is.na(hawaii$moorhens) Efull <- vector(length = length(hawaii$moorhens)) Efull <- NA Efull[I1] <- E acf(Efull, na.action = na.pass) anova(mod1, mod.cs) # AR1 mod.ar <- gls(moorhens ~ Year + Rainfall, data = hawaii, na.action = na.omit, correlation = corAR1(form = ~Year)) summary(mod.ar) # ACF plot - but need to account for NAs first E <- resid(mod.ar, type = "normalized") I1 <- !is.na(hawaii$moorhens) Efull <- vector(length = length(hawaii$moorhens)) Efull <- NA Efull[I1] <- E acf(Efull, na.action = na.pass) anova(mod1, mod.cs, mod.ar) # arma arma1 <- corARMA(c(0.2), p = 1, q = 0) arma2 <- corARMA(c(0.3,-0.3), p = 2, q = 0) mod.arma1 <- gls(moorhens ~ Year + Rainfall, data = hawaii, na.action = na.omit, correlation = arma1) mod.arma2 <- gls(moorhens ~ Year + Rainfall, data = hawaii, na.action = na.omit, correlation = arma2) # RANDOM SLOPES ----------------------------------------------------------- rikz <- read.table("RIKZ.txt", header = T, dec = ".") rikz$fExp <- rikz$Exposure rikz$fExp[rikz$fExp == 8] <- 10 rikz$fExp <- factor(rikz$fExp, levels = c(10,11)) mod.null <- gls(Richness ~ NAP + fExp, method = "REML", data = rikz) mod.int <- lme(Richness ~ NAP + fExp, random = ~1|Beach, method = "REML", data = rikz) mod.slope <- lme(Richness ~ NAP + fExp, random = ~1 + NAP|Beach, method = "REML", data = rikz) summary(mod.slope) anova(mod.null, mod.int, mod.slope) ranef(mod.slope) # WHALE DATA SET ---------------------------------------------------------- whales <- read.table("Cetaceans.txt", header = T, dec = ".") I <- whales$Sex == 0 whales2 <- whales[!I,] whales2$fSex <- factor(whales2$Sex) # first explore data # second confirm random effects # third reduce fixed effects from initial # fourth validate! # Model we want to look for: # Age ~ Stain * fsex + Stain*Location, random = ~1|species/DolphinID # When selection for the nested effect, start from the outside (remove DolphinID first) # Explore the data: dotchart(whales2$Age, color = whales2$Species) plot(whales2$Age ~ whales2$Stain) plot(whales2$Age ~ whales2$Location) plot(whales2$Age ~ whales2$fSex) # Same as the plot above boxplot(whales2$Age~whales2$fSex) # Doesn't look like this is different boxplot(whales2$Age~whales2$Stain) # doesn't look like this is different boxplot(whales2$Age~whales2$Location) # WOW!! This looks different! boxplot(whales2$Age~whales2$Location+whales2$Stain) # WOW!! This looks different! # random effects mod.null <- gls(Age ~ Stain*fSex + Stain*Location, data = whales2, method = "REML") mod.sp <- lme(Age ~ Stain*fSex + Stain*Location, random = ~1|Species, data = whales2, method = "REML") mod.both <- lme(Age ~ Stain*fSex + Stain*Location, random = ~1|Species/DolphinID, data = whales2, method = "REML") anova(mod.null, mod.sp, mod.both) summary(mod.both) ### Let's now get the repeatability # Extract random variance estimates VarCorr(mod.both) var.est <- as.numeric(VarCorr(mod.both)) #dolphin + species (var.est[4] + var.est[2])/ (var.est[4] + var.est[2] + var.est[5]) #dolphin var.est[4] / (var.est[4] + var.est[2] + var.est[5]) # species var.est[2] / (var.est[4] + var.est[2] + var.est[5]) # fixed effects # Now let's take a look at the INTERACTION term for the FIXED effects (Hey, maybe you should think about ML vs REML?). In this case, we need to look at the # Age ~ Stain + fsex + Location + Stain:fsex + Stain:Location, random = ~1|Species/DolphinID # Age ~ Stain + fsex + Location + Stain:Location, random = ~1|Species/DolphinID # Age ~ Stain + fsex + Location + Stain:fsex , random = ~1|Species/DolphinID # Remember: it doesn't make sense to test for an interaction where the main effect is not present # i.e. (Age ~ Stain + fsex + Location + Stain:Location) # (Age ~ Stain + fsex + + Stain:Location) Doesn't make sens as "location" is inside the interaction # So your not technically testing only the effect of location (since there is still some "location" in the interaction) # Here is the full model mod.full.1 <- lme(Age ~ Stain*fSex + Stain*Location, random = ~1|Species/DolphinID, data = whales2, method = "ML") #stain*sex Testing interaction mod2 <- lme(Age ~ fSex + Stain*Location, random = ~1|Species/DolphinID, data = whales2, method = "ML") anova(mod.full.1, mod2) #ns #stain*location mod3 <- lme(Age ~ Stain*fSex + Location, random = ~1|Species/DolphinID, data = whales2, method = "ML") anova(mod.full.1, mod3) #highly sig! # New full model mod.full.2 <- lme(Age ~ fSex + Stain*Location, random = ~1|Species/DolphinID, data = whales2, method = "ML") # Now we'll test the simple fixed effects with our new model # Age ~ Stain + fsex + Location + Stain:Location, random = ~1|Species/DolphinID # Age ~ Stain + + Location + Stain:Location, random = ~1|Species/DolphinID # testing sex mod4 <- lme(Age ~ Stain*Location, random = ~1|Species/DolphinID, data = whales2, method = "ML") anova(mod.full.2, mod4) #ns mod.full.3 <- lme(Age ~ Stain*Location, random = ~1|Species/DolphinID, data = whales2, method = "ML") # testing stain*location mod5 <- lme(Age ~ Stain + Location, random = ~1|Species/DolphinID, data = whales2, method = "ML") anova(mod.full.3, mod5) #sig, so done! # final model mod.final <- lme(Age ~ Stain*Location, random = ~1|Species/DolphinID, data = whales2, method = "ML") summary(mod.final) # LLR for stain*location (RETEST the interaction to get the LLR and report it) mod.test <- lme(Age ~ Stain + Location, random = ~1|Species/DolphinID, data = whales2, method = "ML") anova(mod.final, mod.test) # OK, but now retest the Random effect!!! # random effects mod.final <- lme(Age ~ Stain*Location, random = ~1|Species/DolphinID, data = whales2, method = "REML") # Calculate the repeatability VarCorr(mod.final) var.est <- as.numeric(VarCorr(mod.final)) var.d = var.est[2] var.sp = var.est[4] var.red = var.est[5] # 2 different ways to get to the same result rep1 = (var.d+var.sp)/(var.d+var.sp + var.red) # [1] 0.9781067 rep2 = (var.d)/(var.d+var.sp + var.red) # [1] 0.05057009 #dolphin + species (var.est[4] + var.est[2])/ (var.est[4] + var.est[2] + var.est[5]) #dolphin var.est[4] / (var.est[4] + var.est[2] + var.est[5]) # species var.est[2] / (var.est[4] + var.est[2] + var.est[5]) mod.sp <- lme(Age ~ Stain*Location, random = ~1|Species, data = whales2, method = "REML") mod.null <- gls(Age ~ Stain*Location, #random = ~1|Species/DolphinID, data = whales2, method = "REML") anova(mod.final, mod.sp, mod.null) plot(mod.final) plot(resid(mod.final) ~ whales2$Stain) plot(resid(mod.final) ~ whales2$Location) qqnorm(resid(mod.final)) qqline(resid(mod.final)) # r2 fix.matrix <- model.matrix(mod.final, data = whales2) head(fix.matrix) Fixed <- (fixef(mod.final)[2]*fix.matrix[,2] + fixef(mod.final)[3]*fix.matrix[,3] + fixef(mod.final)[4]*fix.matrix[,4] + fixef(mod.final)[5]*fix.matrix[,5] + fixef(mod.final)[6]*fix.matrix[,6]) # estimate variance in our fitted values (both commands are equivalent, but second is less prone to error) varF <- var(Fixed) varF1 = as.numeric(var(fix.matrix %*% fixef(mod.final))) varResid <- var.est[5] varDolph <- var.est[4] varSp <- var.est[2] varF # varF1 #marginal R2 varF/(varF + varResid + varDolph + varSp) # condition R2 (varF + varDolph + varSp)/(varF + varResid + varDolph + varSp) # Calculate r-squared library(MuMIn) r.squaredGLMM(mod.final) # Look at the assumptions of the model plot(mod.final) plot(resid(mod.final) ~ whales2$Location) # You can look at heterogeneity of the variance in the location mod.final.variance.homo = lme(Age ~ Stain + Location + Stain:Location, random = ~1|Species/DolphinID, weights = varIdent(form = ~1|Location), method = "REML", data = whales2) plot(mod.final) plot(mod.final.variance.homo) anova(mod.final.variance.homo, mod.final)