#----------------------------------# ####### 0: loading packages ####### #----------------------------------# require(ggplot2) require(mgcv) library(itsadug) #----------------------------------# ###### 1: Introducing the GAM ###### #----------------------------------# # ------ 1.1 ------ # set.seed(10) n <- 250 x <- runif(n,0,5) y_model <- 3*x/(1+2*x) y_obs <- rnorm(n,y_model,0.1) data_plot <- qplot(x, y_obs) + geom_line(aes(y=y_model))+ theme_bw() print(data_plot) # ------ 1.2 ------ # linear_model <- gam(y_obs~x) model_summary <- summary(linear_model) print(model_summary) data_plot <- data_plot+ geom_line(colour="red", aes(y=fitted(linear_model))) print(data_plot) # ------ 1.3 ------ # gam_model <- gam(y_obs~s(x)) summary(gam_model) data_plot <- data_plot + geom_line(colour="blue",aes(y=fitted(gam_model))) print(data_plot) # ------ 1.4 ------ # plot(gam_model) # ------ 1.5 ------ # linear_model <- gam(y_obs~x) nested_gam_model <- gam(y_obs~s(x)+x) print(anova(linear_model, nested_gam_model, test="Chisq")) # ------ 1.6 ------ # n <- 250 x_test <- runif(n,-5,5) y_test_fit <- 4*dnorm(x_test) y_test_obs <- rnorm(n,y_test_fit, 0.2) #----------------------------------# ##### 2: Multiple smooth terms ##### #----------------------------------# # ------ 2.1 ------ # gam_data <- gamSim(eg=5) head(gam_data) # ------ 2.2 ------ # basic_model <- gam(y~x0+s(x1), data=gam_data) basic_summary <- summary(basic_model) print(basic_summary$p.table) print(basic_summary$s.table) plot(basic_model) # ------ 2.3 ------ # two_term_model <- gam(y~x0+s(x1)+x2, data=gam_data) two_term_summary <- summary(two_term_model) print(two_term_summary$p.table) print(two_term_summary$s.table) plot(two_term_model) # ------ 2.4 ------ # two_smooth_model <- gam(y~x0+s(x1)+s(x2), data=gam_data) two_smooth_summary <- summary(two_smooth_model) print(two_smooth_summary$p.table) print(two_smooth_summary$s.table) plot(two_smooth_model,page=1) # ------ 2.5 ------ # anova(basic_model,two_term_model,two_smooth_model, test="Chisq") #----------------------------------# ######### 3: Interactions ########## #----------------------------------# # ------ 3.1 ------ # categorical_interact <- gam(y~x0+s(x1)+s(x2,by=x0), data=gam_data) categorical_interact_summary <- summary(categorical_interact) print(categorical_interact_summary$s.table) plot(categorical_interact,page=1) vis.gam(categorical_interact,view=c("x2","x0"),theta=40,n.grid=500,border=NA) # or alternatively: plot using vis.gam function, where theta is the degree rotation on the x-y plane anova(two_smooth_model,categorical_interact,test="Chisq") # ------ 3.2 ------ # smooth_interact <- gam(y~x0+s(x1,x2), data=gam_data) smooth_interact_summary <- summary(smooth_interact) print(smooth_interact_summary$s.table) plot(smooth_interact,page=1,scheme=3) vis.gam(smooth_interact,view=c("x1","x2"),theta=40,n.grid=500,border=NA) # plot(smooth_interact,page=1,scheme=1) will give a similar plot anova(two_smooth_model,smooth_interact,test="Chisq") #----------------------------------# ######## 4: Changing basis ######### #----------------------------------# # ------ 4.1 ------ # data(nottem) n_years <- length(nottem)/12 nottem_month <- rep(1:12, times=n_years) nottem_year <- rep(1920:(1920+n_years-1),each=12) nottem_plot <- qplot(nottem_month,nottem, colour=factor(nottem_year), geom="line") + theme_bw() print(nottem_plot) # ------ 4.2 ------ # year_gam <- gam(nottem~s(nottem_year)+ s(nottem_month, bs="cc")) summary(year_gam)$s.table plot(year_gam,page=1, scale=0) #----------------------------------# ##### 5: Quick intro to GAMMs ###### #----------------------------------# # ------ 5.1 ------ # par(mfrow=c(1,2)) acf(resid(year_gam), lag.max = 36, main = "ACF") pacf(resid(year_gam), lag.max = 36, main = "pACF") # ------ 5.2 ------ # year_gam <- gamm(nottem~s(nottem_year)+ s(nottem_month, bs="cc")) year_gam_AR1 <- gamm(nottem~s(nottem_year)+ s(nottem_month, bs="cc"),correlation = corARMA(form = ~ 1|nottem_year, p = 1)) year_gam_AR2 <- gamm(nottem~s(nottem_year)+ s(nottem_month, bs="cc"),correlation = corARMA(form = ~ 1|nottem_year, p = 2)) anova(year_gam$lme,year_gam_AR1$lme,year_gam_AR2$lme) # ------ 5.3 ------ # data(simdat) simdat$Subject <- as.factor(simdat$Subject) gamm_intercept <- bam(Y ~ Group + s(Time, by=Group) + s(Subject, bs="re"), data=simdat) summary(gamm_intercept) par(mfrow=c(1,2), cex=1.1) plot_smooth(gamm_intercept, view="Time", cond=list(Group="Adults"), rm.ranef=TRUE, main="intercept+s(Time):GroupAdults", rug=FALSE) plot_smooth(gamm_intercept, view="Time", cond=list(Group="Adults", Subject="a01"), main="... + s(Subject)", col='red', ylim=c(-15,20), rug=FALSE) plot_smooth(gamm_intercept, view="Time", cond=list(Group="Adults", Subject="a18"), add=TRUE, col='blue') # ------ 5.4 ------ # gamm_slope <- bam(Y ~ Group + s(Time, by=Group) + s(Subject, Time, bs="re"), data=simdat) summary(gamm_slope) plot_smooth(gamm_slope, view="Time", cond=list(Group="Adults"), rm.ranef=TRUE, main="intercept+s(Time):GroupAdults", rug=FALSE) plot_smooth(gamm_slope, view="Time", cond=list(Group="Adults", Subject="a01"), main="... + s(Subject)", col='red',ylim=c(-15,20), rug=FALSE) plot_smooth(gamm_slope, view="Time", cond=list(Group="Adults", Subject="a18"), add=TRUE, col='blue') # ------ 5.5 ------ # gamm_int_slope <- bam(Y ~ Group + s(Time, by=Group) + s(Subject, bs="re") + s(Subject, Time, bs="re"), data=simdat) summary(gamm_int_slope) plot_smooth(gamm_int_slope, view="Time", cond=list(Group="Adults"), rm.ranef=TRUE, main="intercept+s(Time):GroupAdults", rug=FALSE) plot_smooth(gamm_int_slope, view="Time", cond=list(Group="Adults", Subject="a01"), main="... + s(Subject) + s(Subject, Time)", col='red', ylim=c(-15,20), rug=FALSE) plot_smooth(gamm_int_slope, view="Time", cond=list(Group="Adults", Subject="a18"), add=TRUE, col='blue', xpd=TRUE) # ------ 5.6 ------ # gamm_smooth <- bam(Y ~ Group + s(Time, by=Group) + s(Time, Subject, bs="fs", m=1), data=simdat) summary(gamm_smooth) plot_smooth(gamm_smooth, view="Time", cond=list(Group="Adults"), rm.ranef=TRUE, main="intercept+s(Time):GroupAdults", rug=FALSE) plot_smooth(gamm_smooth, view="Time", cond=list(Group="Adults", Subject="a01"), main="... + s(Time, Subject)", col='red', ylim=c(-15,20), rug=FALSE) plot_smooth(gamm_smooth, view="Time", cond=list(Group="Adults", Subject="a18"), add=TRUE, col='blue', xpd=TRUE) #----------------------------------# ##### 6: GAM behind the scenes ###### #----------------------------------# # ------ 6.1 ------ # size <- c(1.42,1.58,1.78,1.99,1.99,1.99,2.13,2.13,2.13, 2.32,2.32,2.32,2.32,2.32,2.43,2.43,2.78,2.98,2.98) wear <- c(4.0,4.2,2.5,2.6,2.8,2.4,3.2,2.4,2.6,4.8,2.9, 3.8,3.0,2.7,3.1,3.3,3.0,2.8,1.7) x <- size-min(size);x<-x/max(x) plot(x,wear,xlab="Scaled engine size",ylab="Wear index") rk <- function(x,z) # R(x,z) for cubic spline on [0,1] { ((z-0.5)^2-1/12)*((x-0.5)^2-1/12)/4- ((abs(x-z)-0.5)^4-(abs(x-z)-0.5)^2/2+7/240)/24 } spl.X <- function(x,xk) # set up model matrix for cubic penalized regression spline { q <- length(xk)+2 # number of parameters n <- length(x) # number of data X <- matrix(1,n,q) # initialized model matrix X[,2] <- x # set second column to x X[,3:q] <- outer(x,xk,FUN=rk) # and remaining to R(x,xk) X } xk <- 1:4/5 # choose some knots X <- spl.X(x,xk) # generate model matrix mod.1 <- lm(wear~X-1) # fit model xp <- 0:100/100 # x values for prediction Xp <- spl.X(xp,xk) # prediction matrix lines(xp,Xp%*%coef(mod.1)) # plot fitted spline abline(v=c(xk),lty=3) # plot knots