######### # Ducks # ######### install.packages(c("plyr","tidyr","lubridate","survival","icenReg","ggplot2","ggfortify")) library(plyr) ; library(Rcpp) ; library(tidyr) ; library(lubridate) ; library(survival) ; library(icenReg) library(ggplot2) ; library(ggfortify) ; library(eha) file.choose() #add working directory Birdsurv<-read.table("C:\\Birdsurvivaldata.csv",header=TRUE,sep=",") Birdsurv save(Birdsurv,file="Birdsurv.RData") attach(Birdsurv) Birdsurv$Age<-as.factor(Birdsurv$Age) class(Birdsurv$Age) Birdsurv$Age <- factor(Birdsurv$Age, levels = c("0","1"), labels = c("Young", "Old")) table(Birdsurv$Age) str(Birdsurv) # Kaplan-Meier estimation of survival Surv_function_KM <- survfit(Surv(Time,Indicator)~Age,data=Birdsurv) plot(Surv_function_KM, xlab = "Time (days)", lty = c(2,3), ylab="Kaplan-Meier survival function",mark.time = TRUE) legend(45, .95, c("Old","Young"), lty = 2:3) #log-rank test of survival differences survdiff(Surv(Time,Indicator)~Age,data=Birdsurv) #Cox proportional hazard model Surv(Time,Indicator) Birdsurvcox<-coxph(Surv(Time,Indicator)~Age,data=Birdsurv, model=TRUE) summary(Birdsurvcox) #Testing the proportional hazard assumption#### cox.zph(Birdsurvcox) ## Plot Martingale residuals over time bird_resid<-resid(Birdsurvcox,type="martingale") plot(Birdsurv$Time[Birdsurv$Indicator==1],bird_resid[Birdsurv$Indicator==1],xlab= "Time (days)", ylab="Martingale residuals",xlim=c(0,64),ylim=c(-0.8,1)) lines(Birdsurv$Time[Birdsurv$Indicator==0],bird_resid[Birdsurv$Indicator==0], type="p") lines(loess.smooth(Birdsurv$Time[Birdsurv$Indicator==1],bird_resid[Birdsurv$Indicator==1]), lwd=2, col="black") lines(loess.smooth(Birdsurv$Time[Birdsurv$Indicator==0],bird_resid[Birdsurv$Indicator==0]), lwd=2, lty=3, col="black") ## Plot Martingale residuals against linear predictor and covariate#### library(ggplot2) library(reshape2) Birdsurv$mg<-resid(Birdsurvcox,type="martingale") bird.melt <- melt(Birdsurv, value.name = "value", id.vars = c("Bird","mg"),measure.vars = "Age") ggplot(data = bird.melt, mapping = aes(x = value, y = mg)) + geom_point()+ scale_y_continuous(name = "Martingale Residuals") + scale_x_discrete("Age")+ theme_bw()+ theme(panel.grid.major = element_blank(), panel.grid.minor = element_blank()) #Predicted survival function based on cox model#### Surv_function<-survfit(Birdsurvcox,newdata= data.frame(Age=c("Young", "Old")))$surv #survival function plot(survfit(Birdsurvcox,newdata= data.frame(Age=c("Young", "Old"))), lty = c(2,3), xlab = "Time (days)", ylab="Predicted survival probabilities",mark.time = TRUE) legend(44, .98, c("Young", "Old"), lty = 2:3) survfit(Birdsurvcox,newdata= data.frame(Age=c("Young", "Old")))$surv #survival function survfit(Birdsurvcox,newdata= data.frame(Age=c("Young", "Old")))$time #time point for each observation #Life expectancy#### #remove the last object from each of the vectors or from the data.frame Surv1<-survfit(Birdsurvcox,newdata= data.frame(Age=c("Young", "Old")))$surv[1:25,1] #survival function Surv2<-survfit(Birdsurvcox,newdata= data.frame(Age=c("Young", "Old")))$surv[1:25,2] #survival function length(Surv1) Timesteps<-survfit(Birdsurvcox,newdata= data.frame(Age=c("Young", "Old")))$time #time point for each observation # (from second object in the vector to the last object in the vector) - (from the first object in the vector to the before last) Timesteps Timesteps_vector<-Timesteps[2:length(Timesteps)] - Timesteps[1:(length(Timesteps)-1)] #Life expectancy at start of study Lifeexpectancy_young <- sum(Surv1*Timesteps_vector) Lifeexpectancy_old <- sum(Surv2*Timesteps_vector) #Predicted cumulative hazard function Surv1_young<-survfit(Birdsurvcox,newdata= data.frame(Age=c("Young", "Old")))$surv[,1] #survival function Surv2_old<-survfit(Birdsurvcox,newdata= data.frame(Age=c("Young", "Old")))$surv[,2] #survival function Timesteps<-survfit(Birdsurvcox,newdata= data.frame(Age=c("Young", "Old")))$time #time point for each observation Cum_hazard_old_birds<- -log(Surv2_old) Cum_hazard_young_birds<- -log(Surv1_young) length(Timesteps) length(Cum_hazard_old_birds) plot(x=Timesteps,y=-log(Surv2_old),type="l",lty=3,ylab="Predicted cumulative hazard function",xlab="Time (days)") lines(x=Timesteps,y=-log(Surv1_young),lty=2) legend(5, .55, c("Young", "Old"), lty = 2:3) # Predicted hazard function #smooth.spline with 3 to 6 degrees of freedom Cumulative_hazard_birds<-data.frame(Timesteps, Cum_hazard_old_birds, Cum_hazard_young_birds) ssfit_old_birds_3df<-smooth.spline(x=Timesteps,y=Cum_hazard_old_birds,df=3) ssfit_old_birds_3df$fit ssfit_old_birds_4df<-smooth.spline(x=Timesteps,y=Cum_hazard_old_birds,df=4) ssfit_old_birds_4df$fit ssfit_old_birds_5df<-smooth.spline(x=Timesteps,y=Cum_hazard_old_birds,df=5) ssfit_old_birds_5df$fit ssfit_old_birds_6df<-smooth.spline(x=Timesteps,y=Cum_hazard_old_birds,df=6) ssfit_old_birds_6df$fit Cumulative_hazard_birds<-data.frame(Timesteps, Cum_hazard_old_birds, Cum_hazard_young_birds) ssfit_young_birds_3df<-smooth.spline(x=Timesteps,y=Cum_hazard_young_birds,df=3) ssfit_young_birds_3df$fit ssfit_young_birds_4df<-smooth.spline(x=Timesteps,y=Cum_hazard_young_birds,df=4) ssfit_young_birds_4df$fit ssfit_young_birds_5df<-smooth.spline(x=Timesteps,y=Cum_hazard_young_birds,df=5) ssfit_young_birds_5df$fit ssfit_young_birds_6df<-smooth.spline(x=Timesteps,y=Cum_hazard_young_birds,df=6) ssfit_young_birds_6df$fit require(graphics) par(mfrow=c(1,2)) plot(Cum_hazard_old_birds ~ Timesteps, data = Cumulative_hazard_birds, xlim=c(0,65),ylim=c(0,1.1),ylab="Predicted cumulative hazard function",xlab="Time (days)") old_birds.spl <- with(Cumulative_hazard_birds, smooth.spline(Timesteps, Cum_hazard_old_birds)) old_birds.spl lines(old_birds.spl, lty=1) ss3 <- smooth.spline(Cumulative_hazard_birds[,"Timesteps"], Cumulative_hazard_birds[,"Cum_hazard_old_birds"], df = 3) lines(ss3, lty = 2) ss4 <- smooth.spline(Cumulative_hazard_birds[,"Timesteps"], Cumulative_hazard_birds[,"Cum_hazard_old_birds"], df = 4) lines(ss4, lty=3) ss5 <- smooth.spline(Cumulative_hazard_birds[,"Timesteps"], Cumulative_hazard_birds[,"Cum_hazard_old_birds"], df = 5) lines(ss5, lty=4) ss6 <- smooth.spline(Cumulative_hazard_birds[,"Timesteps"], Cumulative_hazard_birds[,"Cum_hazard_old_birds"], df = 6) lines(ss6, lty=5) legend(5,1.1,c(paste("default [C.V.] => df =",round(old_birds.spl$df,1)), "s( * , df = 3)","s( * , df = 4)","s( * , df = 5)","s( * , df = 6)"), lty = c(1,2,3,4,5)) require(graphics) plot(Cum_hazard_young_birds ~ Timesteps, data = Cumulative_hazard_birds, xlim=c(0,65),ylim=c(0,1.1),ylab="Predicted cumulative hazard function",xlab="Time (days)") young_birds.spl <- with(Cumulative_hazard_birds, smooth.spline(Timesteps, Cum_hazard_young_birds)) young_birds.spl lines(young_birds.spl, lty=1) ssy3 <- smooth.spline(Cumulative_hazard_birds[,"Timesteps"], Cumulative_hazard_birds[,"Cum_hazard_young_birds"], df = 3) lines(ssy3, lty = 2) ssy4 <- smooth.spline(Cumulative_hazard_birds[,"Timesteps"], Cumulative_hazard_birds[,"Cum_hazard_young_birds"], df = 4) lines(ssy4, lty = 3) ssy5 <- smooth.spline(Cumulative_hazard_birds[,"Timesteps"], Cumulative_hazard_birds[,"Cum_hazard_young_birds"], df = 5) lines(ssy5, lty = 4) ssy6 <- smooth.spline(Cumulative_hazard_birds[,"Timesteps"], Cumulative_hazard_birds[,"Cum_hazard_young_birds"], df = 6) lines(ssy6, lty = 5) legend(5,1.1,c(paste("default [C.V.] => df =",round(young_birds.spl$df,1)), "s( * , df = 3)","s( * , df = 4)","s( * , df = 5)","s( * , df = 6)"), lty = c(1,2,3,4,5)) ## Residual (Tukey Anscombe) plot: #for old birds plot(residuals(old_birds.spl) ~ fitted(old_birds.spl)) abline(h = 0, col = "gray") plot(residuals(ss3) ~ fitted(ss3)) abline(h = 0, col = "gray") plot(residuals(ss4) ~ fitted(ss4)) abline(h = 0, col = "gray") plot(residuals(ss5) ~ fitted(ss5)) abline(h = 0, col = "gray") plot(residuals(ss6) ~ fitted(ss6)) abline(h = 0, col = "gray") #for young birds plot(residuals(young_birds.spl) ~ fitted(young_birds.spl)) abline(h = 0, col = "gray") plot(residuals(ssy3) ~ fitted(ssy3)) abline(h = 0, col = "gray") plot(residuals(ssy4) ~ fitted(ssy4)) abline(h = 0, col = "gray") plot(residuals(ssy5) ~ fitted(ssy5)) abline(h = 0, col = "gray") plot(residuals(ssy6) ~ fitted(ssy6)) abline(h = 0, col = "gray") ## consistency check: stopifnot(all.equal(Cumulative_hazard_birds$Cum_hazard_old_birds, fitted(old_birds.spl) + residuals(old_birds.spl))) stopifnot(all.equal(Cumulative_hazard_birds$Cum_hazard_young_birds, fitted(young_birds.spl) + residuals(young_birds.spl))) #plot hazard function hazard_old_birds<-predict(ss6, deriv=1) hazard_young_birds<-predict(ssy6, deriv=1) plot(hazard_old_birds$x,hazard_old_birds$y,ylab="Predicted hazard function",xlab="Time (days)") points(hazard_young_birds, pch=19) lines(hazard_old_birds, lty = 3) lines(hazard_young_birds, lty = 2) legend(42, .019, c("Young", "Old"), lty = 2:3) max(hazard_old_birds$y)#max reached at 32 days max(hazard_young_birds$y)#max reached at 32 days ######## # Ewes # ######## # Opening the file DataBS <- read.table("xxx/Ewes.txt", header=T,sep="\t",na.strings="NA") # Useful packages library(survival) ################# # Survival object # Survival data formatted as: (age beginning period, age end period], with '+' indicating censoring # Right censored and left truncated data : Surv(time1: truncation time i.e. when entering the period, here birth, # time2: death or censoring time i.e. end of studied period, event: 0 if censored 1 if first reproduction) surv <- Surv(rep(0,length(DataBS$Individual)),as.numeric(DataBS$Age),DataBS$FirstReproduction) ################# # Kaplan-Meier estimation of the survival function KM <- survfit(surv~1,type="kaplan-meier") plot(KM,xlab="Age (years)",ylab="Survival function",lty=2,lwd=2,xlim=c(1.9,4.3),conf.int=F) ################# # Semi-parametric Cox models # Effect of cohort on age at first reproduction (event) # No constraints on the baseline distribution cox <- coxph(surv ~ DataBS$Cohort) summary(cox) # Call: # coxph(formula = surv ~ DataBS$Cohort) # # n= 25, number of events= 18 # # coef exp(coef) se(coef) z Pr(>|z|) # DataBS$Cohort 0.3758 1.4561 0.3156 1.19 0.234 # # exp(coef) exp(-coef) lower .95 upper .95 # DataBS$Cohort 1.456 0.6868 0.7844 2.703 # # Concordance= 0.629 (se = 0.13 ) # Rsquare= 0.062 (max possible= 0.968 ) # Likelihood ratio test= 1.59 on 1 df, p=0.2067 # Wald test = 1.42 on 1 df, p=0.2339 # Score (logrank) test = 1.46 on 1 df, p=0.2274 # Test if the model respect the proportional hazard hypothesis # A p-value lower than 0.05 indicates a violation of the proportionality assumption cox.zph(cox) # The equivalent degrees of freedom and the generalized Akaike Information Criterion extractAIC(cox) # Estimated survival function for all population fit <- survfit(cox) survival <- fit$surv time <- fit$time lines(fit,lwd=2,conf.int=F) legend(3.15,1,legend=c('Kaplan-Meier','Cox model'),lty=c(2,1),lwd=2) # Life expectancy sum(survival[1:(length(survival)-1)]*(time[2:length(time)]-time[1:(length(time)-1)])) # at 2 years (beginning of the studied period) time[1]+sum(survival[1:(length(survival)-1)]*(time[2:length(time)]-time[1:(length(time)-1)])) # at birth (among the individuals living to 2 years old) # Estimated hazard function for all population Cum_hazard<- -log(survival) Cum_hazard_up<- -log(fit$upper) Cum_hazard_low<- -log(fit$lower) # smooth.spline needs at least 4 time values # Martingales residuals # Fit of the model: Martingale residuals res.mat <- residuals(cox, type="martingale", data=DataBS) plot(DataBS$Age[DataBS$FirstReproduction==1], res.mat[DataBS$FirstReproduction==1], xlim=c(2,4), ylim=c(-2.05,1), ylab="Martingale residuals", xlab="Time", pch=16) lines(DataBS$Age[DataBS$FirstReproduction==0], res.mat[DataBS$FirstReproduction==0], type="p") lines(loess.smooth(DataBS$Age[DataBS$FirstReproduction==1], res.mat[DataBS$FirstReproduction==1]), lwd=2, col="red") lines(loess.smooth(DataBS$Age[DataBS$FirstReproduction==0], res.mat[DataBS$FirstReproduction==0]), lwd=2, lty=3, col="red") # If the PH assumption was not respected, we should perform parametric models # Example: param <- survreg(Surv(as.numeric(DataBS$Age),DataBS$FirstReproduction) ~ DataBS$Cohort, dist="weibull") summary(param) # Call: # survreg(formula = Surv(as.numeric(DataBS$Age), DataBS$FirstReproduction) ~ # DataBS$Cohort, dist = "weibull") # Value Std. Error z p # (Intercept) 405.742 106.562 3.81 1.40e-04 # DataBS$Cohort -0.205 0.054 -3.80 1.46e-04 # Log(scale) -1.752 0.205 -8.55 1.25e-17 # # Scale= 0.173 # # Weibull distribution # Loglik(model)= -19.5 Loglik(intercept only)= -23.9 # Chisq= 8.8 on 1 degrees of freedom, p= 0.003 # Number of Newton-Raphson Iterations: 5 # n= 25 ############################################################################## ########### # Baboons # ########### #Packages install.packages(c("ssym", "interval", "survival", "zoo")) source("https://bioconductor.org/biocLite.R") biocLite("Icens") library(survival) ; library(MASS) ; library(zoo) ; library(ssym) ; library(GIGrvg) ; library(numDeriv) library(normalp) ; library(Formula) ; library(perm) ; library(Icens) ; library(MLEcens) ; library(interval) library(icenReg) ; library(Rcpp) ; library(coda) #Load Baboons data data(Baboons) #view names of variables in Baboons names(Baboons) #view data Baboons #summary of data summary(Baboons) #view specific rows of data Baboons[c(1,39,71,101, 150),] #number of censored and uncensored rows of data table(Baboons$cs) #add variables to Baboons Baboons <- within(Baboons, { delta <- rep(0, length(cs)) delta[cs == 0] <- 1 tt.L <- t tt.R <- t tt.L[cs == 1] <- 0.1 }) #add event variable, where 1 == end of event, 3 == interval (see Surv documentation in the survival package) Baboons$event<- ifelse(Baboons$delta==1,1,3) Surv(time=Baboons$tt.L,time2= Baboons$tt.R,event=Baboons$event,type="interval") #summary of data summary(Baboons) #view specific rows of data Baboons[c(1,39,71,101, 150),] ####predicted non-parametric, semi-parametric and parametric survival functions#### #Non-parametric model Kaplan-Meier Baboons_np<-Baboons[,4:3] baboon_fit_np<-ic_np(formula=NULL,data=Baboons_np) # Semi-parametric model baboon_Surv<-Surv(time=Baboons$tt.L, time2=Baboons$tt.R, type="interval2") baboon_fit_sp<-ic_sp(baboon_Surv~1,Baboons,model="ph") plot(baboon_fit_sp) ls(baboon_fit_sp) summary(baboon_fit_sp) #parametric models Baboons_par <- Baboons_np Baboons_par_Weibull_ph <- ic_par(formula=Surv(Baboons_par$tt.L,Baboons_par$tt.R, type="interval2")~1, data=Baboons_par, model = "ph", dist = "weibull") Baboons_par_Weibull_aft <- ic_par(formula=Surv(Baboons_par$tt.L,Baboons_par$tt.R, type="interval2")~1, data=Baboons_par, model = "aft", dist = "weibull") Baboons_par_AFT_loglogistic <- ic_par(formula=Surv(Baboons_par$tt.L,Baboons_par$tt.R, type="interval2")~1, data=Baboons_par, model = "aft", dist = "loglogistic") Baboons_par_Exponential_ph <- ic_par(formula=Surv(Baboons_par$tt.L,Baboons_par$tt.R, type="interval2")~1, data=Baboons_par, model = "ph", dist = "exponential") Baboons_par_Exponential_AFT <- ic_par(formula=Surv(Baboons_par$tt.L,Baboons_par$tt.R, type="interval2")~1, data=Baboons_par, model = "aft", dist = "exponential") plot(baboon_fit_np,ylab="Predicted survival functions",xlab="Time (hours)") lines(baboon_fit_sp,lty=2,lwd=2) abline(h = 0.5, col = "gray") Baboons_par_Weibull_ph_surv <- survCIs(Baboons_par_Weibull_ph, newdata = NULL, p = NULL, q = NULL, ci_level = 0,MC_samps = 4000) lines(Baboons_par_Weibull_ph_surv,col="green",lty=3) Baboons_par_AFT_loglogistic_surv <- survCIs(Baboons_par_AFT_loglogistic, newdata = NULL, p = NULL, q = NULL, ci_level = 0,MC_samps = 4000) lines(Baboons_par_AFT_loglogistic_surv,lty=4,col="purple") Baboons_par_Exponential_ph_surv <- survCIs(Baboons_par_Exponential_ph, newdata = NULL, p = NULL, q = NULL, ci_level = 0,MC_samps = 4000) lines(Baboons_par_Exponential_ph_surv,lty=5,col="red") legend(9,1,c("Kaplan-Meier","Cox","Weibull","Log-logistic","Exponential"), col=c("black","black","green","purple","red"),lty=c(2,1,1,1,1)) #Diagnostic plots diag_baseline(baboon_fit_sp, cols = NULL,lgdLocation = "topright") #when there are covarites, you can test the proportional hazard assumption with diag_covar #non-parametric estimation of survival function with Icens (Alternative) library(Icens) ; library(interval) result.icfit <- icfit(Surv(time=tt.L, time2=tt.R, type="interval2") ~ 1, conf.int=T, data=Baboons) #plot estimated survival curve and 95% confidence intervals plot(result.icfit, xlab="Time (hours)", ylab="Predicted Kaplan-Meier survival probability", estpar=list(col="blue", lwd=2), cipar=list(col="blue", lty=2),xlim=c(6.7,10.5)) #Life expectancy#### #remove the last object from each of the vectors or from the data.frame Baboons_survfit<-survfit(Surv(time=tt.L, time2=tt.R, type="interval2") ~1,data=Baboons)$surv[1:62] #survival function length(Baboons_survfit) Baboons_survfit_predicted_time<-survfit(Surv(time=tt.L, time2=tt.R, type="interval2") ~1,data=Baboons)$time #time point for each observation length(Baboons_survfit_predicted_time) # (from second object in the vector to the last object in the vector) - (from the first object in the vector to the before last) Baboons_survfit_predicted_time Baboons_survfit_predicted_time_vector<-Baboons_survfit_predicted_time[2:length(Baboons_survfit_predicted_time)] - Baboons_survfit_predicted_time[1:(length(Baboons_survfit_predicted_time)-1)] length(Baboons_survfit_predicted_time_vector) #Life expectancy at start of study Lifeexpectancy_baboons <- Baboons_survfit_predicted_time[1]+sum(Baboons_survfit* Baboons_survfit_predicted_time_vector) #Predicted cumulative hazard function Baboons_survfit <- survfit(Surv(time=tt.L, time2=tt.R, type="interval2") ~1,data=Baboons) plot(Baboons_survfit,fun="cumhaz", ylab="Predicted cumulative hazard function",xlab="Time (hours)") Baboons_survfit_predicted_surv<-survfit(Surv(time=tt.L, time2=tt.R, type="interval2") ~1,data=Baboons)$surv Cum_hazard_Baboons<- -log(Baboons_survfit_predicted_surv) Cum_hazard_Baboons_upper<- -log(Baboons_survfit$upper) Cum_hazard_Baboons_lower<- -log(Baboons_survfit$lower) Baboons_survfit_predicted_time<-survfit(Surv(time=tt.L, time2=tt.R, type="interval2") ~1,data=Baboons)$time plot(x=Baboons_survfit_predicted_time,y=Cum_hazard_Baboons,type="l",ylab="Predicted cumulative hazard function",xlab="Time (hours)",xlim=c(7,11)) lines(x=Baboons_survfit_predicted_time,y=Cum_hazard_Baboons_upper,lty=2) lines(x=Baboons_survfit_predicted_time,y=Cum_hazard_Baboons_lower,lty=2) # Predicted hazard function #smooth.spline with 3 to 6 degrees of freedom Baboons_Timesteps <- Baboons_survfit_predicted_time[1:61] Cum_hazard_Baboons <- Cum_hazard_Baboons[1:61] Cumulative_hazard_baboons<-data.frame(Baboons_Timesteps, Cum_hazard_Baboons) ssfit_Baboons_3df<-smooth.spline(x=Baboons_Timesteps,y=Cum_hazard_Baboons,df=3) ssfit_Baboons_3df$fit ssfit_Baboons_4df<-smooth.spline(x=Baboons_Timesteps,y=Cum_hazard_Baboons,df=4) ssfit_Baboons_4df$fit ssfit_Baboons_5df<-smooth.spline(x=Baboons_Timesteps,y=Cum_hazard_Baboons,df=5) ssfit_Baboons_5df$fit ssfit_Baboons_6df<-smooth.spline(x=Baboons_Timesteps,y=Cum_hazard_Baboons,df=6) ssfit_Baboons_6df$fit require(graphics) #without colour plot(Cum_hazard_Baboons ~ Baboons_Timesteps, data = Cumulative_hazard_baboons, xlim=c(6.7,11),xlab="Time (hours)",ylab="Predicted cumulative hazard") Baboons.spl <- with(Cumulative_hazard_baboons, smooth.spline(Baboons_Timesteps, Cum_hazard_Baboons)) Baboons.spl lines(Baboons.spl, lty=1) ss3_Baboons <- smooth.spline(Cumulative_hazard_baboons[,"Baboons_Timesteps"], Cumulative_hazard_baboons[,"Cum_hazard_Baboons"], df = 3) lines(ss3_Baboons, lty = 2) ss4_Baboons <- smooth.spline(Cumulative_hazard_baboons[,"Baboons_Timesteps"], Cumulative_hazard_baboons[,"Cum_hazard_Baboons"], df = 4) lines(ss4_Baboons, lty = 3) ss5_Baboons <- smooth.spline(Cumulative_hazard_baboons[,"Baboons_Timesteps"], Cumulative_hazard_baboons[,"Cum_hazard_Baboons"], df = 5) lines(ss5_Baboons, lty = 4) ss6_Baboons <- smooth.spline(Cumulative_hazard_baboons[,"Baboons_Timesteps"], Cumulative_hazard_baboons[,"Cum_hazard_Baboons"], df = 6) lines(ss6_Baboons, lty = 4) legend(7,4.9,c(paste("default [C.V.] => df =",round(Baboons.spl$df,1)), "s( * , df = 3)","s( * , df = 4)","s( * , df = 5)","s( * , df = 6)"), lty = c(1,2,3,4,5)) #with colour plot(Cum_hazard_Baboons ~ Baboons_Timesteps, data = Cumulative_hazard_baboons, main = "Cumhaz_baboons & smoothing splines",xlim=c(6.7,11),xlab="Time (hours)",ylab="Predicted cumulative hazard") Baboons.spl <- with(Cumulative_hazard_baboons, smooth.spline(Baboons_Timesteps, Cum_hazard_Baboons)) Baboons.spl lines(Baboons.spl, col = "blue") ss3_Baboons <- smooth.spline(Cumulative_hazard_baboons[,"Baboons_Timesteps"], Cumulative_hazard_baboons[,"Cum_hazard_Baboons"], df = 3) lines(ss3_Baboons, lty = 2, col = "orange") ss4_Baboons <- smooth.spline(Cumulative_hazard_baboons[,"Baboons_Timesteps"], Cumulative_hazard_baboons[,"Cum_hazard_Baboons"], df = 4) lines(ss4_Baboons, lty = 2, col = "green") ss5_Baboons <- smooth.spline(Cumulative_hazard_baboons[,"Baboons_Timesteps"], Cumulative_hazard_baboons[,"Cum_hazard_Baboons"], df = 5) lines(ss5_Baboons, lty = 2, col = "purple") ss6_Baboons <- smooth.spline(Cumulative_hazard_baboons[,"Baboons_Timesteps"], Cumulative_hazard_baboons[,"Cum_hazard_Baboons"], df = 6) lines(ss6_Baboons, lty = 2, col = "red") legend(7,4.9,c(paste("default [C.V.] => df =",round(Baboons.spl$df,1)), "s( * , df = 3)","s( * , df = 4)","s( * , df = 5)","s( * , df = 6)"), col = c("blue","orange","green","purple","red"), lty = c(1,2,2,2,2)) ## Residual (Tukey Anscombe) plot: plot(residuals(Baboons.spl) ~ fitted(Baboons.spl)) abline(h = 0, col = "gray") plot(residuals(ss3_Baboons) ~ fitted(ss3_Baboons)) abline(h = 0, col = "gray") plot(residuals(ss4_Baboons) ~ fitted(ss4_Baboons)) abline(h = 0, col = "gray") plot(residuals(ss5_Baboons) ~ fitted(ss5_Baboons)) abline(h = 0, col = "gray") plot(residuals(ss6_Baboons) ~ fitted(ss6_Baboons)) abline(h = 0, col = "gray") ## consistency check: stopifnot(all.equal(Cumulative_hazard_baboons$Cum_hazard_Baboons, fitted(Baboons.spl)+residuals(Baboons.spl))) #plot hazard function hazard_Baboons<-predict(ss6_Baboons, deriv=1) plot(hazard_Baboons$x,hazard_Baboons$y,ylab="Predicted hazard function",xlab="Time (hours)") lines(hazard_Baboons, lty = 3) max(hazard_Baboons$y)#max reached at 9.89 hours ############################################################################## ############## # Lemurs - 1 # ############## # Opening the file DataML<-read.table("xxx/Mortality_individual_information_data_Microcebus_murinus.txt", header=T,sep="\t",na.strings="NA") # Useful packages library(survival) ################# # Survival object # Survival data formatted as: (age beginning period, age end period], with '+' indicating censoring # Right censored and left truncated data : Surv(time1: truncation time i.e. when entering the period, # time2: death or censoring time i.e. end of period, event: 0 if censored 1 if dead) surv <- Surv(DataML$Age_Season_Start,DataML$Age_Season_End,DataML$Event) ################# # Kaplan-Meier estimation of the survival function - Example by sex KM <- survfit(surv~DataML$Sex,type="kaplan-meier") plot(KM,xlab="Age (days)",ylab="Kaplan-Meier estimation of survival", lty=c(2,1),lwd=2,xlim=c(min(KM$time),max(KM$time)+10)) legend(2000,1,legend=c('Males','Females'),lty=c(1,2),lwd=2) ################# # Semi-parametric Cox models # Effect of seasonal rhythm (Mean.SR), season (Current_Season) and identity (Individual, frailty random effect) on mortality # No constraints on the baseline distribution # We chose a gamma distribution for the frailty distribution because it is the most used in litterature cox <- coxph(surv ~ DataML$Sex + DataML$Current_Season + frailty(DataML$Individual, distribution="gamma")) summary(cox) # Call: # coxph(formula = surv ~ DataML$Sex + DataML$Current_Season + frailty(DataML$Individual, distribution = "gamma")) # # n= 3696, number of events= 258 # # coef se(coef) se2 Chisq DF p # DataML$SexM -0.1569 0.1254 0.1253 1.57 1.00 2.1e-01 # DataML$Current_SeasonSD -0.7317 0.1866 0.1866 15.38 1.00 8.8e-05 # frailty(DataML$Individual 0.01 0.01 6.9e-01 # # exp(coef) exp(-coef) lower .95 upper .95 # DataML$SexM 0.8548 1.170 0.6686 1.0929 # DataML$Current_SeasonSD 0.4811 2.079 0.3337 0.6935 # # Iterations: 5 outer, 31 Newton-Raphson # Variance of random effect= 5e-05 I-likelihood = -1384.4 # Degrees of freedom for terms= 1 1 0 # Concordance= 0.72 (se = 0.021 ) # Likelihood ratio test= 17.25 on 2.01 df, p=0.000182 # Test if the model respect the proportional hazard hypothesis # A p-value lower than 0.05 indicates a violation of the proportionality assumption cox.zph(cox) # Fit of the model: Martingale residuals res.mat <- residuals(cox, type="martingale", data=DataML) plot(DataML$Age_Season_End[DataML$Event==1], res.mat[DataML$Event==1], xlim=c(400,3010), ylim=c(-0.9,1.05),ylab="Martingale residuals", xlab="Time", pch=16) lines(DataML$Age_Season_End[DataML$Event==0], res.mat[DataML$Event==0], type="p") lines(loess.smooth(DataML$Age_Season_End[DataML$Event==1], res.mat[DataML$Event==1]), lwd=2, col="red") lines(loess.smooth(DataML$Age_Season_End[DataML$Event==0], res.mat[DataML$Event==0]), lwd=2, lty=3, col="red") # Useful metrics # The equivalent degrees of freedom and the generalized Akaike Information Criterion extractAIC(cox) ### # Cox model without frailty cox.nf <- coxph(surv ~ Sex + Current_Season, data=DataML) summary(cox.nf) # Call: # coxph(formula = surv ~ Sex + Current_Season, data = DataML) # # n= 3696, number of events= 258 # # coef exp(coef) se(coef) z Pr(>|z|) # SexM -0.1569 0.8548 0.1253 -1.252 0.211 # Current_SeasonSD -0.7317 0.4811 0.1866 -3.921 8.81e-05 *** # --- # Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1 # # exp(coef) exp(-coef) lower .95 upper .95 # SexM 0.8548 1.170 0.6686 1.0929 # Current_SeasonSD 0.4811 2.079 0.3337 0.6935 # # Concordance= 0.539 (se = 0.019 ) # Rsquare= 0.005 (max possible= 0.529 ) # Likelihood ratio test= 17.23 on 2 df, p=0.0001817 # Wald test = 17 on 2 df, p=0.0002039 # Score (logrank) test = 16.99 on 2 df, p=0.0002042 # Test if the model respect the proportional hazard hypothesis # A p-value lower than 0.05 indicates a violation of the proportionality assumption cox.zph(cox.nf) # Fit of the model: Martingale residuals res.mat <- residuals(cox.nf, type="martingale", data=DataML) plot(DataML$Age_Season_End[DataML$Event==1], res.mat[DataML$Event==1], xlim=c(400,3010), ylim=c(-0.9,1.05),ylab="Martingale residuals", xlab="Time", pch=16) lines(DataML$Age_Season_End[DataML$Event==0], res.mat[DataML$Event==0], type="p") lines(loess.smooth(DataML$Age_Season_End[DataML$Event==1], res.mat[DataML$Event==1]), lwd=2, col="red") lines(loess.smooth(DataML$Age_Season_End[DataML$Event==0], res.mat[DataML$Event==0]), lwd=2, lty=3, col="red") # Useful metrics # The equivalent degrees of freedom and the generalized Akaike Information Criterion extractAIC(cox.nf) #Predicted survival function based on cox.nf model Surv_function<-survfit(cox.nf,newdata=data.frame(Sex=c("M","F"),Current_Season=c("LD","LD")))$surv #survival function plot(survfit(cox.nf,newdata=data.frame(Sex=c("M","F"),Current_Season=c("LD","LD"))), xlim=c(300,3000), lty =c(1,2), lwd=2, xlab = "Time (days)", ylab="Predicted survival function") legend(2200,1, c("Males","Females"), lty =c(1,2), lwd=2) survfit(cox.nf,newdata=data.frame(Sex=c("M","F"),Current_Season=c("LD","LD")))$surv #survival function survfit(cox.nf,newdata=data.frame(Sex=c("M","F"),Current_Season=c("LD","LD")))$time #time point for each observation #Life expectancy #remove the last object from each of the vectors or from the data.frame Surv1<-survfit(cox.nf,newdata=data.frame(Sex=c("M","F"), Current_Season=c("LD","LD")))$surv[1:(dim(Surv_function)[1]-1),1] #survival function Surv2<-survfit(cox.nf,newdata=data.frame(Sex=c("M","F"), Current_Season=c("LD","LD")))$surv[1:(dim(Surv_function)[1]-1),2] #survival function length(Surv1) Timesteps<-survfit(cox.nf,newdata=data.frame(Sex=c("M","F"),Current_Season=c("LD","LD")))$time #time point for each observation # (from second object in the vector to the last object in the vector) - (from the first object in the vector to the before last) Timesteps Timesteps_vector<-Timesteps[2:length(Timesteps)] - Timesteps[1:(length(Timesteps)-1)] #Life expectancy at start of study Lifeexpectancy_1 <- sum(Surv1*Timesteps_vector) Lifeexpectancy_2 <- sum(Surv2*Timesteps_vector) #Predicted cumulative hazard function Surv_1<-survfit(cox.nf,newdata=data.frame(Sex=c("M","F"),Current_Season=c("LD","LD")))$surv[,1] #survival function for males Surv_2<-survfit(cox.nf,newdata= data.frame(Sex=c("M","F"),Current_Season=c("LD","LD")))$surv[,2] #survival function for females Timesteps<-survfit(cox.nf,newdata= data.frame(Sex=c("M","F"),Current_Season=c("LD","LD")))$time #time point for each observation Cum_hazard_1<- -log(Surv_1) Cum_hazard_2<- -log(Surv_2) length(Timesteps) length(Cum_hazard_1) plot(x=Timesteps,y=-log(Surv_1),type="l", lty =1, lwd=2, ylim=c(0,5.2), ylab="Predicted cumulative hazard function",xlab="Time (days)") lines(x=Timesteps,y=-log(Surv_2), lty =2, lwd=2) legend(370,5.2, c("Males","Females"), lty =c(1,2), lwd=2) # Predicted hazard function #smooth.spline with 3 to 6 degrees of freedom Cumulative_hazard<-data.frame(Timesteps, Cum_hazard_1, Cum_hazard_2) ssfit_1_3df<-smooth.spline(x=Timesteps,y=Cum_hazard_1,df=3) ssfit_1_3df$fit ssfit_1_4df<-smooth.spline(x=Timesteps,y=Cum_hazard_1,df=4) ssfit_1_4df$fit ssfit_1_5df<-smooth.spline(x=Timesteps,y=Cum_hazard_1,df=5) ssfit_1_5df$fit ssfit_1_6df<-smooth.spline(x=Timesteps,y=Cum_hazard_1,df=6) ssfit_1_6df$fit ssfit_2_3df<-smooth.spline(x=Timesteps,y=Cum_hazard_2,df=3) ssfit_2_3df$fit ssfit_2_4df<-smooth.spline(x=Timesteps,y=Cum_hazard_2,df=4) ssfit_2_4df$fit ssfit_2_5df<-smooth.spline(x=Timesteps,y=Cum_hazard_2,df=5) ssfit_2_5df$fit ssfit_2_6df<-smooth.spline(x=Timesteps,y=Cum_hazard_2,df=6) ssfit_2_6df$fit require(graphics) plot(Cum_hazard_1 ~ Timesteps, data = Cumulative_hazard, type="l", ylim=c(-0.2,5.2), ylab="Predicted cumulative hazard function",xlab="Time (days)") h1.spl <- with(Cumulative_hazard, smooth.spline(Timesteps, Cum_hazard_1)) h1.spl lines(h1.spl, lty=1) ss3_1 <- smooth.spline(Cumulative_hazard[,"Timesteps"], Cumulative_hazard[,"Cum_hazard_1"], df = 3) lines(ss3_1, lty = 2) ss4_1 <- smooth.spline(Cumulative_hazard[,"Timesteps"], Cumulative_hazard[,"Cum_hazard_1"], df = 4) lines(ss4_1, lty=3) ss5_1 <- smooth.spline(Cumulative_hazard[,"Timesteps"], Cumulative_hazard[,"Cum_hazard_1"], df = 5) lines(ss5_1, lty=4) ss6_1 <- smooth.spline(Cumulative_hazard[,"Timesteps"], Cumulative_hazard[,"Cum_hazard_1"], df = 6) lines(ss6_1, lty=5) legend(370,5.2,c(paste("default [C.V.] => df =",round(h1.spl$df,1)), "s( * , df = 3)","s( * , df = 4)","s( * , df = 5)","s( * , df = 6)"), lty = c(1,2,3,4,5)) require(graphics) plot(Cum_hazard_2 ~ Timesteps, data = Cumulative_hazard, type="l",ylim=c(-0.2,5.2), ylab="Predicted cumulative hazard function",xlab="Time (days)") h2.spl <- with(Cumulative_hazard, smooth.spline(Timesteps, Cum_hazard_2)) h2.spl lines(h2.spl, lty=1) ss3_2 <- smooth.spline(Cumulative_hazard[,"Timesteps"], Cumulative_hazard[,"Cum_hazard_2"], df = 3) lines(ss3_2, lty = 2) ss4_2 <- smooth.spline(Cumulative_hazard[,"Timesteps"], Cumulative_hazard[,"Cum_hazard_2"], df = 4) lines(ss4_2, lty = 3) ss5_2 <- smooth.spline(Cumulative_hazard[,"Timesteps"], Cumulative_hazard[,"Cum_hazard_2"], df = 5) lines(ss5_2, lty = 4) ss6_2 <- smooth.spline(Cumulative_hazard[,"Timesteps"], Cumulative_hazard[,"Cum_hazard_2"], df = 6) lines(ss6_2, lty = 5) legend(370,5.2,c(paste("default [C.V.] => df =",round(h2.spl$df,1)), "s( * , df = 3)","s( * , df = 4)","s( * , df = 5)","s( * , df = 6)"), lty = c(1,2,3,4,5)) ## Residual (Tukey Anscombe) plot: plot(residuals(h1.spl) ~ fitted(h1.spl)) abline(h = 0, col = "gray") plot(residuals(ss3_1) ~ fitted(ss3_1)) abline(h = 0, col = "gray") plot(residuals(ss4_1) ~ fitted(ss4_1)) abline(h = 0, col = "gray") plot(residuals(ss5_1) ~ fitted(ss5_1)) abline(h = 0, col = "gray") plot(residuals(ss6_1) ~ fitted(ss6_1)) abline(h = 0, col = "gray") plot(residuals(h2.spl) ~ fitted(h2.spl)) abline(h = 0, col = "gray") plot(residuals(ss3_2) ~ fitted(ss3_2)) abline(h = 0, col = "gray") plot(residuals(ss4_2) ~ fitted(ss4_2)) abline(h = 0, col = "gray") plot(residuals(ss5_2) ~ fitted(ss5_2)) abline(h = 0, col = "gray") plot(residuals(ss6_2) ~ fitted(ss6_2)) abline(h = 0, col = "gray") ## consistency check: stopifnot(all.equal(Cumulative_hazard$Cum_hazard_1,fitted(h1.spl) + residuals(h1.spl))) stopifnot(all.equal(Cumulative_hazard$Cum_hazard_2,fitted(h2.spl) + residuals(h2.spl))) #plot hazard function hazard_1<-predict(ss6_1, deriv=1) hazard_2<-predict(ss6_2, deriv=1) plot(hazard_1$x,hazard_1$y,type="l",lty=1, lwd=2, ylim=c(0,0.0037), ylab="Predicted hazard function",xlab="Time (days)") lines(hazard_2, lty=2, lwd=2) legend(370,0.0035, c("Males","Females"), lty =c(1,2), lwd=2) ####### #GLMM library(lme4) names <- unique(DataML$Individual[which(DataML$Censoring==0)]) sub2 <- DataML[is.element(DataML$Individual,names)==TRUE,] sub2$alive[which(sub2$Event==1)] <- 0 sub2$alive[which(sub2$Event==0)] <- 1 glmer1 <- glmer(alive ~ Sex + Current_Season + (1|Individual), data=sub2, family="binomial") summary(glmer1) ############################################################################## ############## # Lemurs - 2 # ############## # Opening the file DataML<-read.table("xxx/Mortality_individual_information_data_Microcebus_murinus.txt", header=T,sep="\t",na.strings="NA") # Useful packages library(survival) library(eha) ################# # Survival object # Survival data formatted as: (age beginning period, age end period], with '+' indicating censoring # Right censored and left truncated data : Surv(time1: truncation time i.e. when entering the period, # time2: death or censoring time i.e. end of period, event: 0 if censored 1 if dead) surv <- Surv(DataML$Age_Season_Start,DataML$Age_Season_End,DataML$Event) ################# # Kaplan-Meier estimation of the survival function - Example by sex KM <- survfit(surv~DataML$Sex,type="kaplan-meier") plot(KM,col=c('red','blue'),xlab="Age (days)",ylab="Survival",lty=1,lwd=2,xlim=c(min(KM$time),max(KM$time)+10)) legend(1700,1,legend=c('Males','Females'),col=c('blue','red'),lty=1,lwd=2) ################# # Semi-parametric Cox models # Effect of seasonal rhythm (Mean.SR), season (Current_Season) and identity (Individual, frailty random effect) on mortality # No constraints on the baseline distribution # We chose a gamma distribution for the frailty distribution because it is the most used in litterature cox <- coxph(surv ~ DataML$Current_Season + DataML$Sex+DataML$Cluster_YearObs+frailty.gamma(DataML$Individual)) summary(cox) # Test if the model respect the proportional hazard hypothesis # A p-value lower than 0.05 indicates a violation of the proportionality assumption cox.zph(cox) # Fit of the model: Martingale residuals res.mat <- residuals(cox, type="martingale", data=DataML) plot(DataML$Age_Season_End[DataML$Event==1], res.mat[DataML$Event==1], xlim=c(400,3010), ylim=c(-0.9,1.05),ylab="Martingale residuals", xlab="Time", pch=16) lines(DataML$Age_Season_End[DataML$Event==0], res.mat[DataML$Event==0], type="p") lines(loess.smooth(DataML$Age_Season_End[DataML$Event==1], res.mat[DataML$Event==1]), lwd=2, col="red") lines(loess.smooth(DataML$Age_Season_End[DataML$Event==0], res.mat[DataML$Event==0]), lwd=2, lty=3, col="red") # Useful metrics # The equivalent degrees of freedom and the generalized Akaike Information Criterion extractAIC(cox) ################### # Parametric models ph <- phreg(surv ~ DataML$Mean.SR + DataML$Current_Season + DataML$Sex + frailty.gamma(DataML$Individual), dist="gompertz") summary(ph) extractAIC(ph) plot(ph) aft <- aftreg(surv ~ DataML$Mean.SR + DataML$Current_Season + DataML$Sex + frailty.gamma(DataML$Individual), dist="gompertz") summary(aft) extractAIC(aft) plot(aft)