#########
# Ducks #
#########

install.packages(c("plyr","tidyr","lubridate","survival","icenReg","ggplot2","ggfortify"))
library(plyr) ; library(Rcpp) ; library(tidyr) ; library(lubridate) ; library(survival) ; library(icenReg)
library(ggplot2) ; library(ggfortify) ; library(eha)

file.choose()
#add working directory
Birdsurv<-read.table("C:\\Birdsurvivaldata.csv",header=TRUE,sep=",")
Birdsurv
save(Birdsurv,file="Birdsurv.RData")
attach(Birdsurv)

Birdsurv$Age<-as.factor(Birdsurv$Age)
class(Birdsurv$Age)
Birdsurv$Age <- factor(Birdsurv$Age, levels = c("0","1"), labels = c("Young", "Old"))
table(Birdsurv$Age)
str(Birdsurv)


# Kaplan-Meier estimation of survival
Surv_function_KM <- survfit(Surv(Time,Indicator)~Age,data=Birdsurv)
plot(Surv_function_KM, xlab = "Time (days)", lty = c(2,3), ylab="Kaplan-Meier survival function",mark.time = TRUE)
legend(45, .95, c("Old","Young"), lty = 2:3)

#log-rank test of survival differences
survdiff(Surv(Time,Indicator)~Age,data=Birdsurv)


#Cox proportional hazard model
Surv(Time,Indicator)
Birdsurvcox<-coxph(Surv(Time,Indicator)~Age,data=Birdsurv, model=TRUE)
summary(Birdsurvcox)

#Testing the proportional hazard assumption####
cox.zph(Birdsurvcox)

## Plot Martingale residuals over time
bird_resid<-resid(Birdsurvcox,type="martingale")
plot(Birdsurv$Time[Birdsurv$Indicator==1],bird_resid[Birdsurv$Indicator==1],xlab= "Time (days)", ylab="Martingale residuals",xlim=c(0,64),ylim=c(-0.8,1))
lines(Birdsurv$Time[Birdsurv$Indicator==0],bird_resid[Birdsurv$Indicator==0], type="p")
lines(loess.smooth(Birdsurv$Time[Birdsurv$Indicator==1],bird_resid[Birdsurv$Indicator==1]), lwd=2, col="black")
lines(loess.smooth(Birdsurv$Time[Birdsurv$Indicator==0],bird_resid[Birdsurv$Indicator==0]), lwd=2, lty=3, col="black")

## Plot Martingale residuals against linear predictor and covariate####
library(ggplot2)
library(reshape2)
Birdsurv$mg<-resid(Birdsurvcox,type="martingale")
bird.melt <- melt(Birdsurv, value.name = "value", id.vars = c("Bird","mg"),measure.vars = "Age")
ggplot(data = bird.melt, mapping = aes(x = value, y = mg)) + geom_point()+
  scale_y_continuous(name = "Martingale Residuals") + scale_x_discrete("Age")+
  theme_bw()+ theme(panel.grid.major = element_blank(), panel.grid.minor = element_blank())


#Predicted survival function based on cox model####
Surv_function<-survfit(Birdsurvcox,newdata= data.frame(Age=c("Young", "Old")))$surv #survival function

plot(survfit(Birdsurvcox,newdata= data.frame(Age=c("Young", "Old"))), lty = c(2,3), xlab = "Time (days)", ylab="Predicted survival probabilities",mark.time = TRUE)
legend(44, .98, c("Young", "Old"), lty = 2:3)
survfit(Birdsurvcox,newdata= data.frame(Age=c("Young", "Old")))$surv #survival function
survfit(Birdsurvcox,newdata= data.frame(Age=c("Young", "Old")))$time #time point for each observation

#Life expectancy####
#remove the last object from each of the vectors or from the data.frame
Surv1<-survfit(Birdsurvcox,newdata= data.frame(Age=c("Young", "Old")))$surv[1:25,1] #survival function
Surv2<-survfit(Birdsurvcox,newdata= data.frame(Age=c("Young", "Old")))$surv[1:25,2] #survival function
length(Surv1)

Timesteps<-survfit(Birdsurvcox,newdata= data.frame(Age=c("Young", "Old")))$time #time point for each observation
# (from second object in the vector to the last object in the vector) - (from the first object in the vector to the before last)
Timesteps
Timesteps_vector<-Timesteps[2:length(Timesteps)] - Timesteps[1:(length(Timesteps)-1)]

#Life expectancy at start of study
Lifeexpectancy_young   <- sum(Surv1*Timesteps_vector)
Lifeexpectancy_old   <- sum(Surv2*Timesteps_vector)

#Predicted cumulative hazard function 
Surv1_young<-survfit(Birdsurvcox,newdata= data.frame(Age=c("Young", "Old")))$surv[,1] #survival function
Surv2_old<-survfit(Birdsurvcox,newdata= data.frame(Age=c("Young", "Old")))$surv[,2] #survival function
Timesteps<-survfit(Birdsurvcox,newdata= data.frame(Age=c("Young", "Old")))$time #time point for each observation
Cum_hazard_old_birds<- -log(Surv2_old)
Cum_hazard_young_birds<- -log(Surv1_young)
length(Timesteps)
length(Cum_hazard_old_birds)

plot(x=Timesteps,y=-log(Surv2_old),type="l",lty=3,ylab="Predicted cumulative hazard function",xlab="Time (days)")
lines(x=Timesteps,y=-log(Surv1_young),lty=2)
legend(5, .55, c("Young", "Old"), lty = 2:3)

# Predicted hazard function
#smooth.spline with 3 to 6 degrees of freedom
Cumulative_hazard_birds<-data.frame(Timesteps, Cum_hazard_old_birds, Cum_hazard_young_birds)
ssfit_old_birds_3df<-smooth.spline(x=Timesteps,y=Cum_hazard_old_birds,df=3)
ssfit_old_birds_3df$fit

ssfit_old_birds_4df<-smooth.spline(x=Timesteps,y=Cum_hazard_old_birds,df=4)
ssfit_old_birds_4df$fit

ssfit_old_birds_5df<-smooth.spline(x=Timesteps,y=Cum_hazard_old_birds,df=5)
ssfit_old_birds_5df$fit

ssfit_old_birds_6df<-smooth.spline(x=Timesteps,y=Cum_hazard_old_birds,df=6)
ssfit_old_birds_6df$fit

Cumulative_hazard_birds<-data.frame(Timesteps, Cum_hazard_old_birds, Cum_hazard_young_birds)
ssfit_young_birds_3df<-smooth.spline(x=Timesteps,y=Cum_hazard_young_birds,df=3)
ssfit_young_birds_3df$fit

ssfit_young_birds_4df<-smooth.spline(x=Timesteps,y=Cum_hazard_young_birds,df=4)
ssfit_young_birds_4df$fit

ssfit_young_birds_5df<-smooth.spline(x=Timesteps,y=Cum_hazard_young_birds,df=5)
ssfit_young_birds_5df$fit

ssfit_young_birds_6df<-smooth.spline(x=Timesteps,y=Cum_hazard_young_birds,df=6)
ssfit_young_birds_6df$fit

require(graphics)
par(mfrow=c(1,2))
plot(Cum_hazard_old_birds ~ Timesteps, data = Cumulative_hazard_birds, xlim=c(0,65),ylim=c(0,1.1),ylab="Predicted cumulative hazard function",xlab="Time (days)")
old_birds.spl <- with(Cumulative_hazard_birds, smooth.spline(Timesteps, Cum_hazard_old_birds))
old_birds.spl
lines(old_birds.spl, lty=1)
ss3 <- smooth.spline(Cumulative_hazard_birds[,"Timesteps"], Cumulative_hazard_birds[,"Cum_hazard_old_birds"], df = 3)
lines(ss3, lty = 2)
ss4 <- smooth.spline(Cumulative_hazard_birds[,"Timesteps"], Cumulative_hazard_birds[,"Cum_hazard_old_birds"], df = 4)
lines(ss4, lty=3)
ss5 <- smooth.spline(Cumulative_hazard_birds[,"Timesteps"], Cumulative_hazard_birds[,"Cum_hazard_old_birds"], df = 5)
lines(ss5, lty=4)
ss6 <- smooth.spline(Cumulative_hazard_birds[,"Timesteps"], Cumulative_hazard_birds[,"Cum_hazard_old_birds"], df = 6)
lines(ss6, lty=5)
legend(5,1.1,c(paste("default [C.V.] => df =",round(old_birds.spl$df,1)),
               "s( * , df = 3)","s( * , df = 4)","s( * , df = 5)","s( * , df = 6)"), lty = c(1,2,3,4,5))

require(graphics)
plot(Cum_hazard_young_birds ~ Timesteps, data = Cumulative_hazard_birds, xlim=c(0,65),ylim=c(0,1.1),ylab="Predicted cumulative hazard function",xlab="Time (days)")
young_birds.spl <- with(Cumulative_hazard_birds, smooth.spline(Timesteps, Cum_hazard_young_birds))
young_birds.spl
lines(young_birds.spl, lty=1)
ssy3 <- smooth.spline(Cumulative_hazard_birds[,"Timesteps"], Cumulative_hazard_birds[,"Cum_hazard_young_birds"], df = 3)
lines(ssy3, lty = 2)
ssy4 <- smooth.spline(Cumulative_hazard_birds[,"Timesteps"], Cumulative_hazard_birds[,"Cum_hazard_young_birds"], df = 4)
lines(ssy4, lty = 3)
ssy5 <- smooth.spline(Cumulative_hazard_birds[,"Timesteps"], Cumulative_hazard_birds[,"Cum_hazard_young_birds"], df = 5)
lines(ssy5, lty = 4)
ssy6 <- smooth.spline(Cumulative_hazard_birds[,"Timesteps"], Cumulative_hazard_birds[,"Cum_hazard_young_birds"], df = 6)
lines(ssy6, lty = 5)
legend(5,1.1,c(paste("default [C.V.] => df =",round(young_birds.spl$df,1)),
               "s( * , df = 3)","s( * , df = 4)","s( * , df = 5)","s( * , df = 6)"), lty = c(1,2,3,4,5))

## Residual (Tukey Anscombe) plot:
#for old birds
plot(residuals(old_birds.spl) ~ fitted(old_birds.spl))
abline(h = 0, col = "gray")
plot(residuals(ss3) ~ fitted(ss3))
abline(h = 0, col = "gray")
plot(residuals(ss4) ~ fitted(ss4))
abline(h = 0, col = "gray")
plot(residuals(ss5) ~ fitted(ss5))
abline(h = 0, col = "gray")
plot(residuals(ss6) ~ fitted(ss6))
abline(h = 0, col = "gray")

#for young birds
plot(residuals(young_birds.spl) ~ fitted(young_birds.spl))
abline(h = 0, col = "gray")
plot(residuals(ssy3) ~ fitted(ssy3))
abline(h = 0, col = "gray")
plot(residuals(ssy4) ~ fitted(ssy4))
abline(h = 0, col = "gray")
plot(residuals(ssy5) ~ fitted(ssy5))
abline(h = 0, col = "gray")
plot(residuals(ssy6) ~ fitted(ssy6))
abline(h = 0, col = "gray")

## consistency check:
stopifnot(all.equal(Cumulative_hazard_birds$Cum_hazard_old_birds,
                    fitted(old_birds.spl) + residuals(old_birds.spl)))
stopifnot(all.equal(Cumulative_hazard_birds$Cum_hazard_young_birds,
                    fitted(young_birds.spl) + residuals(young_birds.spl)))

#plot hazard function
hazard_old_birds<-predict(ss6, deriv=1) 
hazard_young_birds<-predict(ssy6, deriv=1) 
plot(hazard_old_birds$x,hazard_old_birds$y,ylab="Predicted hazard function",xlab="Time (days)")
points(hazard_young_birds, pch=19)
lines(hazard_old_birds, lty = 3)
lines(hazard_young_birds, lty = 2)
legend(42, .019, c("Young", "Old"), lty = 2:3)
max(hazard_old_birds$y)#max reached at 32 days
max(hazard_young_birds$y)#max reached at 32 days



########
# Ewes #
########

# Opening the file
DataBS <- read.table("xxx/Ewes.txt", header=T,sep="\t",na.strings="NA") 

# Useful packages
library(survival)

#################
# Survival object
# Survival data formatted as: (age beginning period, age end period], with '+' indicating censoring
# Right censored and left truncated data : Surv(time1: truncation time i.e. when entering the period, here birth,
# time2: death or censoring time i.e. end of studied period, event: 0 if censored 1 if first reproduction)
surv <- Surv(rep(0,length(DataBS$Individual)),as.numeric(DataBS$Age),DataBS$FirstReproduction)

#################
# Kaplan-Meier estimation of the survival function
KM <- survfit(surv~1,type="kaplan-meier")
plot(KM,xlab="Age (years)",ylab="Survival function",lty=2,lwd=2,xlim=c(1.9,4.3),conf.int=F)

#################
# Semi-parametric Cox models
# Effect of cohort on age at first reproduction (event)
# No constraints on the baseline distribution
cox <- coxph(surv ~ DataBS$Cohort)
summary(cox)
# Call:
#   coxph(formula = surv ~ DataBS$Cohort)
# 
# n= 25, number of events= 18 
# 
#                 coef exp(coef) se(coef)    z Pr(>|z|)
# DataBS$Cohort 0.3758    1.4561   0.3156 1.19    0.234
# 
#               exp(coef) exp(-coef) lower .95 upper .95
# DataBS$Cohort     1.456     0.6868    0.7844     2.703
# 
# Concordance= 0.629  (se = 0.13 )
# Rsquare= 0.062   (max possible= 0.968 )
# Likelihood ratio test= 1.59  on 1 df,   p=0.2067
# Wald test            = 1.42  on 1 df,   p=0.2339
# Score (logrank) test = 1.46  on 1 df,   p=0.2274

# Test if the model respect the proportional hazard hypothesis
# A p-value lower than 0.05 indicates a violation of the proportionality assumption
cox.zph(cox)

# The equivalent degrees of freedom and the generalized Akaike Information Criterion
extractAIC(cox)

# Estimated survival function for all population
fit <- survfit(cox)
survival <- fit$surv
time <- fit$time

lines(fit,lwd=2,conf.int=F)
legend(3.15,1,legend=c('Kaplan-Meier','Cox model'),lty=c(2,1),lwd=2)

# Life expectancy
sum(survival[1:(length(survival)-1)]*(time[2:length(time)]-time[1:(length(time)-1)]))         
# at 2 years (beginning of the studied period)
time[1]+sum(survival[1:(length(survival)-1)]*(time[2:length(time)]-time[1:(length(time)-1)]))  
# at birth (among the individuals living to 2 years old)

# Estimated hazard function for all population
Cum_hazard<- -log(survival)
Cum_hazard_up<- -log(fit$upper)
Cum_hazard_low<- -log(fit$lower)

# smooth.spline needs at least 4 time values

# Martingales residuals
# Fit of the model: Martingale residuals
res.mat <- residuals(cox, type="martingale", data=DataBS)
plot(DataBS$Age[DataBS$FirstReproduction==1], res.mat[DataBS$FirstReproduction==1], xlim=c(2,4), 
     ylim=c(-2.05,1), ylab="Martingale residuals", xlab="Time", pch=16)
lines(DataBS$Age[DataBS$FirstReproduction==0], res.mat[DataBS$FirstReproduction==0], type="p")
lines(loess.smooth(DataBS$Age[DataBS$FirstReproduction==1], res.mat[DataBS$FirstReproduction==1]), 
      lwd=2, col="red")
lines(loess.smooth(DataBS$Age[DataBS$FirstReproduction==0], res.mat[DataBS$FirstReproduction==0]), 
      lwd=2, lty=3, col="red")

# If the PH assumption was not respected, we should perform parametric models
# Example:

param <- survreg(Surv(as.numeric(DataBS$Age),DataBS$FirstReproduction) ~ DataBS$Cohort, dist="weibull")
summary(param)
# Call:
#   survreg(formula = Surv(as.numeric(DataBS$Age), DataBS$FirstReproduction) ~ 
#             DataBS$Cohort, dist = "weibull")
#                 Value Std. Error     z        p
# (Intercept)   405.742    106.562  3.81 1.40e-04
# DataBS$Cohort  -0.205      0.054 -3.80 1.46e-04
# Log(scale)     -1.752      0.205 -8.55 1.25e-17
# 
# Scale= 0.173 
# 
# Weibull distribution
# Loglik(model)= -19.5   Loglik(intercept only)= -23.9
# Chisq= 8.8 on 1 degrees of freedom, p= 0.003 
# Number of Newton-Raphson Iterations: 5 
# n= 25  


##############################################################################


###########
# Baboons #
###########

#Packages
install.packages(c("ssym", "interval", "survival", "zoo"))
source("https://bioconductor.org/biocLite.R")
biocLite("Icens")
library(survival) ; library(MASS) ; library(zoo) ; library(ssym) ; library(GIGrvg) ; library(numDeriv) 
library(normalp) ; library(Formula) ; library(perm) ; library(Icens) ; library(MLEcens) ; library(interval)
library(icenReg) ;   library(Rcpp) ; library(coda) 

#Load Baboons data
data(Baboons) 
#view names of variables in Baboons
names(Baboons)
#view data
Baboons
#summary of data
summary(Baboons)
#view specific rows of data
Baboons[c(1,39,71,101, 150),] 
#number of censored and uncensored rows of data
table(Baboons$cs)
#add variables to Baboons
Baboons <- within(Baboons, 
                  { delta <- rep(0, length(cs))
                  delta[cs == 0] <- 1 
                  tt.L <- t 
                  tt.R <- t
                  tt.L[cs == 1] <- 0.1 })
#add event variable, where 1 == end of event, 3 == interval (see Surv documentation in the survival package)
Baboons$event<- ifelse(Baboons$delta==1,1,3)
Surv(time=Baboons$tt.L,time2= Baboons$tt.R,event=Baboons$event,type="interval")
#summary of data
summary(Baboons)
#view specific rows of data
Baboons[c(1,39,71,101, 150),]

####predicted non-parametric, semi-parametric and parametric survival functions####
#Non-parametric model Kaplan-Meier
Baboons_np<-Baboons[,4:3]
baboon_fit_np<-ic_np(formula=NULL,data=Baboons_np)

# Semi-parametric model
baboon_Surv<-Surv(time=Baboons$tt.L, time2=Baboons$tt.R, type="interval2")
baboon_fit_sp<-ic_sp(baboon_Surv~1,Baboons,model="ph")
plot(baboon_fit_sp)
ls(baboon_fit_sp)
summary(baboon_fit_sp)

#parametric models
Baboons_par <- Baboons_np
Baboons_par_Weibull_ph <-       ic_par(formula=Surv(Baboons_par$tt.L,Baboons_par$tt.R, 
                                                    type="interval2")~1, data=Baboons_par, model = "ph", dist = "weibull")
Baboons_par_Weibull_aft <-      ic_par(formula=Surv(Baboons_par$tt.L,Baboons_par$tt.R, 
                                                    type="interval2")~1, data=Baboons_par, model = "aft", dist = "weibull")
Baboons_par_AFT_loglogistic <-  ic_par(formula=Surv(Baboons_par$tt.L,Baboons_par$tt.R, 
                                                    type="interval2")~1, data=Baboons_par, model = "aft", dist = "loglogistic")
Baboons_par_Exponential_ph <-   ic_par(formula=Surv(Baboons_par$tt.L,Baboons_par$tt.R, 
                                                    type="interval2")~1, data=Baboons_par, model = "ph", dist = "exponential")
Baboons_par_Exponential_AFT <-  ic_par(formula=Surv(Baboons_par$tt.L,Baboons_par$tt.R, 
                                                    type="interval2")~1, data=Baboons_par, model = "aft", dist = "exponential")

plot(baboon_fit_np,ylab="Predicted survival functions",xlab="Time (hours)")
lines(baboon_fit_sp,lty=2,lwd=2)
abline(h = 0.5, col = "gray")
Baboons_par_Weibull_ph_surv <- survCIs(Baboons_par_Weibull_ph, newdata = NULL, p = NULL, q = NULL, ci_level = 0,MC_samps = 4000)
lines(Baboons_par_Weibull_ph_surv,col="green",lty=3)
Baboons_par_AFT_loglogistic_surv <- survCIs(Baboons_par_AFT_loglogistic, newdata = NULL, p = NULL, q = NULL, ci_level = 0,MC_samps = 4000)
lines(Baboons_par_AFT_loglogistic_surv,lty=4,col="purple")
Baboons_par_Exponential_ph_surv <- survCIs(Baboons_par_Exponential_ph, newdata = NULL, p = NULL, q = NULL, ci_level = 0,MC_samps = 4000)
lines(Baboons_par_Exponential_ph_surv,lty=5,col="red")
legend(9,1,c("Kaplan-Meier","Cox","Weibull","Log-logistic","Exponential"), col=c("black","black","green","purple","red"),lty=c(2,1,1,1,1))
#Diagnostic plots
diag_baseline(baboon_fit_sp, cols = NULL,lgdLocation = "topright")
#when there are covarites, you can test the proportional hazard assumption with diag_covar

#non-parametric estimation of survival function with Icens (Alternative)
library(Icens) ; library(interval)
result.icfit <- icfit(Surv(time=tt.L, time2=tt.R, type="interval2") ~ 1, conf.int=T, data=Baboons) 
#plot estimated survival curve and 95% confidence intervals 
plot(result.icfit, xlab="Time (hours)", ylab="Predicted Kaplan-Meier survival probability", estpar=list(col="blue", lwd=2), cipar=list(col="blue", lty=2),xlim=c(6.7,10.5)) 

#Life expectancy####
#remove the last object from each of the vectors or from the data.frame
Baboons_survfit<-survfit(Surv(time=tt.L, time2=tt.R, type="interval2") ~1,data=Baboons)$surv[1:62] #survival function
length(Baboons_survfit)
Baboons_survfit_predicted_time<-survfit(Surv(time=tt.L, time2=tt.R, type="interval2") ~1,data=Baboons)$time #time point for each observation
length(Baboons_survfit_predicted_time)
# (from second object in the vector to the last object in the vector) - (from the first object in the vector to the before last)
Baboons_survfit_predicted_time
Baboons_survfit_predicted_time_vector<-Baboons_survfit_predicted_time[2:length(Baboons_survfit_predicted_time)] - Baboons_survfit_predicted_time[1:(length(Baboons_survfit_predicted_time)-1)]
length(Baboons_survfit_predicted_time_vector)
#Life expectancy at start of study
Lifeexpectancy_baboons   <- Baboons_survfit_predicted_time[1]+sum(Baboons_survfit*
                                                                    Baboons_survfit_predicted_time_vector)

#Predicted cumulative hazard function 
Baboons_survfit  <-    survfit(Surv(time=tt.L, time2=tt.R, type="interval2") ~1,data=Baboons)
plot(Baboons_survfit,fun="cumhaz", ylab="Predicted cumulative hazard function",xlab="Time (hours)")

Baboons_survfit_predicted_surv<-survfit(Surv(time=tt.L, time2=tt.R, type="interval2") ~1,data=Baboons)$surv
Cum_hazard_Baboons<- -log(Baboons_survfit_predicted_surv)
Cum_hazard_Baboons_upper<- -log(Baboons_survfit$upper)
Cum_hazard_Baboons_lower<- -log(Baboons_survfit$lower)
Baboons_survfit_predicted_time<-survfit(Surv(time=tt.L, time2=tt.R, type="interval2") ~1,data=Baboons)$time
plot(x=Baboons_survfit_predicted_time,y=Cum_hazard_Baboons,type="l",ylab="Predicted cumulative hazard function",xlab="Time (hours)",xlim=c(7,11))
lines(x=Baboons_survfit_predicted_time,y=Cum_hazard_Baboons_upper,lty=2)
lines(x=Baboons_survfit_predicted_time,y=Cum_hazard_Baboons_lower,lty=2)

# Predicted hazard function
#smooth.spline with 3 to 6 degrees of freedom
Baboons_Timesteps <- Baboons_survfit_predicted_time[1:61]
Cum_hazard_Baboons <- Cum_hazard_Baboons[1:61]
Cumulative_hazard_baboons<-data.frame(Baboons_Timesteps, Cum_hazard_Baboons)
ssfit_Baboons_3df<-smooth.spline(x=Baboons_Timesteps,y=Cum_hazard_Baboons,df=3)
ssfit_Baboons_3df$fit

ssfit_Baboons_4df<-smooth.spline(x=Baboons_Timesteps,y=Cum_hazard_Baboons,df=4)
ssfit_Baboons_4df$fit

ssfit_Baboons_5df<-smooth.spline(x=Baboons_Timesteps,y=Cum_hazard_Baboons,df=5)
ssfit_Baboons_5df$fit

ssfit_Baboons_6df<-smooth.spline(x=Baboons_Timesteps,y=Cum_hazard_Baboons,df=6)
ssfit_Baboons_6df$fit

require(graphics)
#without colour
plot(Cum_hazard_Baboons ~ Baboons_Timesteps, data = Cumulative_hazard_baboons, 
     xlim=c(6.7,11),xlab="Time (hours)",ylab="Predicted cumulative hazard")
Baboons.spl <- with(Cumulative_hazard_baboons, smooth.spline(Baboons_Timesteps, Cum_hazard_Baboons))
Baboons.spl
lines(Baboons.spl, lty=1)
ss3_Baboons <- smooth.spline(Cumulative_hazard_baboons[,"Baboons_Timesteps"], 
                             Cumulative_hazard_baboons[,"Cum_hazard_Baboons"], df = 3)
lines(ss3_Baboons, lty = 2)
ss4_Baboons <- smooth.spline(Cumulative_hazard_baboons[,"Baboons_Timesteps"], 
                             Cumulative_hazard_baboons[,"Cum_hazard_Baboons"], df = 4)
lines(ss4_Baboons, lty = 3)
ss5_Baboons <- smooth.spline(Cumulative_hazard_baboons[,"Baboons_Timesteps"], 
                             Cumulative_hazard_baboons[,"Cum_hazard_Baboons"], df = 5)
lines(ss5_Baboons, lty = 4)
ss6_Baboons <- smooth.spline(Cumulative_hazard_baboons[,"Baboons_Timesteps"], 
                             Cumulative_hazard_baboons[,"Cum_hazard_Baboons"], df = 6)
lines(ss6_Baboons, lty = 4)
legend(7,4.9,c(paste("default [C.V.] => df =",round(Baboons.spl$df,1)), "s( * , df = 3)","s( * , df = 4)","s( * , df = 5)","s( * , df = 6)"), lty = c(1,2,3,4,5))

#with colour
plot(Cum_hazard_Baboons ~ Baboons_Timesteps, data = Cumulative_hazard_baboons,
     main = "Cumhaz_baboons  &  smoothing splines",xlim=c(6.7,11),xlab="Time (hours)",ylab="Predicted cumulative hazard")
Baboons.spl <- with(Cumulative_hazard_baboons, smooth.spline(Baboons_Timesteps, Cum_hazard_Baboons))
Baboons.spl
lines(Baboons.spl, col = "blue")
ss3_Baboons <- smooth.spline(Cumulative_hazard_baboons[,"Baboons_Timesteps"], 
                             Cumulative_hazard_baboons[,"Cum_hazard_Baboons"], df = 3)
lines(ss3_Baboons, lty = 2, col = "orange")
ss4_Baboons <- smooth.spline(Cumulative_hazard_baboons[,"Baboons_Timesteps"], 
                             Cumulative_hazard_baboons[,"Cum_hazard_Baboons"], df = 4)
lines(ss4_Baboons, lty = 2, col = "green")
ss5_Baboons <- smooth.spline(Cumulative_hazard_baboons[,"Baboons_Timesteps"], 
                             Cumulative_hazard_baboons[,"Cum_hazard_Baboons"], df = 5)
lines(ss5_Baboons, lty = 2, col = "purple")
ss6_Baboons <- smooth.spline(Cumulative_hazard_baboons[,"Baboons_Timesteps"], 
                             Cumulative_hazard_baboons[,"Cum_hazard_Baboons"], df = 6)
lines(ss6_Baboons, lty = 2, col = "red")
legend(7,4.9,c(paste("default [C.V.] => df =",round(Baboons.spl$df,1)), "s( * , df = 3)","s( * , df = 4)","s( * , df = 5)","s( * , df = 6)"), col = c("blue","orange","green","purple","red"), lty = c(1,2,2,2,2))

## Residual (Tukey Anscombe) plot:
plot(residuals(Baboons.spl) ~ fitted(Baboons.spl))
abline(h = 0, col = "gray")
plot(residuals(ss3_Baboons) ~ fitted(ss3_Baboons))
abline(h = 0, col = "gray")
plot(residuals(ss4_Baboons) ~ fitted(ss4_Baboons))
abline(h = 0, col = "gray")
plot(residuals(ss5_Baboons) ~ fitted(ss5_Baboons))
abline(h = 0, col = "gray")
plot(residuals(ss6_Baboons) ~ fitted(ss6_Baboons))
abline(h = 0, col = "gray")

## consistency check:
stopifnot(all.equal(Cumulative_hazard_baboons$Cum_hazard_Baboons, fitted(Baboons.spl)+residuals(Baboons.spl)))

#plot hazard function
hazard_Baboons<-predict(ss6_Baboons, deriv=1) 
plot(hazard_Baboons$x,hazard_Baboons$y,ylab="Predicted hazard function",xlab="Time (hours)")
lines(hazard_Baboons, lty = 3)
max(hazard_Baboons$y)#max reached at 9.89 hours



##############################################################################

##############
# Lemurs - 1 #
##############

# Opening the file
DataML<-read.table("xxx/Mortality_individual_information_data_Microcebus_murinus.txt", header=T,sep="\t",na.strings="NA")

# Useful packages
library(survival)

#################
# Survival object
# Survival data formatted as: (age beginning period, age end period], with '+' indicating censoring
# Right censored and left truncated data : Surv(time1: truncation time i.e. when entering the period,
# time2: death or censoring time i.e. end of period, event: 0 if censored 1 if dead)
surv <- Surv(DataML$Age_Season_Start,DataML$Age_Season_End,DataML$Event)

#################
# Kaplan-Meier estimation of the survival function - Example by sex
KM <- survfit(surv~DataML$Sex,type="kaplan-meier")
plot(KM,xlab="Age (days)",ylab="Kaplan-Meier estimation of survival",
     lty=c(2,1),lwd=2,xlim=c(min(KM$time),max(KM$time)+10))
legend(2000,1,legend=c('Males','Females'),lty=c(1,2),lwd=2)

#################
# Semi-parametric Cox models
# Effect of seasonal rhythm (Mean.SR), season (Current_Season) and identity (Individual, frailty random effect) on mortality
# No constraints on the baseline distribution
# We chose a gamma distribution for the frailty distribution because it is the most used in litterature
cox <- coxph(surv ~ DataML$Sex + DataML$Current_Season + frailty(DataML$Individual, 
                                                                 distribution="gamma"))
summary(cox)
# Call:
#  coxph(formula = surv ~ DataML$Sex + DataML$Current_Season + frailty(DataML$Individual, distribution = "gamma"))
# 
# n= 3696, number of events= 258 
# 
#                                                       coef   se(coef)      se2     Chisq    DF           p      
# DataML$SexM                        -0.1569    0.1254   0.1253     1.57  1.00  2.1e-01
# DataML$Current_SeasonSD   -0.7317    0.1866   0.1866    15.38 1.00  8.8e-05
# frailty(DataML$Individual                                                     0.01  0.01  6.9e-01
#         
#                                              exp(coef) exp(-coef)  lower .95   upper .95
# DataML$SexM                         0.8548       1.170       0.6686       1.0929
# DataML$Current_SeasonSD    0.4811       2.079       0.3337       0.6935
#         
# Iterations: 5 outer, 31 Newton-Raphson
# Variance of random effect= 5e-05   I-likelihood = -1384.4 
# Degrees of freedom for terms= 1 1 0 
# Concordance= 0.72  (se = 0.021 )
# Likelihood ratio test= 17.25  on 2.01 df,   p=0.000182

# Test if the model respect the proportional hazard hypothesis
# A p-value lower than 0.05 indicates a violation of the proportionality assumption
cox.zph(cox)

# Fit of the model: Martingale residuals
res.mat <- residuals(cox, type="martingale", data=DataML)
plot(DataML$Age_Season_End[DataML$Event==1], res.mat[DataML$Event==1], xlim=c(400,3010), 
     ylim=c(-0.9,1.05),ylab="Martingale residuals", xlab="Time", pch=16)
lines(DataML$Age_Season_End[DataML$Event==0], res.mat[DataML$Event==0], type="p")
lines(loess.smooth(DataML$Age_Season_End[DataML$Event==1], res.mat[DataML$Event==1]), 
      lwd=2, col="red")
lines(loess.smooth(DataML$Age_Season_End[DataML$Event==0], res.mat[DataML$Event==0]), 
      lwd=2, lty=3, col="red")

# Useful metrics
# The equivalent degrees of freedom and the generalized Akaike Information Criterion
extractAIC(cox)

###
# Cox model without frailty
cox.nf <- coxph(surv ~ Sex + Current_Season, data=DataML)
summary(cox.nf)
# Call:
#   coxph(formula = surv ~ Sex + Current_Season, data = DataML)
# 
# n= 3696, number of events= 258 
# 
#                                     coef exp(coef) se(coef)         z Pr(>|z|)    
# SexM                      -0.1569    0.8548   0.1253 -1.252    0.211    
# Current_SeasonSD -0.7317    0.4811   0.1866 -3.921 8.81e-05 ***
#   ---
#   Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
# 
#                  exp(coef) exp(-coef) lower .95 upper .95
# SexM                0.8548      1.170    0.6686    1.0929
# Current_SeasonSD    0.4811      2.079    0.3337    0.6935
# 
# Concordance= 0.539  (se = 0.019 )
# Rsquare= 0.005   (max possible= 0.529 )
# Likelihood ratio test= 17.23  on 2 df,   p=0.0001817
# Wald test            = 17  on 2 df,   p=0.0002039
# Score (logrank) test = 16.99  on 2 df,   p=0.0002042

# Test if the model respect the proportional hazard hypothesis
# A p-value lower than 0.05 indicates a violation of the proportionality assumption
cox.zph(cox.nf)

# Fit of the model: Martingale residuals
res.mat <- residuals(cox.nf, type="martingale", data=DataML)
plot(DataML$Age_Season_End[DataML$Event==1], res.mat[DataML$Event==1], xlim=c(400,3010), 
     ylim=c(-0.9,1.05),ylab="Martingale residuals", xlab="Time", pch=16)
lines(DataML$Age_Season_End[DataML$Event==0], res.mat[DataML$Event==0], type="p")
lines(loess.smooth(DataML$Age_Season_End[DataML$Event==1], res.mat[DataML$Event==1]), 
      lwd=2, col="red")
lines(loess.smooth(DataML$Age_Season_End[DataML$Event==0], res.mat[DataML$Event==0]),     
      lwd=2, lty=3, col="red")

# Useful metrics
# The equivalent degrees of freedom and the generalized Akaike Information Criterion
extractAIC(cox.nf)

#Predicted survival function based on cox.nf model
Surv_function<-survfit(cox.nf,newdata=data.frame(Sex=c("M","F"),Current_Season=c("LD","LD")))$surv #survival function

plot(survfit(cox.nf,newdata=data.frame(Sex=c("M","F"),Current_Season=c("LD","LD"))), 
     xlim=c(300,3000), lty =c(1,2), lwd=2, xlab = "Time (days)", ylab="Predicted survival function")
legend(2200,1, c("Males","Females"), lty =c(1,2), lwd=2)
survfit(cox.nf,newdata=data.frame(Sex=c("M","F"),Current_Season=c("LD","LD")))$surv #survival function
survfit(cox.nf,newdata=data.frame(Sex=c("M","F"),Current_Season=c("LD","LD")))$time 
#time point for each observation

#Life expectancy
#remove the last object from each of the vectors or from the data.frame
Surv1<-survfit(cox.nf,newdata=data.frame(Sex=c("M","F"),
                                         Current_Season=c("LD","LD")))$surv[1:(dim(Surv_function)[1]-1),1] #survival function
Surv2<-survfit(cox.nf,newdata=data.frame(Sex=c("M","F"),
                                         Current_Season=c("LD","LD")))$surv[1:(dim(Surv_function)[1]-1),2] #survival function
length(Surv1)

Timesteps<-survfit(cox.nf,newdata=data.frame(Sex=c("M","F"),Current_Season=c("LD","LD")))$time
#time point for each observation
# (from second object in the vector to the last object in the vector) - (from the first object in the vector to the before last)
Timesteps
Timesteps_vector<-Timesteps[2:length(Timesteps)] - Timesteps[1:(length(Timesteps)-1)]

#Life expectancy at start of study
Lifeexpectancy_1   <- sum(Surv1*Timesteps_vector)
Lifeexpectancy_2   <- sum(Surv2*Timesteps_vector)

#Predicted cumulative hazard function 
Surv_1<-survfit(cox.nf,newdata=data.frame(Sex=c("M","F"),Current_Season=c("LD","LD")))$surv[,1]
#survival function for males
Surv_2<-survfit(cox.nf,newdata= data.frame(Sex=c("M","F"),Current_Season=c("LD","LD")))$surv[,2]
#survival function for females
Timesteps<-survfit(cox.nf,newdata= data.frame(Sex=c("M","F"),Current_Season=c("LD","LD")))$time
#time point for each observation
Cum_hazard_1<- -log(Surv_1)
Cum_hazard_2<- -log(Surv_2)
length(Timesteps)
length(Cum_hazard_1)

plot(x=Timesteps,y=-log(Surv_1),type="l", lty =1, lwd=2, ylim=c(0,5.2),
     ylab="Predicted cumulative hazard function",xlab="Time (days)")
lines(x=Timesteps,y=-log(Surv_2), lty =2, lwd=2)
legend(370,5.2, c("Males","Females"), lty =c(1,2), lwd=2)

# Predicted hazard function
#smooth.spline with 3 to 6 degrees of freedom
Cumulative_hazard<-data.frame(Timesteps, Cum_hazard_1, Cum_hazard_2)

ssfit_1_3df<-smooth.spline(x=Timesteps,y=Cum_hazard_1,df=3)
ssfit_1_3df$fit
ssfit_1_4df<-smooth.spline(x=Timesteps,y=Cum_hazard_1,df=4)
ssfit_1_4df$fit
ssfit_1_5df<-smooth.spline(x=Timesteps,y=Cum_hazard_1,df=5)
ssfit_1_5df$fit
ssfit_1_6df<-smooth.spline(x=Timesteps,y=Cum_hazard_1,df=6)
ssfit_1_6df$fit

ssfit_2_3df<-smooth.spline(x=Timesteps,y=Cum_hazard_2,df=3)
ssfit_2_3df$fit
ssfit_2_4df<-smooth.spline(x=Timesteps,y=Cum_hazard_2,df=4)
ssfit_2_4df$fit
ssfit_2_5df<-smooth.spline(x=Timesteps,y=Cum_hazard_2,df=5)
ssfit_2_5df$fit
ssfit_2_6df<-smooth.spline(x=Timesteps,y=Cum_hazard_2,df=6)
ssfit_2_6df$fit

require(graphics)
plot(Cum_hazard_1 ~ Timesteps, data = Cumulative_hazard, type="l", ylim=c(-0.2,5.2),
     ylab="Predicted cumulative hazard function",xlab="Time (days)")
h1.spl <- with(Cumulative_hazard, smooth.spline(Timesteps, Cum_hazard_1))
h1.spl
lines(h1.spl, lty=1)
ss3_1 <- smooth.spline(Cumulative_hazard[,"Timesteps"], Cumulative_hazard[,"Cum_hazard_1"], df = 3)
lines(ss3_1, lty = 2)
ss4_1 <- smooth.spline(Cumulative_hazard[,"Timesteps"], Cumulative_hazard[,"Cum_hazard_1"], df = 4)
lines(ss4_1, lty=3)
ss5_1 <- smooth.spline(Cumulative_hazard[,"Timesteps"], Cumulative_hazard[,"Cum_hazard_1"], df = 5)
lines(ss5_1, lty=4)
ss6_1 <- smooth.spline(Cumulative_hazard[,"Timesteps"], Cumulative_hazard[,"Cum_hazard_1"], df = 6)
lines(ss6_1, lty=5)
legend(370,5.2,c(paste("default [C.V.] => df =",round(h1.spl$df,1)),
                 "s( * , df = 3)","s( * , df = 4)","s( * , df = 5)","s( * , df = 6)"), lty = c(1,2,3,4,5))

require(graphics)
plot(Cum_hazard_2 ~ Timesteps, data = Cumulative_hazard, type="l",ylim=c(-0.2,5.2),
     ylab="Predicted cumulative hazard function",xlab="Time (days)")
h2.spl <- with(Cumulative_hazard, smooth.spline(Timesteps, Cum_hazard_2))
h2.spl
lines(h2.spl, lty=1)
ss3_2 <- smooth.spline(Cumulative_hazard[,"Timesteps"], Cumulative_hazard[,"Cum_hazard_2"], df = 3)
lines(ss3_2, lty = 2)
ss4_2 <- smooth.spline(Cumulative_hazard[,"Timesteps"], Cumulative_hazard[,"Cum_hazard_2"], df = 4)
lines(ss4_2, lty = 3)
ss5_2 <- smooth.spline(Cumulative_hazard[,"Timesteps"], Cumulative_hazard[,"Cum_hazard_2"], df = 5)
lines(ss5_2, lty = 4)
ss6_2 <- smooth.spline(Cumulative_hazard[,"Timesteps"], Cumulative_hazard[,"Cum_hazard_2"], df = 6)
lines(ss6_2, lty = 5)
legend(370,5.2,c(paste("default [C.V.] => df =",round(h2.spl$df,1)),
                 "s( * , df = 3)","s( * , df = 4)","s( * , df = 5)","s( * , df = 6)"), lty = c(1,2,3,4,5))

## Residual (Tukey Anscombe) plot:
plot(residuals(h1.spl) ~ fitted(h1.spl))
abline(h = 0, col = "gray")
plot(residuals(ss3_1) ~ fitted(ss3_1))
abline(h = 0, col = "gray")
plot(residuals(ss4_1) ~ fitted(ss4_1))
abline(h = 0, col = "gray")
plot(residuals(ss5_1) ~ fitted(ss5_1))
abline(h = 0, col = "gray")
plot(residuals(ss6_1) ~ fitted(ss6_1))
abline(h = 0, col = "gray")

plot(residuals(h2.spl) ~ fitted(h2.spl))
abline(h = 0, col = "gray")
plot(residuals(ss3_2) ~ fitted(ss3_2))
abline(h = 0, col = "gray")
plot(residuals(ss4_2) ~ fitted(ss4_2))
abline(h = 0, col = "gray")
plot(residuals(ss5_2) ~ fitted(ss5_2))
abline(h = 0, col = "gray")
plot(residuals(ss6_2) ~ fitted(ss6_2))
abline(h = 0, col = "gray")

## consistency check:
stopifnot(all.equal(Cumulative_hazard$Cum_hazard_1,fitted(h1.spl) + residuals(h1.spl)))
stopifnot(all.equal(Cumulative_hazard$Cum_hazard_2,fitted(h2.spl) + residuals(h2.spl)))

#plot hazard function
hazard_1<-predict(ss6_1, deriv=1) 
hazard_2<-predict(ss6_2, deriv=1) 
plot(hazard_1$x,hazard_1$y,type="l",lty=1, lwd=2, ylim=c(0,0.0037),
     ylab="Predicted hazard     function",xlab="Time (days)")
lines(hazard_2, lty=2, lwd=2)
legend(370,0.0035, c("Males","Females"), lty =c(1,2), lwd=2)


#######
#GLMM

library(lme4)
names <- unique(DataML$Individual[which(DataML$Censoring==0)])
sub2 <- DataML[is.element(DataML$Individual,names)==TRUE,]
sub2$alive[which(sub2$Event==1)] <- 0
sub2$alive[which(sub2$Event==0)] <- 1
glmer1 <- glmer(alive ~ Sex + Current_Season + (1|Individual), data=sub2, family="binomial")
summary(glmer1)





##############################################################################

##############
# Lemurs - 2 #
##############

# Opening the file
DataML<-read.table("xxx/Mortality_individual_information_data_Microcebus_murinus.txt", header=T,sep="\t",na.strings="NA")

# Useful packages
library(survival)
library(eha)

#################
# Survival object
# Survival data formatted as: (age beginning period, age end period], with '+' indicating censoring
# Right censored and left truncated data : Surv(time1: truncation time i.e. when entering the period,
# time2: death or censoring time i.e. end of period, event: 0 if censored 1 if dead)
surv <- Surv(DataML$Age_Season_Start,DataML$Age_Season_End,DataML$Event)

#################
# Kaplan-Meier estimation of the survival function - Example by sex
KM <- survfit(surv~DataML$Sex,type="kaplan-meier")
plot(KM,col=c('red','blue'),xlab="Age (days)",ylab="Survival",lty=1,lwd=2,xlim=c(min(KM$time),max(KM$time)+10))
legend(1700,1,legend=c('Males','Females'),col=c('blue','red'),lty=1,lwd=2)


#################
# Semi-parametric Cox models
# Effect of seasonal rhythm (Mean.SR), season (Current_Season) and identity (Individual, frailty random effect) on mortality
# No constraints on the baseline distribution
# We chose a gamma distribution for the frailty distribution because it is the most used in litterature
cox <- coxph(surv ~  DataML$Current_Season + DataML$Sex+DataML$Cluster_YearObs+frailty.gamma(DataML$Individual))
summary(cox)

# Test if the model respect the proportional hazard hypothesis
# A p-value lower than 0.05 indicates a violation of the proportionality assumption
cox.zph(cox)

# Fit of the model: Martingale residuals
res.mat <- residuals(cox, type="martingale", data=DataML)
plot(DataML$Age_Season_End[DataML$Event==1], res.mat[DataML$Event==1], xlim=c(400,3010), ylim=c(-0.9,1.05),ylab="Martingale residuals", xlab="Time", pch=16)
lines(DataML$Age_Season_End[DataML$Event==0], res.mat[DataML$Event==0], type="p")
lines(loess.smooth(DataML$Age_Season_End[DataML$Event==1], res.mat[DataML$Event==1]), lwd=2, col="red")
lines(loess.smooth(DataML$Age_Season_End[DataML$Event==0], res.mat[DataML$Event==0]), lwd=2, lty=3, col="red")

# Useful metrics
# The equivalent degrees of freedom and the generalized Akaike Information Criterion
extractAIC(cox)

###################
# Parametric models

ph <- phreg(surv ~ DataML$Mean.SR + DataML$Current_Season + DataML$Sex + frailty.gamma(DataML$Individual), dist="gompertz")
summary(ph)
extractAIC(ph)
plot(ph)

aft <- aftreg(surv ~ DataML$Mean.SR + DataML$Current_Season + DataML$Sex + frailty.gamma(DataML$Individual), dist="gompertz")
summary(aft)
extractAIC(aft)
plot(aft)