# This code was written by Daniel Ibarra for the publication Ibarra and Chamberlain (2015, American Journal of Science). 
# This example uses the dataset from Lake Miragoane, Haiti (Figure 3 of Ibarra and Chamberlain, 2015)
# The code is provided below "as is", please see the instructions file for more details ("HyBIM_Instructions.txt")

# Begin HyBIM code #
# Setup and input raw data ####
# set working directory and load "np" package for kernel smoothing
setwd("~/Desktop/Haiti_example") # Needs to be changed by user
library(np) # if np is not installed type: "install.packages("np")"
library(MASS) # if MASS is not installed type: "install.packages("MASS")"

# Import the CSV with input dataset
data = read.csv("CurtisHodell1993.csv",header=T)

# Pull Age, MgCa, SrCa and d18O out as individual vectors for kernel smoothing
age = data$Age; MgCa = data$Mg.Ca; SrCa = data$Sr.Ca; d18O = data$d18O

# Kernel smoothing and interpolation to even time series ####
# Determine cross validated bandwidth (to avoid over or undersmoothing datasets) for each time series
# See Table 2
# Other kernels can be substituted here for epanechnikov if desired by the user, see np documentation
MgCa_bw <- npregbw(age, MgCa,ckertype = "epanechnikov")$bw
SrCa_bw <- npregbw(age, SrCa,ckertype = "epanechnikov")$bw
d18O_bw <- npregbw(age, d18O,ckertype = "epanechnikov")$bw

# Kernel smooth raw datasets
MgCa_kernel = npreg(MgCa~age, ckertype = "epanechnikov",bws=MgCa_bw,residuals=TRUE)
SrCa_kernel = npreg(SrCa~age, ckertype = "epanechnikov",bws=SrCa_bw,residuals=TRUE)
d18O_kernel = npreg(d18O~age, ckertype = "epanechnikov",bws=d18O_bw,residuals=TRUE)

# Pull out residuals as individual vectors for use in Monte Carlo
MgCa_resid = MgCa_kernel$resid; SrCa_resid = SrCa_kernel$resid; d18O_resid = d18O_kernel$resid

#Bind results of kernel smoothing into matrix
names = c("age","mean","merr","number","stdev")
MgCa_data = cbind(MgCa_kernel$eval,MgCa_kernel$mean,MgCa_kernel$merr)
SrCa_data = cbind(SrCa_kernel$eval,SrCa_kernel$mean,SrCa_kernel$merr)
d18O_data = cbind(d18O_kernel$eval,d18O_kernel$mean,d18O_kernel$merr)
# set column names
colnames(MgCa_data) <- c("age","mean","merr"); colnames(SrCa_data) <- c("age","mean","merr"); colnames(d18O_data) <- c("age","mean","merr")

# Calculate sample size for each bin
count_MgCa = array(NA,dim=c(1,length(MgCa_data$age))); count_MgCa = as.vector(count_MgCa)
count_SrCa = array(NA,dim=c(1,length(SrCa_data$age))); count_SrCa = as.vector(count_SrCa)
count_d18O = array(NA,dim=c(1,length(d18O_data$age))); count_d18O = as.vector(count_d18O)

# Count how many samples fall within each bandwidth
for (i in 1:length(MgCa_data$age)){
  track = which(MgCa_data$age < MgCa_data$age[i] + (MgCa_bw/2) & MgCa_data$age > MgCa_data$age[i] - (MgCa_bw/2) & MgCa_data$mean > 0)
  count_MgCa[i] = length(track)
}
for (i in 1:length(SrCa_data$age)){
  track = which(SrCa_data$age < SrCa_data$age[i] +(SrCa_bw/2) & SrCa_data$age > SrCa_data$age[i] - (SrCa_bw/2) & SrCa_data$mean > 0)
  count_SrCa[i] = length(track)
}
for (i in 1:length(d18O_data$age)){
  track = which(d18O_data$age < d18O_data$age[i] + (d18O_bw/2) & d18O_data$age > d18O_data$age[i] - (d18O_bw/2))
  count_d18O[i] = length(track)
}

# bind in counts to matrix
# npreg outputs standard error so we use the counts from the for loop above to calculate standard deviation
# 1sigma = s.e. * square root(number of samples within each kernel bin)
MgCa_data = cbind(MgCa_data,count_MgCa,sqrt(count_MgCa)*MgCa_data$merr)
SrCa_data = cbind(SrCa_data,count_SrCa,sqrt(count_SrCa)*SrCa_data$merr)
d18O_data = cbind(d18O_data,count_d18O,sqrt(count_d18O)*d18O_data$merr)
# set column names
colnames(MgCa_data) <- c("age","mean","merr","numb","stdev"); colnames(SrCa_data) <- c("age","mean","merr","numb","stdev"); colnames(d18O_data) <- c("age","mean","merr","numb","stdev")

# interpolate to even timeseries
# Specify here the time step of the model, for this record we use 150 years
age.out = rev(seq(145,10250,150))

# Use approx function to linearly interpolate to the "age.out" values for mean and 1 sigma range
MgCa_interpmean = approx(MgCa_data$age,MgCa_data$mean,method="linear",xout=age.out,ties=mean)$y
MgCa_stdevplus = approx(MgCa_data$age,MgCa_data$mean + MgCa_data$stdev,method="linear",xout=age.out,ties=mean)$y
MgCa_stdevminus = approx(MgCa_data$age,MgCa_data$mean - MgCa_data$stdev,method="linear",xout=age.out,ties=mean)$y

SrCa_interpmean = approx(SrCa_data$age,SrCa_data$mean,method="linear",xout=age.out,ties=mean)$y
SrCa_stdevplus = approx(SrCa_data$age,SrCa_data$mean + SrCa_data$stdev,method="linear",xout=age.out,ties=mean)$y
SrCa_stdevminus = approx(SrCa_data$age,SrCa_data$mean - SrCa_data$stdev,method="linear",xout=age.out,ties=mean)$y

d18O_interpmean = approx(d18O_data$age,d18O_data$mean,method="linear",xout=age.out,ties=mean)$y
d18O_stdevplus = approx(d18O_data$age,d18O_data$mean + d18O_data$stdev,method="linear",xout=age.out,ties=mean)$y
d18O_stdevminus = approx(d18O_data$age,d18O_data$mean - d18O_data$stdev,method="linear",xout=age.out,ties=mean)$y

# Bind inpterolated time series into matrix
MgCa_interp = as.data.frame(cbind(age.out,MgCa_interpmean,MgCa_stdevplus,MgCa_stdevminus,-1*(MgCa_interpmean-MgCa_stdevplus)))
SrCa_interp = as.data.frame(cbind(age.out,SrCa_interpmean,SrCa_stdevplus,SrCa_stdevminus,-1*(SrCa_interpmean-SrCa_stdevplus)))
d18O_interp = as.data.frame(cbind(age.out,d18O_interpmean,d18O_stdevplus,d18O_stdevminus,-1*(d18O_interpmean-d18O_stdevplus)))
# set column names
colnames(MgCa_interp) <- c("age","mean","sigp","sigm","stdev"); colnames(SrCa_interp) <- c("age","mean","sigp","sigm","stdev"); colnames(d18O_interp) <- c("age","mean","sigp","sigm","stdev")

# plots of raw data and evenly spaced kernel smoothing ####
par(mfrow=c(3,1),mar=c(0.5,5,0.5,0.25),oma=c(5,0.25,0.25,0.25))
# d18O Plot
plot(age,d18O,xlab="Age (Ma)",ylab="d18O",col="darkgrey",lwd=2,xlim=c(0,10750),las=1,xaxt="n",pch=16,cex=0.5,xaxs="i")
points(age.out,d18O_interp$sigp,type="l",lty=2,lwd=3); points(age.out,d18O_interp$sigm,type="l",lty=2,lwd=2.5)
points(age.out,d18O_interp$mean,type="l",lty=1,lwd=5)

# MgCa plot
plot(age,MgCa,xlab="Age (Ma)",ylab="MgCa",col="darkgrey",lwd=2,xlim=c(0,10750),las=1,xaxt="n",pch=16,cex=1.5,xaxs="i")
points(age.out,MgCa_interp$sigp,type="l",lty=2,lwd=3); points(age.out,MgCa_interp$sigm,type="l",lty=2,lwd=2.5)
points(age.out,MgCa_interp$mean,type="l",lty=1,lwd=5)

#SrCa plot
plot(age,SrCa,xlab="Age (Ma)",ylab="SrCa",col="darkgrey",lwd=2,xlim=c(0,10750),las=1,pch=16,cex=1.5,xaxs="i")
points(age.out,SrCa_interp$sigp,type="l",lty=2,lwd=3); points(age.out,SrCa_interp$sigm,type="l",lty=2,lwd=2.5)
points(age.out,SrCa_interp$mean,type="l",lty=1,lwd=5)

# Monte Carlo routine ####
# set up Monte Carlo
MC_it = 5000 # specify the number of iterations, use 5000 and adjust as necessary/desired. 
# Note: the Monte Carlo could take a few minutes to run depending on the length of the time series
num = length(age.out) # number of time steps

# create empty matrices for results
temp_matrix = matrix(NA,num,MC_it)
volume_matrix = matrix(NA,num,MC_it)
evapvector_matrix = matrix(NA,(num-1),MC_it)
volumevector_matrix = matrix(NA,(num-1),MC_it)  

# Input parameters
temp.range = c(5,35) # range in Celsius, minimum then maximum
RH.range = c(50,90) # range in percent, minimum then maximum
d18Ovital.range = c(2.5,0.2) # Mean and standard deviation of d18O vital effect in per mil (see Table 2 for details)
d18O_in = c(-8,2) # Mean and standard deviation of meteoric water d18O
Ca_Conc = c(0.866,0.1) # Mean and standard deviation of lake water Ca concentration [mmol]
# Note: for lakes with small catchments the meteoric water (stream + rainwater) the Sr and Mg concentrations
# should be weighted as the area of the catchment relative to the area of the lake
Sr_Conc = c(0.000553527,0.00005) # Mean and standard deviation of meteoric water Sr concentration [mmol]
Mg_Conc = c(0.264966056,0.05) # Mean and standard deviation of meteoric water Mg concentration [mmol]
# Note: These partition coeffients were re-regressed from the data in Engstrom and Nelson (1991) to calculate the errors. Only MgCa is temperature dependent
# These partition coefficients should be changed for the given biogenic carbonate that was measured
SrKD_range = c(0.406,0.054) #  Mean and standard deviation of Sr KD regresssion in E&N 1991
MgKD_coeff = c(9.68*10^-5,0.16*10^-5) #  Mean and standard deviation of Mg KD regresssion in E&N 1991

# Create empty temporary vectors to store time series of each Monte Carlo iteration
d18O_temp = rep(NA,length(d18O_kernel$mean)); MgCa_temp = rep(NA,length(MgCa_kernel$mean)); SrCa_temp = rep(NA,length(SrCa_kernel$mean))

for (j in 1:MC_it){
  # set input parameters for each Monte Carlo iteration
  Ca_Conc_ini = rnorm(1,Ca_Conc [1],Ca_Conc [2]) # Set lake water Ca concentration
  Sr_Conc_ini = rnorm(1,Sr_Conc [1],Sr_Conc [2]) # Set stream water Sr concentration
  Mg_Conc_ini = rnorm(1,Mg_Conc [1],Mg_Conc [2]) # Set stream water Mg concentration
  temp = runif(1,temp.range[1],temp.range[2]) # Set a temperature from the range specified above
  SrKD = rnorm(1,SrKD_range[1],SrKD_range[2]) # set KD for Sr
  MgKD =  MgKD_coeff * temp # Set KD for Mg which is temperature dependent (see E&N 1991)
  d18O_input = rnorm(1,d18O_in[1],d18O_in[2]) # set meteoric water d18O
  RH = runif(1,RH.range[1],RH.range[2]) # set humidity
  
  # Create input dataset for Monte Carlo iteration based on mean and residuals of the original kernel smoothing
  for (k in 1:length(d18O_temp)){
    d18O_temp[k] = d18O_kernel$mean[k] + sample(d18O_resid,1)
  }
  for (k in 1:length(MgCa_temp)){
    MgCa_temp[k] = MgCa_kernel$mean[k] + sample(MgCa_resid,1)
  }
  for (k in 1:length(SrCa_temp)){
    SrCa_temp[k] = SrCa_kernel$mean[k] + sample(SrCa_resid,1)
  }

  # Kernel smooth the dataset and linear interpolation to even time step (as above but specific to this Monte Carlo iteration)
  MgCa_it_kernel = npreg(MgCa_temp~MgCa_kernel$eval[,1], ckertype = "epanechnikov",bws=MgCa_bw)
  SrCa_it_kernel = npreg(SrCa_temp~SrCa_kernel$eval[,1], ckertype = "epanechnikov",bws=SrCa_bw)
  d18O_it_kernel = npreg(d18O_temp~d18O_kernel$eval[,1], ckertype = "epanechnikov",bws=d18O_bw)
  SrCa_it = approx(SrCa_it_kernel$eval[,1],SrCa_it_kernel$mean,method="linear",xout=age.out,ties=mean)$y
  MgCa_it = approx(MgCa_it_kernel$eval[,1],MgCa_it_kernel$mean,method="linear",xout=age.out,ties=mean)$y
  d18O_it = approx(d18O_it_kernel$eval[,1],d18O_it_kernel$mean,method="linear",xout=age.out,ties=mean)$y
  
  # place datasets in lake water composition
  temp = 273.15 + temp
  d18Ovital = rnorm(1,d18Ovital.range[1],d18Ovital.range[2])
  alpha_calcwater = exp(((18.03*(10^3/(temp)))-32.42)/1000) # kim and o'neill 1997
  d18O_series_SMOW = ((d18O_it - d18Ovital)*1.03092)+30.92 # vital effect + VPDB to SMOW
  d18O_water = ((1000+d18O_series_SMOW)/alpha_calcwater)-1000 # fractionated to lake water value
  
  Mgconc = (MgCa_it*(Ca_Conc_ini/1000)*MgKD) # Mg/Ca placed in lake water concentration of Mg [mmol]
  Srconc = (SrCa_it*(Ca_Conc_ini/1000)*SrKD) # Sr/Ca placed in lake water concentration of Sr [mmol]
  
  # Take first derivate of time series (the "b vector")
  diff_d18O = diff(d18O_water)
  diff_Mgconc = diff(Mgconc)
  diff_Srconc = diff(Srconc)
  
  # create series of b vectors for matrix multiplication
  all_data_bvect = rbind(diff_d18O,diff_Srconc,diff_Mgconc)
  
  # Interpolate age time series to mid-points between each 
  age.out.rev = (age.out)
  age_plot = rep(NA,length(age.out.rev)-1)
  for (i in 1:(length(age.out)-1)){
    age_plot[i]=(age.out.rev[i]+age.out.rev[i+1])/2
  }
  
  # Create initial set of partial derivates (will be replaced below) for equations 1 to 3
  # EQUATION 1 PARTIALS for the d18O TDE
  dO.dT = as.numeric(rnorm(1,0.259,0.019)) # d18O/dT, derived for this lake in Figure A1 of paper
  equ.epsilon = -7.685 + 6.7123*(10^3/temp)-1.6664*(10^6/temp^2)+0.35041*(10^9/temp^3) # Horita and Weslowski 1994 in per mil
  kin.epsilon = (14.2*(1-RH/100)) #gonfiantini 1986 in per mil
  alpha = 1/(1+((equ.epsilon+kin.epsilon)/1000)) # into alpha total
  dO.dE = as.numeric(-1*(alpha-1)*((mean(d18O_water[i]))+1000)*(0.9)^(alpha-2))  # d18O/dFevap, first derivative of rayleigh equation, equation 7 of paper
  dO.dV = as.numeric(((mean(d18O_water)-(d18O_input))/(1.1^2))) # d18O/dFinput, equation 8
  
  # EQUATION 2 PARTIALS for the [Sr] TDE
  dSr.dT = as.numeric(0.00) # dSr/dT is zero
  dSr.dE = as.numeric((mean(Srconc)/(0.9^2))) # dSr/dFevap, equation 6
  dSr.dV  = as.numeric(((mean(Srconc)-(Sr_Conc_ini))/(1.1^2))) # dSr/dFinput, equation 9

  # EQUATION 3 PARTIALS for the [Mg] TDE
  dMg.dT = as.numeric(0.00) # dMg/dT is zero
  dMg.dE = as.numeric((mean(Mgconc)/(0.9^2))) # dMg/dFevap, equation 6
  dMg.dV  = as.numeric(((mean(Mgconc)-(Mg_Conc_ini))/(1.1^2))) # dMg/dFinput, equation 9
  
  # Create matrix of partial derivates
  d18O_part = c(dO.dT,dO.dE,dO.dV); dSr_part = c(dSr.dT,dSr.dE,dSr.dV); dMg_part = c(dMg.dT,dMg.dE,dMg.dV)
  A = matrix(c(d18O_part,dSr_part,dMg_part),nrow=3,ncol=3,byrow=TRUE)
  
  # Define time series of partial derivates based on varying lake water Sr and Mg concentrations
  coeff.Mg = rep(NA,length(age_plot)); coeff.Sr = rep(NA,length(age_plot)) # Evap coefficients
  coeff.MgV = rep(NA,length(age_plot)); coeff.SrV = rep(NA,length(age_plot)) # volume coefficeints
  coeff.d18O.dE = rep(NA,length(age_plot)); coeff.d18O.dV = rep(NA,length(age.out)-1) # d18O coefficient
  for (i in 1:(length(age.out)-1)){    
    coeff.Sr[i] = as.numeric((Srconc[i])/(0.9^2)) # dSr/dFevap, equation 6
    coeff.Mg[i] = as.numeric((Mgconc[i])/(0.9^2)) # dMg/dFevap, equation 6
    coeff.MgV[i] = as.numeric((((Mgconc[i])-(Mg_Conc_ini)/(1.1^2)))) # dMg/dFinput, equation 9
    coeff.SrV[i] = as.numeric((((Srconc[i])-(Sr_Conc_ini)/(1.1^2)))) # dSr/dFinput, equation 9
    coeff.d18O.dE[i] = as.numeric(-1*(alpha-1)*((d18O_water[i])+1000)*(0.9)^(alpha-2)) # d18O/dFevap, equation 7     
    coeff.d18O.dV[i] = as.numeric(((d18O_input)-((d18O_water[i])))/(1.1^2)) # d18O/dFinput, equation 8
  }
  
  # Matric multiplication to solve "x vector" through the time series for each Monte Carlo iteration
  x_vect = matrix(NA,3,length(age.out)-1)
  for (i in 1:length(age_plot)) {
    # First must replace partial derives for that given time step 
    A[3,2] = coeff.Mg[i]; A[2,2] = coeff.Sr[i] 
    A[3,3] = coeff.MgV[i]; A[2,3] = coeff.SrV[i]
    A[1,2] = coeff.d18O.dE[i]; A[1,3] = coeff.d18O.dV[i]
    # matrix inversion using ginv to solve for x vector of a given time step
    x_vect[,i] = ginv(A)%*%all_data_bvect[,i]
  }

  # Compute temperature and volume vectors for Monte Carlo iteration  
  # Temperature
  temp = 0 # Initial temperature
  # Calculate change along the time series
  temp_vect = rep(NA,(length(age_plot)+1)); temp_vect[1]=temp
  for (i in 1:(length(age_plot))){ 
    temp_vect[i+1]=temp_vect[i]+x_vect[1,i]
  }

  # Calculated normalized volume change
  vol = rep(NA,length(age_plot)+1)
  vol[1] = 1 # starting volume
  # Calculate change along the time series
  for (i in 1:(length(age_plot))){
    vol[i+1]=vol[i]+(x_vect[3,i])-(x_vect[2,i])
  }

  # append to matrix that stores all results of each Monte Carlo iteration
  temp_matrix[,j] = t(temp_vect)[1,]
  volume_matrix[,j] = t(vol)[1,]
}

# Calculate statistics and plot results of Monte Carlo ####
# mean and median of Monte Carlo
mean_temp = rowMeans(temp_matrix,na.rm=TRUE); mean_volume = rowMeans(volume_matrix,na.rm=TRUE)
median_temp = apply(temp_matrix,1,median); median_volume = apply(volume_matrix,1,median)

# 95% range of Monte Carlo
sd_temp = apply(temp_matrix,1,sd); sd_temp_quant = apply(temp_matrix,1,quantile,probs=c(0.025,0.975))
sd_volume = apply(volume_matrix,1,sd); sd_volume_quant = apply(volume_matrix,1,quantile,probs=c(0.025,0.975))

# Plot results ####
par(mfrow=c(2,1),mar=c(0.5,5,0.5,0.25),oma=c(5,0.25,0.25,0.25))
# Temperature plot  (Median value and 95% range)
plot(age.out,median_temp,type="l",xlab="Age (Ma)",ylab="cap T (degC)",xlim=c(0,10300),ylim=c(-12,15),lwd=2,las=1,xaxs="i",xaxt="n")
for (i in 1:MC_it){
  points(age.out,temp_matrix[,i],type="l",lwd=0.5,las=1,col="lightgrey")
}
points(age.out,sd_temp_quant[1,],type="l",lty=3,col="blue",lwd=2); points(age.out,sd_temp_quant[2,],type="l",lty=3,col="blue",lwd=2)
points(age.out,median_temp,type="l",lwd=2)

# Calculate transfer function if d(d18O)/dT was interpreted directly as temperature only
transferd18O = rep(NA,length(age.out))
transferd18O[1] = 0
diff_d18O = diff(d18O_interp$mean)
for (i in 1:length(age_plot)){
  transferd18O[i+1]=transferd18O[i]+(diff_d18O[i]/0.259)
}
# plot d18O/T transfer function 
points(age.out,transferd18O,type="l",col="red",lwd=2)
abline(h=0,col="darkgreen",lwd=2,lty=1)

# Normalized Volume Change (Median value and 95% range)
plot(age.out,median_volume,type="l",xlab="Age (Ma)",ylab="Fraction of Lake Volume",xlim=c(0,10300),lwd=2,ylim=(c(0.75,1.4)),las=1,xaxs="i")
for (i in 1:MC_it){
  points(age.out,volume_matrix[,i],type="l",lwd=1,las=1,col="lightgrey")
}
points(age.out,sd_volume_quant[1,],type="l",lty=3,col="blue",lwd=2); points(age.out,sd_volume_quant[2,],type="l",lty=3,col="blue",lwd=2)
points(age.out,median_volume,type="l",lwd=2)
abline(h=1,col="darkgreen",lwd=2,lty=1)