#! /usr/local/bin/python

########################################
#See Readme file for description
########################################

#import science modules for calculations and statistics
import numpy
import scipy
from scipy import stats

# The mode variable determines whether the bootstrap calculations are performed,
# or whether the results are loaded from file (much faster). Once you have run Main_code
# once, set mode="n" to load results from file in future runs.

mode="y" # do long option of actually running bootstrap
#mode="n" # just load arrays from file to do forg calcuations

#define start and end dates for regression (in Ma). Use 0 and 4000 respectively for entire Earth history.
start=0 
finish=4000

#number of iterations for bootstrap GLS
iterations=2000

#import function that performs GLS algorithm
from GLS_shell import nGLS

# if mode="y" then do the GLS calculations
if mode=="y":
    
    ## Define arrays for storing regression results for different methods:
    
    # method 1: Bootstrap GLS
    pmeta=[] #will contain p values (for diagnostic purposes only)
    meanmeta=[] #will contain mean gradient value
    meta_e1=[] #will contain lower end of 95th percentile range of gradient
    meta_e2=[] #will contain upper end of 95th percentile range of gradient
    taus=[] #will contain Tau values
    
    # method 2: classical GLS
    cGLS_mean=[] #will contain mean gradient value
    cGLS_e1=[] #will contain lower end of 95th percentile range of gradient
    cGLS_e2=[] #will contain upper end of 95th percentile range of gradient
    c_pval=[] #will contain p values (for diagnostic purposes only)
    
    # method 3: simple OLS is a separate calculation (see below)
    
    # Define arrays for storing gradients and intercepts for each method:
    
    # Bootstrap GLS organics
    o_GLS_array=[] #gradients
    oi_GLS_array=[] #intercepts
    
    # Classical GLS organics
    o_cGLS_array=[] #gradients
    oi_cGLS_array=[] #intercepts
    
    # Bootstrap GLS carbonates
    c_GLS_array=[] #gradients
    ci_GLS_array=[] #intercepts
    
    # Classical GLS carbonates
    c_cGLS_array=[] #gradients
    ci_cGLS_array=[] #intercepts
    
    #loop over both organic ("o") and carbonate ("c") data
    for zz in range(0,2): 
        #choice determins whether organic or cabonate data is called in function below
        if zz==0:
            choice="o" # organics
        if zz==1:
            choice="c" #carbonates
            
        # Call function that takes organic/carbonate choice, start/end date, and number of iterations, and performs regression algorithm to give
        # grads: array of gradients from Bootstrap GLS
        # intercepts: array of intercepts from Bootstrap GLS
        # pval: array of pvalues from Bootstrap GLS
        # T: array of Tau values from GLS
        # classic_GLS: array of gradients from classic GLS
        # classicGLS_i: array of intercepts from classic GLS
        # cpval: array of pvalues from classic GLS
        (grads,intercepts,pval,T,classic_GLS,classGLS_i,cpval)=nGLS(choice,start,finish,iterations)
        
        if zz==0: #add organic values from regression to respective arrays
            o_GLS_array.append(grads)
            oi_GLS_array.append(intercepts)
            o_cGLS_array.append(classic_GLS)
            oi_cGLS_array.append(classGLS_i)
        
        if zz==1: #add carbonate values from regression to respective arrays
            c_GLS_array.append(grads)
            ci_GLS_array.append(intercepts)
            c_cGLS_array.append(classic_GLS)
            ci_cGLS_array.append(classGLS_i)
        
        # Record bootstrap GLS results (95% confidence and pvalues and mean value) in respective arrays:
        g2_5=scipy.stats.scoreatpercentile(grads,2.5, interpolation_method='fraction') #get 95th percentile range for gradient
        g97_5=scipy.stats.scoreatpercentile(grads,97.5, interpolation_method='fraction') #get 95th percentile range for gradient
        pmeta.append(pval) #p value (for diagnostic purposes only)
        meanmeta.append(numpy.mean(grads)) #mean gradient
        meta_e1.append(g2_5) #add 95th precentile values to array
        meta_e2.append(g97_5) #add 95th precentile values to array
        taus.append(T) #add Tau value

        #Record classical GLS results (95% confidence and mean value) in respective arrays
        cGLS_mean.append(numpy.mean(classic_GLS)) #mean gradient
        cGLS_e1.append(scipy.stats.scoreatpercentile(classic_GLS,2.5, interpolation_method='fraction')) #get 95th percentile range for gradient
        cGLS_e2.append(scipy.stats.scoreatpercentile(classic_GLS,97.5, interpolation_method='fraction')) #get 95th percentile range for gradient
        c_pval.append(cpval) #p value (for diagnostic purposes only)

    # save arrays so code can be run later without redoing entire bootstrap (by setting mode="n")
    meta_org=numpy.array([o_GLS_array,oi_GLS_array, o_cGLS_array,oi_cGLS_array])
    numpy.save("org_many.npy",meta_org)
    meta_carb=numpy.array([c_GLS_array,ci_GLS_array, c_cGLS_array,ci_cGLS_array])
    numpy.save("carb_many.npy",meta_carb)

    #end bootstrap
    
# If bootstrap already completed with mode="n" then simply load result arrays from file
[o_GLS_array,oi_GLS_array,o_cGLS_array,oi_cGLS_array]=numpy.load("org_many.npy")
[c_GLS_array,ci_GLS_array,c_cGLS_array,ci_cGLS_array]=numpy.load("carb_many.npy")

# define time axis for doing linear regression calculations
t=numpy.linspace(0,3800,100)



#Calculate results for three different methods

############### 
#Begin bootstrap GLS calculations

## Temporary arrays used for bootstrapping forg changes
## Given the distributions for gradients and intercepts in organics and carbonates
## calculated above, here we sample from those distributions and calculate forg
## as a function of time, and store important values in these arrays:
f_grad=[] #gradient in forg
starts=[] #initial value of forg
finishes=[] #final forg value
percent_increase=[] #percentage change in forg over Earth history


for k in range(0,5000): #loop over organic and carbonate distributions to obtain forg distribution
        
    n=numpy.random.random_integers(0,len(o_GLS_array[0])-1)  #random organic index
    m=numpy.random.random_integers(0,len(c_GLS_array[0])-1)  #random carbonate index
    

    org=oi_GLS_array[0][n]+t*o_GLS_array[0][n] #organic isotope as function of time
    carb=ci_GLS_array[0][m]+t*c_GLS_array[0][m] #carbonate isotope as functino of time
    f=(-5-carb)/(org-carb) #obtain forg as a function of time
    
    # calculate change in forg over Earth history (noting that time runs backwards)
    starts.append(f[len(t)-1]) #add initial forg value (Archean)
    finishes.append(f[0]) #add final forg value (present)
    percent_increase.append( 100*(f[0]-f[len(t)-1]) / (f[len(t)-1]) ) #add percentage change in forg
    
       
    (a_s,b_s,r,tt,stderr)=stats.linregress(t,f) #simple linear regression to get gradient in forg (forg is very close to linear)
    a_s=-a_s #correct sign of change since time axis is reversed
    f_grad.append(a_s) #append gradient in forg


#given forg arrays above, now calculate uncertainty in change in forg
mean_increase_e1=numpy.percentile(percent_increase,2.5) #lower end of 95% confidence interval for percentage change in forg
mean_increase_e2=numpy.percentile(percent_increase,97.5) #upper end of 95% confidence interval for percentage change in forg 

mean_f_grad_e1=numpy.percentile(f_grad,2.5) #lower end of 95% confidence interval for forg gradient
mean_f_grad_e2=numpy.percentile(f_grad,97.5) #upper end of 95% confidence interval for forg gradient 

#calculate pvalue based on f_grad distribution
zerop=stats.percentileofscore(f_grad,0)
if zerop<50.0:
    pval=2*zerop/100.0 #for A>B
else:
    pval=2*(1-zerop/100.0) # for B > A
pval_f_grad=pval #p value for forg gradient being distinct from zero



#store median values for forg gradient, initial, final and percentage increase:
mean_f_grad=numpy.median(f_grad) #median forg gradient
mean_start=numpy.median(starts) #median initial value of forg
mean_finish=numpy.median(finishes) #median final value of forg
mean_increase=numpy.median(percent_increase) #median increase in forg

#alternatively use means rather than medians:
#mean_f_grad=numpy.mean(f_grad)
#mean_start=numpy.mean(starts)
#mean_finish=numpy.mean(finishes)
#mean_increase=numpy.mean(percent_increase)

############### end Bootstrap GLS calculations






################ 
# begin classical GLS calculations

## Temporary arrays used for bootstrapping forg changes
## Given the distributions for gradients and intercepts in organics and carbonates
## calculated above, here we sample from those distributions and calculate forg
## as a function of time, and store important values in these arrays:
f_grad=[] #gradient in forg
starts=[] #initial value of forg
finishes=[] #final forg value
percent_increase=[] #percentage change in forg over Earth history

for k in range(0,1000): #loop over organic and carbonate distributions to obtain forg distribution
        
    n=numpy.random.random_integers(0,len(o_cGLS_array[0])-1)  #random organic index
    m=numpy.random.random_integers(0,len(c_cGLS_array[0])-1)  #random carbonate index
    

    org=oi_cGLS_array[0][n]+t*o_cGLS_array[0][n] #organic isotope as function of time
    carb=ci_cGLS_array[0][m]+t*c_cGLS_array[0][m] #carbonate isotope as function of time
    f=(-5-carb)/(org-carb) #obtain forg as a function of time
    
    # calculate change in forg over Earth history (noting that time runs backwards)
    starts.append(f[len(t)-1]) #add initial forg value (Archean)
    finishes.append(f[0]) #add final forg value (present)
    percent_increase.append( 100*(f[0]-f[len(t)-1]) / (f[len(t)-1]) ) #add percentage change in forg
    
    (a_s,b_s,r,tt,stderr)=stats.linregress(t,f) #simple linear regression to get gradient in forg (forg is very close to linear)
    a_s=-a_s #correct sign of change since time axis is reversed
    f_grad.append(a_s) #append gradient in forg

#given forg arrays above, now calculate uncertainty in change in forg
cGLSmean_increase_e1=numpy.percentile(percent_increase,2.5) #lower end of 95% confidence interval for percentage change in forg
cGLSmean_increase_e2=numpy.percentile(percent_increase,97.5) #upper end of 95% confidence interval for percentage change in forg 

cGLSmean_f_grad_e1=numpy.percentile(f_grad,2.5) #lower end of 95% confidence interval for forg gradient
cGLSmean_f_grad_e2=numpy.percentile(f_grad,97.5) #upper end of 95% confidence interval for forg gradient

#calculate pvalue based on f_grad distribution
zerop=stats.percentileofscore(f_grad,0)
if zerop<50.0:
    pval=2*zerop/100.0 #for A>B
else:
    pval=2*(1-zerop/100.0) # for B > A
cGLSpval_f_grad = pval #p value for forg gradient being distinct from zero


#store mean values for forg gradient, initial, final and percentage increase:
cGLSmean_f_grad=numpy.mean(f_grad) #mean forg gradient
cGLSmean_start=numpy.mean(starts) # mean initial value of forg
cGLSmean_finish=numpy.mean(finishes) #mean final value of forg
cGLSmean_increase=numpy.mean(percent_increase) #mean percentage increase in forg

############### end classical GLS calculations







################ simple OLS calculations
# This section carries out the ordinary least squares linear regressoin

#import function for loading carbonate and organic data directly from file
from data_select import draw
#load statistical function for OLS regression
import statsmodels.api as sm

#size of distribution to get uncertainties in OLS regression
dist_size=10000

#load organics and create matrices appropriate for OLS calculation
(oneo,twoo)=draw("o",start,finish)
Y=numpy.ones([len(oneo),1])
t=numpy.ones([len(oneo),2])
Y[:,0]=twoo
t[:,1]=oneo

#OLS fit for organics
model = sm.OLS(Y,t)
results = model.fit()
results.conf_int()

#create distribution for gradient and intercept from OLS regression results
org_g=numpy.random.randn(dist_size)*results.bse[1]+results.params[1] #gradient distribution
org_i=numpy.random.randn(dist_size)*results.bse[0]+results.params[0] #intercept distribution

#load carbonates and create matrices appropriate for OLS calculation
(onec,twoc)=draw("c",-600,4500)
Y=numpy.ones([len(onec),1])
t=numpy.ones([len(onec),2])
Y[:,0]=twoc
t[:,1]=onec

#OLS fit for carbonates
model = sm.OLS(Y,t)
results = model.fit()
results.conf_int()

#create distribution for gradient and intercept from OLS regression
carb_g=numpy.random.randn(dist_size)*results.bse[1]+results.params[1] #gradient distribution
carb_i=numpy.random.randn(dist_size)*results.bse[0]+results.params[0] #intercept distribution

## Temporary arrays used for bootstrapping forg changes
## Given the distributions for gradients and intercepts in organics and carbonates
## calculated above, here we sample from those distributions and calculate forg
## as a function of time, and store important values in these arrays:
f_grad=[] #gradient in forg
starts=[] #initial value of forg
finishes=[] #final forg value
percent_increase=[] #percentage change in forg over Earth history

t=numpy.linspace(0,3800,100) #time variable for calculating forg

for zzz in range(0,10000): #iterate over distributions for organics and carbonates to get forg distribution
    #random indices for gradient and intercept values
    m=numpy.random.random_integers(0,len(org_g)-1) #index for organic gradient
    n=numpy.random.random_integers(0,len(org_i)-1) #index for organic intercept
    o=numpy.random.random_integers(0,len(carb_g)-1) #index for carbonate gradient
    p=numpy.random.random_integers(0,len(carb_i)-1) #index for carbonate intercept
    
    
    org=org_i[n]+t*org_g[m] #organic trend line for this iteration
    carb=carb_i[p]+t*carb_g[o] #carbonate trend line for this iteration

    f=(-5-carb)/(org-carb) #forg as a function of time for this iteration
    
    # calculate percentage change in forg, and add to array
    starts.append(f[len(t)-1])  #add initial forg value (Archean)
    finishes.append(f[0]) #add final forg value (present)
    percent_increase.append( 100*(f[0]-f[len(t)-1]) / (f[len(t)-1]) )
    
    (a_s,b_s,r,tt,stderr)=stats.linregress(t,f) #simple linear regression to get gradient in forg (forg is very close to linear)
    a_s=-a_s #correct sign of change since time axis is reversed
    f_grad.append(a_s) #append gradient in forg

#calculate mean increase with 95% confidence from forg distribution calculated above
simple_mean_increase=numpy.mean(percent_increase) #mean increase in forg
simple_increase_e1=numpy.percentile(percent_increase,2.5)  #lower bound of 95% confidence interval on forg increase
simple_increase_e2=numpy.percentile(percent_increase,97.5) #upper bound of 95% confidence interval on forg increase

#calculate mean gradient in forg with 95% confidence from forg distribution calculated above
simple_mean_grad=numpy.mean(f_grad) #mean forg gradient
simple_f_grad_e1=numpy.percentile(f_grad,2.5) #lower bound of 95% confidence interval on forg gradient
simple_f_grad_e2=numpy.percentile(f_grad,97.5) #upper bound of 95% confidence interval on forg gradient

#calculate p value for gradient in forg (null hypothesis gradient is zero)
zerop=stats.percentileofscore(f_grad,0)
if zerop<50.0:
    pval=2*zerop/100.0 #for A>B
else:
    pval=2*(1-zerop/100.0) # for B > A

############### end simple OLS calculations




############## Calculate trends in organic and carbonate time series individually:

# This part is for extracting trends and pvalues in organic and carbonate time series individually
mult=3800 #years, in Ma, to multiply trend by to get overall change over rock record

##Comment out one of these options, depending on which method you want to use.
## This essentially renames variables for convenience:
## option 1: Bootstrap GLS
which_organic=o_GLS_array[0] #organic gradient distribution
which_organici=oi_GLS_array[0] #organic intercept distribution
which_carb=c_GLS_array[0] #carbonate gradient distribution
which_carbi=ci_GLS_array[0] #carbonate intercept distribution

## option 2: Classical GLS
#which_organic=o_cGLS_array[0] #organic gradient distribution
#which_organici=oi_cGLS_array[0] #organic intercept distribution
#which_carb=c_cGLS_array[0] #carbonate gradient distribution
#which_carbi=ci_cGLS_array[0] #carbonate intercept distribution

#calculate mean values with 95% confidence interval
mean_o=numpy.mean(which_organic) #mean organic gradient
o_lowpercent=numpy.percentile(which_organic,2.5) #lower end of 95% confidence for organic gradient
o_highpercent=numpy.percentile(which_organic,97.5) #upper end of 95% confidence for organic gradient
mean_c=numpy.mean(which_carb) #mean carbonate gradient
c_lowpercent=numpy.percentile(which_carb,2.5) #lower end of 95% confidence for carbonate gradient
c_highpercent=numpy.percentile(which_carb,97.5) #upper end of 95% confidence for carbonate gradient

## p value for organic gradient (null hypothesis gradient=0)
zerop=stats.percentileofscore(which_organic,0)
if zerop<50.0:
    pval_o=2*zerop/100.0 #for A>B
else:
    pval_o=2*(1-zerop/100.0) # for B > A

## p value for carbonates gradient (null hypothesis gradient=0)
zerop=stats.percentileofscore(which_carb,0)
if zerop<50.0:
    pval_c=2*zerop/100.0 #for A>B
else:
    pval_c=2*(1-zerop/100.0) # for B > A

#create empty arrays for percentage increase in organics and carbonates repectively
percent_increase_o=[]
percent_increase_c=[]

#iterate over distributions for organics and carbonates to get forg distribution
for zzz in range(0,10000):  
    #random indices for gradient and intercept values
    m=numpy.random.random_integers(0,len(which_organic)-1) # index for organic gradient
    n=numpy.random.random_integers(0,len(which_organici)-1) # index for organic intercept
    o=numpy.random.random_integers(0,len(which_carb)-1) # index for carbonate gradient
    p=numpy.random.random_integers(0,len(which_carbi)-1) # index for carbonate intercept
    
    # Calculate initial and final values for organics and carbonates from regression
    # for this iteration. Note that time runs in reverse
    org_initial=which_organici[n] #organic initial for this iteration
    org_final=which_organici[n]+mult*which_organic[m] #organic final for this iteration
    carb_initial=which_carbi[p] # carbonate initial for this iteration
    carb_final=which_carbi[p]+mult*which_carb[o] #carbonate final for this iteration
    
    #percentage changes for this iteration
    percent_increase_o.append(100*(org_initial-org_final)/org_final)
    percent_increase_c.append(100*(carb_initial-carb_final)/carb_final)

#calcaulte mean percentage increase with uncertainty for both organics and carbonates
mean_o_percent=numpy.mean(percent_increase_o) #mean percentage change in organics
op_lowpercent=numpy.percentile(percent_increase_o,2.5) #lower bound on 95% confidence interval for organics change
op_highpercent=numpy.percentile(percent_increase_o,97.5) #upper bound on 95% confidence interval for organics change
mean_c_percent=numpy.mean(percent_increase_c) #mean percentage change in carbonates
cp_lowpercent=numpy.percentile(percent_increase_c,2.5) #lower bound on 95% confidence interval for carbonates change
cp_highpercent=numpy.percentile(percent_increase_c,97.5)  #upper bound on 95% confidence interval for carbonates change 
    
# Print results from organic and carbonate analysis
# These results should be the same as those in table 1 in the main text (though slight differences due to MC)
print "Organic results "
print "Mean change in org (parts per thousand):",mult*mean_o,"95 confidence",mult*o_lowpercent,mult*o_highpercent
print "p value",pval_o
print " "
print "Carbonate results"
print "Mean change in carb (parts per thousand):",mult*mean_c,"95 confidence",mult*c_lowpercent,mult*c_highpercent
print "p value",pval_c
print " "
############## end individual organic/carbonate trend results





############ Print forg results
print "F_org Results"
mult=3800 #years, in Ma, to multiply trend by to get overall change over rock record

#percentage changes are converted to "factors of" changes relative to Archean (initial) values
# These results should be the same as those in table 2 in the main text (though slight differences due to MC)
print " "
print "Bootstrap GLS results"
print "Mean change in forg",mult*mean_f_grad,"95 confidence",mult*mean_f_grad_e1,mult*mean_f_grad_e2
print "Change in forg realtive to Archean",mean_increase/100.0+1.0,"95 confidence",mean_increase_e1/100.0+1.0,mean_increase_e2/100.0+1.0
print "p value",pval_f_grad

print " "
print "Classical GLS results"
print "Mean change in forg",mult*cGLSmean_f_grad,"95 confidence",mult*cGLSmean_f_grad_e1,mult*cGLSmean_f_grad_e2
print "Change in forg relative to Archean",cGLSmean_increase/100.0+1.0,"95 confidence",cGLSmean_increase_e1/100.0+1.0,cGLSmean_increase_e2/100.0+1.0
print "p value",cGLSpval_f_grad

print " "
print "Simple OLS results"
print "Mean change in forg",mult*simple_mean_grad,"95 confidence",mult*simple_f_grad_e1,mult*simple_f_grad_e2
print "Change in forg relative to Archean",simple_mean_increase/100.0+1.0,"95 confidence",simple_increase_e1/100.0+1.0,simple_increase_e2/100.0+1.0
print "p value",pval


                                                                                                                                                                 
import sys
sys.exit()