import numpy
import scipy
from scipy import stats
import statsmodels.api as sm

########################################
# Call function that takes organic/carbonate choice, start/end date, and number of iterations, and performs regression algorithm to give
# B000: array of gradients from Bootstrap GLS
# intercepts: array of intercepts from Bootstrap GLS
# pval: array of pvalues from Bootstrap GLS (for diagnostic purposes only)
# T: mean Tau value from GLS
# classic_GLS: array of gradients from classic GLS
# classicGLS_i: array of intercepts from classic GLS
# cpval: array of pvalues from classic GLS (for diagnostic purposes only)
########################################

def nGLS(oc,start,finish,iterations):

    # create arrays that will be outputs (defined above)    
    B111=[] #gradient for Bootstrap GLS
    B000=[] #intercept for Bootstrap GLS
    taus=[] #Tau values
    classic_GLS=[] #gradient for Classical GLS
    classGLS_i=[] #intercept for Classical GLS
   
    #define label for printing progress below
    if oc=="o":
        label="organics"
    if oc=="c":
        label="carbonates"
    ###################################
    # perform regression multiple times (determined by iterations input)
    j=0
    while j<iterations:
        
        print j,"of",iterations,"iterations",label #prints calculation progress
    
        #function for loading isotope data from file
        from data_select import draw
        #determines whether carbonate or organic isotope data should be used
        whichone=oc
        (one,two)=draw(whichone,start,finish) #draws all data between start and finish dates

        
        #create arrays more convenient for matrix multiplication in regression analysis
        n=numpy.size(one) #number of data points
        t=numpy.ones([n,2]) 
        Y=numpy.zeros([n,1])
        t[:,1]=one #time array
        Y[:,0]=two #isotope values array
        
        
        
        #need to set these up as dummies for the loop below
        #these variables will be updated during the iterative calculation
        counter=0
        oldB1=99999; #regression gradient 
        oldB0=99999; #regression intercept
        oldSSQG=9999999999; #goodness-of-fit parameter
        Sp=99999999 #magnitude of error 
        tau_est=333333 #time constant from fit
        
        restartm=0 #variable used for determining when to break from loop and try again if calculation fails
        
        #call function that actually does classical GLS regression
        from classical_GLS import GLS
        
        #in this loop we fit Tau and the error, Sp, to the data:
        while counter<100: #max number iterations (should converge in far less than 100)
            
            #if this is not the first iteration, take the previous Tau value and use this for next iteration 
            if counter>0:
                Tau=tau_est
                S=Sp*numpy.ones([n,1])
                
            # if this is the first iteration, make initial guess for Tau and error
            if counter==0:
                S=10*numpy.ones([n,1])
                Tau=10
            
            V=numpy.ones([n,n]) #define covariance matrix
            for i in range(0,n):
                for k in range(0,n):
                    V[i,k]=S[i,0]*S[k,0]*numpy.exp(-abs(t[i,1]-t[k,1])/Tau) # this is matrix C in appendix

            
            #if Tau is a physically reasonable value, continue
            if Tau>0:
                pass 
            #if Tau is unreasonable, break loop and try again
            else:
                print "negative or nan tau"
                print Tau
                restartm=1 #this will cause the iteration to terminate below
                break 
            
            #Using the tau and error values specified above, fit the data using classical GLS regression to obtain gradient and intercept    
            (B1,B0,er1,er2,SSQG)=GLS(t,Y,S,V,Tau)
            
            # if fit is unreasonable, break loop and try again
            if B1==-9999: #if gradient is fill value
                restartm=1 #this will cause the iteration to terminate below
                print "V",V
                print "t",t
                print "got inf or nan"
                print "t",t
                print "S",S
                print "Tau",Tau
                break    
    
            counter=counter+1
            
            # check for convergence by seeing if gradient, intercept and uncertainty have stopped changing
            if ((abs(SSQG-oldSSQG)<50)and(abs(oldB1-B1)<0.0001)and(abs(oldB0-B0)<0.0001)):
                counter=99999999 ## stop iterations on this loop if convergence achieved
        
            
            #update gradient and intercept values            
            oldB1=B1;oldB0=B0;oldSSQG=SSQG
            
            #this is the new best fit line
            fit=B0+B1*t[:,1]
        
            #define correlated residuals
            e=Y[:,0]-fit
            
            #update error from fit
            Sp=(numpy.sum((numpy.array(e))**2/(n-2)))**0.5
            #define scaled residuals
            r=e/Sp
            
            # use these regression results to get a better estimate of Tau
            #This is the funcion that must be minimized to find Tau, equation 3.3 in main text
            def SoS(Tau):
                S=0
                for i in range(1,n):
                    S=S+(r[i]-numpy.exp(-(t[i,1]-t[i-1,1])/Tau)*r[i-1])**2 
                return S 
            
            result1=scipy.optimize.minimize_scalar(SoS,bounds=(0,500),method='bounded') #perform minimization of the function above
            tau_est=result1.x #new Tau estimate from this iteration
            
            # if new Tau is physically reasonable, continue
            if tau_est>0:
                pass 
            # if new tau is not reasonable, break loop and try again.
            else: 
                print "negative or nan Tau"
                print tau_est
                restartm=1 #this will cause the iteration to terminate below
                break
        
        # if fit didn't work, next iteration
        if restartm==1:
            continue 
        
        ## if fit did work, add values from B1 and B0 distribution to classical GLS arrays
        distr2=er1*numpy.random.randn(10)+B1
        distrii=er2*numpy.random.randn(10)+B0
        for z in range(0,len(distr2)):
            if distr2[z]>-9999:
                classic_GLS.append(distr2[z]) #add value to gradient array
                classGLS_i.append(distrii[z])  # add value to intercept array
        
        ## temporary arrays for convenience in calculations
        tempr=numpy.ones([n,1])
        tempr[:,0]=r[:]
        r=tempr
        
        #load function that does bootstrap GLS regression
        from bootstrap_GLS import shuffle
        #apply function to the classical GLS result from above
        (B11,B00,er1,er2)=shuffle(t,r,B1,B0,Sp*numpy.ones([n,1]),Tau)

        #if bootstrap regression failed, try next iteration
        if B11==-9999: #gradient is filler value
            print "got inf or nan"
            print "t",t
            print "Sp",Sp
            print "Tau",Tau
            continue   
            
        # if regression succeeded, add values from regression to arrays
        B111.append(B11) #add gradient value
        B000.append(B00) #add intercept value
        taus.append(Tau) #add tau value
        j=j+1 #next iteration
    
    #calculate p values for bootstrap GLS (null hypothesis is gradient of zero)
    zerop=stats.percentileofscore(B111,0)
    if zerop<50.0:
        pval=2*zerop/100.0 #for A>B
    else:
        pval=2*(1-zerop/100.0) # for B > A
    
        
    #calculate classical GLS p-value (null hypothesis is gradient of zero)
    Czerop=stats.percentileofscore(classic_GLS,0)
    if Czerop<50.0:
        cpval=2*Czerop/100.0 #for A>B
    else:
        cpval=2*(1-Czerop/100.0) # for B > A
    
    #return values (see start of function for definitions)  
    return B111,B000,pval,numpy.mean(taus),classic_GLS,classGLS_i,cpval
