HydrologicalSignatures.py

import numpy as np
import read_TimeLoop as readT


class HydrologicalSignatures(object):

    def __init__(self, q_mean=-1, bfi=-1, bf_magni=-1, tau_1=-1, tau_2=-1, tau_roques=-1,
                 a_q=-1, b_q=-1, fdc_slope=-1, fdc_quantile90=-1, fdc_quantile10=-1, runoff_ratio=-1, aridity=-1):
        """"
        This method creates an object of the HydrologicalSignatures class. Instantiates attributes by default at -1.
        """
        # general indices
        self.q_mean = q_mean  # average discharge over the relationships
        self.bfi = bfi  # Baseflow index
        self.bf_magni = bf_magni
        # recession indices
        self.tau_1 = tau_1  # fast recession time in Horner (2020) methods
        self.tau_2 = tau_2  # late recession time in Horner (2020) methods
        self.tau_roques = tau_roques  # late recession time computed with the Roques et al. 2017 AWR method
        self.a_q = a_q  # a coefficient in the relationship dQ/dt =a Q^b
        self.b_q = b_q  # b coefficient in the relationship dQ/dt =a Q^b
        # flow duration curve statistics
        self.fdc_slope = fdc_slope
        self.fdc_quantile90 = fdc_quantile90
        self.fdc_quantile10 = fdc_quantile10
        self.runoff_ratio = runoff_ratio  # long Q/P ratio
        self.aridity_ratio = aridity  # long P/PET ratio


    def from_discharge_time_series(self, df_obs, column_name_discharge='Q'):
        """
        This method computes the different hydrological signatures
        of a discharge time series (stored in pandas dataframe df_obs)
        :param df_obs: pandas dataframe containing at least a Datetime column and Q_obs column (discharge series)
        :param column_name: str corresponding to the name of the dataframe column where discharge time series is stored
        :return: directly instantiates the computed signatures in the object
        """
        self.compute_q_mean(df_obs, column_name_discharge)
        self.compute_baseflow_gustard(df_obs, column_name_discharge)
        self.compute_bf_magni(df_obs, column_name_discharge)
        self.recession_extraction(df_obs, column_name_discharge, min_duration=5, max_duration=30)
        self.recession_extraction_roques_methods(df_obs)
        self.compute_fdc_slope(df_obs, column_name_discharge)
        self.compute_fdc_percentile_1090(df_obs, column_name_discharge)

    def from_discharge_and_climate_time_series(self, df_obs_mod, column_name_discharge='Q', column_name_precip='P'):
        self.compute_runoff_ratio(df_obs_mod, (column_name_discharge, column_name_precip))  # precip time series
        # is needed in the dataframe to compute this signature

    def from_climate_time_series(self, df_mod, column_name_precip='P', column_name_pet='PET'):
        self.compute_aridity_ratio(df_mod, (column_name_precip, column_name_pet))  # precip and PET time series
        # is needed in the dataframe to compute this signature

    def compute_runoff_ratio(self, df_obs, column_names=('Q', 'P')):
        """
        This method computes the different runoff ratio ie Q/P
        of a discharge time series (stored in pandas dataframe df_obs)
        :param df_obs: pandas dataframe containing at least a Datetime column and Q_obs column (discharge series)
        :param column_names: str corresponding to the name of the dataframe column where discharge time series is stored
        :return: runoff_ratio
        """
        if (column_names[0] in df_obs.columns) and (column_names[1] in df_obs.columns):
            self.runoff_ratio = df_obs[column_names[0]].mean() / df_obs[column_names[1]].mean()
        else:
            self.runoff_ratio = np.nan
            print('Cannot compute runoff ratio \n')
            if column_names[0] in df_obs.columns:
                print('Precipitation data series is lacking \n')
            if column_names[1] in df_obs.columns:
                print('Discharge data series is lacking \n')
            else:
                print('Discharge and Precipitation series are lacking \n')
        return self.runoff_ratio

    def compute_aridity_ratio(self, df_mod, column_names):
        if (column_names[0] in df_mod.columns) and (column_names[1] in df_mod.columns):
            self.aridity_ratio = df_mod[column_names[0]].mean() / df_mod[column_names[1]].mean()
        else:
            self.aridity_ratio = np.nan
            print('Cannot compute the aridity ratio \n')
        pass

    def compute_q_mean(self, df_obs, column_name='Q'):
        """
        This method compute the average discharge on the discharge time series
        :param df_obs: pandas dataframe containing at least a Datetime column and Q_obs column (discharge series)
        :param column_name: str corresponding to the name of the dataframe column where discharge time series is stored
        :return average discharge value
        """
        self.q_mean = df_obs[column_name].mean()
        return self.q_mean

    def compute_baseflow_gustard(self, df_obs, column_name='Q', k=0.9, d=5):
        """
        This method extracts the baseflow from a discharge time series with the Gustard method
        (Gustard, 1992, Low flow estimation in the United Kingdom)
        :param df_obs: pandas dataframe containing at least a Datetime column and Q_obs column (discharge series)
        :param column_name: str corresponding to the name of the dataframe column where discharge time series is stored
        :param k: filtering constant; default is 0.9
        :param d: window size; default is 5 days
        :return: baseflow index
        """
        if column_name == 'Q':
            bf_column_name = 'BF_obs'
        elif column_name == 'Q_mod':
            bf_column_name = 'BF_mod'
        else:
            bf_column_name = 'BF'

        # Baseflow extraction for column_name values
        df_obs[bf_column_name] = df_obs[column_name].groupby(np.arange(len(df_obs)) // d).transform('min')
        minima_values = df_obs[bf_column_name][df_obs[column_name] == df_obs[bf_column_name]].values
        index_pivotpoints = np.arange(len(df_obs))
        index_pivotpoints = index_pivotpoints[(df_obs[column_name] == df_obs[bf_column_name]).values]

        index_pivotpoints_tmp = np.logical_and(k * minima_values[1:-1] < minima_values[0:-2],
                                               k * minima_values[1:-1] < minima_values[2:])
        index_pivotpoints_tmp = np.insert(index_pivotpoints_tmp, 0, True)
        index_pivotpoints_tmp = np.insert(index_pivotpoints_tmp, -1, True)

        index_pivotpoints = index_pivotpoints[index_pivotpoints_tmp]
        q_bf = minima_values[index_pivotpoints_tmp]
        bf_gustard_obs = np.interp(np.arange(len(df_obs)), index_pivotpoints, q_bf)
        bf_gustard_obs[bf_gustard_obs > df_obs[column_name].values] = (
            df_obs[column_name][bf_gustard_obs > df_obs[column_name]]).values
        df_obs[bf_column_name] = bf_gustard_obs

        # get metric
        self.bfi = df_obs[bf_column_name].mean() / df_obs[column_name].mean()
        return self.bfi, df_obs

    def compute_bf_magni(self, df_obs, column_name='Q'):
        """
        This method computes the baseflow magnitude
        :param df_obs: pandas dataframe containing at least a Datetime column and Q_obs column (discharge series)
        :param column_name: str corresponding to the name of the dataframe column where discharge time series is stored
        :return: baseflow magnitude
        """
        bf_column_name = 'BF_obs'
        if (column_name == 'Q') and (not ('BF_obs' in df_obs.columns)):
            bf_column_name = 'BF_obs'
            bfi, df_obs = HydrologicalSignatures.compute_baseflow_gustard(df_obs, column_name)
        if (column_name == 'Q_mod') and (not ('BF_mod' in df_obs.columns)):
            bf_column_name = 'BF_mod'
            bfi, df_obs = HydrologicalSignatures.compute_baseflow_gustard(df_obs, column_name)

        df_obs_daily_interannual_av = readT.get_daily_interannual_average(df_obs)
        self.bf_magni = (df_obs_daily_interannual_av[bf_column_name].max() - df_obs_daily_interannual_av[
            bf_column_name].min()) / df_obs_daily_interannual_av[bf_column_name].max()
        return self.bf_magni

    def recession_extraction(self, df_obs, column_name='Q', min_duration=5, max_duration=30):
        """
        This methods extracts the recession time constant, one short and one long (tau_2), by fitting recession time
        series with an exponential.
        :param df_obs: pandas dataframe containing at least a Datetime column and Q_obs column (discharge series)
        :param column_name: str corresponding to the name of the dataframe column where discharge time series is stored
        :param min_duration: minimum duration to extract the recession after a discharge peak (??); default is 5 days
        :param max_duration: maximum duration to extract the recession after a discharge peak (??); default is 30 days
        :return: (tau_1, tau_2), the fast and the slow recession time constant.
        """
        import pandas as pd
        from sklearn.linear_model import LinearRegression

        Q_minvalue = df_obs[column_name].quantile(0.5)

        df_obs2 = df_obs.rolling(3, center=True).sum() / 3
        Nan_array = np.empty((1, len(list(df_obs2.columns))))
        Nan_array[:] = np.nan
        df_obs2 = df_obs2.append(pd.DataFrame(Nan_array, columns=list(df_obs2.columns)))
        #     df_obs2 = df_obs2.append(pd.DataFrame([np.nan], columns=column_name))
        df_obs2 = df_obs2.reset_index(drop=True)

        minmax_obs = df_obs2[column_name].diff()
        minmax_obs = minmax_obs[1:]
        minmax_obs[np.abs(minmax_obs) < 1e-9] = 0
        minmax_obs = minmax_obs.apply(np.sign)
        minmax_obs = minmax_obs.diff()
        minmax_obs = minmax_obs[1:]
        minmax_obs = minmax_obs.reset_index(drop=True)
        minmax_obs = minmax_obs.values

        ismax_obs = np.array([e < 0 if ~np.isnan(e) else False for e in minmax_obs], dtype=bool)
        ismax_obs = np.insert(ismax_obs, 0, False)
        ismax_obs = np.insert(ismax_obs, -1, False)
        ismax_obs_enough = (df_obs2[column_name] > Q_minvalue).values
        ismax_obs = ismax_obs & ismax_obs_enough

        ismin_obs = np.array([e > 0 if ~np.isnan(e) else True for e in minmax_obs], dtype=bool)
        ismin_obs = np.insert(ismin_obs, 0, False)
        ismin_obs = np.insert(ismin_obs, -1, False)

        # find end of recessions from minima and create event data.frame
        imax_obs = np.where(ismax_obs)
        imax_obs = imax_obs[0]

        imin_obs = np.where(ismin_obs)
        imin_obs = imin_obs[0]

        new_imin_obs = np.empty((len(imax_obs),))
        new_imin_obs[:] = np.nan
        for j in range(len(imax_obs) - 1):
            Values_sup = imin_obs[np.logical_and(imin_obs > imax_obs[j], imin_obs < imax_obs[j + 1])]
            if Values_sup.size != 0:
                new_imin_obs[j] = Values_sup[0]

        # last element of imax_obs
        j = len(imax_obs) - 1
        Values_sup = imin_obs[imin_obs > imax_obs[j]]
        if Values_sup.size != 0:
            new_imin_obs[j] = Values_sup[0]

        rec_events = pd.DataFrame(np.transpose(np.array([imax_obs, new_imin_obs])),
                                  columns=['start', 'end'])

        # delete recession event rows with nan values
        rec_events = rec_events.dropna()
        # delete recession events lower than min_duration
        rec_events = rec_events[rec_events['end'] - rec_events['start'] + 1 >= min_duration]
        # truncate recession events longer than max_duration
        rec_events.loc[rec_events['end'] - rec_events['start'] + 1 >= max_duration, 'end'] = \
            rec_events[rec_events['end'] - rec_events['start'] + 1 >= max_duration]['start'] + max_duration - 1
        rec_events = rec_events.astype('int64')
        rec_events = rec_events.reset_index()

        # extract short (less than 5 days) and long (more than 15 days) recession periods
        rec_short = rec_events.copy()
        rec_short['end'] = rec_short['start'] + 5
        rec_short = rec_short.astype('int64')
        rec_short = rec_short.reset_index()
        # print(rec_short)
        rec_long = rec_events.copy()
        rec_long = rec_long.loc[rec_long['end'] - rec_long['start'] + 1 >= 15]
        rec_long['start'] = rec_long['start'] + 15
        rec_long = rec_long.loc[rec_long['end'] - rec_long['start'] + 1 >= min_duration]
        rec_long['end'] = rec_long['end'] + 1
        rec_long = rec_long.astype('int64')
        rec_long = rec_long.reset_index()

        # delete null discharge values
        if (len(df_obs['Q'][df_obs['Q'] <= 0]) > 0):
            print('WARNING : ' + str(len(df_obs['Q'][df_obs[
                                                         'Q'] <= 0])) + ' discharge values were null and were replaced to compute recession time analysis. \n')
            df_obs.loc[df_obs['Q'] <= 0, 'Q'] = min(df_obs['Q'][df_obs['Q'] > 0]) / 100

        # compute short recessions
        model = LinearRegression()
        coef_short = np.empty((len(rec_short),))
        coef_short[:] = np.nan
        # fit recession to an exponential
        for index, row in rec_short.iterrows():
            x = np.linspace(1, row['end'] - row['start'], num=row['end'] - row['start']).reshape((-1, 1))
            y = np.log(df_obs[row['start']:row['end']][
                           column_name])  # - np.log(df_obs[row['start']:row['start']+1]['Q'].values)
            model.fit(x, y)
            coef_short[index] = model.coef_

        coef_short[coef_short == 0] = np.nan
        rec_short['coef'] = -1 / coef_short

        # compute long recessions
        model = LinearRegression()
        coef_long = np.empty((len(rec_long),))
        coef_long[:] = np.nan
        # fit recession to an exponential
        for index, row in rec_long.iterrows():
            x = np.linspace(1, row['end'] - row['start'], num=row['end'] - row['start']).reshape((-1, 1))
            y = np.log(df_obs[row['start']:row['end']][
                           column_name])  # - np.log(df_obs[row['start']:row['start']+1]['Q'].values)
            model.fit(x, y)
            coef_long[index] = model.coef_

        coef_long[coef_long == 0] = np.nan
        rec_long['coef'] = -1 / coef_long

        rec_short = rec_short.replace([np.inf, -np.inf], np.nan).dropna()
        rec_short = rec_short.reset_index(drop=True)
        rec_long = rec_long.replace([np.inf, -np.inf], np.nan).dropna()
        rec_long = rec_long.reset_index(drop=True)

        self.tau_1 = rec_short['coef'].median()
        self.tau_2 = rec_long['coef'].median()

        return self.tau_1, self.tau_2

    def recession_extraction_roques_methods(self, df_obs, column_names=('Datetime','Q'), min_recession_time=5, t_overland=1):
        """
        This methods extracts the recession time constant, the exponent factors in the relationship dQ/dt = a.Q^b with
        the Roques et al. (2017) method.
        Roques, C et al. (2017), Improved streamflow recession parameter
        estimation with attention to calculation of − dQ/dt, WRR.
        :param df_obs: pandas dataframe containing at least a Datetime column and Q_obs column (discharge series)
        :return: (tau_roques, a_q, b_q) the average time recession constant, the a and b factor in the relationship
        dQ/dt = a.Q^b
        """

        t = df_obs[column_names[0]].apply(lambda x: x.toordinal()).values
        q = df_obs[column_names[1]].values

        t_interp = np.arange(np.ceil(min(t)),np.floor(max(t))+1,1)  # d
        from scipy import interpolate
        f = interpolate.interp1d(t, q)
        q_interp = f(t_interp)
        t = np.transpose(t_interp)
        q = np.transpose(q_interp)

        # define prominence
        d = np.abs(np.diff(q))
        prominence = 5 * np.nanmin(d[d > 0])

        idmax, pmax, idmin, pmin = self.IDRecession(q, t, prominence, min_recession_time, t_overland)

        aH, bH, aL, bL, ts, d_all, d_L = self.SRanalysis(q, idmin, idmax)

        # import matlab.engine
        # eng = matlab.engine.start_matlab()
        # eng.addpath(eng.genpath('~/Documents/MATLAB/RecessionAnalysisRoques/'))
        # # identify individual recessions
        # visu = 0
        # q = matlab.double(q.tolist())
        # t = matlab.double(t.tolist())
        # q = eng.transpose(q)
        # t = eng.transpose(t)

        # [idmax, pmax, idmin, pmin] = eng.IDRecession(q, t, prominence.tolist(), min_recession_time, t_overland, visu, nargout=4)

        # # perform recession analysis on individual recessions
        # [aH, bH, aL, bL, ts, d_all, d_L, gofH, gofL, gofT] = eng.SRanalysis(q, idmin, idmax, visu, nargout=10)

        # aL = np.asarray(aL)
        # bL = np.asarray(bL)
        # ts = np.asarray(ts)
        # aL = np.nan
        # bL = np.nan
        # ts = np.nan
        self.tau_roques = np.nanmedian(ts)
        self.a_q = np.nanmedian(aL)
        self.b_q = np.nanmedian(bL)
        return ts, aL, bL, aH, bH, d_all, d_L

    def IDRecession(self, data, time, prominence, min_recession_time, t_overland):
        time2 = time - min(time) + 1

        # time2 = np.reshape(time2, (1,-1))
        # data = np.reshape(data, (1,-1))
        # nt = np.shape(time2)
        # nt = nt[0]
        # nd = np.shape(data)
        # nd = nd[0]
        #
        # if nt > 1:
        #     time2 = time2.transpose()
        # if nd > 1:
        #     data = data.transpose()

        from scipy import signal
        [locs_max, pks_max] = signal.find_peaks(data, height=0, prominence=prominence)
        pks_max = pks_max['peak_heights']
        [locs_min, pks_min] = signal.find_peaks(-data, height=np.nanmin(-data))
        pks_min = pks_min['peak_heights']
        pks_min = -pks_min

        #  If series start by a minimum then supress it
        if locs_min[0] < locs_max[0]:
            locs_min = locs_min[1:]
            pks_min = pks_min[1:]

        # Attribute one minimum for one peak event
        locs_min2 = np.zeros(np.shape(locs_max))
        pks_min2 = np.zeros(np.shape(locs_max))

        for pp in range(len(locs_max))[:-1]:
            bool_indic = (locs_max[pp] < locs_min) & (locs_min < locs_max[pp + 1])

            if sum(bool_indic):
                locmin_temp = locs_min[bool_indic]
                pksmin_temp = pks_min[bool_indic]
                indic2 = pksmin_temp == min(pksmin_temp)
                lcmin = locmin_temp[indic2]
                pkmin = pksmin_temp[indic2]
                locs_min2[pp] = lcmin[-1]
                pks_min2[pp] = pkmin[-1]
            else:
                locs_min2[pp] = np.nan
                pks_min2[pp] = np.nan


        # If last event is a peak then delete it
        if locs_max[-1] > locs_min2[-1]:
            locs_min2 = locs_min2[:-1]
            locs_max= locs_max[:-1]
            pks_min2 = pks_min2[:-1]
            pks_max = pks_max[:-1]

        # delete nan values in locs_min2
        locs_max = locs_max[~np.isnan(locs_min2)]
        pks_min2 = pks_min2[~np.isnan(locs_min2)]
        pks_max = pks_max[~np.isnan(locs_min2)]
        locs_min2 = locs_min2[~np.isnan(locs_min2)]

        # delete short events
        bool_short_events = locs_min2-locs_max >= min_recession_time
        locs_max = locs_max[bool_short_events]
        pks_min2 = pks_min2[bool_short_events]
        pks_max = pks_max[bool_short_events]
        locs_min2 = locs_min2[bool_short_events]

        # delete long events
        bool_long_events = locs_min2-locs_max <= 250
        locs_max = locs_max[bool_long_events]
        pks_min2 = pks_min2[bool_long_events]
        pks_max = pks_max[bool_long_events]
        locs_min2 = locs_min2[bool_long_events]

        # Find new loc max to exclude first fast overland flow
        t_overland = int(t_overland)
        locs_max = locs_max + t_overland  # after peak
        pks_max = data[locs_max]

        # Delete errors
        D = pks_max >= pks_min2
        locs_min2 = locs_min2[D]
        locs_max = locs_max[D]
        pks_min2 = pks_min2[D]
        pks_max = pks_max[D]

        # Delete recession if flow data contain NaNs
        N = np.zeros((len(locs_max),))
        for i in np.arange(0,len(locs_max)):
            Q = data[locs_max[i]:np.int(locs_min2[i])]
            N[i] = np.sum(np.isnan(Q))
        N = N==0
        locs_min2 = locs_min2[N]
        locs_max = locs_max[N]
        pks_min2 = pks_min2[N]
        pks_max = pks_max[N]

        idmax = locs_max
        pmax = pks_max
        idmin = locs_min2
        pmin = pks_min2
        return idmax, pmax, idmin, pmin

    def SRanalysis(self, q,idmin,idmax):
        aH = np.zeros((len(idmax),))
        bH = np.zeros((len(idmax),))
        aL = np.zeros((len(idmax),))
        bL = np.zeros((len(idmax),))
        ts = np.zeros((len(idmax),))
        d_all = np.zeros((len(idmax),))
        d_L = np.zeros((len(idmax),))
        gofH = np.zeros((len(idmax),))
        gofL = np.zeros((len(idmax),))
        gofT = np.zeros((len(idmax),))

        limrsq = 0

        #  Limit of recession time to fit a and b
        lr = 4
        # Define the quantile ranges for early and late times
        H1 = 1
        H2 = 0.5
        L1 = 0.5
        L2 = 0.05

        for zz in np.arange(0,len(idmax)):
            time_event = np.arange(1,(idmin[zz] - idmax[zz])+2)
            d_all[zz] = len(time_event)
            Qevent = q[int(idmax[zz]):int(idmin[zz]+1)]

            # Fit exponential function on the data
            #     [xDataexp, yDataexp] = prepareCurveData(time_event', Qevent./max(Qevent));
            xDataexp = time_event
            yDataexp = Qevent/max(Qevent)
            # Set up fittype and options.
            from scipy.optimize import curve_fit
            f = lambda x, a, b, c : a*np.exp(-b*x)+c
            try:
                popt, pcov = curve_fit(f, xDataexp, yDataexp, p0=[0.1, 0.1, 0.1], method='trf', ftol=1e-6, xtol=1e-6, maxfev=1000)#   max_nfev=800,
            except:
                popt = np.array([-1,-1])

            if popt[0] and popt[1] > 0:
                step_max = 0.25*len(time_event)
                cc = np.ceil(step_max * np.exp(-1/(popt[1]*time_event)))+1
                Lderiv = int(len(time_event)-cc[-1])
                dQ_dt = np.zeros((Lderiv,))
                Q_deriv = np.zeros((Lderiv,))
                Rsq = np.zeros((Lderiv,))
                t_deriv = np.zeros((Lderiv,))
                from sklearn.linear_model import LinearRegression

                for ee in np.arange(0,int(len(time_event)-cc[-1])):
                    X = time_event[ee:int(ee+cc[ee]+1)].reshape((-1, 1))
                    Y = Qevent[ee:int(ee+cc[ee]+1)].reshape((-1, 1))
                    model = LinearRegression(fit_intercept=True)
                    model.fit(X, Y)
                    dQdt = np.array([model.intercept_,model.coef_]).flatten()
                    Rsq[ee] = np.max([0,model.score(X,Y)])
                    if dQdt[1] > 0:
                        dQdt[1] = np.nan
                    elif Rsq[ee] < limrsq:
                        dQdt[1] = np.nan
                    elif np.log10(dQdt[1]) < -8:
                        dQdt[1] = np.nan

                    dQ_dt[ee] = -1*dQdt[1]
                    Q_deriv[ee]= np.nanmean(Y)
                    t_deriv[ee] = np.nanmean(X)
            else:
                dQ_dt = np.array(-999)
                Q_deriv = np.array(-999)
                Rsq = np.array(-999)

            # Fit the power law for a and b linear fit: log(y) = p(1) * log(x) + p(2)
            nonan_bool = (~np.isnan(Q_deriv)) & (~np.isnan(dQ_dt))
            Q_deriv = Q_deriv[nonan_bool]
            dQ_dt = dQ_dt[nonan_bool]
            Rsq = Rsq[nonan_bool]
            # fit early-time flow
            H = (Q_deriv < np.nanquantile(q, H1)) & (Q_deriv > np.nanquantile(q, H2))
            if(np.sum(H) >= lr):
                f2 = lambda x, b, a: b * x + a
                try:
                    popt, pcov = curve_fit(f2, np.log(Q_deriv[H]), np.log(dQ_dt[H]), ftol=1e-6, xtol=1e-6,
                                           maxfev=600, sigma=np.diag(1/(Rsq[H])))
                except:
                    popt = np.array([-1,-1])
                bH[zz] = popt[0]
                aH[zz] = np.exp(popt[1])
                # need to return goodness of fit ??
                if bH[zz] < 0:
                    bH[zz] = np.nan
                    aH[zz] = np.nan
            else:
                bH[zz] = np.nan
                aH[zz] = np.nan

            #  fit late-time flow
            L = (Q_deriv < np.nanquantile(q, L1)) & (Q_deriv > np.nanquantile(q, L2))
            if np.sum(L) >=lr:
                f2 = lambda x, b, a: b * x + a
                try:
                    pL, pcov = curve_fit(f2, np.log(Q_deriv[L]), np.log(dQ_dt[L]), ftol=1e-6, xtol=1e-6,
                                         maxfev=600, sigma=np.diag(1/(Rsq[L])))
                except:
                    pL = np.array([-1,-1])
                bL[zz] = pL[0]
                aL[zz] = np.exp(pL[1])
                if bL[zz] < 0:
                    bL[zz] = np.nan
                    aL[zz] = np.nan
            else:
                bL[zz] = np.nan
                aL[zz] = np.nan

            # duration of recession in late-time
            d_L[zz] = np.sum(L)
            if np.sum(L) < lr:
                d_L[zz] = np.nan

            # Fit slope of b=1 for characteristic time scale
            if np.sum(L) >= lr:
                f3 = lambda x, a: x + a
                try:
                    popt, pcov = curve_fit(f3, np.log(Q_deriv[L]), np.log(dQ_dt[L]), p0=0.1, ftol=1e-6, xtol=1e-6,
                                           maxfev=600, sigma=np.diag(1/(Rsq[L])))
                except:
                    popt = np.array(-1)
                ts[zz] = 1/(np.exp(popt))
                if bL[zz]<0: # ?? weird isn't it tz[zz]
                    ts[zz] = np.nan
            else:
                ts[zz] = np.nan

        return aH, bH, aL, bL, ts, d_all, d_L

    def compute_fdc_slope(self, df_obs, column_name='Q', portion_to_compute=(0.33333, 0.66666)):
        fdc_quantile = self.compute_fdc_quantiles(df_obs, column_name, tuple([100*x for x in portion_to_compute]))
        fdc_slope = -(np.log10(fdc_quantile[0, :]) - np.log10(fdc_quantile[1, :])) / (
                portion_to_compute[1] - portion_to_compute[0])
        self.fdc_slope = fdc_slope[0]
        return self.fdc_slope

    def compute_fdc_percentile_1090(self, df_obs, column_name='Q'):
        fdc_quantiles = self.compute_fdc_quantiles(df_obs, column_name, (10, 90))
        self.fdc_quantile90 = fdc_quantiles[0][0]
        self.fdc_quantile10 = fdc_quantiles[1][0]

    def compute_fdc_quantiles(self, df_obs, column_name='Q', quantiles=(10, 90)):
        exceedence = np.arange(1., len(np.array(df_obs)) + 1) / len(np.array(df_obs))
        exceedence *= 100

        Sort_Discharges = np.sort(df_obs[[column_name]], axis=0)[::-1]  # ,'Q_mod'
        # get quantiles and mid slope of FDC
        fdc_quantiles = (np.nanquantile(Sort_Discharges, np.asarray(quantiles) / 100, axis=0))
        return fdc_quantiles

    @staticmethod
    def test():
        import geopandas as gpd
        HydroStations_df = gpd.read_file(
            '/home/jean.marcais/Donnees/BanqueHydro/Shapefiles/StationHydro_FRmetro/StationHydro_withBHareas.shp')
        HydroStations_df.head()
        import read_BankHydro as readBH
        banque_hydro_folder = '/home/jean.marcais/Donnees/BanqueHydro/'
        val = HydroStations_df.loc[825, 'CdStationH'][:-2]
        df_obs_tmp = readBH.get_BankHydro(banque_hydro_folder, val, -1)

        HSA = HydrologicalSignatures()
        HSA.from_discharge_time_series(df_obs_tmp)
        return HSA