HydroClimaticFluxes.py 6.96 KiB
import pandas as pd


class HydroClimaticFluxes(object):

    def __init__(self, code, discharge_df=-1, safran_df=-1):
        self.code = code  # identifier of the site (e.g. gauge number)
        self.discharge_timeseries = discharge_df  # dataframe containing discharge time series
        self.safran_timeseries = safran_df  # dataframe containing P, ET0 and Tair reanalysis over 1958 to present

    def extract_discharge_timeseries(self, filepath, type_data='txt_file'):
        if type_data == 'BanqueHydro':
            import read_BankHydro as readBH
            discharge_timeseries = readBH.get_BankHydro(filepath, self.code, Area_BH=-1)
        else:
            discharge_timeseries = pd.read_csv(filepath)
        self.discharge_timeseries = discharge_timeseries

    def extract_safran_variable(self, foldername, quantity_to_retrieve='Ptot'):
        import netCDF4 as nc
        import geopandas as gpd
        import numpy as np
        import datetime
        import glob
        # find filepath with the pattern "quantity_to_retrieve"
        filepath = glob.glob(foldername+'/*'+quantity_to_retrieve+'*.nc')
        if not filepath:
            print('WARNING : no netcdf found with pattern "'+quantity_to_retrieve+'" \n')
            quantity_gdf = pd.DataFrame()
        else:
            ncdf_ds = nc.Dataset(filepath[0])
            # retrieve Time array
            Time_arr = np.array([datetime.datetime(1958, 7, 31) + datetime.timedelta(days=int(number)) for index, number in
                                 enumerate(ncdf_ds['Time'][:].data)])
            # retrieve Lambert 2 etendu coordinates
            coordinates_df = pd.DataFrame({'X': ncdf_ds['LambXg'][:].data, 'Y': ncdf_ds['LambYg'][:].data,
                                           'InFrance': ncdf_ds['in.France'][:].data})
            # retrieve quantity_df to retrieve
            quantity_df = pd.DataFrame(ncdf_ds[quantity_to_retrieve][:].data.transpose(), columns=Time_arr)
            quantity_df = pd.concat([quantity_df, coordinates_df], axis=1)
            # create polygons to spatialize the SAFRAN inputs. each polygon is 8x8km (ie 4000m from the square center)
            from shapely.geometry import Polygon
            Polygons_ = [Polygon(
                zip([quantity_df.X[i] + 4000, quantity_df.X[i] + 4000, quantity_df.X[i] - 4000, quantity_df.X[i] - 4000],
                    [quantity_df.Y[i] + 4000, quantity_df.Y[i] - 4000, quantity_df.Y[i] - 4000, quantity_df.Y[i] + 4000]))
                  for i, row in quantity_df.iterrows()]
            quantity_gdf = gpd.GeoDataFrame(quantity_df, geometry=Polygons_, crs="EPSG:27572")  # in Lambert 2 etendu
            quantity_gdf = quantity_gdf.to_crs("EPSG:2154")  # reproject in RGF93
            return quantity_gdf

    def intersect_safran_gpd_and_contour(self, quantity_gdf, catchment_contour_shp, quantity_to_retrieve='SAFRAN_forcing'):
        import geopandas as gpd
        # keep only the tile intersecting the catchment contour
        try:
            quantity_gdf = gpd.clip(quantity_gdf, catchment_contour_shp)
        except:
            quantity_gdf = gpd.overlay(quantity_gdf, catchment_contour_shp)    
        quantity_gdf['Area'] = quantity_gdf.geometry.area
        # compute area of each tile partially belonging to the catchment contour to weight safran inputs
        area_tot = sum(quantity_gdf.Area)
        quantity_gdf = quantity_gdf.drop(columns=['X', 'Y', 'InFrance', 'geometry'])
        # weight forcing tile timeseries by its relative area proportion
        quantity_gdf = quantity_gdf.apply(lambda x : x * quantity_gdf.Area)
        quantity_gdf = quantity_gdf.drop(columns='Area')
        quantity_gdf = quantity_gdf.sum(axis=0) / area_tot
        # rename dataframe column
        quantity_gdf = quantity_gdf.reset_index()
        quantity_gdf = quantity_gdf.rename(columns={'index': 'Datetime', 0: quantity_to_retrieve})
        if type(self.safran_timeseries)==int and self.safran_timeseries == -1:
            self.safran_timeseries = quantity_gdf
        else:
            self.safran_timeseries = pd.merge(self.safran_timeseries, quantity_gdf, on='Datetime')


    def extract_safran_timeseries_from_contour(self, foldername, catchment_contour_shp):
        # Ptot_gpd = self.extract_safran_variable(foldername, 'Ptot')
        # self.intersect_safran_gpd_and_contour(Ptot_gpd, catchment_contour_shp, 'Ptot')
        ET0_gpd = self.extract_safran_variable(foldername, 'ET0')
        self.intersect_safran_gpd_and_contour(ET0_gpd, catchment_contour_shp, 'ET0')
        # Tair_gpd = self.extract_safran_variable(foldername, 'Tair')
        # self.intersect_safran_gpd_and_contour(Tair_gpd, catchment_contour_shp, 'Tair')
        Snow_gpd = self.extract_safran_variable(foldername, 'Snow')
        self.intersect_safran_gpd_and_contour(Snow_gpd, catchment_contour_shp, 'Snow')
        Rain_gpd = self.extract_safran_variable(foldername, 'Rain')
        self.intersect_safran_gpd_and_contour(Rain_gpd, catchment_contour_shp, 'Rain')
        self.safran_timeseries['Ptot'] = self.safran_timeseries['Snow'] + self.safran_timeseries['Rain'] 

    def merge_time_series(self, merging_option='outer'):
        if (type(self.discharge_timeseries) == int) and (self.discharge_timeseries == -1):
            hydroclim_dataframe = self.safran_timeseries
        elif (type(self.safran_timeseries) == int) and (self.safran_timeseries == -1):
            hydroclim_dataframe = self.discharge_timeseries
        else:
            # self.safran_timeseries['Datetime'] = self.safran_timeseries['Datetime'].astype('datetime64[ns]')
            self.safran_timeseries['Datetime'] = pd.to_datetime(self.safran_timeseries['Datetime'])
            hydroclim_dataframe = pd.merge(self.discharge_timeseries, self.safran_timeseries, on='Datetime',
                                            how=merging_option).sort_values(by='Datetime')
        return hydroclim_dataframe

    @staticmethod
    def test():
        foldername = '/home/jean.marcais/Donnees/BanqueHydro/'
        code = 'W3315010'
        hydroclimaticfluxes_test = HydroClimaticFluxes(code)
        #  retrieve banque hydro discharge time series
        hydroclimaticfluxes_test.extract_discharge_timeseries(foldername, type_data='BanqueHydro')
        #  retrieve safran time series
        safran_foldername = '/home/jean.marcais/Donnees/SAFRAN/SAFRAN_Vidal/'
        safran_grid_shpfilename = '/home/jean.marcais/Donnees/SAFRAN/maille_safran/maille_meteo_fr_pr93.shp'
        from geopandas import read_file as gpd_read_file
        catchment_contour_shp = gpd_read_file('/home/jean.marcais/Donnees/DonneesLaura/BanqueHydro/Shapes'
                                              '/BassinsVersantsMetropole/BV_4207_stations.shp')
        catchment_contour_shp = catchment_contour_shp[catchment_contour_shp.loc[:, 'Code'] == code]
        hydroclimaticfluxes_test.extract_safran_timeseries_from_contour(safran_foldername, safran_grid_shpfilename,
                                                                        catchment_contour_shp)

        return hydroclimaticfluxes_test