clip_RequiredData.py

# -*- coding: utf-8 -*-
"""
Created on Wed Mar 24 09:21:44 2021

@author: laura.lindeperg
"""

import geopandas as gpd
import pandas as pd


# **************************** Data *****************************


# Watersheds
# shp_stations_filepath = 'E:/DonneesLaura/BanqueHydro/Shapes/StationBHYDRO_L93.shp'
df_stations_filepath = 'C:/Users/laura.lindeperg/Documents/DonneesLaura/BanqueHydro/StationsNonInfluenceesExplore2/Synthèse analyses/Synthèse_meta_selection_624.csv'
shp_contour_filepath = 'C:/Users/laura.lindeperg/Documents/DonneesLaura/BanqueHydro/Shapes/BassinsVersantsMetropole/BV_4207_stations.shp'
shp_BV_IV_path ='C:/Users/laura.lindeperg/Documents/DonneesLaura/Watersheds/BV_InvalidesValides/BVsInvalidesValides.shp'

df_stations = pd.read_csv(df_stations_filepath, sep = ';', encoding='latin-1')
shp_contour = gpd.read_file(shp_contour_filepath)
shp_BV_IV = gpd.read_file(shp_BV_IV_path)

# shp_BV_VV = gpd.read_file('C:/Users/laura.lindeperg/Documents/DonneesLaura/Watersheds/BV_InvalidesValides/BVsValidesValides.shp')

shp_foldername = 'C:/Users/laura.lindeperg/Documents/DonneesLaura/Watersheds/GEOMETRY/'

# Banque Hydro
banquehydro_foldername = 'C:/Users/laura.lindeperg/Documents/DonneesLaura/BanqueHydro/Export2020/'

# SAFRAN
safran_foldername = 'C:/Users/laura.lindeperg/Documents/DonneesLaura/SAFRAN/daily/'
# safran_grid_shpfilename = './TestData/SAFRAN/maille_meteo_fr_pr93.shp'

# GEOL
BDLisa_shp_path = 'C:/Users/laura.lindeperg/Documents/DonneesLaura/BD_Lisa/RegionalHydrogeologyAnalysisMe/BD_Lisa_regionalhydrogeology.shp'
BDLisa_shp = gpd.read_file(BDLisa_shp_path)

BRGM_geol_path = 'C:/Users/laura.lindeperg/Documents/DonneesLaura/CarteGeolBRGM/FR_vecteur/FR_vecteur/GEO001M_CART_FR_S_FGEOL_2154.shp'
BRGM_shp = gpd.read_file(BRGM_geol_path)


# extract safran gpd
from HydroClimaticFluxes import HydroClimaticFluxes
HCF = HydroClimaticFluxes(code=-1)
# Ptot_gpd = HCF.extract_safran_variable(safran_foldername, 'Ptot')
ET0_gpd = HCF.extract_safran_variable(safran_foldername, 'ET0')
Tair_gpd = HCF.extract_safran_variable(safran_foldername, 'Tair')
Snow_gpd = HCF.extract_safran_variable(safran_foldername, 'Snow')
Rain_gpd = HCF.extract_safran_variable(safran_foldername, 'Rain')


# **************************** Create my own Data *****************************

my_df_stations = df_stations.loc[:, 'Code':'YL93' ]

duplicated_stations = my_df_stations.duplicated(subset = ['Code'])
index_duplicated = duplicated_stations[duplicated_stations[:,] == True]
stations_duplicated = my_df_stations.iloc[index_duplicated.index[0]]

no_duplicated = my_df_stations.drop(index_duplicated.index[0])

# shp_watersheds = my_df_stations.merge(shp_contour, on = 'Code', how = 'outer')
shp_watersheds = no_duplicated.merge(shp_contour, on = 'Code', how = 'outer')
shp_watersheds = gpd.GeoDataFrame(shp_watersheds)
no_geometry = shp_watersheds[shp_watersheds['geometry'] == None]

studied_watersheds = shp_watersheds.loc[shp_watersheds.loc[:,'Code'].isin(no_duplicated.loc[:,'Code'])]
complete_watersheds = studied_watersheds[studied_watersheds['geometry'] != None]

# complete_watersheds.to_file('C:/Users/laura.lindeperg/Documents/DonneesLaura/Watersheds/complete_df_wrong_geometries.shp')

complete_watersheds_noElle = complete_watersheds.drop(complete_watersheds.loc[complete_watersheds.loc[:, 'Code'] == 'J4742020'].index[0])

## Dealing with BV_IV
df_BV_IV = no_duplicated.loc[no_duplicated.loc[:,'Code'].isin(shp_BV_IV.loc[:,'Code'])]
shp_BV_IV = shp_BV_IV.loc[:, ['Code', 'S_km2', 'dt_pstn', 'geometry']]
bv_IV = df_BV_IV.merge(shp_BV_IV, on = 'Code', how = 'outer')
bv_IV = gpd.GeoDataFrame(bv_IV)

# bv_IV.to_file('C:/Users/laura.lindeperg/Documents/DonneesLaura/Watersheds/BV_IV.shp')

## Replacing problematic watersheds'geometry in final df
index_BV_Inv = complete_watersheds.loc[complete_watersheds.loc[:,'Code'].isin(bv_IV.loc[:,'Code'])].index
new_geometries = bv_IV.loc[:, ['Code', 'geometry']]
# for i in index_BV_Inv:
#     print(i)
#     geometry = bv_IV.loc[bv_IV.loc[:, 'Code'] == complete_watersheds.loc[i, 'Code']]
#     print(geometry)
#     # print(geometry.__dict__)
#     complete_watersheds = complete_watersheds.loc[i,:].replace(geometry.__dict__)
#     print(complete_watersheds.loc[i, 'geometry'])


# *************************** Create watersheds' shapefiles and rasters ************************************

# List of the stations'codes
watershed_code = complete_watersheds.loc[:,'Code']
# watershed_code_SAFRAN = complete_watersheds_noElle.loc[:,'Code']
watershed_code_IV = shp_BV_IV.loc[shp_BV_IV.loc[:,'Code'].isin(watershed_code)].loc[:, 'Code']

# Get a sample of them for test
code_for_test = watershed_code.loc[0:3]
# code_for_test = ['K9341810']
# code_for_test = ['J4742020']

## SHP and GEOL

# for i in code_for_test:
for i in watershed_code:

    ## Get shp
    shpfile_contour_i = gpd.read_file(shp_foldername+i+'.shp')
    # shpfile_contour_i = shp_contour[shp_contour.loc[:, 'Code'] == i]
    # And save it in a file (identifier = its code)
    # shpfile_contour_i.to_file('C:/Users/laura.lindeperg/Documents/DonneesLaura/Watersheds/GEOMETRY/'+i+'.shp')

    ## Get watershed's geologic properties and save them in file

    # BDLisa
    try:
        shpfile_BDLisa = gpd.clip(BDLisa_shp, shpfile_contour_i)
    except:
        shpfile_BDLisa = gpd.overlay(BDLisa_shp, shpfile_contour_i)

    shpfile_BDLisa.to_file('C:/Users/laura.lindeperg/Documents/DonneesLaura/Watersheds/GEOL/BDLisa/'+i+'_BDLisa.shp')

    # BRGM geol
    try:
        shpfile_BRGM = gpd.clip(BRGM_shp, shpfile_contour_i)
    except:
        shpfile_BRGM = gpd.overlay(BRGM_shp, shpfile_contour_i)

    shpfile_BRGM.to_file('C:/Users/laura.lindeperg/Documents/DonneesLaura/Watersheds/GEOL/BRGM/'+i+'_BRGM.shp')


## SAFRAN csv

# for i in code_for_test:
for i in watershed_code_IV:

    ## Get shp
    shpfile_contour_i = complete_watersheds[complete_watersheds.loc[:, 'Code'] == i]
    # shpfile_contour_i = bv_IV[bv_IV.loc[:, 'Code'] == i]
    # And save it in a file (identifier = its code)
    # shpfile_contour_i.to_file('C:/Users/laura.lindeperg/Documents/DonneesLaura/Watersheds/GEOMETRY/'+i+'.shp')

    HCF = HydroClimaticFluxes(code = i)
    HCF.intersect_safran_gpd_and_contour(ET0_gpd, shpfile_contour_i, 'ET0')
    HCF.intersect_safran_gpd_and_contour(Tair_gpd, shpfile_contour_i, 'Tair')
    HCF.intersect_safran_gpd_and_contour(Snow_gpd, shpfile_contour_i, 'Snow')
    HCF.intersect_safran_gpd_and_contour(Rain_gpd, shpfile_contour_i, 'Rain')
    HCF.safran_timeseries['Ptot'] = HCF.safran_timeseries['Snow'] + HCF.safran_timeseries['Rain']

    # HCF.safran_timeseries.to_csv('C:/Users/laura.lindeperg/Documents/DonneesLaura/Watersheds/SAFRAN/'+i+'_safran_timeseries.csv', index=False)


# Test
safran_test = pd.read_csv('C:/Users/laura.lindeperg/Documents/DonneesLaura/Watersheds/SAFRAN/A1072010_safran_timeseries.csv', encoding='latin-1')