Commit 0ef1a7f9 authored by Laura LINDEPERG's avatar Laura LINDEPERG
Browse files

Hydro sig. correlation - reduced sets - filters

Boxplots
Discharge timeseries from pickled objects
parent 7289385d
......@@ -11,6 +11,8 @@ import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
import numpy as np
import pickle
# **************************** Data *****************************
......@@ -23,10 +25,35 @@ df_hydro_sig = pd.read_csv(df_hydro_sig_path)
## Older files
# df_hydro_sig = df_stations.drop(columns = ['age', 'fc', 'ks', 'maingeol_age', 'maingeol_age_proportion', 'maingeol_description', 'maingeol_id', 'maingeol_proportion', 'name'])
df_hydro_sig = df_hydro_sig.drop(columns = ['name'])
# df_hydro_sig_main = df_hydro_sig.loc[:, ['aridity_ratio', 'code', 'q_mean', 'runoff_ratio']]
# df_hydro_sig_main_long = df_hydro_sig_main.melt(id_vars = ['code'], var_name = 'hydro_sig')
# df_hydro_sig = df_hydro_sig.loc[df_hydro_sig.loc[:,'code'].isin(['K6492510', 'V2814020', 'P7041510', 'A9001050', 'H8043310', 'H4033010']) == False]
df_hydro_sig_1 = df_hydro_sig.loc[:, ['code', 'aridity_ratio', 'q_mean', 'runoff_ratio']]
df_hydro_sig_2 = df_hydro_sig.loc[:, ['code', 'bfi_5', 'bfi_90', 'bf_magni']]
df_hydro_sig_3 = df_hydro_sig.loc[:, ['code', 'fdc_quantile10', 'fdc_quantile90', 'fdc_slope']]
df_hydro_sig_4 = df_hydro_sig.loc[:, ['code', 'tau_1', 'tau_2', 'tau_roques']]
df_hydro_sig_5 = df_hydro_sig.loc[:, ['code', 'a_q', 'b_q']]
df_hydro_sig_12 = pd.merge(df_hydro_sig_1, df_hydro_sig_2, on = 'code', how = 'outer')
df_hydro_sig_13 = pd.merge(df_hydro_sig_1, df_hydro_sig_3, on = 'code', how = 'outer')
df_hydro_sig_14 = pd.merge(df_hydro_sig_1, df_hydro_sig_4, on = 'code', how = 'outer')
df_hydro_sig_15 = pd.merge(df_hydro_sig_1, df_hydro_sig_5, on = 'code', how = 'outer')
df_hydro_sig_23 = pd.merge(df_hydro_sig_2, df_hydro_sig_3, on = 'code', how = 'outer')
df_hydro_sig_24 = pd.merge(df_hydro_sig_2, df_hydro_sig_4, on = 'code', how = 'outer')
df_hydro_sig_25 = pd.merge(df_hydro_sig_2, df_hydro_sig_5, on = 'code', how = 'outer')
df_hydro_sig_34 = pd.merge(df_hydro_sig_3, df_hydro_sig_4, on = 'code', how = 'outer')
df_hydro_sig_35 = pd.merge(df_hydro_sig_3, df_hydro_sig_5, on = 'code', how = 'outer')
df_hydro_sig_45 = pd.merge(df_hydro_sig_4, df_hydro_sig_5, on = 'code', how = 'outer')
df_hydro_sig_12_long = df_hydro_sig_12.melt(id_vars = ['code'], var_name = 'hydro_sig')
df_hydro_sig_13_long = df_hydro_sig_13.melt(id_vars = ['code'], var_name = 'hydro_sig')
df_hydro_sig_14_long = df_hydro_sig_14.melt(id_vars = ['code'], var_name = 'hydro_sig')
df_hydro_sig_15_long = df_hydro_sig_15.melt(id_vars = ['code'], var_name = 'hydro_sig')
df_hydro_sig_23_long = df_hydro_sig_23.melt(id_vars = ['code'], var_name = 'hydro_sig')
df_hydro_sig_24_long = df_hydro_sig_24.melt(id_vars = ['code'], var_name = 'hydro_sig')
df_hydro_sig_25_long = df_hydro_sig_25.melt(id_vars = ['code'], var_name = 'hydro_sig')
df_hydro_sig_34_long = df_hydro_sig_34.melt(id_vars = ['code'], var_name = 'hydro_sig')
df_hydro_sig_35_long = df_hydro_sig_35.melt(id_vars = ['code'], var_name = 'hydro_sig')
df_hydro_sig_45_long = df_hydro_sig_45.melt(id_vars = ['code'], var_name = 'hydro_sig')
# Watersheds
......@@ -63,43 +90,93 @@ my_df = my_df.loc[my_df.loc[:, 'code'].isin(border_catchments)==False]
# Filter catchments based on their size...
my_df = my_df.loc[my_df.loc[:, 'S_km2'] < 5000]
size = shp_watersheds.loc[shp_watersheds.loc[:, 'S_km2'] < 5000]
# ...and their main geol proportion
my_geol = df_geol.loc[df_geol.loc[:, 'maingeol_proportion'] > 0.70]
my_df = my_df.loc[my_df.loc[:, 'code'].isin(my_geol.loc[:,'code'])==True]
proportion_geol = df_geol.loc[df_geol.loc[:, 'maingeol_proportion'] > 0.70]
my_df = my_df.loc[my_df.loc[:, 'code'].isin(proportion_geol.loc[:,'code'])==True]
# # Exclude catchments which disturb scales
# odd_catchments = ['V2814020']
odd_catchments = ['V2814020', 'V5045020', 'H4033010', 'S4214010']
# my_df = my_df.loc[my_df.loc[:,'code'].isin(['K6492510', 'V2814020', 'P7041510', 'A9001050', 'H8043310', 'H4033010']) == False]
i = odd_catchments[3]
filename_i = 'watershed_'+i
infile_i = open(filename_i, 'rb')
my_watershed = pickle.load(infile_i)
infile_i.close()
timeseries_q_V2814020 = my_watershed.hydro_climatic_fluxes.discharge_timeseries
timeseries_q_V5045020 = my_watershed.hydro_climatic_fluxes.discharge_timeseries
timeseries_q_H4033010 = my_watershed.hydro_climatic_fluxes.discharge_timeseries
timeseries_q_S4214010 = my_watershed.hydro_climatic_fluxes.discharge_timeseries
g = sns.relplot(x="Datetime", y="Q", kind="line", data=timeseries_q_S4214010)
g.fig.autofmt_xdate()
# **************************** Plots *****************************
# my_df.value=np.log(my_df.value)
sns.relplot(x='DeltaV', y='DeltaV', hue = 'maingeol_description', data=my_df.loc[my_df.loc[:, 'hydro_sig'] == 'q_mean'])
sns.boxplot(x='maingeol_description', y='DeltaV', data=my_df.loc[my_df.loc[:, 'hydro_sig'] == 'q_mean']).tick_params(axis='x', labelrotation=45)
## Discharge timeseries
# List of the stations'codes
watershed_code = shp_watersheds.loc[:,'Code']
# Get a sample of them for test
code_for_test = watershed_code.loc[0:3]
# Open stored watershed objects
# for i in watershed_code
for i in odd_catchments:
filename_i = 'C:/Users/laura.lindeperg/Documents/INRAE_2021/CODE/fhysa/Object_watershed/watershed_'+i
infile_i = open(filename_i, 'rb')
watershed_i = pickle.load(infile_i)
infile_i.close()
timeseries_q_i = watershed_i.hydro_climatic_fluxes.discharge_timeseries
g = sns.relplot(x="Datetime", y="Q", kind="line", data=timeseries_q_i).set(title='watershed '+i)
g.fig.autofmt_xdate()
g.savefig('C:/Users/laura.lindeperg/Documents/INRAE_2021/FIGURES/Discharge_timeseries/discharge_timeseries_'+i+'.png', dpi=400, facecolor='w',edgecolor='w', format='png', pad_inches=0.1)
## Hydro sig correlations
geol = df_geol.loc[:, ['code', 'maingeol_description']]
my_data = pd.merge(df_hydro_sig_25, geol)
# Filtering workflow
my_data = my_data.loc[my_data.loc[:, 'code'].isin(border_catchments)==False]
my_data = my_data.loc[my_data.loc[:, 'code'].isin(size.loc[:,'code'])==True]
my_data = my_data.loc[my_data.loc[:, 'code'].isin(proportion_geol.loc[:,'code'])==True]
my_data = my_data.loc[my_data.loc[:, 'code'].isin(odd_catchments)==False]
my_data=my_data.sort_values(by=['maingeol_description'])
df_geol = df_geol.loc[:, ['code', 'maingeol_description']]
my_data = pd.merge(df_hydro_sig, df_geol)
viz = sns.PairGrid(data=my_data, hue='maingeol_description')
viz.map(sns.scatterplot)
viz.add_legend()
viz.savefig('C:/Users/laura.lindeperg/Documents/INRAE_2021/FIGURES/Hydro_sig_correlation_BordersSizeGeolOutliers_45.png', dpi=400, facecolor='w',edgecolor='w', format='png', pad_inches=0.1)
# viz.map_diag(sns.histplot)
# viz.map_offdiag(sns.scatterplot)
# viz.add_legend()
# medians = my_df.loc[my_df.loc[:, 'hydro_sig'] == 'q_mean'].groupby(['maingeol_description'])['value'].median().values
nb_bv_geol = my_df.loc[my_df.loc[:, 'hydro_sig'] == 'q_mean']['maingeol_description'].value_counts().sort_index()
nobs = nb_bv_geol.values
nobs = [str(x) for x in nobs.tolist()]
nobs = [i for i in nobs]
# nobs = ["n: " + i for i in nobs]
my_df=my_df.sort_values(by=['maingeol_description'])
## Boxplot of the hydrological signatures
figure, axes = plt.subplots(4, 4, figsize = (17, 17))
figure.suptitle('Hydrological signatures')
# figure.suptitle('Hydrological signatures')
axes[0, 0].set_title('Qmean')
axes[0, 1].set_title('Aridity ratio')
axes[0, 2].set_title('Runoff ratio')
......@@ -136,8 +213,20 @@ axes[3, 3].axis("off")
## Boxplot of the hydrological signatures from their main hydrogeologic type perspective
figure, axes = plt.subplots(4, 4, figsize = (17, 17), sharex=True)
my_palette = ['#1f77b4', '#ff7f0e', '#2ca02c', '#d62728', '9467bd', '8c564b', 'e377c2', '7f7f7f', 'bcbd22', '17becf']
sns_palette_default = [(0.12156862745098039, 0.4666666666666667, 0.7058823529411765),
(1.0, 0.4980392156862745, 0.054901960784313725),
(0.17254901960784313, 0.6274509803921569, 0.17254901960784313),
(0.8392156862745098, 0.15294117647058825, 0.1568627450980392),
(0.5803921568627451, 0.403921568627451, 0.7411764705882353),
(0.5490196078431373, 0.33725490196078434, 0.29411764705882354),
(0.8901960784313725, 0.4666666666666667, 0.7607843137254902),
(0.4980392156862745, 0.4980392156862745, 0.4980392156862745),
(0.7372549019607844, 0.7411764705882353, 0.13333333333333333),
(0.09019607843137255, 0.7450980392156863, 0.8117647058823529)]
figure, axes = plt.subplots(4, 4, figsize = (17, 17), sharex = True)
# figure.suptitle('Hydrological signatures')
axes[0, 0].set_title('Qmean')
axes[0, 1].set_title('Aridity ratio')
......@@ -154,24 +243,66 @@ axes[2, 3].set_title('tau 2')
axes[3, 0].set_title('tau Roques')
axes[3, 1].set_title('BFI 90')
ax00 = sns.boxplot(ax=axes[0, 0], x='maingeol_description', y='value', data=my_df.loc[my_df.loc[:, 'hydro_sig'] == 'q_mean']).legend('off')
ax01 = sns.boxplot(ax=axes[0, 1], x='maingeol_description', y='value', data=my_df.loc[my_df.loc[:, 'hydro_sig'] == 'aridity_ratio'])
ax02 = sns.boxplot(ax=axes[0, 2], x='maingeol_description', y='value', data=my_df.loc[my_df.loc[:, 'hydro_sig'] == 'runoff_ratio'])
ax03 = sns.boxplot(ax=axes[0, 3], x='maingeol_description', y='value', data=my_df.loc[my_df.loc[:, 'hydro_sig'] == 'bfi_5'])
ax10 = sns.boxplot(ax=axes[1, 0], x='maingeol_description', y='value', data=my_df.loc[my_df.loc[:, 'hydro_sig'] == 'bf_magni'])
ax11 = sns.boxplot(ax=axes[1, 1], x='maingeol_description', y='value', data=my_df.loc[my_df.loc[:, 'hydro_sig'] == 'a_q'])
ax12 = sns.boxplot(ax=axes[1, 2], x='maingeol_description', y='value', data=my_df.loc[my_df.loc[:, 'hydro_sig'] == 'b_q'])
ax13 = sns.boxplot(ax=axes[1, 3], x='maingeol_description', y='value', data=my_df.loc[my_df.loc[:, 'hydro_sig'] == 'fdc_quantile10'])
ax20 = sns.boxplot(ax=axes[2, 0], x='maingeol_description', y='value', data=my_df.loc[my_df.loc[:, 'hydro_sig'] == 'fdc_quantile90'])
ax21 = sns.boxplot(ax=axes[2, 1], x='maingeol_description', y='value', data=my_df.loc[my_df.loc[:, 'hydro_sig'] == 'fdc_slope'])
ax22 = sns.boxplot(ax=axes[2, 2], x='maingeol_description', y='value', data=my_df.loc[my_df.loc[:, 'hydro_sig'] == 'tau_1'])
ax23 = sns.boxplot(ax=axes[2, 3], x='maingeol_description', y='value', data=my_df.loc[my_df.loc[:, 'hydro_sig'] == 'tau_2'])
ax30 = sns.boxplot(ax=axes[3, 0], x='maingeol_description', y='value', data=my_df.loc[my_df.loc[:, 'hydro_sig'] == 'tau_roques'])
ax00 = sns.boxplot(ax=axes[0, 0], x='maingeol_description', y='value', data=my_df.loc[my_df.loc[:, 'hydro_sig'] == 'q_mean'])
ax01 = sns.boxplot(ax=axes[0, 1], x='maingeol_description', y='value', data=my_df.loc[my_df.loc[:, 'hydro_sig'] == 'aridity_ratio']).set(xlabel = None)
ax02 = sns.boxplot(ax=axes[0, 2], x='maingeol_description', y='value', data=my_df.loc[my_df.loc[:, 'hydro_sig'] == 'runoff_ratio']).set(xlabel = None)
ax03 = sns.boxplot(ax=axes[0, 3], x='maingeol_description', y='value', data=my_df.loc[my_df.loc[:, 'hydro_sig'] == 'bfi_5']).set(xlabel = None)
ax10 = sns.boxplot(ax=axes[1, 0], x='maingeol_description', y='value', data=my_df.loc[my_df.loc[:, 'hydro_sig'] == 'bf_magni']).set(xlabel = None)
ax11 = sns.boxplot(ax=axes[1, 1], x='maingeol_description', y='value', data=my_df.loc[my_df.loc[:, 'hydro_sig'] == 'a_q']).set(xlabel = None)
ax12 = sns.boxplot(ax=axes[1, 2], x='maingeol_description', y='value', data=my_df.loc[my_df.loc[:, 'hydro_sig'] == 'b_q']).set(xlabel = None)
ax13 = sns.boxplot(ax=axes[1, 3], x='maingeol_description', y='value', data=my_df.loc[my_df.loc[:, 'hydro_sig'] == 'fdc_quantile10']).set(xlabel = None)
ax20 = sns.boxplot(ax=axes[2, 0], x='maingeol_description', y='value', data=my_df.loc[my_df.loc[:, 'hydro_sig'] == 'fdc_quantile90']).set(xlabel = None)
ax21 = sns.boxplot(ax=axes[2, 1], x='maingeol_description', y='value', data=my_df.loc[my_df.loc[:, 'hydro_sig'] == 'fdc_slope']).set(xlabel = None)
ax22 = sns.boxplot(ax=axes[2, 2], x='maingeol_description', y='value', data=my_df.loc[my_df.loc[:, 'hydro_sig'] == 'tau_1']).set(xlabel = None)
ax23 = sns.boxplot(ax=axes[2, 3], x='maingeol_description', y='value', data=my_df.loc[my_df.loc[:, 'hydro_sig'] == 'tau_2']).set(xlabel = None)
ax30 = sns.boxplot(ax=axes[3, 0], x='maingeol_description', y='value', data=my_df.loc[my_df.loc[:, 'hydro_sig'] == 'tau_roques']).set(xlabel = None)
ax31 = sns.boxplot(ax=axes[3, 1], x='maingeol_description', y='value', data=my_df.loc[my_df.loc[:, 'hydro_sig'] == 'bfi_90'])
ax31.set(xlabel = None)
axes[3, 2].axis("off")
axes[3, 3].axis("off")
# Add it to the plot
pos = range(len(nobs))
# for tick,label in zip(pos,ax31.get_xticklabels()):
for tick,label in zip(pos,nb_bv_geol.index):
ax00.text(pos[tick],
8,
nobs[tick],
horizontalalignment='center',
fontsize='x-small',
color='black',
fontweight='semibold')
# legend
import matplotlib.patches as mpatches
mont = mpatches.Patch(color=sns.color_palette("deep")[0], label = nb_bv_geol.index[0])
socle_aquif = mpatches.Patch(color= sns.color_palette("deep")[1], label = nb_bv_geol.index[1])
socle_imper = mpatches.Patch(color= sns.color_palette("deep")[2], label = nb_bv_geol.index[2])
socle_semiper = mpatches.Patch(color= sns.color_palette("deep")[3], label = nb_bv_geol.index[3])
sedim_aqu_karst = mpatches.Patch(color= sns.color_palette("deep")[4], label = nb_bv_geol.index[4])
sedim_aqu_non_karst = mpatches.Patch(color= sns.color_palette("deep")[5], label = nb_bv_geol.index[5])
sedim_imper = mpatches.Patch(color= sns.color_palette("deep")[6], label = nb_bv_geol.index[6])
sedim_semiper_karst = mpatches.Patch(color= sns.color_palette("deep")[7], label = nb_bv_geol.index[7])
sedim_semiper_non_karst = mpatches.Patch(color= sns.color_palette("deep")[8], label = nb_bv_geol.index[8])
volc = mpatches.Patch(color= sns.color_palette("deep")[9], label = nb_bv_geol.index[9])
plt.legend(handles=[mont, socle_aquif, socle_imper, socle_semiper, sedim_aqu_karst, sedim_aqu_non_karst, sedim_imper, sedim_semiper_karst, sedim_semiper_non_karst, volc])
figure.legend([ax01.legend], # List of the line objects
labels= nb_bv_geol.index, # The labels for each line
#loc='best', # Position of the legend
borderaxespad=0.1, # Add little spacing around the legend box
title="Legend Title") # Title for the legend
figure.tight_layout()
sns.boxplot(x='maingeol_description', y='value', data=my_df.loc[my_df.loc[:, 'hydro_sig'] == 'q_mean']).tick_params(axis='x', labelrotation=45)
......@@ -216,6 +347,11 @@ axes[3, 3].axis("off")
# my_df.value=np.log(my_df.value)
sns.relplot(x='DeltaV', y='DeltaV', hue = 'maingeol_description', data=my_df.loc[my_df.loc[:, 'hydro_sig'] == 'q_mean'])
sns.boxplot(x='maingeol_description', y='DeltaV', data=my_df.loc[my_df.loc[:, 'hydro_sig'] == 'q_mean']).tick_params(axis='x', labelrotation=45)
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment