Commit 07d47261 authored by Le Roux Erwan's avatar Le Roux Erwan
Browse files

[refactor] remove rolling=30 for smoothing global temperatures, use smoothing spline instead

parent 3ac9db1f
No related merge requests found
Showing with 114 additions and 59 deletions
+114 -59
...@@ -121,7 +121,7 @@ gcm_rcm_couple_to_color = { ...@@ -121,7 +121,7 @@ gcm_rcm_couple_to_color = {
gcm_to_color = { gcm_to_color = {
'CNRM-CM5': 'red', 'CNRM-CM5': 'red',
'MPI-ESM-LR': 'blue', 'MPI-ESM-LR': 'tab:blue',
'HadGEM2-ES': 'green', 'HadGEM2-ES': 'green',
...@@ -129,6 +129,6 @@ gcm_to_color = { ...@@ -129,6 +129,6 @@ gcm_to_color = {
'IPSL-CM5A-MR': 'orange', 'IPSL-CM5A-MR': 'orange',
'NorESM1-M': 'yellow', 'NorESM1-M': 'y',
} }
import calendar import calendar
import os
import os.path as op import os.path as op
import pandas as pd import pandas as pd
import subprocess import subprocess
...@@ -7,6 +8,7 @@ from datetime import datetime, timedelta ...@@ -7,6 +8,7 @@ from datetime import datetime, timedelta
import cdsapi import cdsapi
import numpy as np import numpy as np
from netCDF4._netCDF4 import Dataset, OrderedDict from netCDF4._netCDF4 import Dataset, OrderedDict
from scipy.interpolate import interp1d, UnivariateSpline
from extreme_data.meteo_france_data.adamont_data.adamont_gcm_rcm_couples import gcm_to_rnumber from extreme_data.meteo_france_data.adamont_data.adamont_gcm_rcm_couples import gcm_to_rnumber
from extreme_data.meteo_france_data.adamont_data.adamont_scenario import get_year_min_and_year_max_from_scenario, \ from extreme_data.meteo_france_data.adamont_data.adamont_scenario import get_year_min_and_year_max_from_scenario, \
...@@ -24,24 +26,29 @@ def get_scenario_name(scenario): ...@@ -24,24 +26,29 @@ def get_scenario_name(scenario):
return str(scenario).split('.')[-1] return str(scenario).split('.')[-1]
def year_to_global_mean_temp(gcm, scenario, year_min=None, year_max=None, rolling=30, anomaly=False): def year_to_global_mean_temp(gcm, scenario, year_min=None, year_max=None, spline=False, anomaly=False):
d = OrderedDict() d = OrderedDict()
years, global_mean_temps = years_and_global_mean_temps(gcm, scenario, year_min, year_max, rolling=rolling, anomaly=anomaly) years, global_mean_temps = years_and_global_mean_temps(gcm, scenario, year_min, year_max, spline=spline,
anomaly=anomaly)
for year, global_mean_temp in zip(years, global_mean_temps): for year, global_mean_temp in zip(years, global_mean_temps):
d[year] = global_mean_temp d[year] = global_mean_temp
return d return d
def years_and_global_mean_temps(gcm, scenario, year_min=None, year_max=None, rolling=30, anomaly=False): def get_column_name(anomaly, spline):
basic_column_name = 'Annual anomaly' if anomaly else 'Annual mean'
if spline:
return '{} with spline'.format(basic_column_name)
else:
return basic_column_name
def years_and_global_mean_temps(gcm, scenario, year_min=None, year_max=None, anomaly=False, spline=True):
# Compute everything # Compute everything
ensemble_member = 'r{}i1p1'.format(gcm_to_rnumber[gcm]) ensemble_member = 'r{}i1p1'.format(gcm_to_rnumber[gcm])
scenario_name = get_scenario_name(scenario) scenario_name = get_scenario_name(scenario)
# Standards # Standards
mean_annual_column_name = 'Annual mean'
anomaly_annual_column_name = 'Annual anomaly'
rolling_mean_annual_column_name = 'Rolling annual mean for window={}'.format(rolling)
rolling_anomaly_annual_column_name = 'Rolling annual anomaly for window={}'.format(rolling)
filename = 'global_tas_Amon_{}_{}_{}'.format(gcm, scenario_name, ensemble_member) filename = 'global_tas_Amon_{}_{}_{}'.format(gcm, scenario_name, ensemble_member)
dat_filepath = op.join(GLOBALTEMP_DATA_PATH, filename + '.dat') dat_filepath = op.join(GLOBALTEMP_DATA_PATH, filename + '.dat')
txt_filepath = op.join(GLOBALTEMP_DATA_PATH, filename + '.txt') txt_filepath = op.join(GLOBALTEMP_DATA_PATH, filename + '.txt')
...@@ -51,9 +58,7 @@ def years_and_global_mean_temps(gcm, scenario, year_min=None, year_max=None, rol ...@@ -51,9 +58,7 @@ def years_and_global_mean_temps(gcm, scenario, year_min=None, year_max=None, rol
download_dat(dat_filepath, txt_filepath) download_dat(dat_filepath, txt_filepath)
# Transform nc file into csv file # Transform nc file into csv file
if not op.exists(csv_filepath): if not op.exists(csv_filepath):
dat_to_csv(csv_filepath, txt_filepath, mean_annual_column_name, rolling_mean_annual_column_name, dat_to_csv(csv_filepath, txt_filepath, spline)
anomaly_annual_column_name, rolling_anomaly_annual_column_name,
rolling=rolling)
# Load csv file # Load csv file
df = pd.read_csv(csv_filepath, index_col=0) df = pd.read_csv(csv_filepath, index_col=0)
...@@ -65,21 +70,12 @@ def years_and_global_mean_temps(gcm, scenario, year_min=None, year_max=None, rol ...@@ -65,21 +70,12 @@ def years_and_global_mean_temps(gcm, scenario, year_min=None, year_max=None, rol
years = list(df.index) years = list(df.index)
assert years[0] >= year_min assert years[0] >= year_min
assert years[-1] <= year_max assert years[-1] <= year_max
if rolling: global_mean_temp = list(df[get_column_name(anomaly, spline)])
if anomaly: # os.remove(csv_filepath)
global_mean_temp = list(df[rolling_anomaly_annual_column_name])
else:
global_mean_temp = list(df[rolling_mean_annual_column_name])
else:
if anomaly:
global_mean_temp = list(df[anomaly_annual_column_name])
else:
global_mean_temp = list(df[mean_annual_column_name])
return years, global_mean_temp return years, global_mean_temp
def dat_to_csv(csv_filepath, txt_filepath, mean_annual_column_name, rolling_mean_annual_column_name, def dat_to_csv(csv_filepath, txt_filepath, spline):
anomaly_annual_column_name, rolling_anomaly_annual_column_name, rolling=30):
d = OrderedDict() d = OrderedDict()
with open(txt_filepath, 'r') as f: with open(txt_filepath, 'r') as f:
for i, l in enumerate(f): for i, l in enumerate(f):
...@@ -98,6 +94,10 @@ def dat_to_csv(csv_filepath, txt_filepath, mean_annual_column_name, rolling_mean ...@@ -98,6 +94,10 @@ def dat_to_csv(csv_filepath, txt_filepath, mean_annual_column_name, rolling_mean
l /= 12 l /= 12
l = [np.nan] + list(l) l = [np.nan] + list(l)
assert len(l) == len(df.index) assert len(l) == len(df.index)
# First we compute the standard column
mean_annual_column_name, anomaly_annual_column_name = [get_column_name(anomaly=anomaly, spline=False)
for anomaly in [False, True]]
df[mean_annual_column_name] = l df[mean_annual_column_name] = l
s_mean_for_reference_period_1850_to_1900 = df.loc[1850:1900, mean_annual_column_name] s_mean_for_reference_period_1850_to_1900 = df.loc[1850:1900, mean_annual_column_name]
# Sometimes some initial global mean temperatures are negative for the first years, # Sometimes some initial global mean temperatures are negative for the first years,
...@@ -105,13 +105,34 @@ def dat_to_csv(csv_filepath, txt_filepath, mean_annual_column_name, rolling_mean ...@@ -105,13 +105,34 @@ def dat_to_csv(csv_filepath, txt_filepath, mean_annual_column_name, rolling_mean
ind = s_mean_for_reference_period_1850_to_1900 > 0 ind = s_mean_for_reference_period_1850_to_1900 > 0
mean_for_reference_period_1850_to_1900 = s_mean_for_reference_period_1850_to_1900.loc[ind].mean() mean_for_reference_period_1850_to_1900 = s_mean_for_reference_period_1850_to_1900.loc[ind].mean()
df[anomaly_annual_column_name] = df[mean_annual_column_name] - mean_for_reference_period_1850_to_1900 df[anomaly_annual_column_name] = df[mean_annual_column_name] - mean_for_reference_period_1850_to_1900
# Computing the rolling
if rolling is not None: # Then we regress some cubic spline on these columns
df[rolling_mean_annual_column_name] = df[mean_annual_column_name].rolling(window=rolling).mean() for anomaly in [True, False]:
df[rolling_anomaly_annual_column_name] = df[anomaly_annual_column_name].rolling(window=rolling).mean() noisy_data = df[get_column_name(anomaly, spline=False)]
ind = noisy_data > -50
spline_data = noisy_data.copy()
spline_data.loc[ind] = apply_cubic_spline(noisy_data.loc[ind].index.values,
noisy_data.loc[ind].values)
df[get_column_name(anomaly, spline=True)] = spline_data
df.to_csv(csv_filepath) df.to_csv(csv_filepath)
def apply_cubic_spline(x, y):
"""
s is THE important parameter, that controls as how far the points of the spline are from the original points.
w[i] corresponds to constant weight in our case.
sum((w[i] * (y[i]-spl(x[i])))**2, axis=0) <= s
"""
# s = 3 # it was working well except for the blue one.
s = 5
f = UnivariateSpline(x, y, s=s, w=None)
new_y = f(x)
return new_y
def download_dat(dat_filepath, txt_filepath): def download_dat(dat_filepath, txt_filepath):
web_filepath = op.join(GLOBALTEMP_WEB_PATH, op.basename(dat_filepath)) web_filepath = op.join(GLOBALTEMP_WEB_PATH, op.basename(dat_filepath))
dirname = op.dirname(dat_filepath) dirname = op.dirname(dat_filepath)
...@@ -126,7 +147,7 @@ def download_dat(dat_filepath, txt_filepath): ...@@ -126,7 +147,7 @@ def download_dat(dat_filepath, txt_filepath):
def main_example(): def main_example():
scenario = AdamontScenario.rcp45 scenario = AdamontScenario.rcp45
gcm = 'EC-EARTH' gcm = 'EC-EARTH'
year_to_global_mean_temp(gcm, scenario) print(year_to_global_mean_temp(gcm, scenario))
def main_test_cmip5_loader(): def main_test_cmip5_loader():
...@@ -138,14 +159,6 @@ def main_test_cmip5_loader(): ...@@ -138,14 +159,6 @@ def main_test_cmip5_loader():
print(temps) print(temps)
def test_rolling():
df = pd.DataFrame([1, 2, 3, 4, 5])
print(df)
df2 = df.rolling(window=3).mean()
print(df2)
if __name__ == '__main__': if __name__ == '__main__':
# main_example() main_example()
# test_rolling() # main_test_cmip5_loader()
main_test_cmip5_loader()
from typing import Union
import matplotlib.pyplot as plt import matplotlib.pyplot as plt
from matplotlib.lines import Line2D from matplotlib.lines import Line2D
...@@ -6,44 +8,84 @@ from extreme_data.meteo_france_data.adamont_data.adamont_scenario import get_lin ...@@ -6,44 +8,84 @@ from extreme_data.meteo_france_data.adamont_data.adamont_scenario import get_lin
adamont_scenarios_real, AdamontScenario, scenario_to_str, get_gcm_list, rcp_scenarios adamont_scenarios_real, AdamontScenario, scenario_to_str, get_gcm_list, rcp_scenarios
from extreme_data.meteo_france_data.adamont_data.cmip5.climate_explorer_cimp5 import year_to_global_mean_temp, \ from extreme_data.meteo_france_data.adamont_data.cmip5.climate_explorer_cimp5 import year_to_global_mean_temp, \
years_and_global_mean_temps years_and_global_mean_temps
from extreme_data.meteo_france_data.scm_models_data.visualization.study_visualizer import VisualizationParameters, \
StudyVisualizer
from root_utils import VERSION_TIME
def main_plot_temperature(anomaly=False): def main_plot_temperature_all(anomaly=False, spline=False):
rolling = 30
ax = plt.gca() ax = plt.gca()
for gcm in get_gcm_list(adamont_version=2)[:]: for gcm in get_gcm_list(adamont_version=2)[:]:
for scenario in rcp_scenarios[:]:
label=gcm if scenario == rcp_scenarios[0] else None
plot_temperature_for_rcp_gcm(ax, gcm, scenario, label=label, year_min=2005, year_max=2100, spline=spline, anomaly=anomaly)
end_plot(anomaly, ax, spline)
def main_plot_temperature_with_spline_on_top(anomaly=False):
spline = None
for gcm in get_gcm_list(adamont_version=2)[:]:
ax = plt.gca()
# Plot the historical part in solid line (this part is the same between the different scenarios) # Plot the historical part in solid line (this part is the same between the different scenarios)
linestyle = get_linestyle_from_scenario(AdamontScenario.histo) linestyle = get_linestyle_from_scenario(AdamontScenario.histo)
plot_temperature_for_rcp_gcm(ax, gcm, AdamontScenario.rcp45, year_min=1951, year_max=2005, linestyle=linestyle, scenarios = rcp_scenarios
label=gcm, rolling=rolling, anomaly=anomaly) for scenario in scenarios:
for scenario in rcp_scenarios: label = gcm if scenario == scenarios[0] else None
plot_temperature_for_rcp_gcm(ax, gcm, scenario, year_min=2005, year_max=2100, rolling=rolling, anomaly=anomaly) plot_temperature_for_rcp_gcm(ax, gcm, scenario, year_min=1951, year_max=2005, linestyle=linestyle,
label=label, spline=spline, anomaly=anomaly)
plot_temperature_for_rcp_gcm(ax, gcm, scenario, year_min=2005, year_max=2100, spline=spline, anomaly=anomaly)
end_plot(anomaly, ax, spline, gcm)
def end_plot(anomaly, ax, spline, title=None):
ax2 = ax.twinx() ax2 = ax.twinx()
legend_elements = [ legend_elements = [
Line2D([0], [0], color='k', lw=1, label=scenario_to_str(s), Line2D([0], [0], color='k', lw=1, label=scenario_to_str(s),
linestyle=get_linestyle_from_scenario(s)) for s in adamont_scenarios_real linestyle=get_linestyle_from_scenario(s)) for s in adamont_scenarios_real
] ]
ax2.legend(handles=legend_elements, loc='upper center') ax2.legend(handles=legend_elements, loc='center left')
ax2.set_yticks([]) ax2.set_yticks([])
ax.legend(loc='upper left') ax.legend(loc='upper left')
ax.set_xlabel('Years') ax.set_xlabel('Years')
add_str = ' averaged on the last {} years'.format(rolling) if rolling is not None else '' if spline is None:
add_str = ' with and without spline'
elif spline:
add_str = ' with spline'
else:
add_str = ''
add_str1 = 'anomaly of temperature' if anomaly else 'mean Temperature' add_str1 = 'anomaly of temperature' if anomaly else 'mean Temperature'
ax.set_ylabel('Global {}{} (K)\n' ax.set_ylabel('Global {}{} (K)\n'
'mean temperature is taken on the year centered on the winter'.format(add_str1, add_str)) 'mean temperature is taken on the year centered on the winter'.format(add_str1, add_str))
plt.show() if title is None:
plt.show()
else:
filename = "{}/{}".format(VERSION_TIME, title)
StudyVisualizer.savefig_in_results(filename, transparent=False)
plt.close()
def plot_temperature_for_rcp_gcm(ax, gcm, scenario, year_min, year_max, linestyle=None, label=None, rolling=None, anomaly=False): def plot_temperature_for_rcp_gcm(ax, gcm, scenario, year_min, year_max, linestyle=None,
years, global_mean_temp = years_and_global_mean_temps(gcm, scenario, year_min=year_min, year_max=year_max, rolling=rolling, anomaly=anomaly) label=None, spline: Union[None, bool] = False, anomaly=False):
color = gcm_to_color[gcm] splines = [spline] if spline is not None else [True, False]
if linestyle is None: for spline in splines:
linestyle = get_linestyle_from_scenario(scenario) years, global_mean_temp = years_and_global_mean_temps(gcm, scenario, year_min=year_min, year_max=year_max, spline=spline, anomaly=anomaly)
ax.plot(years, global_mean_temp, linestyle=linestyle, color=color, label=label) if len(splines) == 2:
if spline:
color = 'k'
label_plot = None if label is None else label + ' with spline'
else:
color = gcm_to_color[gcm]
label_plot = None if label is None else label + ' without spline'
else:
color = gcm_to_color[gcm]
label_plot = label
if linestyle is None:
linestyle = get_linestyle_from_scenario(scenario)
ax.plot(years, global_mean_temp, linestyle=linestyle, color=color, label=label_plot)
if __name__ == '__main__': if __name__ == '__main__':
main_plot_temperature(anomaly=True) main_plot_temperature_with_spline_on_top(anomaly=True)
# main_plot_temperature_all(anomaly=True, spline=False)
...@@ -9,8 +9,8 @@ from extreme_data.meteo_france_data.adamont_data.cmip5.climate_explorer_cimp5 im ...@@ -9,8 +9,8 @@ from extreme_data.meteo_france_data.adamont_data.cmip5.climate_explorer_cimp5 im
def temperature_minmax_to_year_minmax(gcm, scenario, temperature_min, temperature_max): def temperature_minmax_to_year_minmax(gcm, scenario, temperature_min, temperature_max):
years, global_mean_temps = years_and_global_mean_temps(gcm, scenario, year_min=2005, year_max=2100, years, global_mean_temps = years_and_global_mean_temps(gcm, scenario, year_min=2005, year_max=2100, anomaly=True,
rolling=30, anomaly=True) spline=True)
years, global_mean_temps = np.array(years), np.array(global_mean_temps) years, global_mean_temps = np.array(years), np.array(global_mean_temps)
ind = temperature_min < global_mean_temps ind = temperature_min < global_mean_temps
ind &= global_mean_temps < temperature_max ind &= global_mean_temps < temperature_max
......
Supports Markdown
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment