[refactor] remove rolling=30 for smoothing global temperatures, use smoothing spline instead

07d47261 · Le Roux Erwan · 3ac9db1f · 07d47261 · 07d47261 · 07d47261
Commit 07d47261 authored 4 years ago by Le Roux Erwan
Hide whitespace changes
Inline Side-by-side

Showing

with 114 additions and 59 deletions
+114 -59
--- a/extreme_data/meteo_france_data/adamont_data/adamont_gcm_rcm_couples.py
+++ b/extreme_data/meteo_france_data/adamont_data/adamont_gcm_rcm_couples.py
@@ -121,7 +121,7 @@ gcm_rcm_couple_to_color = {
 gcm_to_color = {
    'CNRM-CM5': 'red',
-    'MPI-ESM-LR': 'blue',
+    'MPI-ESM-LR': 'tab:blue',
    'HadGEM2-ES': 'green',
@@ -129,6 +129,6 @@ gcm_to_color = {
    'IPSL-CM5A-MR': 'orange',
-    'NorESM1-M': 'yellow',
+    'NorESM1-M': 'y',
 }
--- a/extreme_data/meteo_france_data/adamont_data/cmip5/climate_explorer_cimp5.py
+++ b/extreme_data/meteo_france_data/adamont_data/cmip5/climate_explorer_cimp5.py
 import calendar
+import os
 import os.path as op
 import pandas as pd
 import subprocess
@@ -7,6 +8,7 @@ from datetime import datetime, timedelta
 import cdsapi
 import numpy as np
 from netCDF4._netCDF4 import Dataset, OrderedDict
+from scipy.interpolate import interp1d, UnivariateSpline
 from extreme_data.meteo_france_data.adamont_data.adamont_gcm_rcm_couples import gcm_to_rnumber
 from extreme_data.meteo_france_data.adamont_data.adamont_scenario import get_year_min_and_year_max_from_scenario, \
@@ -24,24 +26,29 @@ def get_scenario_name(scenario):
        return str(scenario).split('.')[-1]
-def year_to_global_mean_temp(gcm, scenario, year_min=None, year_max=None, rolling=30, anomaly=False):
+def year_to_global_mean_temp(gcm, scenario, year_min=None, year_max=None, spline=False, anomaly=False):
    d = OrderedDict()
-    years, global_mean_temps = years_and_global_mean_temps(gcm, scenario, year_min, year_max, rolling=rolling, anomaly=anomaly)
+    years, global_mean_temps = years_and_global_mean_temps(gcm, scenario, year_min, year_max, spline=spline,
+                                                           anomaly=anomaly)
    for year, global_mean_temp in zip(years, global_mean_temps):
        d[year] = global_mean_temp
    return d
-def years_and_global_mean_temps(gcm, scenario, year_min=None, year_max=None, rolling=30, anomaly=False):
+def get_column_name(anomaly, spline):
+    basic_column_name = 'Annual anomaly' if anomaly else 'Annual mean'
+    if spline:
+        return '{} with spline'.format(basic_column_name)
+    else:
+        return basic_column_name
+def years_and_global_mean_temps(gcm, scenario, year_min=None, year_max=None, anomaly=False, spline=True):
    # Compute everything
    ensemble_member = 'r{}i1p1'.format(gcm_to_rnumber[gcm])
    scenario_name = get_scenario_name(scenario)
    # Standards
-    mean_annual_column_name = 'Annual mean'
-    anomaly_annual_column_name = 'Annual anomaly'
-    rolling_mean_annual_column_name = 'Rolling annual mean for window={}'.format(rolling)
-    rolling_anomaly_annual_column_name = 'Rolling annual anomaly for window={}'.format(rolling)
    filename = 'global_tas_Amon_{}_{}_{}'.format(gcm, scenario_name, ensemble_member)
    dat_filepath = op.join(GLOBALTEMP_DATA_PATH, filename + '.dat')
    txt_filepath = op.join(GLOBALTEMP_DATA_PATH, filename + '.txt')
@@ -51,9 +58,7 @@ def years_and_global_mean_temps(gcm, scenario, year_min=None, year_max=None, rol
        download_dat(dat_filepath, txt_filepath)
    # Transform nc file into csv file
    if not op.exists(csv_filepath):
-        dat_to_csv(csv_filepath, txt_filepath, mean_annual_column_name, rolling_mean_annual_column_name,
+        dat_to_csv(csv_filepath, txt_filepath, spline)
-                   anomaly_annual_column_name,  rolling_anomaly_annual_column_name,
-                   rolling=rolling)
    # Load csv file
    df = pd.read_csv(csv_filepath, index_col=0)
@@ -65,21 +70,12 @@ def years_and_global_mean_temps(gcm, scenario, year_min=None, year_max=None, rol
    years = list(df.index)
    assert years[0] >= year_min
    assert years[-1] <= year_max
-    if rolling:
+    global_mean_temp = list(df[get_column_name(anomaly, spline)])
-        if anomaly:
+    # os.remove(csv_filepath)
-            global_mean_temp = list(df[rolling_anomaly_annual_column_name])
-        else:
-            global_mean_temp = list(df[rolling_mean_annual_column_name])
-    else:
-        if anomaly:
-            global_mean_temp = list(df[anomaly_annual_column_name])
-        else:
-            global_mean_temp = list(df[mean_annual_column_name])
    return years, global_mean_temp
-def dat_to_csv(csv_filepath, txt_filepath, mean_annual_column_name, rolling_mean_annual_column_name,
+def dat_to_csv(csv_filepath, txt_filepath, spline):
-               anomaly_annual_column_name,  rolling_anomaly_annual_column_name, rolling=30):
    d = OrderedDict()
    with open(txt_filepath, 'r') as f:
        for i, l in enumerate(f):
@@ -98,6 +94,10 @@ def dat_to_csv(csv_filepath, txt_filepath, mean_annual_column_name, rolling_mean
    l /= 12
    l = [np.nan] + list(l)
    assert len(l) == len(df.index)
+    # First we compute the standard column
+    mean_annual_column_name, anomaly_annual_column_name = [get_column_name(anomaly=anomaly, spline=False)
+                                                           for anomaly in [False, True]]
    df[mean_annual_column_name] = l
    s_mean_for_reference_period_1850_to_1900 = df.loc[1850:1900, mean_annual_column_name]
    # Sometimes some initial global mean temperatures are negative for the first years,
@@ -105,13 +105,34 @@ def dat_to_csv(csv_filepath, txt_filepath, mean_annual_column_name, rolling_mean
    ind = s_mean_for_reference_period_1850_to_1900 > 0
    mean_for_reference_period_1850_to_1900 = s_mean_for_reference_period_1850_to_1900.loc[ind].mean()
    df[anomaly_annual_column_name] = df[mean_annual_column_name] - mean_for_reference_period_1850_to_1900
-    # Computing the rolling
-    if rolling is not None:
+    # Then we regress some cubic spline on these columns
-        df[rolling_mean_annual_column_name] = df[mean_annual_column_name].rolling(window=rolling).mean()
+    for anomaly in [True, False]:
-        df[rolling_anomaly_annual_column_name] = df[anomaly_annual_column_name].rolling(window=rolling).mean()
+        noisy_data = df[get_column_name(anomaly, spline=False)]
+        ind = noisy_data > -50
+        spline_data = noisy_data.copy()
+        spline_data.loc[ind] = apply_cubic_spline(noisy_data.loc[ind].index.values,
+                                                                       noisy_data.loc[ind].values)
+        df[get_column_name(anomaly, spline=True)] = spline_data
    df.to_csv(csv_filepath)
+def apply_cubic_spline(x, y):
+    """
+    s is THE important parameter, that controls as how far the points of the spline are from the original points.
+    w[i] corresponds to constant weight in our case.
+    sum((w[i] * (y[i]-spl(x[i])))**2, axis=0) <= s
+    """
+    # s = 3 # it was working well except for the blue one.
+    s = 5
+    f = UnivariateSpline(x, y, s=s, w=None)
+    new_y = f(x)
+    return new_y
 def download_dat(dat_filepath, txt_filepath):
    web_filepath = op.join(GLOBALTEMP_WEB_PATH, op.basename(dat_filepath))
    dirname = op.dirname(dat_filepath)
@@ -126,7 +147,7 @@ def download_dat(dat_filepath, txt_filepath):
 def main_example():
    scenario = AdamontScenario.rcp45
    gcm = 'EC-EARTH'
-    year_to_global_mean_temp(gcm, scenario)
+    print(year_to_global_mean_temp(gcm, scenario))
 def main_test_cmip5_loader():
@@ -138,14 +159,6 @@ def main_test_cmip5_loader():
            print(temps)
-def test_rolling():
-    df = pd.DataFrame([1, 2, 3, 4, 5])
-    print(df)
-    df2 = df.rolling(window=3).mean()
-    print(df2)
 if __name__ == '__main__':
-    # main_example()
+    main_example()
-    # test_rolling()
+    # main_test_cmip5_loader()
-    main_test_cmip5_loader()
--- a/extreme_data/meteo_france_data/adamont_data/cmip5/plot_temperatures.py
+++ b/extreme_data/meteo_france_data/adamont_data/cmip5/plot_temperatures.py
+from typing import Union
 import matplotlib.pyplot as plt
 from matplotlib.lines import Line2D
@@ -6,44 +8,84 @@ from extreme_data.meteo_france_data.adamont_data.adamont_scenario import get_lin
    adamont_scenarios_real, AdamontScenario, scenario_to_str, get_gcm_list, rcp_scenarios
 from extreme_data.meteo_france_data.adamont_data.cmip5.climate_explorer_cimp5 import year_to_global_mean_temp, \
    years_and_global_mean_temps
+from extreme_data.meteo_france_data.scm_models_data.visualization.study_visualizer import VisualizationParameters, \
+    StudyVisualizer
+from root_utils import VERSION_TIME
-def main_plot_temperature(anomaly=False):
+def main_plot_temperature_all(anomaly=False, spline=False):
-    rolling = 30
    ax = plt.gca()
    for gcm in get_gcm_list(adamont_version=2)[:]:
+        for scenario in rcp_scenarios[:]:
+            label=gcm if scenario == rcp_scenarios[0] else None
+            plot_temperature_for_rcp_gcm(ax, gcm, scenario, label=label, year_min=2005, year_max=2100, spline=spline, anomaly=anomaly)
+    end_plot(anomaly, ax, spline)
+def main_plot_temperature_with_spline_on_top(anomaly=False):
+    spline = None
+    for gcm in get_gcm_list(adamont_version=2)[:]:
+        ax = plt.gca()
        # Plot the historical part in solid line (this part is the same between the different scenarios)
        linestyle = get_linestyle_from_scenario(AdamontScenario.histo)
-        plot_temperature_for_rcp_gcm(ax, gcm, AdamontScenario.rcp45, year_min=1951, year_max=2005, linestyle=linestyle,
+        scenarios = rcp_scenarios
-                                     label=gcm, rolling=rolling, anomaly=anomaly)
+        for scenario in scenarios:
-        for scenario in rcp_scenarios:
+            label = gcm if scenario == scenarios[0] else None
-            plot_temperature_for_rcp_gcm(ax, gcm, scenario, year_min=2005, year_max=2100, rolling=rolling, anomaly=anomaly)
+            plot_temperature_for_rcp_gcm(ax, gcm, scenario, year_min=1951, year_max=2005, linestyle=linestyle,
+                                         label=label, spline=spline, anomaly=anomaly)
+            plot_temperature_for_rcp_gcm(ax, gcm, scenario, year_min=2005, year_max=2100, spline=spline, anomaly=anomaly)
+        end_plot(anomaly, ax, spline, gcm)
+def end_plot(anomaly, ax, spline, title=None):
    ax2 = ax.twinx()
    legend_elements = [
        Line2D([0], [0], color='k', lw=1, label=scenario_to_str(s),
               linestyle=get_linestyle_from_scenario(s)) for s in adamont_scenarios_real
    ]
-    ax2.legend(handles=legend_elements, loc='upper center')
+    ax2.legend(handles=legend_elements, loc='center left')
    ax2.set_yticks([])
    ax.legend(loc='upper left')
    ax.set_xlabel('Years')
-    add_str = ' averaged on the last {} years'.format(rolling) if rolling is not None else ''
+    if spline is None:
+        add_str = ' with and without spline'
+    elif spline:
+        add_str = ' with spline'
+    else:
+        add_str = ''
    add_str1 = 'anomaly of temperature' if anomaly else 'mean Temperature'
    ax.set_ylabel('Global {}{} (K)\n'
                  'mean temperature is taken on the year centered on the winter'.format(add_str1, add_str))
-    plt.show()
+    if title is None:
+        plt.show()
+    else:
+        filename = "{}/{}".format(VERSION_TIME, title)
+        StudyVisualizer.savefig_in_results(filename, transparent=False)
+    plt.close()
-def plot_temperature_for_rcp_gcm(ax, gcm, scenario, year_min, year_max, linestyle=None, label=None, rolling=None, anomaly=False):
+def plot_temperature_for_rcp_gcm(ax, gcm, scenario, year_min, year_max, linestyle=None,
-    years, global_mean_temp = years_and_global_mean_temps(gcm, scenario, year_min=year_min, year_max=year_max, rolling=rolling, anomaly=anomaly)
+                                 label=None, spline: Union[None, bool] = False, anomaly=False):
-    color = gcm_to_color[gcm]
+    splines = [spline] if spline is not None else [True, False]
-    if linestyle is None:
+    for spline in splines:
-        linestyle = get_linestyle_from_scenario(scenario)
+        years, global_mean_temp = years_and_global_mean_temps(gcm, scenario, year_min=year_min, year_max=year_max, spline=spline, anomaly=anomaly)
-    ax.plot(years, global_mean_temp, linestyle=linestyle, color=color, label=label)
+        if len(splines) == 2:
+            if spline:
+                color = 'k'
+                label_plot = None if label is None else label + ' with spline'
+            else:
+                color = gcm_to_color[gcm]
+                label_plot = None if label is None else label + ' without spline'
+        else:
+            color = gcm_to_color[gcm]
+            label_plot = label
+        if linestyle is None:
+            linestyle = get_linestyle_from_scenario(scenario)
+        ax.plot(years, global_mean_temp, linestyle=linestyle, color=color, label=label_plot)
 if __name__ == '__main__':
-    main_plot_temperature(anomaly=True)
+    main_plot_temperature_with_spline_on_top(anomaly=True)
+    # main_plot_temperature_all(anomaly=True, spline=False)
--- a/extreme_data/meteo_france_data/adamont_data/cmip5/temperature_to_year.py
+++ b/extreme_data/meteo_france_data/adamont_data/cmip5/temperature_to_year.py
@@ -9,8 +9,8 @@ from extreme_data.meteo_france_data.adamont_data.cmip5.climate_explorer_cimp5 im
 def temperature_minmax_to_year_minmax(gcm, scenario, temperature_min, temperature_max):
-    years, global_mean_temps = years_and_global_mean_temps(gcm, scenario, year_min=2005, year_max=2100,
+    years, global_mean_temps = years_and_global_mean_temps(gcm, scenario, year_min=2005, year_max=2100, anomaly=True,
-                                                           rolling=30, anomaly=True)
+                                                           spline=True)
    years, global_mean_temps = np.array(years), np.array(global_mean_temps)
    ind = temperature_min < global_mean_temps
    ind &= global_mean_temps < temperature_max