Commit 89e9e74e authored by Le Roux Erwan's avatar Le Roux Erwan
Browse files

[contrasting project] replace the index of df_weather_types. add test_weather_patterns_maxima

parent 598c29ab
No related merge requests found
Showing with 61 additions and 23 deletions
+61 -23
...@@ -5,23 +5,50 @@ import pandas as pd ...@@ -5,23 +5,50 @@ import pandas as pd
from extreme_data.meteo_france_data.scm_models_data.utils import date_to_str from extreme_data.meteo_france_data.scm_models_data.utils import date_to_str
from root_utils import get_full_path from root_utils import get_full_path
relative_path = r'local/EDF_data/Weather_types/CatalogueTT_EDF_France0_5308.txt' # Type of Weather
edf_filepath = get_full_path(relative_path=relative_path)
ANTICYCLONIC = 'Anticyclonic'
CENTRAL_DEPRESSION = 'Central Depression'
EAST_RETURN = 'East Return'
NORTHEAST_CIRCULATION = 'Northeast Circulation'
SOUTH_CIRCULATION = 'South Circulation'
SOUTHWEST_CIRCULATION = 'Southwest Circulation'
STEADY_OCEANIC = 'Steady Oceanic'
ATLANTIC_WAVE = 'Atlantic Wave'
wp_int_to_wp_str = {
1: ATLANTIC_WAVE,
2: STEADY_OCEANIC,
3: SOUTHWEST_CIRCULATION,
4: SOUTH_CIRCULATION,
5: NORTHEAST_CIRCULATION,
6: EAST_RETURN,
7: CENTRAL_DEPRESSION,
8: ANTICYCLONIC,
}
def load_df_weather_types() -> pd.DataFrame: def load_df_weather_types() -> pd.DataFrame:
relative_path = r'local/EDF_data/Weather_types/CatalogueTT_EDF_France0_5308.txt'
edf_filepath = get_full_path(relative_path=relative_path)
global df global df
weather_types = [] weather_types = []
with open(edf_filepath, 'rb') as f: with open(edf_filepath, 'rb') as f:
for i, l in enumerate(f): for i, l in enumerate(f):
if i >= 7: if i >= 7:
l = str(l).split('"')[1:] l = str(l).split('"')[1:]
wp = int(l[1][2]) wp_int = int(l[1][2])
wp_str = wp_int_to_wp_str[wp_int]
day, month, year = [int(e) for e in l[0].split('/')] day, month, year = [int(e) for e in l[0].split('/')]
date_str = date_to_str(datetime(year=year, month=month, day=day)) date_str = date_to_str(datetime(year=year, month=month, day=day))
weather_types.append((date_str, wp)) weather_types.append((date_str, wp_str))
df = pd.DataFrame(weather_types, columns=['Date', 'WP']) df = pd.DataFrame(weather_types, columns=['Date', 'WP'])
df.set_index('Date', inplace=True) df.set_index('Date', inplace=True)
return df return df
...@@ -82,7 +82,7 @@ class AbstractStudy(object): ...@@ -82,7 +82,7 @@ class AbstractStudy(object):
# Map each year to the 'days since year-08-01 06:00:00' # Map each year to the 'days since year-08-01 06:00:00'
year_to_days = OrderedDict() year_to_days = OrderedDict()
for year in self.ordered_years: for year in self.ordered_years:
date = datetime.datetime(year=year-1, month=8, day=1, hour=6, minute=0, second=0) date = datetime.datetime(year=year - 1, month=8, day=1, hour=6, minute=0, second=0)
days = [] days = []
for i in range(366): for i in range(366):
days.append(date_to_str(date)) days.append(date_to_str(date))
...@@ -96,11 +96,19 @@ class AbstractStudy(object): ...@@ -96,11 +96,19 @@ class AbstractStudy(object):
def year_to_wps(self): def year_to_wps(self):
assert 1954 <= self.year_min and self.year_max <= WP_PATTERN_MAX_YEAR, \ assert 1954 <= self.year_min and self.year_max <= WP_PATTERN_MAX_YEAR, \
'Weather patterns are not available between {} and {}'.format(self.year_min, self.year_max) 'Weather patterns are not available between {} and {}'.format(self.year_min, self.year_max)
year_to_wps = {} year_to_wps = OrderedDict()
for year, days in self.year_to_days.items(): for year, days in self.year_to_days.items():
year_to_wps[year] = self.df_weather_types.loc[days].iloc[:, 0].values year_to_wps[year] = self.df_weather_types.loc[days].iloc[:, 0].values
return year_to_wps return year_to_wps
@cached_property
def year_to_wp_for_annual_maxima(self):
year_to_wp_for_annual_maxima = OrderedDict()
for year, idx in self.year_to_annual_maxima_index.items():
wps_for_annual_maxima = self.year_to_wps[year][idx]
year_to_wp_for_annual_maxima[year] = wps_for_annual_maxima
return year_to_wp_for_annual_maxima
@cached_property @cached_property
def df_weather_types(self): def df_weather_types(self):
return load_df_weather_types() return load_df_weather_types()
...@@ -190,8 +198,6 @@ class AbstractStudy(object): ...@@ -190,8 +198,6 @@ class AbstractStudy(object):
""" Load daily observations """ """ Load daily observations """
@cached_property @cached_property
def year_to_daily_time_serie_array(self) -> OrderedDict: def year_to_daily_time_serie_array(self) -> OrderedDict:
return self._year_to_daily_time_serie_array return self._year_to_daily_time_serie_array
...@@ -252,7 +258,7 @@ class AbstractStudy(object): ...@@ -252,7 +258,7 @@ class AbstractStudy(object):
@cached_property @cached_property
def ordered_years_and_path_files(self): def ordered_years_and_path_files(self):
nc_files = [(int(f.split('_')[-2][:4])+1, f) for f in os.listdir(self.study_full_path) if f.endswith('.nc')] nc_files = [(int(f.split('_')[-2][:4]) + 1, f) for f in os.listdir(self.study_full_path) if f.endswith('.nc')]
ordered_years, path_files = zip(*[(year, op.join(self.study_full_path, nc_file)) ordered_years, path_files = zip(*[(year, op.join(self.study_full_path, nc_file))
for year, nc_file in sorted(nc_files, key=lambda t: t[0]) for year, nc_file in sorted(nc_files, key=lambda t: t[0])
if self.year_min <= year <= self.year_max]) if self.year_min <= year <= self.year_max])
...@@ -609,7 +615,3 @@ class AbstractStudy(object): ...@@ -609,7 +615,3 @@ class AbstractStudy(object):
mask_massif = np.array(img) mask_massif = np.array(img)
mask_french_alps += mask_massif mask_french_alps += mask_massif
return ~np.array(mask_french_alps, dtype=bool) return ~np.array(mask_french_alps, dtype=bool)
...@@ -4,7 +4,7 @@ from datetime import datetime ...@@ -4,7 +4,7 @@ from datetime import datetime
import pandas as pd import pandas as pd
from extreme_data.edf_data.weather_types import load_df_weather_types from extreme_data.edf_data.weather_types import load_df_weather_types, wp_int_to_wp_str, ANTICYCLONIC, STEADY_OCEANIC
from extreme_data.meteo_france_data.scm_models_data.crocus.crocus import CrocusSwe3Days from extreme_data.meteo_france_data.scm_models_data.crocus.crocus import CrocusSwe3Days
from extreme_data.meteo_france_data.scm_models_data.safran.safran import SafranTemperature, SafranPrecipitation1Day from extreme_data.meteo_france_data.scm_models_data.safran.safran import SafranTemperature, SafranPrecipitation1Day
from extreme_data.meteo_france_data.scm_models_data.utils import date_to_str from extreme_data.meteo_france_data.scm_models_data.utils import date_to_str
...@@ -16,8 +16,8 @@ class TestWeatherTypes(unittest.TestCase): ...@@ -16,8 +16,8 @@ class TestWeatherTypes(unittest.TestCase):
df = load_df_weather_types() df = load_df_weather_types()
self.assertEqual(len(df), 20354) self.assertEqual(len(df), 20354)
# Assert values # Assert values
self.assertEqual(df.iloc[0, :].values[0], 5) self.assertEqual(df.iloc[0, :].values[0], wp_int_to_wp_str[5])
self.assertEqual(df.iloc[-1, :].values[0], 8) self.assertEqual(df.iloc[-1, :].values[0], wp_int_to_wp_str[8])
# Assert keys # Assert keys
self.assertEqual(date_to_str(datetime(year=1953, month=1, day=1)), df.index[0]) self.assertEqual(date_to_str(datetime(year=1953, month=1, day=1)), df.index[0])
self.assertEqual(date_to_str(datetime(year=2008, month=9, day=22)), df.index[-1]) self.assertEqual(date_to_str(datetime(year=2008, month=9, day=22)), df.index[-1])
...@@ -34,14 +34,14 @@ class TestWeatherTypes(unittest.TestCase): ...@@ -34,14 +34,14 @@ class TestWeatherTypes(unittest.TestCase):
def test_yearly_percentages(self): def test_yearly_percentages(self):
# Expected Percentages come from the original paper # Expected Percentages come from the original paper
expected_percentages = [7, 23, 8, 18, 7, 6, 3, 28] expected_percentages = [7, 23, 8, 18, 7, 6, 3, 28]
wp_ids = list(range(1, 9)) wp_str = [wp_int_to_wp_str[wp_int] for wp_int in range(1, 9)]
wp_to_expected_percentages = dict(zip(wp_ids, expected_percentages)) wp_to_expected_percentages = dict(zip(wp_str, expected_percentages))
# Compute percentages # Compute percentages
df = load_df_weather_types() df = load_df_weather_types()
wp_to_found_percentages = 100 * df['WP'].value_counts() / len(df) wp_to_found_percentages = 100 * df['WP'].value_counts() / len(df)
wp_to_found_percentages = {int(k): round(v) for k, v in wp_to_found_percentages.to_dict().items()} wp_to_found_percentages = {k: round(v) for k, v in wp_to_found_percentages.to_dict().items()}
# They remove one the wp1 so that the sum of the percentages sum to 100 # They remove one the wp1 so that the sum of the percentages sum to 100
wp_to_found_percentages[1] -= 1 wp_to_found_percentages[wp_int_to_wp_str[1]] -= 1
self.assertEqual(sum(wp_to_found_percentages.values()), 100) self.assertEqual(sum(wp_to_found_percentages.values()), 100)
# wp_to_found_percentages = wp_to_found_percentages.astype(int) # wp_to_found_percentages = wp_to_found_percentages.astype(int)
self.assertEqual(wp_to_expected_percentages, wp_to_found_percentages) self.assertEqual(wp_to_expected_percentages, wp_to_found_percentages)
...@@ -53,7 +53,7 @@ class TestWeatherTypes(unittest.TestCase): ...@@ -53,7 +53,7 @@ class TestWeatherTypes(unittest.TestCase):
for year, wps in study.year_to_wps.items(): for year, wps in study.year_to_wps.items():
daily_time_serie_array = study.year_to_daily_time_serie_array[year] daily_time_serie_array = study.year_to_daily_time_serie_array[year]
self.assertEqual(len(daily_time_serie_array), len(wps)) self.assertEqual(len(daily_time_serie_array), len(wps))
mask = np.array(wps) == 8 mask = np.array(wps) == ANTICYCLONIC
no_rain.extend(np.max(daily_time_serie_array[mask, :], axis=1)) no_rain.extend(np.max(daily_time_serie_array[mask, :], axis=1))
rain.extend(np.max(daily_time_serie_array[~mask, :], axis=1)) rain.extend(np.max(daily_time_serie_array[~mask, :], axis=1))
# For 90% of the anticyclonic days, the daily max precipitation (snowfall + rainfall) for a massifs is < 0.2mm # For 90% of the anticyclonic days, the daily max precipitation (snowfall + rainfall) for a massifs is < 0.2mm
...@@ -61,6 +61,15 @@ class TestWeatherTypes(unittest.TestCase): ...@@ -61,6 +61,15 @@ class TestWeatherTypes(unittest.TestCase):
self.assertLess(np.quantile(no_rain, 0.5), 0.2) self.assertLess(np.quantile(no_rain, 0.5), 0.2)
self.assertLess(5, np.quantile(rain, 0.5)) self.assertLess(5, np.quantile(rain, 0.5))
def test_weather_patterns_maxima(self):
study = SafranPrecipitation1Day(altitude=900, year_min=1954, year_max=2008)
s = pd.Series(np.concatenate([v for v in study.year_to_wp_for_annual_maxima.values()]))
storms_ranking = s.value_counts()
self.assertEqual(storms_ranking.index[0], STEADY_OCEANIC)
self.assertEqual(storms_ranking.index[-1], ANTICYCLONIC)
self.assertEqual(storms_ranking.values[0], 376)
self.assertEqual(storms_ranking.values[-1], 9)
if __name__ == '__main__': if __name__ == '__main__':
unittest.main() unittest.main()
Supports Markdown
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment