Commit f6d13838 authored by Le Roux Erwan's avatar Le Roux Erwan
Browse files

[refactor] add warning when too much zero values. improve warning test.

parent c5b17398
No related merge requests found
Showing with 36 additions and 20 deletions
+36 -20
...@@ -15,10 +15,10 @@ class AbstractEstimator(object): ...@@ -15,10 +15,10 @@ class AbstractEstimator(object):
self.dataset = dataset # type: AbstractDataset self.dataset = dataset # type: AbstractDataset
self._result_from_fit = None # type: Union[None, AbstractResultFromModelFit] self._result_from_fit = None # type: Union[None, AbstractResultFromModelFit]
# Class constructor # Class constructor (shortcut to initialize some subclasses)
@classmethod @classmethod
def from_dataset(cls, dataset: AbstractDataset): def from_dataset(cls, dataset: AbstractDataset):
raise NotImplementedError return cls(dataset)
# Fit estimator # Fit estimator
...@@ -28,7 +28,7 @@ class AbstractEstimator(object): ...@@ -28,7 +28,7 @@ class AbstractEstimator(object):
def _fit(self) -> AbstractResultFromModelFit: def _fit(self) -> AbstractResultFromModelFit:
raise NotImplementedError raise NotImplementedError
# Results from model fit # Fit results
@property @property
def result_from_model_fit(self) -> AbstractResultFromModelFit: def result_from_model_fit(self) -> AbstractResultFromModelFit:
......
...@@ -39,8 +39,9 @@ class AbstractTemporalLinearMarginModel(LinearMarginModel): ...@@ -39,8 +39,9 @@ class AbstractTemporalLinearMarginModel(LinearMarginModel):
def fitmargin_from_maxima_gev(self, data: np.ndarray, df_coordinates_spat: pd.DataFrame, def fitmargin_from_maxima_gev(self, data: np.ndarray, df_coordinates_spat: pd.DataFrame,
df_coordinates_temp: pd.DataFrame) -> AbstractResultFromModelFit: df_coordinates_temp: pd.DataFrame) -> AbstractResultFromModelFit:
assert data.shape[1] == len(df_coordinates_temp.values) data = data[0]
x = ro.FloatVector(data[0]) assert len(data) == len(df_coordinates_temp.values)
x = ro.FloatVector(data)
if self.fit_method == TemporalMarginFitMethod.is_mev_gev_fit: if self.fit_method == TemporalMarginFitMethod.is_mev_gev_fit:
return self.ismev_gev_fit(x, df_coordinates_temp) return self.ismev_gev_fit(x, df_coordinates_temp)
if self.fit_method == TemporalMarginFitMethod.extremes_fevd_bayesian: if self.fit_method == TemporalMarginFitMethod.extremes_fevd_bayesian:
......
...@@ -62,6 +62,10 @@ class WarningWhileRunningR(Warning): ...@@ -62,6 +62,10 @@ class WarningWhileRunningR(Warning):
pass pass
class WarningTooMuchZeroValues(Warning):
pass
class WarningMaximumAbsoluteValueTooHigh(Warning): class WarningMaximumAbsoluteValueTooHigh(Warning):
pass pass
...@@ -74,7 +78,7 @@ class SafeRunException(Exception): ...@@ -74,7 +78,7 @@ class SafeRunException(Exception):
pass pass
def safe_run_r_estimator(function, data=None, use_start=False, threshold_max_abs_value=100, maxit=1000000, def safe_run_r_estimator(function, data=None, use_start=False, max_ratio_between_two_extremes_values=10, maxit=1000000,
**parameters) -> robjects.ListVector: **parameters) -> robjects.ListVector:
if OptimizationConstants.USE_MAXIT: if OptimizationConstants.USE_MAXIT:
# Add optimization parameters # Add optimization parameters
...@@ -83,14 +87,21 @@ def safe_run_r_estimator(function, data=None, use_start=False, threshold_max_abs ...@@ -83,14 +87,21 @@ def safe_run_r_estimator(function, data=None, use_start=False, threshold_max_abs
# Some checks for Spatial Extremes # Some checks for Spatial Extremes
if data is not None: if data is not None:
# Raise warning if the maximum absolute value is above a threshold
if isinstance(data, np.ndarray): if isinstance(data, np.ndarray):
maximum_absolute_value = np.max(np.abs(data)) # Raise warning if the gap is too important between the two biggest values of data
if maximum_absolute_value > threshold_max_abs_value: sorted_data = sorted(data.flatten())
print(data)
if sorted_data[-2] * max_ratio_between_two_extremes_values < sorted_data[-1]:
msg = "maxmimum absolute value in data {} is too high, i.e. above the defined threshold {}" \ msg = "maxmimum absolute value in data {} is too high, i.e. above the defined threshold {}" \
.format(maximum_absolute_value, threshold_max_abs_value) .format(sorted_data[-1], max_ratio_between_two_extremes_values)
msg += '\nPotentially in that case, data should be re-normalized' msg += '\nPotentially in that case, data should be re-normalized'
warnings.warn(msg, WarningMaximumAbsoluteValueTooHigh) warnings.warn(msg, WarningMaximumAbsoluteValueTooHigh)
# Raise warning if ratio of zeros in data is above some percentage (90% so far)
limit_percentage = 90
if 100 * np.count_nonzero(data) / len(data) < limit_percentage:
msg = 'data contains more than {}% of zero values'.format(100 - limit_percentage)
warnings.warn(msg, WarningTooMuchZeroValues)
# Add data to the parameters
parameters['data'] = data parameters['data'] = data
# First run without using start value # First run without using start value
# Then if it crashes, use start value # Then if it crashes, use start value
...@@ -131,6 +142,7 @@ def get_coord_df(df_coordinates: pd.DataFrame): ...@@ -131,6 +142,7 @@ def get_coord_df(df_coordinates: pd.DataFrame):
coord = r('data.frame')(coord, stringsAsFactors=True) coord = r('data.frame')(coord, stringsAsFactors=True)
return coord return coord
def get_null(): def get_null():
as_null = r['as.null'] as_null = r['as.null']
return as_null(1.0) return as_null(1.0)
......
...@@ -137,7 +137,6 @@ class StudyVisualizerForNonStationaryTrends(StudyVisualizer): ...@@ -137,7 +137,6 @@ class StudyVisualizerForNonStationaryTrends(StudyVisualizer):
# In both cases, we remove any massif with psnow < 0.9 # In both cases, we remove any massif with psnow < 0.9
if self.fit_only_time_series_with_ninety_percent_of_non_null_values: if self.fit_only_time_series_with_ninety_percent_of_non_null_values:
d = {m: v for m, v in d.items() if self.massif_name_to_psnow[m] >= 0.9} d = {m: v for m, v in d.items() if self.massif_name_to_psnow[m] >= 0.9}
print(d.keys())
return d return d
@property @property
......
import numpy as np import numpy as np
import unittest import unittest
from extreme_fit.model.utils import safe_run_r_estimator, WarningMaximumAbsoluteValueTooHigh from extreme_fit.model.utils import safe_run_r_estimator, WarningMaximumAbsoluteValueTooHigh, WarningTooMuchZeroValues
def function(data=None, control=None): def empty_function(data=None, control=None):
pass pass
class TestSafeRunREstimator(unittest.TestCase): class TestSafeRunREstimator(unittest.TestCase):
def test_warning(self): def test_warning_maximum_value(self):
threshold = 10 ratio = 10
value_above_threhsold = 2 * threshold data = np.array([ratio+1, 1])
datas = [np.array([value_above_threhsold]), np.ones([2, 2]) * value_above_threhsold] with self.assertWarns(WarningMaximumAbsoluteValueTooHigh):
for data in datas: safe_run_r_estimator(function=empty_function, data=data, max_ratio_between_two_extremes_values=ratio)
with self.assertWarns(WarningMaximumAbsoluteValueTooHigh):
safe_run_r_estimator(function=function, data=data, threshold_max_abs_value=threshold) def test_warning_too_much_zero(self):
n = 5
data = np.concatenate([np.zeros(n), np.ones(n)])
with self.assertWarns(WarningTooMuchZeroValues):
safe_run_r_estimator(function=empty_function, data=data)
if __name__ == '__main__': if __name__ == '__main__':
......
Supports Markdown
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment