Commit 617998bd authored by Dumoulin Nicolas's avatar Dumoulin Nicolas
Browse files

Pareto front calculation and bugfixes

parent 7f48eef4
......@@ -14,6 +14,7 @@ from indice_biodiversite_2 import Biodiversity
from social import Social
from tqdm import tqdm
from patutils import md5sum, load_pat_patches
import seaborn as sns
class Scenario:
def __init__(self, patches):
......@@ -32,11 +33,22 @@ class Scenario:
cult_to_decrease = surfDelta[surfDelta<0].sort_values(ascending=True).keys().tolist()
cult_to_increase = surfDelta[surfDelta>0].sort_values(ascending=False).keys().tolist()
# Sampling the patches to reallocate
samples = self.patches[self.patches['cultgeopat'].isin(cult_to_decrease)].sample(n=nbPatches, random_state=rng)#.reset_index(drop=True)
if len(self.patches['cultgeopat'].isin(cult_to_decrease)) >= nbPatches:
samples = self.patches[self.patches['cultgeopat'].isin(cult_to_decrease)].sample(n=nbPatches, random_state=rng)#.reset_index(drop=True)
else:
# All cultural types have decreased to objective
samples = self.patches.sample(n=nbPatches, random_state=rng)
# Building the new culture reallocated
factors = surfDelta[cult_to_increase]
factors = (factors*len(samples)/factors.sum()).map(round) # normalize on nb samples
newCult = pd.Series(cult_to_increase).repeat(factors)
if len(cult_to_increase) > 0:
factors = surfDelta[cult_to_increase]
factors = (factors*len(samples)/factors.sum()).map(round) # normalize on nb samples
newCult = pd.Series(cult_to_increase).repeat(factors)
else:
# All cultural types have increased to objective
# So we used all cultures with more weight for highest delta
factors = -1/surfDelta
factors = (factors*len(samples)/factors.sum()).map(round) # normalize on nb samples
newCult = pd.Series(surfDelta.keys().tolist()).repeat(factors)
if len(newCult) < len(samples): # may be due to factors rounding
newCult = newCult.append(newCult.sample(n=len(samples)-len(newCult), random_state=rng), ignore_index=True)
newCult = newCult.sample(frac=1, random_state=rng)[:len(samples)].reset_index(drop=True) # shuffle and cut extra elements
......@@ -117,25 +129,95 @@ class MCMC:
self.targetPAT = self.patches.groupby('cultgeopat')['SURF_PARC'].sum()*(1+targetRatio)
self.indicators = Indicators(self.mcmc_config['indicators_config'], self.patches, self.patches_md5sum, self.targetPAT)
def is_pareto_efficient(costs, return_mask = True):
"""
Find the pareto-efficient points
:param costs: An (n_points, n_costs) array
:param return_mask: True to return a mask
:return: An array of indices of pareto-efficient points.
If return_mask is True, this will be an (n_points, ) boolean array
Otherwise it will be a (n_efficient_points, ) integer array of indices.
Author:
Source: https://stackoverflow.com/questions/32791911/fast-calculation-of-pareto-front-in-python
"""
is_efficient = np.arange(costs.shape[0])
n_points = costs.shape[0]
next_point_index = 0 # Next index in the is_efficient array to search for
while next_point_index<len(costs):
nondominated_point_mask = np.any(costs<costs[next_point_index], axis=1)
nondominated_point_mask[next_point_index] = True
is_efficient = is_efficient[nondominated_point_mask] # Remove dominated points
costs = costs[nondominated_point_mask]
next_point_index = np.sum(nondominated_point_mask[:next_point_index])+1
if return_mask:
is_efficient_mask = np.zeros(n_points, dtype = bool)
is_efficient_mask[is_efficient] = True
return is_efficient_mask
else:
return is_efficient
def step(self, iter_nb, nb_particles, scenarios, scores=None):
'''
Sample new scenarios, evaluate their scores and retain only pareto front.
:param iter_nb: (int) number of this iteration, 0 for initial step, 1 for first iteration and so on.
:param nb_particles: number of new scenarios to sample
:param scenarios: (DataFrame) list of the scenarios used as base for sampling the new scenarios.
:param scores: (DataFrame) list of scores for each scenario. May be None
if given scenario are only used for sampling and will not be considered for pareto front.
'''
new_scenarios = []
new_scores = []
# Loop of sampling and scoring
for index, patches in tqdm(scenarios.sample(nb_particles, replace=True, random_state=self.rng).iterrows(), total=nb_particles):
scenario = Scenario(patches[0].copy())
scenario.reallocate(self.rng, self.targetPAT, self.mcmc_config['ratio_patches_to_modify']) # 3.8 ms
new_scenarios.append(scenario.patches)
new_scores.append(self.indicators.compute_indicators(scenario.patches))
# merging with precedent data
new_scores = pd.DataFrame(new_scores, columns=self.indicators.indicators_names)
new_scores['iteration'] = iter_nb
if scores is None:
scores = new_scores
scenarios = pd.DataFrame(new_scenarios)
else:
scores = scores.append(new_scores, ignore_index=True, sort=False)
scenarios = scenarios.append(pd.DataFrame(new_scenarios), ignore_index=True, sort=False)
# Computing pareto front
pareto_mask = MCMC.is_pareto_efficient(scores[['resilience','proximity','productivity','biodiversity','social']].values)
scores['pareto'] = pareto_mask
# Writing output data
scores.to_csv(self.outputdir+'/mcmc_iter_{}.csv'.format(iter_nb), index=False)
try:
sns.pairplot(scores, vars=['resilience','proximity','productivity','biodiversity','social'],
diag_kind="kde",hue="pareto"
).savefig(self.outputdir+'/mcmc_iter_{}.png'.format(iter_nb))
except (np.linalg.linalg.LinAlgError, ValueError) as e:
# when data doesn't vary enough on a dimension, it is impossible to generate the pareto density
# and an error is triggered. Here, we ignore this error.
pass
return [scenarios[pareto_mask], scores[pareto_mask]]
def run(self):
# Initial sampling and evaluation
scores = []
for i in tqdm(range(self.mcmc_config['initial_nb_particles'])):
scenario = Scenario(self.patches.copy()) # 0.2 ms
scenario.reallocate(self.rng, self.targetPAT, self.mcmc_config['ratio_patches_to_modify']) # 3.8 ms
scores.append(self.indicators.compute_indicators(scenario.patches))
scores = pd.DataFrame(scores, columns=self.indicators.indicators_names)
scores.to_csv(self.outputdir+'mcmc.csv')
nb_particles = self.mcmc_config['initial_nb_particles']
scenarios_init = pd.DataFrame([self.patches])
scenarios, scores = self.step(0, nb_particles, scenarios_init)
# Iteration
for i in range(10):
print('Iteration #{}'.format(i))
scenarios, scores = self.step(i+1, nb_particles, scenarios, scores)
# TODO
# Storing variation of indicators
init_var = scores.std()
# Selecting particles
# sequential optimization loop
return [scenarios,scores]
if __name__ == '__main__':
mcmc = MCMC('MCMC_config.yml')
# scenario = Scenario(mcmc.patches.copy())
# scenario.reallocate(mcmc.rng, mcmc.targetPAT, mcmc.mcmc_config['ratio_patches_to_modify'])
mcmc.run()
scenario,scores = mcmc.run()
# print(mcmc.indicators.biodiversity(mcmc.patches))
# print(mcmc.indicators.proximity(mcmc.patches))
patches: ../output/PAT_patches/PAT_patches.shp
target: ../resources/targetPAT.csv
output_dir: ../output/MCMC/
initial_nb_particles: 10
ratio_patches_to_modify: 0.05
output_dir: ../output/MCMC
initial_nb_particles: 10000
ratio_patches_to_modify: 0.01
indicators_config:
proximite:
surf_totale_cible: 20165939.605135553
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment