Commit c77ff897 authored by Dumoulin Nicolas's avatar Dumoulin Nicolas
Browse files

better output management

reallocation fix: reallocation stopped when threshold reached
new indicators added: cumulated_it and previous_index
parent 69840cb4
......@@ -11,10 +11,10 @@ import patutils
from patutils import md5sum, load_pat_patches
import seaborn as sns
import matplotlib.pyplot as plt
from scenarios import ScenariosStack
from scenarios import ScenariosStack, Reallocator
class MCMC:
def __init__(self, mcmc_config_filename):
def __init__(self, mcmc_config_filename, output=True):
if not os.path.isfile(mcmc_config_filename):
print('Error: file not found "{}"'.format(mcmc_config_filename))
print('Please copy the template file "MCMC_config.sample.yml" and adjust to your settings and run again this program')
......@@ -28,27 +28,28 @@ class MCMC:
print('MCMC initialized with default seed') # self.rng.get_state()
# Copying input data in output dir
self.outputdir = self.mcmc_config['output_dir'] + '/' + time.strftime('%Y%m%d-%H%M%S')
if not os.path.exists(self.outputdir):
os.makedirs(self.outputdir)
print('All data will be written in {}'.format(self.outputdir))
else:
print('Output directory already exists! ({})'.format(self.outputdir))
sys.exit(1)
shutil.copy(mcmc_config_filename, self.outputdir)
for f in [ self.mcmc_config['target'],
self.mcmc_config['indicators_config']['resilience'],
self.mcmc_config['indicators_config']['social']['cost_matrix_filename']
]:
shutil.copy(f, self.outputdir)
with open(self.outputdir+'/seed.txt', 'w') as outfile:
outfile.write('{}\n'.format(self.rng.get_state()))
config_data = {
'patches_md5sum':self.patches_md5sum,
'biodiversity_matrix_md5sum':md5sum(self.mcmc_config['indicators_config']['biodiversity']['matrixfilename']),
'social_patches_costs_md5sum':md5sum(self.mcmc_config['indicators_config']['social']['patches_costs_filename'])
}
with open(self.outputdir+'/config.yml', 'w') as outfile:
yaml.dump(config_data, outfile, default_flow_style=False)
if output:
if not os.path.exists(self.outputdir):
os.makedirs(self.outputdir)
print('All data will be written in {}'.format(self.outputdir))
else:
print('Output directory already exists! ({})'.format(self.outputdir))
sys.exit(1)
shutil.copy(mcmc_config_filename, self.outputdir)
for f in [ self.mcmc_config['target'],
self.mcmc_config['indicators_config']['resilience'],
self.mcmc_config['indicators_config']['social']['cost_matrix_filename']
]:
shutil.copy(f, self.outputdir)
with open(self.outputdir+'/seed.txt', 'w') as outfile:
outfile.write('{}\n'.format(self.rng.get_state()))
config_data = {
'patches_md5sum':self.patches_md5sum,
'biodiversity_matrix_md5sum':md5sum(self.mcmc_config['indicators_config']['biodiversity']['matrixfilename']),
'social_patches_costs_md5sum':md5sum(self.mcmc_config['indicators_config']['social']['patches_costs_filename'])
}
with open(self.outputdir+'/config.yml', 'w') as outfile:
yaml.dump(config_data, outfile, default_flow_style=False)
# finishing init
self.patches = load_pat_patches(self.mcmc_config['patches'])
self.surfaces = self.patches['SURF_PARC']
......@@ -56,6 +57,7 @@ class MCMC:
targetRatio = (self.target['2050']-self.target['2016'])/self.target['2016']
self.targetPAT = self.patches.groupby('cultgeopat')['SURF_PARC'].sum()*(1+targetRatio)
self.indicators = Indicators(self.mcmc_config['indicators_config'], self.patches, self.patches_md5sum, self.targetPAT)
self.reallocator = Reallocator(self.rng, self.patches, self.targetPAT, self.mcmc_config['ratio_patches_to_modify'])
def is_pareto_efficient(costs, return_mask = True):
......@@ -98,10 +100,11 @@ class MCMC:
'''
# Loop of sampling and scoring
for index, scenario in tqdm(scenarios.sample(nb_particles, self.rng).iterrows(), total=nb_particles):
scenario = scenarios.reallocate(index, self.rng, self.patches, self.targetPAT, self.mcmc_config['ratio_patches_to_modify'])
if not scenario is None:
scenarios.append(iter_nb, scenario)
scenario = scenarios.reallocate(index)
if scenario is None:
scenarios.setFullReallocated(index, True)
else:
scenarios.append(iter_nb, scenario, index)
# Computing pareto front
start_time = time.time()
pareto_mask = MCMC.is_pareto_efficient(scenarios.scores[['Resilience','Proximity','Productivity','Biodiversity','Social','TargetDelta']].values)
......@@ -110,21 +113,29 @@ class MCMC:
print(' Fully reallocated scenarios {}/{} - '.format(scenarios.nbFullReallocated(), len(scenarios.scores)), end="", flush=True)
print('{:.2f}% on pareto front (computed in {}) - '.format(100*np.sum(pareto_mask)/len(pareto_mask), time.strftime("%M:%S", time.gmtime(elapsed_time))), end="", flush=True)
# Writing output data
write_times=[]
start_time = time.time()
scenarios.scores.to_csv(self.outputdir+'/scores_iter_{0:03d}.csv'.format(iter_nb), index=True)
scenarios.cultgeopat.to_csv(self.outputdir+'/cult_iter_{0:03d}.csv'.format(iter_nb), index=True)
try:
pairplot = sns.pairplot(scenarios.scores, vars=['Resilience','Proximity','Productivity','Biodiversity','Social','TargetDelta'],
diag_kind="kde",hue="pareto"
)
pairplot.savefig(self.outputdir+'/mcmc_iter_{0:03d}.png'.format(iter_nb))
plt.close(pairplot.fig)
except (np.linalg.linalg.LinAlgError, ValueError) as e:
# when data doesn't vary enough on a dimension, it is impossible to generate the pareto density
# and an error is triggered. Here, we ignore this error.
pass
elapsed_time = time.time() - start_time
print('Scores written in {} '.format(time.strftime("%M:%S", time.gmtime(elapsed_time))), end="", flush=True)
write_times.append(time.time() - start_time)
start_time = time.time()
scenarios.cultgeopat.to_hdf(self.outputdir+'/cult_iter_{0:03d}.h5'.format(iter_nb), 'cultgeopat', index=True, complevel=1)
write_times.append(time.time() - start_time)
if self.mcmc_config.get('write_data', False):
start_time = time.time()
try:
pairplot = sns.pairplot(scenarios.scores, vars=['Resilience','Proximity','Productivity','Biodiversity','Social','TargetDelta'],
diag_kind="kde",hue="pareto"
)
pairplot.savefig(self.outputdir+'/mcmc_iter_{0:03d}.png'.format(iter_nb))
plt.close(pairplot.fig)
except (np.linalg.linalg.LinAlgError, ValueError, AttributeError) as e:
# when data doesn't vary enough on a dimension, it is impossible to generate the pareto density
# and an error is triggered. Here, we ignore this error.
pass
write_times.append(time.time() - start_time)
print('Scores written in {}/{}/{} '.format(*map(lambda x: time.strftime("%M:%S", time.gmtime(x)), write_times)), end="", flush=True)
else:
print('Scores written in {}/{} '.format(*map(lambda x: time.strftime("%M:%S", time.gmtime(x)), write_times)), end="", flush=True)
print()
# Retaining only optimal particules
scenarios.retain(pareto_mask)
......@@ -138,23 +149,24 @@ class MCMC:
os.makedirs(shp_dir)
counter = 0
for index,scenario in tqdm(scenarios.cultgeopat.iterrows(), total=len(scenarios.cultgeopat)):
if scenarios.scores.loc[index,'full_reallocated']:
counter += 1
concat = pd.concat([self.patches, scenario.to_frame("newcult")], axis=1)
concat = concat[concat['init_cult']!=concat['newcult']]
if len(concat)>0:
for c in ['newcult','cultgeopat','init_cult']:
patutils.decode_cultgeopat(concat, c)
concat.to_file(shp_dir+'/{:03d}_{}'.format(iter_nb, index), encoding='utf-8')
#if scenarios.scores.loc[index,'full_reallocated']:
counter += 1
concat = pd.concat([self.patches, scenario.to_frame("newcult")], axis=1)
concat = concat[concat['init_cult']!=concat['newcult']]
if len(concat)>0:
for c in ['newcult','cultgeopat','init_cult']:
patutils.decode_cultgeopat(concat, c)
concat.to_file(shp_dir+'/{:03d}_{}'.format(iter_nb, index), encoding='utf-8')
elapsed_time = time.time() - start_time
print(' {} shapefiles (only scenario fully reallocated) written in {}'.format(counter, time.strftime("%M:%S", time.gmtime(elapsed_time))))
#print(' {} shapefiles (only scenario fully reallocated) written in {}'.format(counter, time.strftime("%M:%S", time.gmtime(elapsed_time))))
print(' {} shapefiles written in {}'.format(counter, time.strftime("%M:%S", time.gmtime(elapsed_time))))
def run(self):
# Initial sampling and evaluation
nb_particles = self.mcmc_config['initial_nb_particles']
# Start with initial scenario
scenarios = ScenariosStack(self.indicators, self.patches)
scenarios = ScenariosStack(self.indicators, self.reallocator, self.patches)
# Iteration
nb_iteration=0
for i in range(self.mcmc_config['max_iterations']):
......@@ -165,8 +177,7 @@ class MCMC:
if scenarios.allFullReallocated():
print('All scenarios are fully reallocated. Simulation finished.')
break
if self.mcmc_config.get('write_data', False):
self.write_data(nb_iteration, scenarios)
self.write_data(nb_iteration, scenarios)
return scenarios
if __name__ == '__main__':
......
from MCMC import *
from scenarios import ScenariosStack
import timeit
if __name__ == '__main__':
mcmc = MCMC('MCMC_config.yml')
scenario = Scenario(mcmc.patches.copy())
for ind in ['proximity', 'resilience', 'productivity', 'biodiversity', 'social']:
mcmc = MCMC('MCMC_config.yml', False)
scenarios = ScenariosStack(mcmc.indicators, mcmc.reallocator, mcmc.patches)
for ind in ['Proximity', 'Resilience', 'Productivity', 'Biodiversity', 'Social']:
print('{}: {:.4f} s'.format(ind,
timeit.timeit(lambda: eval('mcmc.indicators._indicators[\''+ind+'\'].compute_indicator(scenario.patches)'), number=10)/10
timeit.timeit(lambda: eval('mcmc.indicators._indicators[\''+ind+'\'].compute_indicator(mcmc.patches)'), number=10)/10
))
print('\nreallocate: {:.4f} s'.format(timeit.timeit(lambda:
scenario.reallocate(mcmc.rng, mcmc.targetPAT, mcmc.mcmc_config['ratio_patches_to_modify'])
mcmc.reallocator.reallocate(mcmc.patches)
, number=10)/10))
for ind in ['proximity', 'resilience', 'productivity', 'biodiversity', 'social']:
for ind in ['Proximity', 'Resilience', 'Productivity', 'Biodiversity', 'Social']:
print('{}: {:.4f} s'.format(ind,
timeit.timeit(lambda: eval('mcmc.indicators._indicators[\''+ind+'\'].compute_indicator(scenario.patches)'), number=10)/10
timeit.timeit(lambda: eval('mcmc.indicators._indicators[\''+ind+'\'].compute_indicator(mcmc.patches)'), number=10)/10
))
'''
proximity: 0.0146 s
resilience: 0.0901 s
productivity: 0.0099 s
biodiversity: 0.0178 s
social: 0.1336 s
Proximity: 0.0137 s
Resilience: 0.0311 s
Productivity: 0.0022 s
Biodiversity: 0.0120 s
Social: 0.0841 s
reallocate: 0.0469 s
proximity: 0.0167 s
resilience: 0.0874 s
productivity: 0.0117 s
biodiversity: 0.0176 s
social: 0.2209 s
reallocate: 0.0280 s
Proximity: 0.0167 s
Resilience: 0.0322 s
Productivity: 0.0049 s
Biodiversity: 0.0120 s
Social: 0.1015 s
'''
......@@ -5,52 +5,64 @@ import pandas as pd
import itertools
from patutils import load_pat_patches
def reallocate(patches, rng, targetPAT, ratioNbPatches):
nbPatches = int(len(patches)*ratioNbPatches)
surfDelta = targetPAT - patches.groupby('cultgeopat')['SURF_PARC'].sum()
cult_to_decrease = surfDelta[surfDelta<0].sort_values(ascending=True).keys().tolist()
cult_to_increase = surfDelta[surfDelta>0].sort_values(ascending=False).keys().tolist()
targetReached = False
# Sampling the patches to reallocate
patches_to_decrease = patches[patches['cultgeopat'].isin(cult_to_decrease)]
samples = patches_to_decrease.sample(n=min(nbPatches,len(patches_to_decrease)), random_state=rng)#.reset_index(drop=True)
# Building the new culture reallocated
if len(cult_to_increase) > 0:
class Reallocator:
def __init__(self, rng, patches, targetPAT, ratioNbPatches):
self.rng = rng
self.ratioNbPatches = ratioNbPatches
self.targetPAT = targetPAT
surfDelta = targetPAT - patches.groupby('cultgeopat')['SURF_PARC'].sum()
self.PAT_cult_to_decrease = surfDelta[surfDelta<0].sort_values(ascending=True).keys().tolist()
self.PAT_cult_to_increase = surfDelta[surfDelta>0].sort_values(ascending=False).keys().tolist()
def reallocate(self, patches):
nbPatches = int(len(patches)*self.ratioNbPatches)
surfDelta = self.targetPAT - patches.groupby('cultgeopat')['SURF_PARC'].sum()
cult_to_decrease = surfDelta.loc[self.PAT_cult_to_decrease][surfDelta<0].sort_values(ascending=True).keys().tolist()
cult_to_increase = surfDelta.loc[self.PAT_cult_to_increase][surfDelta>0].sort_values(ascending=True).keys().tolist()
if len(cult_to_increase)==0 or len(cult_to_decrease)==0:
# All cultural types have increased to objective, so not changing the patches
return None
# Sampling the patches to reallocate
patches_to_decrease = patches[patches['cultgeopat'].isin(cult_to_decrease)]
samples = patches_to_decrease.sample(n=min(nbPatches,len(patches_to_decrease)), random_state=self.rng)#.reset_index(drop=True)
# Building the new culture reallocated
factors = surfDelta[cult_to_increase]
factors = (factors*len(samples)/factors.sum()).map(round) # normalize on nb samples
newCult = pd.Series(cult_to_increase).repeat(factors)
else:
# All cultural types have increased to objective, so not changing the patches
return None
if len(newCult) < len(samples): # may be due to factors rounding
newCult = newCult.append(newCult.sample(n=len(samples)-len(newCult), random_state=rng), ignore_index=True)
newCult = newCult.sample(frac=1, random_state=rng)[:len(samples)].reset_index(drop=True) # shuffle and cut extra elements
# Doing the reallocation
patches.loc[samples.index.values,'cultgeopat'] = newCult.values
return patches
if len(newCult) < len(samples): # may be due to factors rounding
newCult = newCult.append(newCult.sample(n=len(samples)-len(newCult), random_state=self.rng), ignore_index=True)
newCult = newCult.sample(frac=1, random_state=self.rng)[:len(samples)].reset_index(drop=True) # shuffle and cut extra elements
# Doing the reallocation
patches.loc[samples.index.values,'cultgeopat'] = newCult.values
return patches
class ScenariosStack:
def __init__(self, indicators, initial_patches):
def __init__(self, indicators, reallocator, initial_patches):
self._counter = itertools.count()
scenario_id = next(self._counter)
self.indicators = indicators
self.reallocator = reallocator
self.initial_patches = initial_patches
self.cultgeopat = initial_patches[['cultgeopat']]
self.cultgeopat = self.cultgeopat.T
# Now we have the id_parcel as columns, and each row will represent a scenario
self.cultgeopat.rename(index={'cultgeopat':scenario_id}, inplace=True)
self.scores = pd.DataFrame([self.indicators.compute_indicators(initial_patches) + [0, False, False]],
columns=self.indicators.indicators_names+['iteration','pareto','full_reallocated'],
self.scores = pd.DataFrame([self.indicators.compute_indicators(initial_patches) + [0, 0, -1, False, False]],
columns=self.indicators.indicators_names+['iteration','cumulated_it', 'previous_index', 'pareto','full_reallocated'],
dtype='float64')
for c in ['pareto','full_reallocated']:
self.scores[c] = self.scores[c].astype('bool')
self.scores['iteration'] = self.scores['iteration'].astype('int')
for c in ['iteration','cumulated_it', 'previous_index']:
self.scores[c] = self.scores[c].astype('int')
def append(self, nb_iter, patches_cult):
def append(self, nb_iter, patches_cult, previous_index):
id = next(self._counter)
self.cultgeopat.loc[id] = patches_cult['cultgeopat'].values
self.scores.at[id] = self.indicators.compute_indicators(patches_cult) + [nb_iter, False, False]
self.scores['iteration'] = self.scores['iteration'].astype('int')
self.scores.loc[id] = self.indicators.compute_indicators(patches_cult) + [nb_iter, self.scores.loc[previous_index]['cumulated_it']+1, previous_index, False, False]
for c in ['iteration','cumulated_it', 'previous_index']:
self.scores[c] = self.scores[c].astype('int')
return id
def retain(self, mask):
self.cultgeopat = self.cultgeopat[mask]
......@@ -71,13 +83,13 @@ class ScenariosStack:
def sample(self, nb, rng):
return self.cultgeopat[self.scores['full_reallocated']==False].sample(nb, replace=True, random_state=rng)
def consolidate_scenario(self, id, patches, columns):
selected_data = patches[columns]
def consolidate_scenario(self, id, columns):
selected_data = self.initial_patches[columns]
return pd.concat([self.cultgeopat.loc[id].to_frame("cultgeopat"), selected_data],axis=1)
def reallocate(self, scen_id, rng, initial_patches, targetPAT, ratioNbPatches):
def reallocate(self, scen_id):
if self.isFullReallocated(scen_id):
return None
else:
patches = self.consolidate_scenario(scen_id, initial_patches, ['SURF_PARC', 'Bdv','init_cult', 'VALEUR_CAD'])
return reallocate(patches, rng, targetPAT, ratioNbPatches)
patches = self.consolidate_scenario(scen_id, ['SURF_PARC', 'Bdv','init_cult', 'VALEUR_CAD'])
return self.reallocator.reallocate(patches)
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment