MCMC.py 8.98 KB
Newer Older
Dumoulin Nicolas's avatar
Dumoulin Nicolas committed
1
2
3
4
#!/usr/bin/python
# -*- coding: utf-8 -*-
import pandas as pd
import geopandas as gpd
5
import os, sys, time, shutil
Dumoulin Nicolas's avatar
Dumoulin Nicolas committed
6
7
import yaml
import numpy as np
8
from indicators import Indicators
Dumoulin Nicolas's avatar
Dumoulin Nicolas committed
9
from tqdm import tqdm
10
import patutils
11
from patutils import md5sum, load_pat_patches
12
import seaborn as sns
13
import matplotlib.pyplot as plt
Dumoulin Nicolas's avatar
Dumoulin Nicolas committed
14
from scenarios import ScenariosStack, Reallocator
Dumoulin Nicolas's avatar
Dumoulin Nicolas committed
15
16

class MCMC:
Dumoulin Nicolas's avatar
Dumoulin Nicolas committed
17
    def __init__(self, mcmc_config_filename, output=True):
Dumoulin Nicolas's avatar
Dumoulin Nicolas committed
18
19
20
21
22
        if not os.path.isfile(mcmc_config_filename):
        	print('Error: file not found "{}"'.format(mcmc_config_filename))
        	print('Please copy the template file "MCMC_config.sample.yml" and adjust to your settings and run again this program')
        	sys.exit(1)
        self.mcmc_config = yaml.load(open(mcmc_config_filename,'r'))
23
24
25
26
27
28
29
        self.patches_md5sum = md5sum(self.mcmc_config['patches'])
        if 'rng_seed' in self.mcmc_config:
            self.rng = np.random.RandomState(self.mcmc_config['rng_seed'])
        else:
            self.rng = np.random.RandomState(42)
            print('MCMC initialized with default seed') # self.rng.get_state()
        # Copying input data in output dir
30
        self.outputdir = self.mcmc_config['output_dir'] + '/' + time.strftime('%Y%m%d-%H%M%S') + '_{initial_nb_particles}_{step_nb_particles}_{ratio_patches_to_modify}'.format(**self.mcmc_config)
Dumoulin Nicolas's avatar
Dumoulin Nicolas committed
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
        if output:
            if not os.path.exists(self.outputdir):
                os.makedirs(self.outputdir)
                print('All data will be written in {}'.format(self.outputdir))
            else:
                print('Output directory already exists! ({})'.format(self.outputdir))
                sys.exit(1)
            shutil.copy(mcmc_config_filename, self.outputdir)
            for f in [ self.mcmc_config['target'],
                    self.mcmc_config['indicators_config']['resilience'],
                    self.mcmc_config['indicators_config']['social']['cost_matrix_filename']
                    ]:
                shutil.copy(f, self.outputdir)
            with open(self.outputdir+'/seed.txt', 'w') as outfile:
                outfile.write('{}\n'.format(self.rng.get_state()))
            config_data = {
                'patches_md5sum':self.patches_md5sum,
                'biodiversity_matrix_md5sum':md5sum(self.mcmc_config['indicators_config']['biodiversity']['matrixfilename']),
                'social_patches_costs_md5sum':md5sum(self.mcmc_config['indicators_config']['social']['patches_costs_filename'])
            }
            with open(self.outputdir+'/config.yml', 'w') as outfile:
                yaml.dump(config_data, outfile, default_flow_style=False)
53
        # finishing init
54
        self.patches = load_pat_patches(self.mcmc_config['patches'])
55
56
        self.surfaces = self.patches['SURF_PARC']
        self.target = pd.read_csv(self.mcmc_config['target'], sep=';',index_col=0).rename(index=patutils.code_cultgeopat)
Dumoulin Nicolas's avatar
Dumoulin Nicolas committed
57
58
59
        targetRatio = (self.target['2050']-self.target['2016'])/self.target['2016']
        self.targetPAT = self.patches.groupby('cultgeopat')['SURF_PARC'].sum()*(1+targetRatio)
        self.indicators = Indicators(self.mcmc_config['indicators_config'], self.patches, self.patches_md5sum, self.targetPAT)
Dumoulin Nicolas's avatar
Dumoulin Nicolas committed
60
        self.reallocator = Reallocator(self.rng, self.patches, self.targetPAT, self.mcmc_config['ratio_patches_to_modify'])
Dumoulin Nicolas's avatar
Dumoulin Nicolas committed
61

62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89

    def is_pareto_efficient(costs, return_mask = True):
        """
        Find the pareto-efficient points
        :param costs: An (n_points, n_costs) array
        :param return_mask: True to return a mask
        :return: An array of indices of pareto-efficient points.
            If return_mask is True, this will be an (n_points, ) boolean array
            Otherwise it will be a (n_efficient_points, ) integer array of indices.
        Author:
        Source: https://stackoverflow.com/questions/32791911/fast-calculation-of-pareto-front-in-python
        """
        is_efficient = np.arange(costs.shape[0])
        n_points = costs.shape[0]
        next_point_index = 0  # Next index in the is_efficient array to search for
        while next_point_index<len(costs):
            nondominated_point_mask = np.any(costs<costs[next_point_index], axis=1)
            nondominated_point_mask[next_point_index] = True
            is_efficient = is_efficient[nondominated_point_mask]  # Remove dominated points
            costs = costs[nondominated_point_mask]
            next_point_index = np.sum(nondominated_point_mask[:next_point_index])+1
        if return_mask:
            is_efficient_mask = np.zeros(n_points, dtype = bool)
            is_efficient_mask[is_efficient] = True
            return is_efficient_mask
        else:
            return is_efficient

90
91
92
    def scenario_scores(patches, indicators):
        return indicators.compute_indicators(patches)

93
    def step(self, iter_nb, nb_particles, scenarios, write_data=True, disable_progress=False):
94
95
96
        '''
        Sample new scenarios, evaluate their scores and retain only pareto front.

97
        :param iter_nb: (int) number of this iteration.
98
        :param nb_particles: number of new scenarios to sample
99
        :param scenarios: (ScenariosStack) list of the scenarios used as base for sampling the new scenarios.
100
101
        '''
        # Loop of sampling and scoring
102
        start_time = time.time()
103
        data={'scenarios_patches':[], 'scenarios_cult':[], 'previous_indexes':[], 'fullreallocated':[]}
104
        for index, scenario in tqdm(scenarios.sample(nb_particles, self.rng).iterrows(), total=nb_particles, disable=disable_progress):
105
106
107
108
109
110
            [fullreallocated,scenario] = scenarios.reallocate(index)
            data['fullreallocated'].append(fullreallocated)
            data['scenarios_patches'].append(scenario)
            data['scenarios_cult'].append(scenario['cultgeopat'])
            data['previous_indexes'].append(index)
        scenarios.reconstitute(iter_nb, **data, disable_progress=disable_progress)
111
        elapsed_time = time.time() - start_time
112
        print('Iteration duration: {} - {:.2f} scen/s '.format(time.strftime("%M:%S", time.gmtime(elapsed_time)), nb_particles/elapsed_time), end="", flush=True)
113
        # Computing pareto front
114
        start_time = time.time()
115
        pareto_mask = MCMC.is_pareto_efficient(scenarios.scores[['Resilience','Proximity','Productivity','Biodiversity','Social']].values)
116
        scenarios.scores['pareto'] = pareto_mask
117
        elapsed_time = time.time() - start_time
118
        print(' Fully reallocated scenarios {}/{} - '.format(scenarios.nbFullReallocated(), nb_particles), end="", flush=True)
119
        if (len(scenarios.scores)>0):
120
            print('{:.2f}% on pareto front (computed in {}) '.format(100*np.sum(pareto_mask)/len(pareto_mask), time.strftime("%M:%S", time.gmtime(elapsed_time))), end="", flush=True)
121
        # Writing output data
122
123
124
125
126
127
128
129
        if write_data:
            write_times=[]
            start_time = time.time()
            scenarios.scores.to_csv(self.outputdir+'/scores_iter_{0:03d}.csv'.format(iter_nb), index=True)
            write_times.append(time.time() - start_time)
            start_time = time.time()
            scenarios.cultgeopat.to_hdf(self.outputdir+'/cult_iter_{0:03d}.h5'.format(iter_nb), 'cultgeopat', index=True, complevel=1)
            write_times.append(time.time() - start_time)
Dumoulin Nicolas's avatar
Dumoulin Nicolas committed
130
131
132
133
134
135
136
            start_time = time.time()
            try:
                pairplot = sns.pairplot(scenarios.scores, vars=['Resilience','Proximity','Productivity','Biodiversity','Social','TargetDelta'],
                        diag_kind="kde",hue="pareto"
                    )
                pairplot.savefig(self.outputdir+'/mcmc_iter_{0:03d}.png'.format(iter_nb))
                plt.close(pairplot.fig)
137
            except:
Dumoulin Nicolas's avatar
Dumoulin Nicolas committed
138
139
140
141
                # when data doesn't vary enough on a dimension, it is impossible to generate the pareto density
                # and an error is triggered. Here, we ignore this error.
                pass
            write_times.append(time.time() - start_time)
142
            print(' - Scores written in {}/{}/{} '.format(*map(lambda x: time.strftime("%M:%S", time.gmtime(x)), write_times)), end="", flush=True)
143
        print()
144
        # Retaining only optimal particules
145
146
        scenarios.retain(pareto_mask)
        return scenarios
147

148
149
    def run(self):
        # Start with initial scenario
Dumoulin Nicolas's avatar
Dumoulin Nicolas committed
150
        scenarios = ScenariosStack(self.indicators, self.reallocator, self.patches)
151
        # Iteration
152
        nb_iteration=0
153
        for i in range(self.mcmc_config['max_iterations']):
154
155
            nb_iteration=i+1
            print('Iteration #{}'.format(nb_iteration))
156
157
158
159
            if nb_iteration==1:
                nb_particles = self.mcmc_config['initial_nb_particles']
            else:
                nb_particles = self.mcmc_config['step_nb_particles']
160
            scenarios = self.step(nb_iteration, nb_particles, scenarios, write_data=self.mcmc_config.get('write_data', False))
161
162
163
            if scenarios.allFullReallocated():
                print('All scenarios are fully reallocated. Simulation finished.')
                break
164
        return scenarios
Dumoulin Nicolas's avatar
Dumoulin Nicolas committed
165
166
167

if __name__ == '__main__':
    mcmc = MCMC('MCMC_config.yml')
168
    scenarios = mcmc.run()