MCMC.py 8.89 KB
Newer Older
Dumoulin Nicolas's avatar
Dumoulin Nicolas committed
1
2
3
4
#!/usr/bin/python
# -*- coding: utf-8 -*-
import pandas as pd
import geopandas as gpd
5
import os, sys, time, shutil
Dumoulin Nicolas's avatar
Dumoulin Nicolas committed
6
7
import yaml
import numpy as np
8
from indicators import Indicators
Dumoulin Nicolas's avatar
Dumoulin Nicolas committed
9
from tqdm import tqdm
10
import patutils
11
from patutils import md5sum, load_pat_patches
12
import seaborn as sns
13
import matplotlib.pyplot as plt
14
from scenarios import ScenariosStack
Dumoulin Nicolas's avatar
Dumoulin Nicolas committed
15
16
17
18
19
20
21
22

class MCMC:
    def __init__(self, mcmc_config_filename):
        if not os.path.isfile(mcmc_config_filename):
        	print('Error: file not found "{}"'.format(mcmc_config_filename))
        	print('Please copy the template file "MCMC_config.sample.yml" and adjust to your settings and run again this program')
        	sys.exit(1)
        self.mcmc_config = yaml.load(open(mcmc_config_filename,'r'))
23
24
25
26
27
28
29
        self.patches_md5sum = md5sum(self.mcmc_config['patches'])
        if 'rng_seed' in self.mcmc_config:
            self.rng = np.random.RandomState(self.mcmc_config['rng_seed'])
        else:
            self.rng = np.random.RandomState(42)
            print('MCMC initialized with default seed') # self.rng.get_state()
        # Copying input data in output dir
Dumoulin Nicolas's avatar
Dumoulin Nicolas committed
30
        self.outputdir = self.mcmc_config['output_dir'] + '/' + time.strftime('%Y%m%d-%H%M%S')
31
32
33
34
35
36
37
        if not os.path.exists(self.outputdir):
            os.makedirs(self.outputdir)
            print('All data will be written in {}'.format(self.outputdir))
        else:
            print('Output directory already exists! ({})'.format(self.outputdir))
            sys.exit(1)
        shutil.copy(mcmc_config_filename, self.outputdir)
38
        for f in [ self.mcmc_config['target'],
39
                self.mcmc_config['indicators_config']['resilience'],
40
                self.mcmc_config['indicators_config']['social']['cost_matrix_filename']
41
42
                ]:
            shutil.copy(f, self.outputdir)
43
44
        with open(self.outputdir+'/seed.txt', 'w') as outfile:
            outfile.write('{}\n'.format(self.rng.get_state()))
Dumoulin Nicolas's avatar
Dumoulin Nicolas committed
45
46
47
48
49
        config_data = {
            'patches_md5sum':self.patches_md5sum,
            'biodiversity_matrix_md5sum':md5sum(self.mcmc_config['indicators_config']['biodiversity']['matrixfilename']),
            'social_patches_costs_md5sum':md5sum(self.mcmc_config['indicators_config']['social']['patches_costs_filename'])
        }
50
        with open(self.outputdir+'/config.yml', 'w') as outfile:
Dumoulin Nicolas's avatar
Dumoulin Nicolas committed
51
            yaml.dump(config_data, outfile, default_flow_style=False)
52
        # finishing init
53
        self.patches = load_pat_patches(self.mcmc_config['patches'])
54
55
        self.surfaces = self.patches['SURF_PARC']
        self.target = pd.read_csv(self.mcmc_config['target'], sep=';',index_col=0).rename(index=patutils.code_cultgeopat)
Dumoulin Nicolas's avatar
Dumoulin Nicolas committed
56
57
58
59
        targetRatio = (self.target['2050']-self.target['2016'])/self.target['2016']
        self.targetPAT = self.patches.groupby('cultgeopat')['SURF_PARC'].sum()*(1+targetRatio)
        self.indicators = Indicators(self.mcmc_config['indicators_config'], self.patches, self.patches_md5sum, self.targetPAT)

60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87

    def is_pareto_efficient(costs, return_mask = True):
        """
        Find the pareto-efficient points
        :param costs: An (n_points, n_costs) array
        :param return_mask: True to return a mask
        :return: An array of indices of pareto-efficient points.
            If return_mask is True, this will be an (n_points, ) boolean array
            Otherwise it will be a (n_efficient_points, ) integer array of indices.
        Author:
        Source: https://stackoverflow.com/questions/32791911/fast-calculation-of-pareto-front-in-python
        """
        is_efficient = np.arange(costs.shape[0])
        n_points = costs.shape[0]
        next_point_index = 0  # Next index in the is_efficient array to search for
        while next_point_index<len(costs):
            nondominated_point_mask = np.any(costs<costs[next_point_index], axis=1)
            nondominated_point_mask[next_point_index] = True
            is_efficient = is_efficient[nondominated_point_mask]  # Remove dominated points
            costs = costs[nondominated_point_mask]
            next_point_index = np.sum(nondominated_point_mask[:next_point_index])+1
        if return_mask:
            is_efficient_mask = np.zeros(n_points, dtype = bool)
            is_efficient_mask[is_efficient] = True
            return is_efficient_mask
        else:
            return is_efficient

88
89
90
    def scenario_scores(patches, indicators):
        return indicators.compute_indicators(patches)

91
    def step(self, iter_nb, nb_particles, scenarios):
92
93
94
        '''
        Sample new scenarios, evaluate their scores and retain only pareto front.

95
        :param iter_nb: (int) number of this iteration.
96
        :param nb_particles: number of new scenarios to sample
97
        :param scenarios: (ScenariosStack) list of the scenarios used as base for sampling the new scenarios.
98
99
        '''
        # Loop of sampling and scoring
100
101
        for index, scenario in tqdm(scenarios.sample(nb_particles, self.rng).iterrows(), total=nb_particles):
            scenario = scenarios.reallocate(index, self.rng, self.patches, self.targetPAT, self.mcmc_config['ratio_patches_to_modify'])
102
103
            if not scenario is None:
                scenarios.append(iter_nb, scenario)
104
                scenarios.setFullReallocated(index, True)
105
        # Computing pareto front
106
        start_time = time.time()
107
108
        pareto_mask = MCMC.is_pareto_efficient(scenarios.scores[['Resilience','Proximity','Productivity','Biodiversity','Social','TargetDelta']].values)
        scenarios.scores['pareto'] = pareto_mask
109
        elapsed_time = time.time() - start_time
110
111
        print(' Fully reallocated scenarios {}/{} - '.format(scenarios.nbFullReallocated(), len(scenarios.scores)), end="", flush=True)
        print('{:.2f}% on pareto front (computed in {}) - '.format(100*np.sum(pareto_mask)/len(pareto_mask), time.strftime("%M:%S", time.gmtime(elapsed_time))), end="", flush=True)
112
        # Writing output data
113
        start_time = time.time()
114
115
        scenarios.scores.to_csv(self.outputdir+'/scores_iter_{0:03d}.csv'.format(iter_nb), index=True)
        scenarios.cultgeopat.to_csv(self.outputdir+'/cult_iter_{0:03d}.csv'.format(iter_nb), index=True)
116
        try:
117
            pairplot = sns.pairplot(scenarios.scores, vars=['Resilience','Proximity','Productivity','Biodiversity','Social','TargetDelta'],
118
                    diag_kind="kde",hue="pareto"
119
120
121
                )
            pairplot.savefig(self.outputdir+'/mcmc_iter_{0:03d}.png'.format(iter_nb))
            plt.close(pairplot.fig)
122
123
124
125
        except (np.linalg.linalg.LinAlgError, ValueError) as e:
            # when data doesn't vary enough on a dimension, it is impossible to generate the pareto density
            # and an error is triggered. Here, we ignore this error.
            pass
126
        elapsed_time = time.time() - start_time
127
        print('Scores written in {} '.format(time.strftime("%M:%S", time.gmtime(elapsed_time))), end="", flush=True)
128
        print()
129
        # Retaining only optimal particules
130
131
        scenarios.retain(pareto_mask)
        return scenarios
132

133
134
135
136
137
138
    def write_data(self, iter_nb, scenarios):
        print(' Writing modified patches of scenarios')
        # Writing shapefiles
        start_time = time.time()
        shp_dir = self.outputdir + '/patches_iter_{0:03d}'.format(iter_nb)
        os.makedirs(shp_dir)
139
        counter = 0
140
        for index,scenario in tqdm(scenarios.cultgeopat.iterrows(), total=len(scenarios.cultgeopat)):
141
142
143
144
145
146
147
148
            if scenarios.scores.loc[index,'full_reallocated']:
                counter += 1
                concat = pd.concat([self.patches, scenario.to_frame("newcult")], axis=1)
                concat = concat[concat['init_cult']!=concat['newcult']]
                if len(concat)>0:
                    for c in ['newcult','cultgeopat','init_cult']:
                        patutils.decode_cultgeopat(concat, c)
                    concat.to_file(shp_dir+'/{:03d}_{}'.format(iter_nb, index), encoding='utf-8')
149
        elapsed_time = time.time() - start_time
150
        print('  {} shapefiles (only scenario fully reallocated) written in {}'.format(counter, time.strftime("%M:%S", time.gmtime(elapsed_time))))
151
152


153
    def run(self):
Dumoulin Nicolas's avatar
Dumoulin Nicolas committed
154
        # Initial sampling and evaluation
155
        nb_particles = self.mcmc_config['initial_nb_particles']
156
157
        # Start with initial scenario
        scenarios = ScenariosStack(self.indicators, self.patches)
158
        # Iteration
159
        nb_iteration=0
160
        for i in range(self.mcmc_config['max_iterations']):
161
162
            nb_iteration=i+1
            print('Iteration #{}'.format(nb_iteration))
163
            # Write SHP only for the last iteration
164
165
166
167
168
169
            scenarios = self.step(nb_iteration, nb_particles, scenarios)
            if scenarios.allFullReallocated():
                print('All scenarios are fully reallocated. Simulation finished.')
                break
        if self.mcmc_config.get('write_data', False):
            self.write_data(nb_iteration, scenarios)
170
        return scenarios
Dumoulin Nicolas's avatar
Dumoulin Nicolas committed
171
172
173

if __name__ == '__main__':
    mcmc = MCMC('MCMC_config.yml')
174
    scenario = mcmc.run()