MCMC.py 8.57 KB
Newer Older
Dumoulin Nicolas's avatar
Dumoulin Nicolas committed
1
2
3
4
#!/usr/bin/python
# -*- coding: utf-8 -*-
import pandas as pd
import geopandas as gpd
5
import os, sys, time, shutil
Dumoulin Nicolas's avatar
Dumoulin Nicolas committed
6
7
import yaml
import numpy as np
8
from indicators import Indicators
Dumoulin Nicolas's avatar
Dumoulin Nicolas committed
9
from tqdm import tqdm
10
import patutils
11
from patutils import md5sum, load_pat_patches
12
import seaborn as sns
13
import matplotlib.pyplot as plt
14
from scenarios import ScenariosStack
Dumoulin Nicolas's avatar
Dumoulin Nicolas committed
15
16
17
18
19
20
21
22

class MCMC:
    def __init__(self, mcmc_config_filename):
        if not os.path.isfile(mcmc_config_filename):
        	print('Error: file not found "{}"'.format(mcmc_config_filename))
        	print('Please copy the template file "MCMC_config.sample.yml" and adjust to your settings and run again this program')
        	sys.exit(1)
        self.mcmc_config = yaml.load(open(mcmc_config_filename,'r'))
23
24
25
26
27
28
29
        self.patches_md5sum = md5sum(self.mcmc_config['patches'])
        if 'rng_seed' in self.mcmc_config:
            self.rng = np.random.RandomState(self.mcmc_config['rng_seed'])
        else:
            self.rng = np.random.RandomState(42)
            print('MCMC initialized with default seed') # self.rng.get_state()
        # Copying input data in output dir
Dumoulin Nicolas's avatar
Dumoulin Nicolas committed
30
        self.outputdir = self.mcmc_config['output_dir'] + '/' + time.strftime('%Y%m%d-%H%M%S')
31
32
33
34
35
36
37
        if not os.path.exists(self.outputdir):
            os.makedirs(self.outputdir)
            print('All data will be written in {}'.format(self.outputdir))
        else:
            print('Output directory already exists! ({})'.format(self.outputdir))
            sys.exit(1)
        shutil.copy(mcmc_config_filename, self.outputdir)
38
        for f in [ self.mcmc_config['target'],
39
                self.mcmc_config['indicators_config']['resilience'],
40
                self.mcmc_config['indicators_config']['social']['cost_matrix_filename']
41
42
                ]:
            shutil.copy(f, self.outputdir)
43
44
        with open(self.outputdir+'/seed.txt', 'w') as outfile:
            outfile.write('{}\n'.format(self.rng.get_state()))
Dumoulin Nicolas's avatar
Dumoulin Nicolas committed
45
46
47
48
49
        config_data = {
            'patches_md5sum':self.patches_md5sum,
            'biodiversity_matrix_md5sum':md5sum(self.mcmc_config['indicators_config']['biodiversity']['matrixfilename']),
            'social_patches_costs_md5sum':md5sum(self.mcmc_config['indicators_config']['social']['patches_costs_filename'])
        }
50
        with open(self.outputdir+'/config.yml', 'w') as outfile:
Dumoulin Nicolas's avatar
Dumoulin Nicolas committed
51
            yaml.dump(config_data, outfile, default_flow_style=False)
52
        # finishing init
53
        self.patches = load_pat_patches(self.mcmc_config['patches'])
54
55
        self.surfaces = self.patches['SURF_PARC']
        self.target = pd.read_csv(self.mcmc_config['target'], sep=';',index_col=0).rename(index=patutils.code_cultgeopat)
Dumoulin Nicolas's avatar
Dumoulin Nicolas committed
56
57
58
59
        targetRatio = (self.target['2050']-self.target['2016'])/self.target['2016']
        self.targetPAT = self.patches.groupby('cultgeopat')['SURF_PARC'].sum()*(1+targetRatio)
        self.indicators = Indicators(self.mcmc_config['indicators_config'], self.patches, self.patches_md5sum, self.targetPAT)

60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87

    def is_pareto_efficient(costs, return_mask = True):
        """
        Find the pareto-efficient points
        :param costs: An (n_points, n_costs) array
        :param return_mask: True to return a mask
        :return: An array of indices of pareto-efficient points.
            If return_mask is True, this will be an (n_points, ) boolean array
            Otherwise it will be a (n_efficient_points, ) integer array of indices.
        Author:
        Source: https://stackoverflow.com/questions/32791911/fast-calculation-of-pareto-front-in-python
        """
        is_efficient = np.arange(costs.shape[0])
        n_points = costs.shape[0]
        next_point_index = 0  # Next index in the is_efficient array to search for
        while next_point_index<len(costs):
            nondominated_point_mask = np.any(costs<costs[next_point_index], axis=1)
            nondominated_point_mask[next_point_index] = True
            is_efficient = is_efficient[nondominated_point_mask]  # Remove dominated points
            costs = costs[nondominated_point_mask]
            next_point_index = np.sum(nondominated_point_mask[:next_point_index])+1
        if return_mask:
            is_efficient_mask = np.zeros(n_points, dtype = bool)
            is_efficient_mask[is_efficient] = True
            return is_efficient_mask
        else:
            return is_efficient

88
89
90
    def scenario_scores(patches, indicators):
        return indicators.compute_indicators(patches)

91
    def step(self, iter_nb, nb_particles, scenarios):
92
93
94
        '''
        Sample new scenarios, evaluate their scores and retain only pareto front.

95
        :param iter_nb: (int) number of this iteration.
96
        :param nb_particles: number of new scenarios to sample
97
        :param scenarios: (ScenariosStack) list of the scenarios used as base for sampling the new scenarios.
98
99
        '''
        # Loop of sampling and scoring
100
101
        for index, scenario in tqdm(scenarios.sample(nb_particles, self.rng).iterrows(), total=nb_particles):
            scenario = scenarios.reallocate(index, self.rng, self.patches, self.targetPAT, self.mcmc_config['ratio_patches_to_modify'])
102
103
            if not scenario is None:
                scenarios.append(iter_nb, scenario)
104
        # Computing pareto front
105
        start_time = time.time()
106
107
        pareto_mask = MCMC.is_pareto_efficient(scenarios.scores[['Resilience','Proximity','Productivity','Biodiversity','Social','TargetDelta']].values)
        scenarios.scores['pareto'] = pareto_mask
108
        elapsed_time = time.time() - start_time
109
        print('Pareto front computed  ({:.2f}% keeped) in {} - '.format(100*np.sum(pareto_mask)/len(pareto_mask), time.strftime("%M:%S", time.gmtime(elapsed_time))), end="", flush=True)
110
        # Writing output data
111
        start_time = time.time()
112
113
        scenarios.scores.to_csv(self.outputdir+'/scores_iter_{0:03d}.csv'.format(iter_nb), index=True)
        scenarios.cultgeopat.to_csv(self.outputdir+'/cult_iter_{0:03d}.csv'.format(iter_nb), index=True)
114
        try:
115
            pairplot = sns.pairplot(scenarios.scores, vars=['Resilience','Proximity','Productivity','Biodiversity','Social','TargetDelta'],
116
                    diag_kind="kde",hue="pareto"
117
118
119
                )
            pairplot.savefig(self.outputdir+'/mcmc_iter_{0:03d}.png'.format(iter_nb))
            plt.close(pairplot.fig)
120
121
122
123
        except (np.linalg.linalg.LinAlgError, ValueError) as e:
            # when data doesn't vary enough on a dimension, it is impossible to generate the pareto density
            # and an error is triggered. Here, we ignore this error.
            pass
124
        elapsed_time = time.time() - start_time
125
126
        print('Scores of {} scenarios written in {} '.format(len(scenarios.scores), time.strftime("%M:%S", time.gmtime(elapsed_time))), end="", flush=True)
        print()
127
        # Retaining only optimal particules
128
129
        scenarios.retain(pareto_mask)
        return scenarios
130

131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
    def write_data(self, iter_nb, scenarios):
        print(' Writing modified patches of scenarios')
        # Writing shapefiles
        start_time = time.time()
        shp_dir = self.outputdir + '/patches_iter_{0:03d}'.format(iter_nb)
        os.makedirs(shp_dir)
        for index,scenario in tqdm(scenarios.cultgeopat.iterrows(), total=len(scenarios.cultgeopat)):
            concat = pd.concat([self.patches, scenario.to_frame("newcult")], axis=1)
            concat = concat[concat['init_cult']!=concat['newcult']]
            if len(concat)>0:
                for c in ['newcult','cultgeopat','init_cult']:
                    patutils.decode_cultgeopat(concat, c)
                concat.to_file(shp_dir+'/{:03d}_{}'.format(iter_nb, index), encoding='utf-8')
        elapsed_time = time.time() - start_time
        print('  Shape files written in {}'.format(time.strftime("%M:%S", time.gmtime(elapsed_time))))


148
    def run(self):
Dumoulin Nicolas's avatar
Dumoulin Nicolas committed
149
        # Initial sampling and evaluation
150
        nb_particles = self.mcmc_config['initial_nb_particles']
151
152
        # Start with initial scenario
        scenarios = ScenariosStack(self.indicators, self.patches)
153
        # Iteration
154
        nb_iteration=0
155
        for i in range(self.mcmc_config['max_iterations']):
156
157
            nb_iteration=i+1
            print('Iteration #{}'.format(nb_iteration))
158
            # Write SHP only for the last iteration
159
160
161
162
163
164
            scenarios = self.step(nb_iteration, nb_particles, scenarios)
            if scenarios.allFullReallocated():
                print('All scenarios are fully reallocated. Simulation finished.')
                break
        if self.mcmc_config.get('write_data', False):
            self.write_data(nb_iteration, scenarios)
165
        return scenarios
Dumoulin Nicolas's avatar
Dumoulin Nicolas committed
166
167
168

if __name__ == '__main__':
    mcmc = MCMC('MCMC_config.yml')
169
    scenario = mcmc.run()