MCMC.py 8.53 KB
Newer Older
Dumoulin Nicolas's avatar
Dumoulin Nicolas committed
1
2
3
4
#!/usr/bin/python
# -*- coding: utf-8 -*-
import pandas as pd
import geopandas as gpd
5
import os, sys, time, shutil
Dumoulin Nicolas's avatar
Dumoulin Nicolas committed
6
7
import yaml
import numpy as np
8
from indicators import Indicators
Dumoulin Nicolas's avatar
Dumoulin Nicolas committed
9
from tqdm import tqdm
10
import patutils
11
from patutils import md5sum, load_pat_patches
12
import seaborn as sns
13
import matplotlib.pyplot as plt
14
from scenarios import ScenariosStack
Dumoulin Nicolas's avatar
Dumoulin Nicolas committed
15
16
17
18
19
20
21
22

class MCMC:
    def __init__(self, mcmc_config_filename):
        if not os.path.isfile(mcmc_config_filename):
        	print('Error: file not found "{}"'.format(mcmc_config_filename))
        	print('Please copy the template file "MCMC_config.sample.yml" and adjust to your settings and run again this program')
        	sys.exit(1)
        self.mcmc_config = yaml.load(open(mcmc_config_filename,'r'))
23
        self.write_data = self.mcmc_config.get('write_data', False)
24
25
26
27
28
29
30
        self.patches_md5sum = md5sum(self.mcmc_config['patches'])
        if 'rng_seed' in self.mcmc_config:
            self.rng = np.random.RandomState(self.mcmc_config['rng_seed'])
        else:
            self.rng = np.random.RandomState(42)
            print('MCMC initialized with default seed') # self.rng.get_state()
        # Copying input data in output dir
Dumoulin Nicolas's avatar
Dumoulin Nicolas committed
31
        self.outputdir = self.mcmc_config['output_dir'] + '/' + time.strftime('%Y%m%d-%H%M%S')
32
33
34
35
36
37
38
        if not os.path.exists(self.outputdir):
            os.makedirs(self.outputdir)
            print('All data will be written in {}'.format(self.outputdir))
        else:
            print('Output directory already exists! ({})'.format(self.outputdir))
            sys.exit(1)
        shutil.copy(mcmc_config_filename, self.outputdir)
39
        for f in [ self.mcmc_config['target'],
40
                self.mcmc_config['indicators_config']['resilience'],
41
                self.mcmc_config['indicators_config']['social']['cost_matrix_filename']
42
43
                ]:
            shutil.copy(f, self.outputdir)
44
45
        with open(self.outputdir+'/seed.txt', 'w') as outfile:
            outfile.write('{}\n'.format(self.rng.get_state()))
Dumoulin Nicolas's avatar
Dumoulin Nicolas committed
46
47
48
49
50
        config_data = {
            'patches_md5sum':self.patches_md5sum,
            'biodiversity_matrix_md5sum':md5sum(self.mcmc_config['indicators_config']['biodiversity']['matrixfilename']),
            'social_patches_costs_md5sum':md5sum(self.mcmc_config['indicators_config']['social']['patches_costs_filename'])
        }
51
        with open(self.outputdir+'/config.yml', 'w') as outfile:
Dumoulin Nicolas's avatar
Dumoulin Nicolas committed
52
            yaml.dump(config_data, outfile, default_flow_style=False)
53
        # finishing init
54
        self.patches = load_pat_patches(self.mcmc_config['patches'])
55
56
        self.surfaces = self.patches['SURF_PARC']
        self.target = pd.read_csv(self.mcmc_config['target'], sep=';',index_col=0).rename(index=patutils.code_cultgeopat)
Dumoulin Nicolas's avatar
Dumoulin Nicolas committed
57
58
59
60
        targetRatio = (self.target['2050']-self.target['2016'])/self.target['2016']
        self.targetPAT = self.patches.groupby('cultgeopat')['SURF_PARC'].sum()*(1+targetRatio)
        self.indicators = Indicators(self.mcmc_config['indicators_config'], self.patches, self.patches_md5sum, self.targetPAT)

61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88

    def is_pareto_efficient(costs, return_mask = True):
        """
        Find the pareto-efficient points
        :param costs: An (n_points, n_costs) array
        :param return_mask: True to return a mask
        :return: An array of indices of pareto-efficient points.
            If return_mask is True, this will be an (n_points, ) boolean array
            Otherwise it will be a (n_efficient_points, ) integer array of indices.
        Author:
        Source: https://stackoverflow.com/questions/32791911/fast-calculation-of-pareto-front-in-python
        """
        is_efficient = np.arange(costs.shape[0])
        n_points = costs.shape[0]
        next_point_index = 0  # Next index in the is_efficient array to search for
        while next_point_index<len(costs):
            nondominated_point_mask = np.any(costs<costs[next_point_index], axis=1)
            nondominated_point_mask[next_point_index] = True
            is_efficient = is_efficient[nondominated_point_mask]  # Remove dominated points
            costs = costs[nondominated_point_mask]
            next_point_index = np.sum(nondominated_point_mask[:next_point_index])+1
        if return_mask:
            is_efficient_mask = np.zeros(n_points, dtype = bool)
            is_efficient_mask[is_efficient] = True
            return is_efficient_mask
        else:
            return is_efficient

89
90
91
    def scenario_scores(patches, indicators):
        return indicators.compute_indicators(patches)

92
    def step(self, iter_nb, nb_particles, scenarios, write_data=False):
93
94
95
        '''
        Sample new scenarios, evaluate their scores and retain only pareto front.

96
        :param iter_nb: (int) number of this iteration.
97
        :param nb_particles: number of new scenarios to sample
98
        :param scenarios: (ScenariosStack) list of the scenarios used as base for sampling the new scenarios.
99
100
        '''
        # Loop of sampling and scoring
101
102
103
        for index, scenario in tqdm(scenarios.sample(nb_particles, self.rng).iterrows(), total=nb_particles):
            scenario = scenarios.reallocate(index, self.rng, self.patches, self.targetPAT, self.mcmc_config['ratio_patches_to_modify'])
            scenarios.append(iter_nb, scenario)
104
        # Computing pareto front
105
        start_time = time.time()
106
107
        pareto_mask = MCMC.is_pareto_efficient(scenarios.scores[['Resilience','Proximity','Productivity','Biodiversity','Social','TargetDelta']].values)
        scenarios.scores['pareto'] = pareto_mask
108
        elapsed_time = time.time() - start_time
109
        print('Pareto front computed  ({:.2f}% keeped) in {} - '.format(100*np.sum(pareto_mask)/len(pareto_mask), time.strftime("%M:%S", time.gmtime(elapsed_time))), end="", flush=True)
110
        # Writing output data
111
        start_time = time.time()
112
113
        scenarios.scores.to_csv(self.outputdir+'/scores_iter_{0:03d}.csv'.format(iter_nb), index=True)
        scenarios.cultgeopat.to_csv(self.outputdir+'/cult_iter_{0:03d}.csv'.format(iter_nb), index=True)
114
        try:
115
            pairplot = sns.pairplot(scenarios.scores, vars=['Resilience','Proximity','Productivity','Biodiversity','Social','TargetDelta'],
116
                    diag_kind="kde",hue="pareto"
117
118
119
                )
            pairplot.savefig(self.outputdir+'/mcmc_iter_{0:03d}.png'.format(iter_nb))
            plt.close(pairplot.fig)
120
121
122
123
        except (np.linalg.linalg.LinAlgError, ValueError) as e:
            # when data doesn't vary enough on a dimension, it is impossible to generate the pareto density
            # and an error is triggered. Here, we ignore this error.
            pass
124
        elapsed_time = time.time() - start_time
125
126
        print('Scores of {} scenarios written in {} '.format(len(scenarios.scores), time.strftime("%M:%S", time.gmtime(elapsed_time))), end="", flush=True)
        print()
127
        # Retaining only optimal particules
128
        scenarios.retain(pareto_mask)
129
        if write_data:
130
            print(' Writing modified patches of scenarios')
131
132
            # Writing shapefiles
            start_time = time.time()
133
            shp_dir = self.outputdir + '/patches_iter_{0:03d}'.format(iter_nb)
134
            os.makedirs(shp_dir)
135
            for index,scenario in tqdm(scenarios.cultgeopat.iterrows(), total=len(scenarios.cultgeopat)):
136
137
138
139
140
141
142
                concat = pd.concat([self.patches, scenario.to_frame("newcult")], axis=1)
                concat = concat[concat['init_cult']!=concat['newcult']]
                if len(concat)>0:
                    for c in ['newcult','cultgeopat','init_cult']:
                        patutils.decode_cultgeopat(concat, c)
                    concat.to_file(shp_dir+'/{:03d}_{}'.format(iter_nb, index), encoding='utf-8')
                # scenario[scenario['cultmodified']].drop('cultmodified', axis=1).to_file(shp_dir+'/{0:03d}_{}'.format(iter_nb,index), encoding='utf-8')
143
            elapsed_time = time.time() - start_time
144
            print('  Shape files written in {}'.format(time.strftime("%M:%S", time.gmtime(elapsed_time))))
145
        return scenarios
146

147
    def run(self):
Dumoulin Nicolas's avatar
Dumoulin Nicolas committed
148
        # Initial sampling and evaluation
149
        nb_particles = self.mcmc_config['initial_nb_particles']
150
151
        # Start with initial scenario
        scenarios = ScenariosStack(self.indicators, self.patches)
152
        # Iteration
153
        for i in range(self.mcmc_config['max_iterations']):
154
            print('Iteration #{}'.format(i+1))
155
            # Write SHP only for the last iteration
156
157
            scenarios = self.step(i, nb_particles, scenarios, self.write_data and i == self.mcmc_config['max_iterations']-1)
        return scenarios
Dumoulin Nicolas's avatar
Dumoulin Nicolas committed
158
159
160

if __name__ == '__main__':
    mcmc = MCMC('MCMC_config.yml')
161
    scenario = mcmc.run()