MCMC.py 10.1 KB
Newer Older
Dumoulin Nicolas's avatar
Dumoulin Nicolas committed
1
2
3
4
#!/usr/bin/python
# -*- coding: utf-8 -*-
import pandas as pd
import geopandas as gpd
5
import os, sys, time, shutil
Dumoulin Nicolas's avatar
Dumoulin Nicolas committed
6
7
import yaml
import numpy as np
8
from indicators import Indicators
Dumoulin Nicolas's avatar
Dumoulin Nicolas committed
9
from tqdm import tqdm
10
import patutils
11
from patutils import md5sum, load_pat_patches
12
import seaborn as sns
13
import matplotlib.pyplot as plt
Dumoulin Nicolas's avatar
Dumoulin Nicolas committed
14
from scenarios import ScenariosStack, Reallocator
Dumoulin Nicolas's avatar
Dumoulin Nicolas committed
15
16

class MCMC:
Dumoulin Nicolas's avatar
Dumoulin Nicolas committed
17
    def __init__(self, mcmc_config_filename, output=True):
Dumoulin Nicolas's avatar
Dumoulin Nicolas committed
18
19
20
21
22
        if not os.path.isfile(mcmc_config_filename):
        	print('Error: file not found "{}"'.format(mcmc_config_filename))
        	print('Please copy the template file "MCMC_config.sample.yml" and adjust to your settings and run again this program')
        	sys.exit(1)
        self.mcmc_config = yaml.load(open(mcmc_config_filename,'r'))
23
24
25
26
27
28
29
        self.patches_md5sum = md5sum(self.mcmc_config['patches'])
        if 'rng_seed' in self.mcmc_config:
            self.rng = np.random.RandomState(self.mcmc_config['rng_seed'])
        else:
            self.rng = np.random.RandomState(42)
            print('MCMC initialized with default seed') # self.rng.get_state()
        # Copying input data in output dir
Dumoulin Nicolas's avatar
Dumoulin Nicolas committed
30
        self.outputdir = self.mcmc_config['output_dir'] + '/' + time.strftime('%Y%m%d-%H%M%S')
Dumoulin Nicolas's avatar
Dumoulin Nicolas committed
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
        if output:
            if not os.path.exists(self.outputdir):
                os.makedirs(self.outputdir)
                print('All data will be written in {}'.format(self.outputdir))
            else:
                print('Output directory already exists! ({})'.format(self.outputdir))
                sys.exit(1)
            shutil.copy(mcmc_config_filename, self.outputdir)
            for f in [ self.mcmc_config['target'],
                    self.mcmc_config['indicators_config']['resilience'],
                    self.mcmc_config['indicators_config']['social']['cost_matrix_filename']
                    ]:
                shutil.copy(f, self.outputdir)
            with open(self.outputdir+'/seed.txt', 'w') as outfile:
                outfile.write('{}\n'.format(self.rng.get_state()))
            config_data = {
                'patches_md5sum':self.patches_md5sum,
                'biodiversity_matrix_md5sum':md5sum(self.mcmc_config['indicators_config']['biodiversity']['matrixfilename']),
                'social_patches_costs_md5sum':md5sum(self.mcmc_config['indicators_config']['social']['patches_costs_filename'])
            }
            with open(self.outputdir+'/config.yml', 'w') as outfile:
                yaml.dump(config_data, outfile, default_flow_style=False)
53
        # finishing init
54
        self.patches = load_pat_patches(self.mcmc_config['patches'])
55
56
        self.surfaces = self.patches['SURF_PARC']
        self.target = pd.read_csv(self.mcmc_config['target'], sep=';',index_col=0).rename(index=patutils.code_cultgeopat)
Dumoulin Nicolas's avatar
Dumoulin Nicolas committed
57
58
59
        targetRatio = (self.target['2050']-self.target['2016'])/self.target['2016']
        self.targetPAT = self.patches.groupby('cultgeopat')['SURF_PARC'].sum()*(1+targetRatio)
        self.indicators = Indicators(self.mcmc_config['indicators_config'], self.patches, self.patches_md5sum, self.targetPAT)
Dumoulin Nicolas's avatar
Dumoulin Nicolas committed
60
        self.reallocator = Reallocator(self.rng, self.patches, self.targetPAT, self.mcmc_config['ratio_patches_to_modify'])
Dumoulin Nicolas's avatar
Dumoulin Nicolas committed
61

62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89

    def is_pareto_efficient(costs, return_mask = True):
        """
        Find the pareto-efficient points
        :param costs: An (n_points, n_costs) array
        :param return_mask: True to return a mask
        :return: An array of indices of pareto-efficient points.
            If return_mask is True, this will be an (n_points, ) boolean array
            Otherwise it will be a (n_efficient_points, ) integer array of indices.
        Author:
        Source: https://stackoverflow.com/questions/32791911/fast-calculation-of-pareto-front-in-python
        """
        is_efficient = np.arange(costs.shape[0])
        n_points = costs.shape[0]
        next_point_index = 0  # Next index in the is_efficient array to search for
        while next_point_index<len(costs):
            nondominated_point_mask = np.any(costs<costs[next_point_index], axis=1)
            nondominated_point_mask[next_point_index] = True
            is_efficient = is_efficient[nondominated_point_mask]  # Remove dominated points
            costs = costs[nondominated_point_mask]
            next_point_index = np.sum(nondominated_point_mask[:next_point_index])+1
        if return_mask:
            is_efficient_mask = np.zeros(n_points, dtype = bool)
            is_efficient_mask[is_efficient] = True
            return is_efficient_mask
        else:
            return is_efficient

90
91
92
    def scenario_scores(patches, indicators):
        return indicators.compute_indicators(patches)

93
    def step(self, iter_nb, nb_particles, scenarios, write_data=True, disable_progress=False):
94
95
96
        '''
        Sample new scenarios, evaluate their scores and retain only pareto front.

97
        :param iter_nb: (int) number of this iteration.
98
        :param nb_particles: number of new scenarios to sample
99
        :param scenarios: (ScenariosStack) list of the scenarios used as base for sampling the new scenarios.
100
101
        '''
        # Loop of sampling and scoring
102
        data={'scenarios_patches':[], 'scenarios_cult':[], 'previous_indexes':[]}
103
        for index, scenario in tqdm(scenarios.sample(nb_particles, self.rng).iterrows(), total=nb_particles, disable=disable_progress):
Dumoulin Nicolas's avatar
Dumoulin Nicolas committed
104
105
            scenario = scenarios.reallocate(index)
            if scenario is None:
106
                scenarios.setFullReallocated(index, True)
Dumoulin Nicolas's avatar
Dumoulin Nicolas committed
107
            else:
108
109
110
111
112
113
114
                data['scenarios_patches'].append(scenario)
                data['scenarios_cult'].append(scenario['cultgeopat'].to_list())
                data['previous_indexes'].append(index)
        start_time = time.time()
        scenarios.reconstitute(iter_nb, **data)
        elapsed_time = time.time() - start_time
        print('scenarios evaluated and stored in {} - '.format(time.strftime("%M:%S", time.gmtime(elapsed_time))), end="", flush=True)
115
        # Computing pareto front
116
        start_time = time.time()
117
118
        pareto_mask = MCMC.is_pareto_efficient(scenarios.scores[['Resilience','Proximity','Productivity','Biodiversity','Social','TargetDelta']].values)
        scenarios.scores['pareto'] = pareto_mask
119
        elapsed_time = time.time() - start_time
120
        print(' Fully reallocated scenarios {}/{} - '.format(scenarios.nbFullReallocated(), len(scenarios.scores)), end="", flush=True)
121
        if (len(scenarios.scores)>0):
122
            print('{:.2f}% on pareto front (computed in {}) '.format(100*np.sum(pareto_mask)/len(pareto_mask), time.strftime("%M:%S", time.gmtime(elapsed_time))), end="", flush=True)
123
        # Writing output data
124
125
126
127
128
129
130
131
        if write_data:
            write_times=[]
            start_time = time.time()
            scenarios.scores.to_csv(self.outputdir+'/scores_iter_{0:03d}.csv'.format(iter_nb), index=True)
            write_times.append(time.time() - start_time)
            start_time = time.time()
            scenarios.cultgeopat.to_hdf(self.outputdir+'/cult_iter_{0:03d}.h5'.format(iter_nb), 'cultgeopat', index=True, complevel=1)
            write_times.append(time.time() - start_time)
Dumoulin Nicolas's avatar
Dumoulin Nicolas committed
132
133
134
135
136
137
138
139
140
141
142
143
            start_time = time.time()
            try:
                pairplot = sns.pairplot(scenarios.scores, vars=['Resilience','Proximity','Productivity','Biodiversity','Social','TargetDelta'],
                        diag_kind="kde",hue="pareto"
                    )
                pairplot.savefig(self.outputdir+'/mcmc_iter_{0:03d}.png'.format(iter_nb))
                plt.close(pairplot.fig)
            except (np.linalg.linalg.LinAlgError, ValueError, AttributeError) as e:
                # when data doesn't vary enough on a dimension, it is impossible to generate the pareto density
                # and an error is triggered. Here, we ignore this error.
                pass
            write_times.append(time.time() - start_time)
144
            print(' - Scores written in {}/{}/{} '.format(*map(lambda x: time.strftime("%M:%S", time.gmtime(x)), write_times)), end="", flush=True)
145
        print()
146
        # Retaining only optimal particules
147
148
        scenarios.retain(pareto_mask)
        return scenarios
149

150
151
152
153
154
155
    def write_data(self, iter_nb, scenarios):
        print(' Writing modified patches of scenarios')
        # Writing shapefiles
        start_time = time.time()
        shp_dir = self.outputdir + '/patches_iter_{0:03d}'.format(iter_nb)
        os.makedirs(shp_dir)
156
        counter = 0
157
        for index,scenario in tqdm(scenarios.cultgeopat.iterrows(), total=len(scenarios.cultgeopat)):
Dumoulin Nicolas's avatar
Dumoulin Nicolas committed
158
159
160
161
162
163
164
165
            #if scenarios.scores.loc[index,'full_reallocated']:
            counter += 1
            concat = pd.concat([self.patches, scenario.to_frame("newcult")], axis=1)
            concat = concat[concat['init_cult']!=concat['newcult']]
            if len(concat)>0:
                for c in ['newcult','cultgeopat','init_cult']:
                    patutils.decode_cultgeopat(concat, c)
                concat.to_file(shp_dir+'/{:03d}_{}'.format(iter_nb, index), encoding='utf-8')
166
        elapsed_time = time.time() - start_time
Dumoulin Nicolas's avatar
Dumoulin Nicolas committed
167
168
        #print('  {} shapefiles (only scenario fully reallocated) written in {}'.format(counter, time.strftime("%M:%S", time.gmtime(elapsed_time))))
        print('  {} shapefiles written in {}'.format(counter, time.strftime("%M:%S", time.gmtime(elapsed_time))))
169
170


171
172
    def run(self):
        # Start with initial scenario
Dumoulin Nicolas's avatar
Dumoulin Nicolas committed
173
        scenarios = ScenariosStack(self.indicators, self.reallocator, self.patches)
174
        # Iteration
175
        nb_iteration=0
176
        for i in range(self.mcmc_config['max_iterations']):
177
178
            nb_iteration=i+1
            print('Iteration #{}'.format(nb_iteration))
179
180
181
182
            if nb_iteration==1:
                nb_particles = self.mcmc_config['initial_nb_particles']
            else:
                nb_particles = self.mcmc_config['step_nb_particles']
183
            scenarios = self.step(nb_iteration, nb_particles, scenarios, write_data=self.mcmc_config.get('write_data', False))
184
185
186
            if scenarios.allFullReallocated():
                print('All scenarios are fully reallocated. Simulation finished.')
                break
Dumoulin Nicolas's avatar
Dumoulin Nicolas committed
187
        self.write_data(nb_iteration, scenarios)
188
        return scenarios
Dumoulin Nicolas's avatar
Dumoulin Nicolas committed
189
190
191

if __name__ == '__main__':
    mcmc = MCMC('MCMC_config.yml')
192
    scenario = mcmc.run()