MCMC.py 10.2 KB
Newer Older
Dumoulin Nicolas's avatar
Dumoulin Nicolas committed
1
2
3
4
#!/usr/bin/python
# -*- coding: utf-8 -*-
import pandas as pd
import geopandas as gpd
5
import os, sys, time, shutil
Dumoulin Nicolas's avatar
Dumoulin Nicolas committed
6
7
import yaml
import numpy as np
8
from indicators import Indicators
Dumoulin Nicolas's avatar
Dumoulin Nicolas committed
9
from tqdm import tqdm
10
import patutils
11
from patutils import md5sum, load_pat_patches
12
import seaborn as sns
13
import matplotlib.pyplot as plt
Dumoulin Nicolas's avatar
Dumoulin Nicolas committed
14
from scenarios import ScenariosStack, Reallocator
Dumoulin Nicolas's avatar
Dumoulin Nicolas committed
15
16

class MCMC:
Dumoulin Nicolas's avatar
Dumoulin Nicolas committed
17
    def __init__(self, mcmc_config_filename, output=True):
Dumoulin Nicolas's avatar
Dumoulin Nicolas committed
18
19
20
21
22
        if not os.path.isfile(mcmc_config_filename):
        	print('Error: file not found "{}"'.format(mcmc_config_filename))
        	print('Please copy the template file "MCMC_config.sample.yml" and adjust to your settings and run again this program')
        	sys.exit(1)
        self.mcmc_config = yaml.load(open(mcmc_config_filename,'r'))
23
24
25
26
27
28
29
        self.patches_md5sum = md5sum(self.mcmc_config['patches'])
        if 'rng_seed' in self.mcmc_config:
            self.rng = np.random.RandomState(self.mcmc_config['rng_seed'])
        else:
            self.rng = np.random.RandomState(42)
            print('MCMC initialized with default seed') # self.rng.get_state()
        # Copying input data in output dir
30
        self.outputdir = self.mcmc_config['output_dir'] + '/' + time.strftime('%Y%m%d-%H%M%S') + '_{initial_nb_particles}_{step_nb_particles}_{ratio_patches_to_modify}'.format(**self.mcmc_config)
Dumoulin Nicolas's avatar
Dumoulin Nicolas committed
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
        if output:
            if not os.path.exists(self.outputdir):
                os.makedirs(self.outputdir)
                print('All data will be written in {}'.format(self.outputdir))
            else:
                print('Output directory already exists! ({})'.format(self.outputdir))
                sys.exit(1)
            shutil.copy(mcmc_config_filename, self.outputdir)
            for f in [ self.mcmc_config['target'],
                    self.mcmc_config['indicators_config']['resilience'],
                    self.mcmc_config['indicators_config']['social']['cost_matrix_filename']
                    ]:
                shutil.copy(f, self.outputdir)
            with open(self.outputdir+'/seed.txt', 'w') as outfile:
                outfile.write('{}\n'.format(self.rng.get_state()))
            config_data = {
                'patches_md5sum':self.patches_md5sum,
                'biodiversity_matrix_md5sum':md5sum(self.mcmc_config['indicators_config']['biodiversity']['matrixfilename']),
                'social_patches_costs_md5sum':md5sum(self.mcmc_config['indicators_config']['social']['patches_costs_filename'])
            }
            with open(self.outputdir+'/config.yml', 'w') as outfile:
                yaml.dump(config_data, outfile, default_flow_style=False)
53
        # finishing init
54
        self.patches = load_pat_patches(self.mcmc_config['patches'])
55
56
        self.surfaces = self.patches['SURF_PARC']
        self.target = pd.read_csv(self.mcmc_config['target'], sep=';',index_col=0).rename(index=patutils.code_cultgeopat)
Dumoulin Nicolas's avatar
Dumoulin Nicolas committed
57
58
59
        targetRatio = (self.target['2050']-self.target['2016'])/self.target['2016']
        self.targetPAT = self.patches.groupby('cultgeopat')['SURF_PARC'].sum()*(1+targetRatio)
        self.indicators = Indicators(self.mcmc_config['indicators_config'], self.patches, self.patches_md5sum, self.targetPAT)
Dumoulin Nicolas's avatar
Dumoulin Nicolas committed
60
        self.reallocator = Reallocator(self.rng, self.patches, self.targetPAT, self.mcmc_config['ratio_patches_to_modify'])
Dumoulin Nicolas's avatar
Dumoulin Nicolas committed
61

62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89

    def is_pareto_efficient(costs, return_mask = True):
        """
        Find the pareto-efficient points
        :param costs: An (n_points, n_costs) array
        :param return_mask: True to return a mask
        :return: An array of indices of pareto-efficient points.
            If return_mask is True, this will be an (n_points, ) boolean array
            Otherwise it will be a (n_efficient_points, ) integer array of indices.
        Author:
        Source: https://stackoverflow.com/questions/32791911/fast-calculation-of-pareto-front-in-python
        """
        is_efficient = np.arange(costs.shape[0])
        n_points = costs.shape[0]
        next_point_index = 0  # Next index in the is_efficient array to search for
        while next_point_index<len(costs):
            nondominated_point_mask = np.any(costs<costs[next_point_index], axis=1)
            nondominated_point_mask[next_point_index] = True
            is_efficient = is_efficient[nondominated_point_mask]  # Remove dominated points
            costs = costs[nondominated_point_mask]
            next_point_index = np.sum(nondominated_point_mask[:next_point_index])+1
        if return_mask:
            is_efficient_mask = np.zeros(n_points, dtype = bool)
            is_efficient_mask[is_efficient] = True
            return is_efficient_mask
        else:
            return is_efficient

90
91
92
    def scenario_scores(patches, indicators):
        return indicators.compute_indicators(patches)

93
    def step(self, iter_nb, nb_particles, scenarios, write_data=True, disable_progress=False):
94
95
96
        '''
        Sample new scenarios, evaluate their scores and retain only pareto front.

97
        :param iter_nb: (int) number of this iteration.
98
        :param nb_particles: number of new scenarios to sample
99
        :param scenarios: (ScenariosStack) list of the scenarios used as base for sampling the new scenarios.
100
101
        '''
        # Loop of sampling and scoring
102
        start_time = time.time()
103
        data={'scenarios_patches':[], 'scenarios_cult':[], 'previous_indexes':[], 'fullreallocated':[]}
104
        for index, scenario in tqdm(scenarios.sample(nb_particles, self.rng).iterrows(), total=nb_particles, disable=disable_progress):
105
106
107
108
109
110
            [fullreallocated,scenario] = scenarios.reallocate(index)
            data['fullreallocated'].append(fullreallocated)
            data['scenarios_patches'].append(scenario)
            data['scenarios_cult'].append(scenario['cultgeopat'])
            data['previous_indexes'].append(index)
        scenarios.reconstitute(iter_nb, **data, disable_progress=disable_progress)
111
        elapsed_time = time.time() - start_time
112
        print('Iteration duration: {} - {:.2f} scen/s '.format(time.strftime("%M:%S", time.gmtime(elapsed_time)), nb_particles/elapsed_time), end="", flush=True)
113
        # Computing pareto front
114
        start_time = time.time()
115
116
        pareto_mask = MCMC.is_pareto_efficient(scenarios.scores[['Resilience','Proximity','Productivity','Biodiversity','Social','TargetDelta']].values)
        scenarios.scores['pareto'] = pareto_mask
117
        elapsed_time = time.time() - start_time
118
        print(' Fully reallocated scenarios {}/{} - '.format(scenarios.nbFullReallocated(), nb_particles), end="", flush=True)
119
        if (len(scenarios.scores)>0):
120
            print('{:.2f}% on pareto front (computed in {}) '.format(100*np.sum(pareto_mask)/len(pareto_mask), time.strftime("%M:%S", time.gmtime(elapsed_time))), end="", flush=True)
121
        # Writing output data
122
123
124
125
126
127
128
129
        if write_data:
            write_times=[]
            start_time = time.time()
            scenarios.scores.to_csv(self.outputdir+'/scores_iter_{0:03d}.csv'.format(iter_nb), index=True)
            write_times.append(time.time() - start_time)
            start_time = time.time()
            scenarios.cultgeopat.to_hdf(self.outputdir+'/cult_iter_{0:03d}.h5'.format(iter_nb), 'cultgeopat', index=True, complevel=1)
            write_times.append(time.time() - start_time)
Dumoulin Nicolas's avatar
Dumoulin Nicolas committed
130
131
132
133
134
135
136
            start_time = time.time()
            try:
                pairplot = sns.pairplot(scenarios.scores, vars=['Resilience','Proximity','Productivity','Biodiversity','Social','TargetDelta'],
                        diag_kind="kde",hue="pareto"
                    )
                pairplot.savefig(self.outputdir+'/mcmc_iter_{0:03d}.png'.format(iter_nb))
                plt.close(pairplot.fig)
137
            except:
Dumoulin Nicolas's avatar
Dumoulin Nicolas committed
138
139
140
141
                # when data doesn't vary enough on a dimension, it is impossible to generate the pareto density
                # and an error is triggered. Here, we ignore this error.
                pass
            write_times.append(time.time() - start_time)
142
            print(' - Scores written in {}/{}/{} '.format(*map(lambda x: time.strftime("%M:%S", time.gmtime(x)), write_times)), end="", flush=True)
143
        print()
144
        # Retaining only optimal particules
145
146
        scenarios.retain(pareto_mask)
        return scenarios
147

148
149
150
151
152
153
    def write_data(self, iter_nb, scenarios):
        print(' Writing modified patches of scenarios')
        # Writing shapefiles
        start_time = time.time()
        shp_dir = self.outputdir + '/patches_iter_{0:03d}'.format(iter_nb)
        os.makedirs(shp_dir)
154
        counter = 0
155
        for index,scenario in tqdm(scenarios.cultgeopat.iterrows(), total=len(scenarios.cultgeopat)):
Dumoulin Nicolas's avatar
Dumoulin Nicolas committed
156
157
158
159
160
161
162
163
            #if scenarios.scores.loc[index,'full_reallocated']:
            counter += 1
            concat = pd.concat([self.patches, scenario.to_frame("newcult")], axis=1)
            concat = concat[concat['init_cult']!=concat['newcult']]
            if len(concat)>0:
                for c in ['newcult','cultgeopat','init_cult']:
                    patutils.decode_cultgeopat(concat, c)
                concat.to_file(shp_dir+'/{:03d}_{}'.format(iter_nb, index), encoding='utf-8')
164
        elapsed_time = time.time() - start_time
Dumoulin Nicolas's avatar
Dumoulin Nicolas committed
165
166
        #print('  {} shapefiles (only scenario fully reallocated) written in {}'.format(counter, time.strftime("%M:%S", time.gmtime(elapsed_time))))
        print('  {} shapefiles written in {}'.format(counter, time.strftime("%M:%S", time.gmtime(elapsed_time))))
167
168


169
170
    def run(self):
        # Start with initial scenario
Dumoulin Nicolas's avatar
Dumoulin Nicolas committed
171
        scenarios = ScenariosStack(self.indicators, self.reallocator, self.patches)
172
        # Iteration
173
        nb_iteration=0
174
        for i in range(self.mcmc_config['max_iterations']):
175
176
            nb_iteration=i+1
            print('Iteration #{}'.format(nb_iteration))
177
178
179
180
            if nb_iteration==1:
                nb_particles = self.mcmc_config['initial_nb_particles']
            else:
                nb_particles = self.mcmc_config['step_nb_particles']
181
            scenarios = self.step(nb_iteration, nb_particles, scenarios, write_data=self.mcmc_config.get('write_data', False))
182
183
184
            if scenarios.allFullReallocated():
                print('All scenarios are fully reallocated. Simulation finished.')
                break
Dumoulin Nicolas's avatar
Dumoulin Nicolas committed
185
        self.write_data(nb_iteration, scenarios)
186
        return scenarios
Dumoulin Nicolas's avatar
Dumoulin Nicolas committed
187
188
189

if __name__ == '__main__':
    mcmc = MCMC('MCMC_config.yml')
190
    scenario = mcmc.run()