MCMC.py 9.65 KB
Newer Older
Dumoulin Nicolas's avatar
Dumoulin Nicolas committed
1
2
3
4
#!/usr/bin/python
# -*- coding: utf-8 -*-
import pandas as pd
import geopandas as gpd
5
import os, sys, time, shutil
Dumoulin Nicolas's avatar
Dumoulin Nicolas committed
6
7
import yaml
import numpy as np
8
from indicators import Indicators
Dumoulin Nicolas's avatar
Dumoulin Nicolas committed
9
from tqdm import tqdm
10
import patutils
11
from patutils import md5sum, load_pat_patches
12
import seaborn as sns
13
import matplotlib.pyplot as plt
Dumoulin Nicolas's avatar
Dumoulin Nicolas committed
14
from scenarios import ScenariosStack, Reallocator
Dumoulin Nicolas's avatar
Dumoulin Nicolas committed
15
16

class MCMC:
Dumoulin Nicolas's avatar
Dumoulin Nicolas committed
17
    def __init__(self, mcmc_config_filename, output=True):
Dumoulin Nicolas's avatar
Dumoulin Nicolas committed
18
19
20
21
22
        if not os.path.isfile(mcmc_config_filename):
        	print('Error: file not found "{}"'.format(mcmc_config_filename))
        	print('Please copy the template file "MCMC_config.sample.yml" and adjust to your settings and run again this program')
        	sys.exit(1)
        self.mcmc_config = yaml.load(open(mcmc_config_filename,'r'))
23
24
25
26
27
28
29
        self.patches_md5sum = md5sum(self.mcmc_config['patches'])
        if 'rng_seed' in self.mcmc_config:
            self.rng = np.random.RandomState(self.mcmc_config['rng_seed'])
        else:
            self.rng = np.random.RandomState(42)
            print('MCMC initialized with default seed') # self.rng.get_state()
        # Copying input data in output dir
Dumoulin Nicolas's avatar
Dumoulin Nicolas committed
30
        self.outputdir = self.mcmc_config['output_dir'] + '/' + time.strftime('%Y%m%d-%H%M%S')
Dumoulin Nicolas's avatar
Dumoulin Nicolas committed
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
        if output:
            if not os.path.exists(self.outputdir):
                os.makedirs(self.outputdir)
                print('All data will be written in {}'.format(self.outputdir))
            else:
                print('Output directory already exists! ({})'.format(self.outputdir))
                sys.exit(1)
            shutil.copy(mcmc_config_filename, self.outputdir)
            for f in [ self.mcmc_config['target'],
                    self.mcmc_config['indicators_config']['resilience'],
                    self.mcmc_config['indicators_config']['social']['cost_matrix_filename']
                    ]:
                shutil.copy(f, self.outputdir)
            with open(self.outputdir+'/seed.txt', 'w') as outfile:
                outfile.write('{}\n'.format(self.rng.get_state()))
            config_data = {
                'patches_md5sum':self.patches_md5sum,
                'biodiversity_matrix_md5sum':md5sum(self.mcmc_config['indicators_config']['biodiversity']['matrixfilename']),
                'social_patches_costs_md5sum':md5sum(self.mcmc_config['indicators_config']['social']['patches_costs_filename'])
            }
            with open(self.outputdir+'/config.yml', 'w') as outfile:
                yaml.dump(config_data, outfile, default_flow_style=False)
53
        # finishing init
54
        self.patches = load_pat_patches(self.mcmc_config['patches'])
55
56
        self.surfaces = self.patches['SURF_PARC']
        self.target = pd.read_csv(self.mcmc_config['target'], sep=';',index_col=0).rename(index=patutils.code_cultgeopat)
Dumoulin Nicolas's avatar
Dumoulin Nicolas committed
57
58
59
        targetRatio = (self.target['2050']-self.target['2016'])/self.target['2016']
        self.targetPAT = self.patches.groupby('cultgeopat')['SURF_PARC'].sum()*(1+targetRatio)
        self.indicators = Indicators(self.mcmc_config['indicators_config'], self.patches, self.patches_md5sum, self.targetPAT)
Dumoulin Nicolas's avatar
Dumoulin Nicolas committed
60
        self.reallocator = Reallocator(self.rng, self.patches, self.targetPAT, self.mcmc_config['ratio_patches_to_modify'])
Dumoulin Nicolas's avatar
Dumoulin Nicolas committed
61

62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89

    def is_pareto_efficient(costs, return_mask = True):
        """
        Find the pareto-efficient points
        :param costs: An (n_points, n_costs) array
        :param return_mask: True to return a mask
        :return: An array of indices of pareto-efficient points.
            If return_mask is True, this will be an (n_points, ) boolean array
            Otherwise it will be a (n_efficient_points, ) integer array of indices.
        Author:
        Source: https://stackoverflow.com/questions/32791911/fast-calculation-of-pareto-front-in-python
        """
        is_efficient = np.arange(costs.shape[0])
        n_points = costs.shape[0]
        next_point_index = 0  # Next index in the is_efficient array to search for
        while next_point_index<len(costs):
            nondominated_point_mask = np.any(costs<costs[next_point_index], axis=1)
            nondominated_point_mask[next_point_index] = True
            is_efficient = is_efficient[nondominated_point_mask]  # Remove dominated points
            costs = costs[nondominated_point_mask]
            next_point_index = np.sum(nondominated_point_mask[:next_point_index])+1
        if return_mask:
            is_efficient_mask = np.zeros(n_points, dtype = bool)
            is_efficient_mask[is_efficient] = True
            return is_efficient_mask
        else:
            return is_efficient

90
91
92
    def scenario_scores(patches, indicators):
        return indicators.compute_indicators(patches)

93
    def step(self, iter_nb, nb_particles, scenarios, write_data=True, disable_progress=False):
94
95
96
        '''
        Sample new scenarios, evaluate their scores and retain only pareto front.

97
        :param iter_nb: (int) number of this iteration.
98
        :param nb_particles: number of new scenarios to sample
99
        :param scenarios: (ScenariosStack) list of the scenarios used as base for sampling the new scenarios.
100
101
        '''
        # Loop of sampling and scoring
102
        for index, scenario in tqdm(scenarios.sample(nb_particles, self.rng).iterrows(), total=nb_particles, disable=disable_progress):
Dumoulin Nicolas's avatar
Dumoulin Nicolas committed
103
104
            scenario = scenarios.reallocate(index)
            if scenario is None:
105
                scenarios.setFullReallocated(index, True)
Dumoulin Nicolas's avatar
Dumoulin Nicolas committed
106
107
            else:
                scenarios.append(iter_nb, scenario, index)
108
        scenarios.retain(scenarios.scores['iteration']==iter_nb)
109
        # Computing pareto front
110
        start_time = time.time()
111
112
        pareto_mask = MCMC.is_pareto_efficient(scenarios.scores[['Resilience','Proximity','Productivity','Biodiversity','Social','TargetDelta']].values)
        scenarios.scores['pareto'] = pareto_mask
113
        elapsed_time = time.time() - start_time
114
        print(' Fully reallocated scenarios {}/{} - '.format(scenarios.nbFullReallocated(), len(scenarios.scores)), end="", flush=True)
115
116
        if (len(scenarios.scores)>0):
            print('{:.2f}% on pareto front (computed in {}) - '.format(100*np.sum(pareto_mask)/len(pareto_mask), time.strftime("%M:%S", time.gmtime(elapsed_time))), end="", flush=True)
117
        # Writing output data
118
119
120
121
122
123
124
125
        if write_data:
            write_times=[]
            start_time = time.time()
            scenarios.scores.to_csv(self.outputdir+'/scores_iter_{0:03d}.csv'.format(iter_nb), index=True)
            write_times.append(time.time() - start_time)
            start_time = time.time()
            scenarios.cultgeopat.to_hdf(self.outputdir+'/cult_iter_{0:03d}.h5'.format(iter_nb), 'cultgeopat', index=True, complevel=1)
            write_times.append(time.time() - start_time)
Dumoulin Nicolas's avatar
Dumoulin Nicolas committed
126
127
128
129
130
131
132
133
134
135
136
137
138
            start_time = time.time()
            try:
                pairplot = sns.pairplot(scenarios.scores, vars=['Resilience','Proximity','Productivity','Biodiversity','Social','TargetDelta'],
                        diag_kind="kde",hue="pareto"
                    )
                pairplot.savefig(self.outputdir+'/mcmc_iter_{0:03d}.png'.format(iter_nb))
                plt.close(pairplot.fig)
            except (np.linalg.linalg.LinAlgError, ValueError, AttributeError) as e:
                # when data doesn't vary enough on a dimension, it is impossible to generate the pareto density
                # and an error is triggered. Here, we ignore this error.
                pass
            write_times.append(time.time() - start_time)
            print('Scores written in {}/{}/{} '.format(*map(lambda x: time.strftime("%M:%S", time.gmtime(x)), write_times)), end="", flush=True)
139
        print()
140
        # Retaining only optimal particules
141
142
        scenarios.retain(pareto_mask)
        return scenarios
143

144
145
146
147
148
149
    def write_data(self, iter_nb, scenarios):
        print(' Writing modified patches of scenarios')
        # Writing shapefiles
        start_time = time.time()
        shp_dir = self.outputdir + '/patches_iter_{0:03d}'.format(iter_nb)
        os.makedirs(shp_dir)
150
        counter = 0
151
        for index,scenario in tqdm(scenarios.cultgeopat.iterrows(), total=len(scenarios.cultgeopat)):
Dumoulin Nicolas's avatar
Dumoulin Nicolas committed
152
153
154
155
156
157
158
159
            #if scenarios.scores.loc[index,'full_reallocated']:
            counter += 1
            concat = pd.concat([self.patches, scenario.to_frame("newcult")], axis=1)
            concat = concat[concat['init_cult']!=concat['newcult']]
            if len(concat)>0:
                for c in ['newcult','cultgeopat','init_cult']:
                    patutils.decode_cultgeopat(concat, c)
                concat.to_file(shp_dir+'/{:03d}_{}'.format(iter_nb, index), encoding='utf-8')
160
        elapsed_time = time.time() - start_time
Dumoulin Nicolas's avatar
Dumoulin Nicolas committed
161
162
        #print('  {} shapefiles (only scenario fully reallocated) written in {}'.format(counter, time.strftime("%M:%S", time.gmtime(elapsed_time))))
        print('  {} shapefiles written in {}'.format(counter, time.strftime("%M:%S", time.gmtime(elapsed_time))))
163
164


165
    def run(self):
Dumoulin Nicolas's avatar
Dumoulin Nicolas committed
166
        # Initial sampling and evaluation
167
        nb_particles = self.mcmc_config['initial_nb_particles']
168
        # Start with initial scenario
Dumoulin Nicolas's avatar
Dumoulin Nicolas committed
169
        scenarios = ScenariosStack(self.indicators, self.reallocator, self.patches)
170
        # Iteration
171
        nb_iteration=0
172
        for i in range(self.mcmc_config['max_iterations']):
173
174
            nb_iteration=i+1
            print('Iteration #{}'.format(nb_iteration))
175
            scenarios = self.step(nb_iteration, nb_particles, scenarios, write_data=self.mcmc_config.get('write_data', False))
176
177
178
            if scenarios.allFullReallocated():
                print('All scenarios are fully reallocated. Simulation finished.')
                break
Dumoulin Nicolas's avatar
Dumoulin Nicolas committed
179
        self.write_data(nb_iteration, scenarios)
180
        return scenarios
Dumoulin Nicolas's avatar
Dumoulin Nicolas committed
181
182
183

if __name__ == '__main__':
    mcmc = MCMC('MCMC_config.yml')
184
    scenario = mcmc.run()