MCMC.py 12.8 KB
Newer Older
Dumoulin Nicolas's avatar
Dumoulin Nicolas committed
1
2
3
4
#!/usr/bin/python
# -*- coding: utf-8 -*-
import pandas as pd
import geopandas as gpd
5
import os, sys, time, shutil
Dumoulin Nicolas's avatar
Dumoulin Nicolas committed
6
7
import yaml
import numpy as np
8
9
10
from Indicator import Indicator
from overrides import overrides
from proximite import Proximite as Proximity
Dumoulin Nicolas's avatar
Dumoulin Nicolas committed
11
12
13
14
15
from resilience_list import Resilience
from productivite import Productivity
from indice_biodiversite_2 import Biodiversity
from social import Social
from tqdm import tqdm
16
from patutils import md5sum, load_pat_patches
17
import seaborn as sns
18
19
import multiprocessing as mp
from functools import partial
20
import itertools
Dumoulin Nicolas's avatar
Dumoulin Nicolas committed
21
22

class Scenario:
23
    _counter = itertools.count()
Dumoulin Nicolas's avatar
Dumoulin Nicolas committed
24
25
    def __init__(self, patches):
        self.patches = patches
26
        self.id = next(Scenario._counter)
Dumoulin Nicolas's avatar
Dumoulin Nicolas committed
27
28
29
30
31

    def load_shp(shpfilename):
        '''
        Return an instance of class Patches by loading a shapefile of initial patches.
        '''
32
        scenario = Scenario(load_pat_patches(shpfilename))
Dumoulin Nicolas's avatar
Dumoulin Nicolas committed
33
34
35
36
37
38
39
40
        return scenario

    def reallocate(self, rng, targetPAT, ratioNbPatches):
        nbPatches = int(len(self.patches)*ratioNbPatches)
        surfDelta = targetPAT - self.patches.groupby('cultgeopat')['SURF_PARC'].sum()
        cult_to_decrease = surfDelta[surfDelta<0].sort_values(ascending=True).keys().tolist()
        cult_to_increase = surfDelta[surfDelta>0].sort_values(ascending=False).keys().tolist()
        # Sampling the patches to reallocate
41
42
43
44
45
        if len(self.patches['cultgeopat'].isin(cult_to_decrease)) >= nbPatches:
            samples = self.patches[self.patches['cultgeopat'].isin(cult_to_decrease)].sample(n=nbPatches, random_state=rng)#.reset_index(drop=True)
        else:
            # All cultural types have decreased to objective
            samples = self.patches.sample(n=nbPatches, random_state=rng)
Dumoulin Nicolas's avatar
Dumoulin Nicolas committed
46
        # Building the new culture reallocated
47
48
49
50
51
52
53
54
55
56
        if len(cult_to_increase) > 0:
            factors = surfDelta[cult_to_increase]
            factors = (factors*len(samples)/factors.sum()).map(round) # normalize on nb samples
            newCult = pd.Series(cult_to_increase).repeat(factors)
        else:
            # All cultural types have increased to objective
            # So we used all cultures with more weight for highest delta
            factors = -1/surfDelta
            factors = (factors*len(samples)/factors.sum()).map(round) # normalize on nb samples
            newCult = pd.Series(surfDelta.keys().tolist()).repeat(factors)
Dumoulin Nicolas's avatar
Dumoulin Nicolas committed
57
58
59
60
61
        if len(newCult) < len(samples): # may be due to factors rounding
            newCult = newCult.append(newCult.sample(n=len(samples)-len(newCult), random_state=rng), ignore_index=True)
        newCult = newCult.sample(frac=1, random_state=rng)[:len(samples)].reset_index(drop=True) # shuffle and cut extra elements
        # Doing the reallocation
        self.patches.loc[samples.index.values,'cultgeopat'] = newCult.values
62
        self.patches.loc[samples.index.values,'cultmodified'] = True
Dumoulin Nicolas's avatar
Dumoulin Nicolas committed
63

64
65
66
67
68
69
70
71
72
class CulturalIndicator(Indicator):
    @overrides
    def __init__(self, config, initial_patches=None, patches_md5sum=None, targetPAT=None):
        self.cultgeopat = config

    @overrides
    def compute_indicator(self, patches):
        return patches[patches['cultgeopat']==self.cultgeopat]['SURF_PARC'].sum()

Dumoulin Nicolas's avatar
Dumoulin Nicolas committed
73
74
class Indicators:
    def __init__(self, config, initial_patches, patches_md5sum, targetPAT):
75
        self.indicators_names = ['Proximity', 'Resilience', 'Productivity', 'Biodiversity', 'Social']
76
        self._indicators = {
77
78
            indicator: eval(indicator)(config.get(indicator.lower()), initial_patches, patches_md5sum, targetPAT)
            for indicator in self.indicators_names
79
80
81
82
            }
        for cultgeopat in targetPAT.index.tolist():
            self._indicators[cultgeopat] = CulturalIndicator(cultgeopat)
            self.indicators_names.append(cultgeopat)
Dumoulin Nicolas's avatar
Dumoulin Nicolas committed
83
84

    def compute_indicators(self, patches):
85
        return [1/self._indicators[ind].compute_indicator(patches) for ind in self.indicators_names]
Dumoulin Nicolas's avatar
Dumoulin Nicolas committed
86
87
88
89
90
91
92
93
94
95
96
97
98
99

    def compute_indicators_pool(self, scenarios):
        rows=[]
        for patches in scenarios:
            rows.append(self.compute_indicators(patches))
        return pd.DataFrame(rows, columns=self.indicators_names)

class MCMC:
    def __init__(self, mcmc_config_filename):
        if not os.path.isfile(mcmc_config_filename):
        	print('Error: file not found "{}"'.format(mcmc_config_filename))
        	print('Please copy the template file "MCMC_config.sample.yml" and adjust to your settings and run again this program')
        	sys.exit(1)
        self.mcmc_config = yaml.load(open(mcmc_config_filename,'r'))
100
        self.multiprocessing = self.mcmc_config.get('multiprocessing', False)
101
        self.write_shp = self.mcmc_config.get('write_shp', False)
102
103
104
105
106
107
108
        self.patches_md5sum = md5sum(self.mcmc_config['patches'])
        if 'rng_seed' in self.mcmc_config:
            self.rng = np.random.RandomState(self.mcmc_config['rng_seed'])
        else:
            self.rng = np.random.RandomState(42)
            print('MCMC initialized with default seed') # self.rng.get_state()
        # Copying input data in output dir
Dumoulin Nicolas's avatar
Dumoulin Nicolas committed
109
        self.outputdir = self.mcmc_config['output_dir'] + '/' + time.strftime('%Y%m%d-%H%M%S')
110
111
112
113
114
115
116
        if not os.path.exists(self.outputdir):
            os.makedirs(self.outputdir)
            print('All data will be written in {}'.format(self.outputdir))
        else:
            print('Output directory already exists! ({})'.format(self.outputdir))
            sys.exit(1)
        shutil.copy(mcmc_config_filename, self.outputdir)
117
        for f in [ self.mcmc_config['target'],
118
119
120
121
122
123
                self.mcmc_config['indicators_config']['resilience'],
                self.mcmc_config['indicators_config']['biodiversity']['matrixfilename'],
                self.mcmc_config['indicators_config']['social']['cost_matrix_filename'],
                self.mcmc_config['indicators_config']['social']['patches_costs_filename']
                ]:
            shutil.copy(f, self.outputdir)
124
125
        with open(self.outputdir+'/seed.txt', 'w') as outfile:
            outfile.write('{}\n'.format(self.rng.get_state()))
Dumoulin Nicolas's avatar
Dumoulin Nicolas committed
126
127
128
129
130
        config_data = {
            'patches_md5sum':self.patches_md5sum,
            'biodiversity_matrix_md5sum':md5sum(self.mcmc_config['indicators_config']['biodiversity']['matrixfilename']),
            'social_patches_costs_md5sum':md5sum(self.mcmc_config['indicators_config']['social']['patches_costs_filename'])
        }
131
        with open(self.outputdir+'/config.yml', 'w') as outfile:
Dumoulin Nicolas's avatar
Dumoulin Nicolas committed
132
            yaml.dump(config_data, outfile, default_flow_style=False)
133
        # finishing init
134
        self.patches = load_pat_patches(self.mcmc_config['patches'])
135
        self.patches['cultmodified'] = False
Dumoulin Nicolas's avatar
Dumoulin Nicolas committed
136
137
138
139
140
        self.target = pd.read_csv(self.mcmc_config['target'], sep=';',index_col=0)
        targetRatio = (self.target['2050']-self.target['2016'])/self.target['2016']
        self.targetPAT = self.patches.groupby('cultgeopat')['SURF_PARC'].sum()*(1+targetRatio)
        self.indicators = Indicators(self.mcmc_config['indicators_config'], self.patches, self.patches_md5sum, self.targetPAT)

141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168

    def is_pareto_efficient(costs, return_mask = True):
        """
        Find the pareto-efficient points
        :param costs: An (n_points, n_costs) array
        :param return_mask: True to return a mask
        :return: An array of indices of pareto-efficient points.
            If return_mask is True, this will be an (n_points, ) boolean array
            Otherwise it will be a (n_efficient_points, ) integer array of indices.
        Author:
        Source: https://stackoverflow.com/questions/32791911/fast-calculation-of-pareto-front-in-python
        """
        is_efficient = np.arange(costs.shape[0])
        n_points = costs.shape[0]
        next_point_index = 0  # Next index in the is_efficient array to search for
        while next_point_index<len(costs):
            nondominated_point_mask = np.any(costs<costs[next_point_index], axis=1)
            nondominated_point_mask[next_point_index] = True
            is_efficient = is_efficient[nondominated_point_mask]  # Remove dominated points
            costs = costs[nondominated_point_mask]
            next_point_index = np.sum(nondominated_point_mask[:next_point_index])+1
        if return_mask:
            is_efficient_mask = np.zeros(n_points, dtype = bool)
            is_efficient_mask[is_efficient] = True
            return is_efficient_mask
        else:
            return is_efficient

169
170
171
    def scenario_scores(patches, indicators):
        return indicators.compute_indicators(patches)

172
    def step(self, iter_nb, nb_particles, scenarios, scores=None, write_shp=False):
173
174
175
176
177
178
179
180
181
182
        '''
        Sample new scenarios, evaluate their scores and retain only pareto front.

        :param iter_nb: (int) number of this iteration, 0 for initial step, 1 for first iteration and so on.
        :param nb_particles: number of new scenarios to sample
        :param scenarios: (DataFrame) list of the scenarios used as base for sampling the new scenarios.
        :param scores: (DataFrame) list of scores for each scenario. May be None
        if given scenario are only used for sampling and will not be considered for pareto front.
        '''
        new_scenarios = []
183
        new_scenarios_id = []
184
185
186
187
188
189
        new_scores = []
        # Loop of sampling and scoring
        for index, patches in tqdm(scenarios.sample(nb_particles, replace=True, random_state=self.rng).iterrows(), total=nb_particles):
            scenario = Scenario(patches[0].copy())
            scenario.reallocate(self.rng, self.targetPAT, self.mcmc_config['ratio_patches_to_modify']) # 3.8 ms
            new_scenarios.append(scenario.patches)
190
            new_scenarios_id.append(scenario.id)
191
192
193
            if not self.multiprocessing:
                new_scores.append(self.indicators.compute_indicators(scenario.patches))
        if self.multiprocessing:
194
            new_scores = list(tqdm(self.pool.imap(partial(MCMC.scenario_scores, indicators=self.indicators), new_scenarios, chunksize=50), total=len(new_scenarios)))
195
        # merging with precedent data
196
        new_scores = pd.DataFrame(new_scores, index=new_scenarios_id, columns=self.indicators.indicators_names)
197
198
199
        new_scores['iteration'] = iter_nb
        if scores is None:
            scores = new_scores
200
            scenarios = pd.DataFrame(new_scenarios, index=new_scenarios_id)
201
        else:
202
203
            scores = scores.append(new_scores, sort=False)
            scenarios = scenarios.append(pd.DataFrame(new_scenarios, index=new_scenarios_id), sort=False)
204
        # Computing pareto front
205
        pareto_mask = MCMC.is_pareto_efficient(scores[['Resilience','Proximity','Productivity','Biodiversity','Social']].values)
206
207
        scores['pareto'] = pareto_mask
        # Writing output data
208
        scores.to_csv(self.outputdir+'/mcmc_iter_{}.csv'.format(iter_nb), index=True)
209
        try:
210
            sns.pairplot(scores, vars=['Resilience','Proximity','Productivity','Biodiversity','Social'],
211
212
213
214
215
216
                    diag_kind="kde",hue="pareto"
                ).savefig(self.outputdir+'/mcmc_iter_{}.png'.format(iter_nb))
        except (np.linalg.linalg.LinAlgError, ValueError) as e:
            # when data doesn't vary enough on a dimension, it is impossible to generate the pareto density
            # and an error is triggered. Here, we ignore this error.
            pass
217
218
219
        scenarios = scenarios[pareto_mask]
        scores = scores[pareto_mask]
        # Writing shapefiles
220
221
222
223
224
225
        if write_shp:
            shp_dir = self.outputdir + '/patches_iter_{}'.format(iter_nb)
            os.makedirs(shp_dir)
            for index,scenario in scenarios.iterrows():
                scenario = scenario[0]
                scenario[scenario['cultmodified']].drop('cultmodified', axis=1).to_file(shp_dir+'/{}_{}'.format(iter_nb,index), encoding='utf-8')
226
        return [scenarios, scores]
227

228
    def run(self, nb_processes=mp.cpu_count()):
229
230
        if self.multiprocessing:
            self.pool = mp.Pool(processes=nb_processes)
Dumoulin Nicolas's avatar
Dumoulin Nicolas committed
231
        # Initial sampling and evaluation
232
233
234
235
        nb_particles = self.mcmc_config['initial_nb_particles']
        scenarios_init = pd.DataFrame([self.patches])
        scenarios, scores = self.step(0, nb_particles, scenarios_init)
        # Iteration
236
        for i in range(self.mcmc_config['max_iterations']):
237
            print('Iteration #{}'.format(i))
238
239
            # Write SHP only for the last iteration
            scenarios, scores = self.step(i+1, nb_particles, scenarios, scores, i == self.mcmc_config['max_iterations']-1)
Dumoulin Nicolas's avatar
Dumoulin Nicolas committed
240
241
242
243
        # TODO
        # Storing variation of indicators
        init_var = scores.std()
        # sequential optimization loop
244
245
        if self.multiprocessing:
            self.pool.close()
246
        return [scenarios,scores]
Dumoulin Nicolas's avatar
Dumoulin Nicolas committed
247
248
249

if __name__ == '__main__':
    mcmc = MCMC('MCMC_config.yml')
250
251
    # scenario = Scenario(mcmc.patches.copy())
    # scenario.reallocate(mcmc.rng, mcmc.targetPAT, mcmc.mcmc_config['ratio_patches_to_modify'])
252
    scenario,scores = mcmc.run()
Dumoulin Nicolas's avatar
Dumoulin Nicolas committed
253
254
    # print(mcmc.indicators.biodiversity(mcmc.patches))
    # print(mcmc.indicators.proximity(mcmc.patches))