MCMC.py 13.6 KB
Newer Older
Dumoulin Nicolas's avatar
Dumoulin Nicolas committed
1
2
3
4
#!/usr/bin/python
# -*- coding: utf-8 -*-
import pandas as pd
import geopandas as gpd
5
import os, sys, time, shutil
Dumoulin Nicolas's avatar
Dumoulin Nicolas committed
6
7
import yaml
import numpy as np
8
9
10
from Indicator import Indicator
from overrides import overrides
from proximite import Proximite as Proximity
Dumoulin Nicolas's avatar
Dumoulin Nicolas committed
11
12
13
14
15
from resilience_list import Resilience
from productivite import Productivity
from indice_biodiversite_2 import Biodiversity
from social import Social
from tqdm import tqdm
16
from patutils import md5sum, load_pat_patches
17
import seaborn as sns
18
import matplotlib.pyplot as plt
19
20
import multiprocessing as mp
from functools import partial
21
import itertools
Dumoulin Nicolas's avatar
Dumoulin Nicolas committed
22
23

class Scenario:
24
    _counter = itertools.count()
Dumoulin Nicolas's avatar
Dumoulin Nicolas committed
25
26
    def __init__(self, patches):
        self.patches = patches
27
        self.id = next(Scenario._counter)
Dumoulin Nicolas's avatar
Dumoulin Nicolas committed
28
29
30
31
32

    def load_shp(shpfilename):
        '''
        Return an instance of class Patches by loading a shapefile of initial patches.
        '''
33
        scenario = Scenario(load_pat_patches(shpfilename))
Dumoulin Nicolas's avatar
Dumoulin Nicolas committed
34
35
36
37
38
39
40
41
        return scenario

    def reallocate(self, rng, targetPAT, ratioNbPatches):
        nbPatches = int(len(self.patches)*ratioNbPatches)
        surfDelta = targetPAT - self.patches.groupby('cultgeopat')['SURF_PARC'].sum()
        cult_to_decrease = surfDelta[surfDelta<0].sort_values(ascending=True).keys().tolist()
        cult_to_increase = surfDelta[surfDelta>0].sort_values(ascending=False).keys().tolist()
        # Sampling the patches to reallocate
42
43
44
45
46
        if len(self.patches['cultgeopat'].isin(cult_to_decrease)) >= nbPatches:
            samples = self.patches[self.patches['cultgeopat'].isin(cult_to_decrease)].sample(n=nbPatches, random_state=rng)#.reset_index(drop=True)
        else:
            # All cultural types have decreased to objective
            samples = self.patches.sample(n=nbPatches, random_state=rng)
Dumoulin Nicolas's avatar
Dumoulin Nicolas committed
47
        # Building the new culture reallocated
48
49
50
51
52
53
54
55
56
57
        if len(cult_to_increase) > 0:
            factors = surfDelta[cult_to_increase]
            factors = (factors*len(samples)/factors.sum()).map(round) # normalize on nb samples
            newCult = pd.Series(cult_to_increase).repeat(factors)
        else:
            # All cultural types have increased to objective
            # So we used all cultures with more weight for highest delta
            factors = -1/surfDelta
            factors = (factors*len(samples)/factors.sum()).map(round) # normalize on nb samples
            newCult = pd.Series(surfDelta.keys().tolist()).repeat(factors)
Dumoulin Nicolas's avatar
Dumoulin Nicolas committed
58
59
60
61
62
        if len(newCult) < len(samples): # may be due to factors rounding
            newCult = newCult.append(newCult.sample(n=len(samples)-len(newCult), random_state=rng), ignore_index=True)
        newCult = newCult.sample(frac=1, random_state=rng)[:len(samples)].reset_index(drop=True) # shuffle and cut extra elements
        # Doing the reallocation
        self.patches.loc[samples.index.values,'cultgeopat'] = newCult.values
63
        self.patches.loc[samples.index.values,'cultmodified'] = True
Dumoulin Nicolas's avatar
Dumoulin Nicolas committed
64

65
66
67
68
69
70
71
72
73
class CulturalIndicator(Indicator):
    @overrides
    def __init__(self, config, initial_patches=None, patches_md5sum=None, targetPAT=None):
        self.cultgeopat = config

    @overrides
    def compute_indicator(self, patches):
        return patches[patches['cultgeopat']==self.cultgeopat]['SURF_PARC'].sum()

74
75
76
77
78
79
80
81
82
83
84
85
86
class TargetDeltaIndicator(Indicator):
    '''
    This indicator computes the delta between the surfaces of cultural and the given target
    '''
    @overrides
    def __init__(self, config=None, initial_patches=None, patches_md5sum=None, targetPAT=None):
        self.targetPAT = targetPAT

    @overrides
    def compute_indicator(self, patches):
        surfDelta = self.targetPAT - patches.groupby('cultgeopat')['SURF_PARC'].sum()
        return surfDelta.abs().sum()

Dumoulin Nicolas's avatar
Dumoulin Nicolas committed
87
88
class Indicators:
    def __init__(self, config, initial_patches, patches_md5sum, targetPAT):
89
        self.indicators_names = ['Proximity', 'Resilience', 'Productivity', 'Biodiversity', 'Social']
90
        self._indicators = {
91
92
            indicator: eval(indicator)(config.get(indicator.lower()), initial_patches, patches_md5sum, targetPAT)
            for indicator in self.indicators_names
93
94
95
96
            }
        for cultgeopat in targetPAT.index.tolist():
            self._indicators[cultgeopat] = CulturalIndicator(cultgeopat)
            self.indicators_names.append(cultgeopat)
97
98
        self._indicators['TargetDelta'] = TargetDeltaIndicator(targetPAT=targetPAT)
        self.indicators_names.append('TargetDelta')
Dumoulin Nicolas's avatar
Dumoulin Nicolas committed
99
100

    def compute_indicators(self, patches):
101
        return [self._indicators[ind].compute_indicator(patches) for ind in self.indicators_names]
Dumoulin Nicolas's avatar
Dumoulin Nicolas committed
102
103
104
105
106
107
108
109
110
111
112
113
114
115

    def compute_indicators_pool(self, scenarios):
        rows=[]
        for patches in scenarios:
            rows.append(self.compute_indicators(patches))
        return pd.DataFrame(rows, columns=self.indicators_names)

class MCMC:
    def __init__(self, mcmc_config_filename):
        if not os.path.isfile(mcmc_config_filename):
        	print('Error: file not found "{}"'.format(mcmc_config_filename))
        	print('Please copy the template file "MCMC_config.sample.yml" and adjust to your settings and run again this program')
        	sys.exit(1)
        self.mcmc_config = yaml.load(open(mcmc_config_filename,'r'))
116
        self.multiprocessing = self.mcmc_config.get('multiprocessing', False)
117
        self.write_shp = self.mcmc_config.get('write_shp', False)
118
119
120
121
122
123
124
        self.patches_md5sum = md5sum(self.mcmc_config['patches'])
        if 'rng_seed' in self.mcmc_config:
            self.rng = np.random.RandomState(self.mcmc_config['rng_seed'])
        else:
            self.rng = np.random.RandomState(42)
            print('MCMC initialized with default seed') # self.rng.get_state()
        # Copying input data in output dir
Dumoulin Nicolas's avatar
Dumoulin Nicolas committed
125
        self.outputdir = self.mcmc_config['output_dir'] + '/' + time.strftime('%Y%m%d-%H%M%S')
126
127
128
129
130
131
132
        if not os.path.exists(self.outputdir):
            os.makedirs(self.outputdir)
            print('All data will be written in {}'.format(self.outputdir))
        else:
            print('Output directory already exists! ({})'.format(self.outputdir))
            sys.exit(1)
        shutil.copy(mcmc_config_filename, self.outputdir)
133
        for f in [ self.mcmc_config['target'],
134
135
136
137
138
139
                self.mcmc_config['indicators_config']['resilience'],
                self.mcmc_config['indicators_config']['biodiversity']['matrixfilename'],
                self.mcmc_config['indicators_config']['social']['cost_matrix_filename'],
                self.mcmc_config['indicators_config']['social']['patches_costs_filename']
                ]:
            shutil.copy(f, self.outputdir)
140
141
        with open(self.outputdir+'/seed.txt', 'w') as outfile:
            outfile.write('{}\n'.format(self.rng.get_state()))
Dumoulin Nicolas's avatar
Dumoulin Nicolas committed
142
143
144
145
146
        config_data = {
            'patches_md5sum':self.patches_md5sum,
            'biodiversity_matrix_md5sum':md5sum(self.mcmc_config['indicators_config']['biodiversity']['matrixfilename']),
            'social_patches_costs_md5sum':md5sum(self.mcmc_config['indicators_config']['social']['patches_costs_filename'])
        }
147
        with open(self.outputdir+'/config.yml', 'w') as outfile:
Dumoulin Nicolas's avatar
Dumoulin Nicolas committed
148
            yaml.dump(config_data, outfile, default_flow_style=False)
149
        # finishing init
150
        self.patches = load_pat_patches(self.mcmc_config['patches'])
151
        self.patches['cultmodified'] = False
Dumoulin Nicolas's avatar
Dumoulin Nicolas committed
152
153
154
155
156
        self.target = pd.read_csv(self.mcmc_config['target'], sep=';',index_col=0)
        targetRatio = (self.target['2050']-self.target['2016'])/self.target['2016']
        self.targetPAT = self.patches.groupby('cultgeopat')['SURF_PARC'].sum()*(1+targetRatio)
        self.indicators = Indicators(self.mcmc_config['indicators_config'], self.patches, self.patches_md5sum, self.targetPAT)

157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184

    def is_pareto_efficient(costs, return_mask = True):
        """
        Find the pareto-efficient points
        :param costs: An (n_points, n_costs) array
        :param return_mask: True to return a mask
        :return: An array of indices of pareto-efficient points.
            If return_mask is True, this will be an (n_points, ) boolean array
            Otherwise it will be a (n_efficient_points, ) integer array of indices.
        Author:
        Source: https://stackoverflow.com/questions/32791911/fast-calculation-of-pareto-front-in-python
        """
        is_efficient = np.arange(costs.shape[0])
        n_points = costs.shape[0]
        next_point_index = 0  # Next index in the is_efficient array to search for
        while next_point_index<len(costs):
            nondominated_point_mask = np.any(costs<costs[next_point_index], axis=1)
            nondominated_point_mask[next_point_index] = True
            is_efficient = is_efficient[nondominated_point_mask]  # Remove dominated points
            costs = costs[nondominated_point_mask]
            next_point_index = np.sum(nondominated_point_mask[:next_point_index])+1
        if return_mask:
            is_efficient_mask = np.zeros(n_points, dtype = bool)
            is_efficient_mask[is_efficient] = True
            return is_efficient_mask
        else:
            return is_efficient

185
186
187
    def scenario_scores(patches, indicators):
        return indicators.compute_indicators(patches)

188
    def step(self, iter_nb, nb_particles, scenarios, scores=None, write_shp=False):
189
190
191
192
193
194
195
196
197
198
        '''
        Sample new scenarios, evaluate their scores and retain only pareto front.

        :param iter_nb: (int) number of this iteration, 0 for initial step, 1 for first iteration and so on.
        :param nb_particles: number of new scenarios to sample
        :param scenarios: (DataFrame) list of the scenarios used as base for sampling the new scenarios.
        :param scores: (DataFrame) list of scores for each scenario. May be None
        if given scenario are only used for sampling and will not be considered for pareto front.
        '''
        new_scenarios = []
199
        new_scenarios_id = []
200
201
202
203
204
205
        new_scores = []
        # Loop of sampling and scoring
        for index, patches in tqdm(scenarios.sample(nb_particles, replace=True, random_state=self.rng).iterrows(), total=nb_particles):
            scenario = Scenario(patches[0].copy())
            scenario.reallocate(self.rng, self.targetPAT, self.mcmc_config['ratio_patches_to_modify']) # 3.8 ms
            new_scenarios.append(scenario.patches)
206
            new_scenarios_id.append(scenario.id)
207
208
209
            if not self.multiprocessing:
                new_scores.append(self.indicators.compute_indicators(scenario.patches))
        if self.multiprocessing:
210
            new_scores = list(tqdm(self.pool.imap(partial(MCMC.scenario_scores, indicators=self.indicators), new_scenarios, chunksize=50), total=len(new_scenarios)))
211
        # merging with precedent data
212
        new_scores = pd.DataFrame(new_scores, index=new_scenarios_id, columns=self.indicators.indicators_names)
213
214
215
        new_scores['iteration'] = iter_nb
        if scores is None:
            scores = new_scores
216
            scenarios = pd.DataFrame(new_scenarios, index=new_scenarios_id)
217
        else:
218
219
            scores = scores.append(new_scores, sort=False)
            scenarios = scenarios.append(pd.DataFrame(new_scenarios, index=new_scenarios_id), sort=False)
220
        # Computing pareto front
221
        pareto_mask = MCMC.is_pareto_efficient(scores[['Resilience','Proximity','Productivity','Biodiversity','Social','TargetDelta']].values)
222
223
        scores['pareto'] = pareto_mask
        # Writing output data
224
        scores.to_csv(self.outputdir+'/mcmc_iter_{0:03d}.csv'.format(iter_nb), index=True)
225
        try:
226
            pairplot = sns.pairplot(scores, vars=['Resilience','Proximity','Productivity','Biodiversity','Social','TargetDelta'],
227
                    diag_kind="kde",hue="pareto"
228
229
230
                )
            pairplot.savefig(self.outputdir+'/mcmc_iter_{0:03d}.png'.format(iter_nb))
            plt.close(pairplot.fig)
231
232
233
234
        except (np.linalg.linalg.LinAlgError, ValueError) as e:
            # when data doesn't vary enough on a dimension, it is impossible to generate the pareto density
            # and an error is triggered. Here, we ignore this error.
            pass
235
236
237
        scenarios = scenarios[pareto_mask]
        scores = scores[pareto_mask]
        # Writing shapefiles
238
        if write_shp:
239
            shp_dir = self.outputdir + '/patches_iter_{0:03d}'.format(iter_nb)
240
241
242
            os.makedirs(shp_dir)
            for index,scenario in scenarios.iterrows():
                scenario = scenario[0]
243
                scenario[scenario['cultmodified']].drop('cultmodified', axis=1).to_file(shp_dir+'/{0:03d}_{}'.format(iter_nb,index), encoding='utf-8')
244
        return [scenarios, scores]
245

246
    def run(self, nb_processes=mp.cpu_count()):
247
248
        if self.multiprocessing:
            self.pool = mp.Pool(processes=nb_processes)
Dumoulin Nicolas's avatar
Dumoulin Nicolas committed
249
        # Initial sampling and evaluation
250
251
252
253
        nb_particles = self.mcmc_config['initial_nb_particles']
        scenarios_init = pd.DataFrame([self.patches])
        scenarios, scores = self.step(0, nb_particles, scenarios_init)
        # Iteration
254
        for i in range(self.mcmc_config['max_iterations']):
255
            print('Iteration #{}'.format(i+1))
256
            # Write SHP only for the last iteration
257
            scenarios, scores = self.step(i+1, nb_particles, scenarios, scores, self.write_shp and i == self.mcmc_config['max_iterations']-1)
Dumoulin Nicolas's avatar
Dumoulin Nicolas committed
258
259
260
261
        # TODO
        # Storing variation of indicators
        init_var = scores.std()
        # sequential optimization loop
262
263
        if self.multiprocessing:
            self.pool.close()
264
        return [scenarios,scores]
Dumoulin Nicolas's avatar
Dumoulin Nicolas committed
265
266
267

if __name__ == '__main__':
    mcmc = MCMC('MCMC_config.yml')
268
269
    # scenario = Scenario(mcmc.patches.copy())
    # scenario.reallocate(mcmc.rng, mcmc.targetPAT, mcmc.mcmc_config['ratio_patches_to_modify'])
270
    scenario,scores = mcmc.run()
Dumoulin Nicolas's avatar
Dumoulin Nicolas committed
271
272
    # print(mcmc.indicators.biodiversity(mcmc.patches))
    # print(mcmc.indicators.proximity(mcmc.patches))