MCMC.py 14.5 KB
Newer Older
Dumoulin Nicolas's avatar
Dumoulin Nicolas committed
1
2
3
4
#!/usr/bin/python
# -*- coding: utf-8 -*-
import pandas as pd
import geopandas as gpd
5
import os, sys, time, shutil
Dumoulin Nicolas's avatar
Dumoulin Nicolas committed
6
7
import yaml
import numpy as np
8
9
10
from Indicator import Indicator
from overrides import overrides
from proximite import Proximite as Proximity
Dumoulin Nicolas's avatar
Dumoulin Nicolas committed
11
12
13
14
15
from resilience_list import Resilience
from productivite import Productivity
from indice_biodiversite_2 import Biodiversity
from social import Social
from tqdm import tqdm
16
from patutils import md5sum, load_pat_patches
17
import seaborn as sns
18
import matplotlib.pyplot as plt
19
20
import multiprocessing as mp
from functools import partial
21
import itertools
Dumoulin Nicolas's avatar
Dumoulin Nicolas committed
22
23

class Scenario:
24
    _counter = itertools.count()
Dumoulin Nicolas's avatar
Dumoulin Nicolas committed
25
26
    def __init__(self, patches):
        self.patches = patches
27
        self.id = next(Scenario._counter)
Dumoulin Nicolas's avatar
Dumoulin Nicolas committed
28
29
30
31
32

    def load_shp(shpfilename):
        '''
        Return an instance of class Patches by loading a shapefile of initial patches.
        '''
33
        scenario = Scenario(load_pat_patches(shpfilename))
Dumoulin Nicolas's avatar
Dumoulin Nicolas committed
34
35
36
37
38
39
40
41
        return scenario

    def reallocate(self, rng, targetPAT, ratioNbPatches):
        nbPatches = int(len(self.patches)*ratioNbPatches)
        surfDelta = targetPAT - self.patches.groupby('cultgeopat')['SURF_PARC'].sum()
        cult_to_decrease = surfDelta[surfDelta<0].sort_values(ascending=True).keys().tolist()
        cult_to_increase = surfDelta[surfDelta>0].sort_values(ascending=False).keys().tolist()
        # Sampling the patches to reallocate
42
        if len(self.patches[self.patches['cultgeopat'].isin(cult_to_decrease)]) >= nbPatches:
43
44
45
46
            samples = self.patches[self.patches['cultgeopat'].isin(cult_to_decrease)].sample(n=nbPatches, random_state=rng)#.reset_index(drop=True)
        else:
            # All cultural types have decreased to objective
            samples = self.patches.sample(n=nbPatches, random_state=rng)
Dumoulin Nicolas's avatar
Dumoulin Nicolas committed
47
        # Building the new culture reallocated
48
49
50
51
52
53
54
55
56
57
        if len(cult_to_increase) > 0:
            factors = surfDelta[cult_to_increase]
            factors = (factors*len(samples)/factors.sum()).map(round) # normalize on nb samples
            newCult = pd.Series(cult_to_increase).repeat(factors)
        else:
            # All cultural types have increased to objective
            # So we used all cultures with more weight for highest delta
            factors = -1/surfDelta
            factors = (factors*len(samples)/factors.sum()).map(round) # normalize on nb samples
            newCult = pd.Series(surfDelta.keys().tolist()).repeat(factors)
Dumoulin Nicolas's avatar
Dumoulin Nicolas committed
58
59
60
61
62
        if len(newCult) < len(samples): # may be due to factors rounding
            newCult = newCult.append(newCult.sample(n=len(samples)-len(newCult), random_state=rng), ignore_index=True)
        newCult = newCult.sample(frac=1, random_state=rng)[:len(samples)].reset_index(drop=True) # shuffle and cut extra elements
        # Doing the reallocation
        self.patches.loc[samples.index.values,'cultgeopat'] = newCult.values
63
        self.patches.loc[samples.index.values,'cultmodified'] = True
Dumoulin Nicolas's avatar
Dumoulin Nicolas committed
64

65
66
67
68
69
70
71
72
73
class CulturalIndicator(Indicator):
    @overrides
    def __init__(self, config, initial_patches=None, patches_md5sum=None, targetPAT=None):
        self.cultgeopat = config

    @overrides
    def compute_indicator(self, patches):
        return patches[patches['cultgeopat']==self.cultgeopat]['SURF_PARC'].sum()

74
75
76
77
78
79
80
81
82
83
84
85
86
class TargetDeltaIndicator(Indicator):
    '''
    This indicator computes the delta between the surfaces of cultural and the given target
    '''
    @overrides
    def __init__(self, config=None, initial_patches=None, patches_md5sum=None, targetPAT=None):
        self.targetPAT = targetPAT

    @overrides
    def compute_indicator(self, patches):
        surfDelta = self.targetPAT - patches.groupby('cultgeopat')['SURF_PARC'].sum()
        return surfDelta.abs().sum()

Dumoulin Nicolas's avatar
Dumoulin Nicolas committed
87
88
class Indicators:
    def __init__(self, config, initial_patches, patches_md5sum, targetPAT):
89
        self.indicators_names = ['Proximity', 'Resilience', 'Productivity', 'Biodiversity', 'Social']
90
        self._indicators = {
91
92
            indicator: eval(indicator)(config.get(indicator.lower()), initial_patches, patches_md5sum, targetPAT)
            for indicator in self.indicators_names
93
94
95
96
            }
        for cultgeopat in targetPAT.index.tolist():
            self._indicators[cultgeopat] = CulturalIndicator(cultgeopat)
            self.indicators_names.append(cultgeopat)
97
98
        self._indicators['TargetDelta'] = TargetDeltaIndicator(targetPAT=targetPAT)
        self.indicators_names.append('TargetDelta')
Dumoulin Nicolas's avatar
Dumoulin Nicolas committed
99
100

    def compute_indicators(self, patches):
101
        return [self._indicators[ind].compute_indicator(patches) for ind in self.indicators_names]
Dumoulin Nicolas's avatar
Dumoulin Nicolas committed
102
103
104
105
106
107
108
109
110
111
112
113
114
115

    def compute_indicators_pool(self, scenarios):
        rows=[]
        for patches in scenarios:
            rows.append(self.compute_indicators(patches))
        return pd.DataFrame(rows, columns=self.indicators_names)

class MCMC:
    def __init__(self, mcmc_config_filename):
        if not os.path.isfile(mcmc_config_filename):
        	print('Error: file not found "{}"'.format(mcmc_config_filename))
        	print('Please copy the template file "MCMC_config.sample.yml" and adjust to your settings and run again this program')
        	sys.exit(1)
        self.mcmc_config = yaml.load(open(mcmc_config_filename,'r'))
116
        self.multiprocessing = self.mcmc_config.get('multiprocessing', False)
117
        self.write_data = self.mcmc_config.get('write_data', False)
118
119
120
121
122
123
124
        self.patches_md5sum = md5sum(self.mcmc_config['patches'])
        if 'rng_seed' in self.mcmc_config:
            self.rng = np.random.RandomState(self.mcmc_config['rng_seed'])
        else:
            self.rng = np.random.RandomState(42)
            print('MCMC initialized with default seed') # self.rng.get_state()
        # Copying input data in output dir
Dumoulin Nicolas's avatar
Dumoulin Nicolas committed
125
        self.outputdir = self.mcmc_config['output_dir'] + '/' + time.strftime('%Y%m%d-%H%M%S')
126
127
128
129
130
131
132
        if not os.path.exists(self.outputdir):
            os.makedirs(self.outputdir)
            print('All data will be written in {}'.format(self.outputdir))
        else:
            print('Output directory already exists! ({})'.format(self.outputdir))
            sys.exit(1)
        shutil.copy(mcmc_config_filename, self.outputdir)
133
        for f in [ self.mcmc_config['target'],
134
                self.mcmc_config['indicators_config']['resilience'],
135
                self.mcmc_config['indicators_config']['social']['cost_matrix_filename']
136
137
                ]:
            shutil.copy(f, self.outputdir)
138
139
        with open(self.outputdir+'/seed.txt', 'w') as outfile:
            outfile.write('{}\n'.format(self.rng.get_state()))
Dumoulin Nicolas's avatar
Dumoulin Nicolas committed
140
141
142
143
144
        config_data = {
            'patches_md5sum':self.patches_md5sum,
            'biodiversity_matrix_md5sum':md5sum(self.mcmc_config['indicators_config']['biodiversity']['matrixfilename']),
            'social_patches_costs_md5sum':md5sum(self.mcmc_config['indicators_config']['social']['patches_costs_filename'])
        }
145
        with open(self.outputdir+'/config.yml', 'w') as outfile:
Dumoulin Nicolas's avatar
Dumoulin Nicolas committed
146
            yaml.dump(config_data, outfile, default_flow_style=False)
147
        # finishing init
148
        self.patches = load_pat_patches(self.mcmc_config['patches'])
149
        self.patches['cultmodified'] = False
Dumoulin Nicolas's avatar
Dumoulin Nicolas committed
150
151
152
153
154
        self.target = pd.read_csv(self.mcmc_config['target'], sep=';',index_col=0)
        targetRatio = (self.target['2050']-self.target['2016'])/self.target['2016']
        self.targetPAT = self.patches.groupby('cultgeopat')['SURF_PARC'].sum()*(1+targetRatio)
        self.indicators = Indicators(self.mcmc_config['indicators_config'], self.patches, self.patches_md5sum, self.targetPAT)

155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182

    def is_pareto_efficient(costs, return_mask = True):
        """
        Find the pareto-efficient points
        :param costs: An (n_points, n_costs) array
        :param return_mask: True to return a mask
        :return: An array of indices of pareto-efficient points.
            If return_mask is True, this will be an (n_points, ) boolean array
            Otherwise it will be a (n_efficient_points, ) integer array of indices.
        Author:
        Source: https://stackoverflow.com/questions/32791911/fast-calculation-of-pareto-front-in-python
        """
        is_efficient = np.arange(costs.shape[0])
        n_points = costs.shape[0]
        next_point_index = 0  # Next index in the is_efficient array to search for
        while next_point_index<len(costs):
            nondominated_point_mask = np.any(costs<costs[next_point_index], axis=1)
            nondominated_point_mask[next_point_index] = True
            is_efficient = is_efficient[nondominated_point_mask]  # Remove dominated points
            costs = costs[nondominated_point_mask]
            next_point_index = np.sum(nondominated_point_mask[:next_point_index])+1
        if return_mask:
            is_efficient_mask = np.zeros(n_points, dtype = bool)
            is_efficient_mask[is_efficient] = True
            return is_efficient_mask
        else:
            return is_efficient

183
184
185
    def scenario_scores(patches, indicators):
        return indicators.compute_indicators(patches)

186
    def step(self, iter_nb, nb_particles, scenarios, scores=None, write_data=False):
187
188
189
190
191
192
193
194
195
196
        '''
        Sample new scenarios, evaluate their scores and retain only pareto front.

        :param iter_nb: (int) number of this iteration, 0 for initial step, 1 for first iteration and so on.
        :param nb_particles: number of new scenarios to sample
        :param scenarios: (DataFrame) list of the scenarios used as base for sampling the new scenarios.
        :param scores: (DataFrame) list of scores for each scenario. May be None
        if given scenario are only used for sampling and will not be considered for pareto front.
        '''
        new_scenarios = []
197
        new_scenarios_id = []
198
199
200
201
202
203
        new_scores = []
        # Loop of sampling and scoring
        for index, patches in tqdm(scenarios.sample(nb_particles, replace=True, random_state=self.rng).iterrows(), total=nb_particles):
            scenario = Scenario(patches[0].copy())
            scenario.reallocate(self.rng, self.targetPAT, self.mcmc_config['ratio_patches_to_modify']) # 3.8 ms
            new_scenarios.append(scenario.patches)
204
            new_scenarios_id.append(scenario.id)
205
206
207
            if not self.multiprocessing:
                new_scores.append(self.indicators.compute_indicators(scenario.patches))
        if self.multiprocessing:
208
            new_scores = list(tqdm(self.pool.imap(partial(MCMC.scenario_scores, indicators=self.indicators), new_scenarios, chunksize=50), total=len(new_scenarios)))
209
        # merging with precedent data
210
        start_time = time.time()
211
        new_scores = pd.DataFrame(new_scores, index=new_scenarios_id, columns=self.indicators.indicators_names)
212
213
214
        new_scores['iteration'] = iter_nb
        if scores is None:
            scores = new_scores
215
216
            scenarios = pd.DataFrame(new_scenarios, columns=['patches'], index=new_scenarios_id)
            scenarios['iteration'] = iter_nb
217
        else:
218
            scores = scores.append(new_scores, sort=False)
219
220
221
222
223
            new_scenarios = pd.DataFrame(new_scenarios, columns=['patches'], index=new_scenarios_id)
            new_scenarios['iteration'] = iter_nb
            scenarios = scenarios.append(new_scenarios, sort=False)
        elapsed_time = time.time() - start_time
        print('Data merged in {} - '.format(time.strftime("%M:%S", time.gmtime(elapsed_time))), end="", flush=True)
224
        # Computing pareto front
225
        start_time = time.time()
226
        pareto_mask = MCMC.is_pareto_efficient(scores[['Resilience','Proximity','Productivity','Biodiversity','Social','TargetDelta']].values)
227
        scores['pareto'] = pareto_mask
228
229
        elapsed_time = time.time() - start_time
        print('Pareto front computed in {} - '.format(time.strftime("%M:%S", time.gmtime(elapsed_time))), end="", flush=True)
230
        # Writing output data
231
        start_time = time.time()
232
        scores.to_csv(self.outputdir+'/mcmc_iter_{0:03d}.csv'.format(iter_nb), index=True)
233
        try:
234
            pairplot = sns.pairplot(scores, vars=['Resilience','Proximity','Productivity','Biodiversity','Social','TargetDelta'],
235
                    diag_kind="kde",hue="pareto"
236
237
238
                )
            pairplot.savefig(self.outputdir+'/mcmc_iter_{0:03d}.png'.format(iter_nb))
            plt.close(pairplot.fig)
239
240
241
242
        except (np.linalg.linalg.LinAlgError, ValueError) as e:
            # when data doesn't vary enough on a dimension, it is impossible to generate the pareto density
            # and an error is triggered. Here, we ignore this error.
            pass
243
244
245
        elapsed_time = time.time() - start_time
        print('Scores written in {} - '.format(time.strftime("%M:%S", time.gmtime(elapsed_time))), end="", flush=True)
        # Retaining only optimal particules
246
247
        scenarios = scenarios[pareto_mask]
        scores = scores[pareto_mask]
248
249
250
        if write_data:
            # Writing shapefiles
            start_time = time.time()
251
            shp_dir = self.outputdir + '/patches_iter_{0:03d}'.format(iter_nb)
252
253
254
            os.makedirs(shp_dir)
            for index,scenario in scenarios.iterrows():
                scenario = scenario[0]
255
                scenario[scenario['cultmodified']].drop('cultmodified', axis=1).to_file(shp_dir+'/{0:03d}_{}'.format(iter_nb,index), encoding='utf-8')
256
257
258
            elapsed_time = time.time() - start_time
            print(' - Shape files written in {}'.format(time.strftime("%M:%S", time.gmtime(elapsed_time))), end="", flush=True)
        print()
259
        return [scenarios, scores]
260

261
    def run(self, nb_processes=mp.cpu_count()):
262
263
        if self.multiprocessing:
            self.pool = mp.Pool(processes=nb_processes)
Dumoulin Nicolas's avatar
Dumoulin Nicolas committed
264
        # Initial sampling and evaluation
265
266
267
268
        nb_particles = self.mcmc_config['initial_nb_particles']
        scenarios_init = pd.DataFrame([self.patches])
        scenarios, scores = self.step(0, nb_particles, scenarios_init)
        # Iteration
269
        for i in range(self.mcmc_config['max_iterations']):
270
            print('Iteration #{}'.format(i+1))
271
            # Write SHP only for the last iteration
272
            scenarios, scores = self.step(i+1, nb_particles, scenarios, scores, self.write_data and i == self.mcmc_config['max_iterations']-1)
Dumoulin Nicolas's avatar
Dumoulin Nicolas committed
273
274
275
276
        # TODO
        # Storing variation of indicators
        init_var = scores.std()
        # sequential optimization loop
277
278
        if self.multiprocessing:
            self.pool.close()
279
        return [scenarios,scores]
Dumoulin Nicolas's avatar
Dumoulin Nicolas committed
280
281
282

if __name__ == '__main__':
    mcmc = MCMC('MCMC_config.yml')
283
284
    # scenario = Scenario(mcmc.patches.copy())
    # scenario.reallocate(mcmc.rng, mcmc.targetPAT, mcmc.mcmc_config['ratio_patches_to_modify'])
285
    scenario,scores = mcmc.run()
Dumoulin Nicolas's avatar
Dumoulin Nicolas committed
286
287
    # print(mcmc.indicators.biodiversity(mcmc.patches))
    # print(mcmc.indicators.proximity(mcmc.patches))