MCMC.py 12.4 KB
Newer Older
Dumoulin Nicolas's avatar
Dumoulin Nicolas committed
1
2
3
4
#!/usr/bin/python
# -*- coding: utf-8 -*-
import pandas as pd
import geopandas as gpd
5
import os, sys, time, shutil
Dumoulin Nicolas's avatar
Dumoulin Nicolas committed
6
7
import yaml
import numpy as np
8
9
10
from Indicator import Indicator
from overrides import overrides
from proximite import Proximite as Proximity
Dumoulin Nicolas's avatar
Dumoulin Nicolas committed
11
12
13
14
15
from resilience_list import Resilience
from productivite import Productivity
from indice_biodiversite_2 import Biodiversity
from social import Social
from tqdm import tqdm
16
from patutils import md5sum, load_pat_patches
17
import seaborn as sns
18
19
import multiprocessing as mp
from functools import partial
Dumoulin Nicolas's avatar
Dumoulin Nicolas committed
20
21
22
23
24
25
26
27
28

class Scenario:
    def __init__(self, patches):
        self.patches = patches

    def load_shp(shpfilename):
        '''
        Return an instance of class Patches by loading a shapefile of initial patches.
        '''
29
        scenario = Scenario(load_pat_patches(shpfilename))
Dumoulin Nicolas's avatar
Dumoulin Nicolas committed
30
31
32
33
34
35
36
37
        return scenario

    def reallocate(self, rng, targetPAT, ratioNbPatches):
        nbPatches = int(len(self.patches)*ratioNbPatches)
        surfDelta = targetPAT - self.patches.groupby('cultgeopat')['SURF_PARC'].sum()
        cult_to_decrease = surfDelta[surfDelta<0].sort_values(ascending=True).keys().tolist()
        cult_to_increase = surfDelta[surfDelta>0].sort_values(ascending=False).keys().tolist()
        # Sampling the patches to reallocate
38
39
40
41
42
        if len(self.patches['cultgeopat'].isin(cult_to_decrease)) >= nbPatches:
            samples = self.patches[self.patches['cultgeopat'].isin(cult_to_decrease)].sample(n=nbPatches, random_state=rng)#.reset_index(drop=True)
        else:
            # All cultural types have decreased to objective
            samples = self.patches.sample(n=nbPatches, random_state=rng)
Dumoulin Nicolas's avatar
Dumoulin Nicolas committed
43
        # Building the new culture reallocated
44
45
46
47
48
49
50
51
52
53
        if len(cult_to_increase) > 0:
            factors = surfDelta[cult_to_increase]
            factors = (factors*len(samples)/factors.sum()).map(round) # normalize on nb samples
            newCult = pd.Series(cult_to_increase).repeat(factors)
        else:
            # All cultural types have increased to objective
            # So we used all cultures with more weight for highest delta
            factors = -1/surfDelta
            factors = (factors*len(samples)/factors.sum()).map(round) # normalize on nb samples
            newCult = pd.Series(surfDelta.keys().tolist()).repeat(factors)
Dumoulin Nicolas's avatar
Dumoulin Nicolas committed
54
55
56
57
58
        if len(newCult) < len(samples): # may be due to factors rounding
            newCult = newCult.append(newCult.sample(n=len(samples)-len(newCult), random_state=rng), ignore_index=True)
        newCult = newCult.sample(frac=1, random_state=rng)[:len(samples)].reset_index(drop=True) # shuffle and cut extra elements
        # Doing the reallocation
        self.patches.loc[samples.index.values,'cultgeopat'] = newCult.values
59
        self.patches.loc[samples.index.values,'cultmodified'] = True
Dumoulin Nicolas's avatar
Dumoulin Nicolas committed
60

61
62
63
64
65
66
67
68
69
class CulturalIndicator(Indicator):
    @overrides
    def __init__(self, config, initial_patches=None, patches_md5sum=None, targetPAT=None):
        self.cultgeopat = config

    @overrides
    def compute_indicator(self, patches):
        return patches[patches['cultgeopat']==self.cultgeopat]['SURF_PARC'].sum()

Dumoulin Nicolas's avatar
Dumoulin Nicolas committed
70
71
class Indicators:
    def __init__(self, config, initial_patches, patches_md5sum, targetPAT):
72
        self.indicators_names = ['Proximity', 'Resilience', 'Productivity', 'Biodiversity', 'Social']
73
        self._indicators = {
74
75
            indicator: eval(indicator)(config.get(indicator.lower()), initial_patches, patches_md5sum, targetPAT)
            for indicator in self.indicators_names
76
77
78
79
            }
        for cultgeopat in targetPAT.index.tolist():
            self._indicators[cultgeopat] = CulturalIndicator(cultgeopat)
            self.indicators_names.append(cultgeopat)
Dumoulin Nicolas's avatar
Dumoulin Nicolas committed
80
81

    def compute_indicators(self, patches):
82
        return [1/self._indicators[ind].compute_indicator(patches) for ind in self.indicators_names]
Dumoulin Nicolas's avatar
Dumoulin Nicolas committed
83
84
85
86
87
88
89
90
91
92
93
94
95
96

    def compute_indicators_pool(self, scenarios):
        rows=[]
        for patches in scenarios:
            rows.append(self.compute_indicators(patches))
        return pd.DataFrame(rows, columns=self.indicators_names)

class MCMC:
    def __init__(self, mcmc_config_filename):
        if not os.path.isfile(mcmc_config_filename):
        	print('Error: file not found "{}"'.format(mcmc_config_filename))
        	print('Please copy the template file "MCMC_config.sample.yml" and adjust to your settings and run again this program')
        	sys.exit(1)
        self.mcmc_config = yaml.load(open(mcmc_config_filename,'r'))
97
        self.multiprocessing = self.mcmc_config.get('multiprocessing', False)
98
99
100
101
102
103
104
        self.patches_md5sum = md5sum(self.mcmc_config['patches'])
        if 'rng_seed' in self.mcmc_config:
            self.rng = np.random.RandomState(self.mcmc_config['rng_seed'])
        else:
            self.rng = np.random.RandomState(42)
            print('MCMC initialized with default seed') # self.rng.get_state()
        # Copying input data in output dir
Dumoulin Nicolas's avatar
Dumoulin Nicolas committed
105
        self.outputdir = self.mcmc_config['output_dir'] + '/' + time.strftime('%Y%m%d-%H%M%S')
106
107
108
109
110
111
112
        if not os.path.exists(self.outputdir):
            os.makedirs(self.outputdir)
            print('All data will be written in {}'.format(self.outputdir))
        else:
            print('Output directory already exists! ({})'.format(self.outputdir))
            sys.exit(1)
        shutil.copy(mcmc_config_filename, self.outputdir)
113
        for f in [ self.mcmc_config['target'],
114
115
116
117
118
119
                self.mcmc_config['indicators_config']['resilience'],
                self.mcmc_config['indicators_config']['biodiversity']['matrixfilename'],
                self.mcmc_config['indicators_config']['social']['cost_matrix_filename'],
                self.mcmc_config['indicators_config']['social']['patches_costs_filename']
                ]:
            shutil.copy(f, self.outputdir)
120
121
        with open(self.outputdir+'/seed.txt', 'w') as outfile:
            outfile.write('{}\n'.format(self.rng.get_state()))
Dumoulin Nicolas's avatar
Dumoulin Nicolas committed
122
123
124
125
126
        config_data = {
            'patches_md5sum':self.patches_md5sum,
            'biodiversity_matrix_md5sum':md5sum(self.mcmc_config['indicators_config']['biodiversity']['matrixfilename']),
            'social_patches_costs_md5sum':md5sum(self.mcmc_config['indicators_config']['social']['patches_costs_filename'])
        }
127
        with open(self.outputdir+'/config.yml', 'w') as outfile:
Dumoulin Nicolas's avatar
Dumoulin Nicolas committed
128
            yaml.dump(config_data, outfile, default_flow_style=False)
129
        # finishing init
130
        self.patches = load_pat_patches(self.mcmc_config['patches'])
131
        self.patches['cultmodified'] = False
Dumoulin Nicolas's avatar
Dumoulin Nicolas committed
132
133
134
135
136
        self.target = pd.read_csv(self.mcmc_config['target'], sep=';',index_col=0)
        targetRatio = (self.target['2050']-self.target['2016'])/self.target['2016']
        self.targetPAT = self.patches.groupby('cultgeopat')['SURF_PARC'].sum()*(1+targetRatio)
        self.indicators = Indicators(self.mcmc_config['indicators_config'], self.patches, self.patches_md5sum, self.targetPAT)

137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164

    def is_pareto_efficient(costs, return_mask = True):
        """
        Find the pareto-efficient points
        :param costs: An (n_points, n_costs) array
        :param return_mask: True to return a mask
        :return: An array of indices of pareto-efficient points.
            If return_mask is True, this will be an (n_points, ) boolean array
            Otherwise it will be a (n_efficient_points, ) integer array of indices.
        Author:
        Source: https://stackoverflow.com/questions/32791911/fast-calculation-of-pareto-front-in-python
        """
        is_efficient = np.arange(costs.shape[0])
        n_points = costs.shape[0]
        next_point_index = 0  # Next index in the is_efficient array to search for
        while next_point_index<len(costs):
            nondominated_point_mask = np.any(costs<costs[next_point_index], axis=1)
            nondominated_point_mask[next_point_index] = True
            is_efficient = is_efficient[nondominated_point_mask]  # Remove dominated points
            costs = costs[nondominated_point_mask]
            next_point_index = np.sum(nondominated_point_mask[:next_point_index])+1
        if return_mask:
            is_efficient_mask = np.zeros(n_points, dtype = bool)
            is_efficient_mask[is_efficient] = True
            return is_efficient_mask
        else:
            return is_efficient

165
166
167
    def scenario_scores(patches, indicators):
        return indicators.compute_indicators(patches)

168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
    def step(self, iter_nb, nb_particles, scenarios, scores=None):
        '''
        Sample new scenarios, evaluate their scores and retain only pareto front.

        :param iter_nb: (int) number of this iteration, 0 for initial step, 1 for first iteration and so on.
        :param nb_particles: number of new scenarios to sample
        :param scenarios: (DataFrame) list of the scenarios used as base for sampling the new scenarios.
        :param scores: (DataFrame) list of scores for each scenario. May be None
        if given scenario are only used for sampling and will not be considered for pareto front.
        '''
        new_scenarios = []
        new_scores = []
        # Loop of sampling and scoring
        for index, patches in tqdm(scenarios.sample(nb_particles, replace=True, random_state=self.rng).iterrows(), total=nb_particles):
            scenario = Scenario(patches[0].copy())
            scenario.reallocate(self.rng, self.targetPAT, self.mcmc_config['ratio_patches_to_modify']) # 3.8 ms
            new_scenarios.append(scenario.patches)
185
186
187
188
            if not self.multiprocessing:
                new_scores.append(self.indicators.compute_indicators(scenario.patches))
        if self.multiprocessing:
            new_scores = self.pool.map(partial(MCMC.scenario_scores, indicators=self.indicators), new_scenarios)
189
190
191
192
193
194
195
196
197
198
        # merging with precedent data
        new_scores = pd.DataFrame(new_scores, columns=self.indicators.indicators_names)
        new_scores['iteration'] = iter_nb
        if scores is None:
            scores = new_scores
            scenarios = pd.DataFrame(new_scenarios)
        else:
            scores = scores.append(new_scores, ignore_index=True, sort=False)
            scenarios = scenarios.append(pd.DataFrame(new_scenarios), ignore_index=True, sort=False)
        # Computing pareto front
199
        pareto_mask = MCMC.is_pareto_efficient(scores[['Resilience','Proximity','Productivity','Biodiversity','Social']].values)
200
201
202
203
        scores['pareto'] = pareto_mask
        # Writing output data
        scores.to_csv(self.outputdir+'/mcmc_iter_{}.csv'.format(iter_nb), index=False)
        try:
204
            sns.pairplot(scores, vars=['Resilience','Proximity','Productivity','Biodiversity','Social'],
205
206
207
208
209
210
                    diag_kind="kde",hue="pareto"
                ).savefig(self.outputdir+'/mcmc_iter_{}.png'.format(iter_nb))
        except (np.linalg.linalg.LinAlgError, ValueError) as e:
            # when data doesn't vary enough on a dimension, it is impossible to generate the pareto density
            # and an error is triggered. Here, we ignore this error.
            pass
211
212
213
214
215
216
217
218
219
        scenarios = scenarios[pareto_mask]
        scores = scores[pareto_mask]
        # Writing shapefiles
        shp_dir = self.outputdir + '/patches_iter_{}'.format(iter_nb)
        os.makedirs(shp_dir)
        for index,scenario in scenarios.iterrows():
            scenario = scenario[0]
            scenario[scenario['cultmodified']].drop('cultmodified', axis=1).to_file(shp_dir+'/{}_{}'.format(iter_nb,index), encoding='utf-8')
        return [scenarios, scores]
220

221
    def run(self, nb_processes=mp.cpu_count()):
222
223
        if self.multiprocessing:
            self.pool = mp.Pool(processes=nb_processes)
Dumoulin Nicolas's avatar
Dumoulin Nicolas committed
224
        # Initial sampling and evaluation
225
226
227
228
229
230
231
        nb_particles = self.mcmc_config['initial_nb_particles']
        scenarios_init = pd.DataFrame([self.patches])
        scenarios, scores = self.step(0, nb_particles, scenarios_init)
        # Iteration
        for i in range(10):
            print('Iteration #{}'.format(i))
            scenarios, scores = self.step(i+1, nb_particles, scenarios, scores)
Dumoulin Nicolas's avatar
Dumoulin Nicolas committed
232
233
234
235
        # TODO
        # Storing variation of indicators
        init_var = scores.std()
        # sequential optimization loop
236
237
        if self.multiprocessing:
            self.pool.close()
238
        return [scenarios,scores]
Dumoulin Nicolas's avatar
Dumoulin Nicolas committed
239
240
241

if __name__ == '__main__':
    mcmc = MCMC('MCMC_config.yml')
242
243
    # scenario = Scenario(mcmc.patches.copy())
    # scenario.reallocate(mcmc.rng, mcmc.targetPAT, mcmc.mcmc_config['ratio_patches_to_modify'])
244
    scenario,scores = mcmc.run()
Dumoulin Nicolas's avatar
Dumoulin Nicolas committed
245
246
    # print(mcmc.indicators.biodiversity(mcmc.patches))
    # print(mcmc.indicators.proximity(mcmc.patches))