MCMC.py 11.8 KB
Newer Older
Dumoulin Nicolas's avatar
Dumoulin Nicolas committed
1
2
3
4
#!/usr/bin/python
# -*- coding: utf-8 -*-
import pandas as pd
import geopandas as gpd
5
import os, sys, time, shutil
Dumoulin Nicolas's avatar
Dumoulin Nicolas committed
6
7
import yaml
import numpy as np
8
9
10
from Indicator import Indicator
from overrides import overrides
from proximite import Proximite as Proximity
Dumoulin Nicolas's avatar
Dumoulin Nicolas committed
11
12
13
14
15
from resilience_list import Resilience
from productivite import Productivity
from indice_biodiversite_2 import Biodiversity
from social import Social
from tqdm import tqdm
16
from patutils import md5sum, load_pat_patches
17
import seaborn as sns
18
19
import multiprocessing as mp
from functools import partial
Dumoulin Nicolas's avatar
Dumoulin Nicolas committed
20
21
22
23
24
25
26
27
28

class Scenario:
    def __init__(self, patches):
        self.patches = patches

    def load_shp(shpfilename):
        '''
        Return an instance of class Patches by loading a shapefile of initial patches.
        '''
29
        scenario = Scenario(load_pat_patches(shpfilename))
Dumoulin Nicolas's avatar
Dumoulin Nicolas committed
30
31
32
33
34
35
36
37
        return scenario

    def reallocate(self, rng, targetPAT, ratioNbPatches):
        nbPatches = int(len(self.patches)*ratioNbPatches)
        surfDelta = targetPAT - self.patches.groupby('cultgeopat')['SURF_PARC'].sum()
        cult_to_decrease = surfDelta[surfDelta<0].sort_values(ascending=True).keys().tolist()
        cult_to_increase = surfDelta[surfDelta>0].sort_values(ascending=False).keys().tolist()
        # Sampling the patches to reallocate
38
39
40
41
42
        if len(self.patches['cultgeopat'].isin(cult_to_decrease)) >= nbPatches:
            samples = self.patches[self.patches['cultgeopat'].isin(cult_to_decrease)].sample(n=nbPatches, random_state=rng)#.reset_index(drop=True)
        else:
            # All cultural types have decreased to objective
            samples = self.patches.sample(n=nbPatches, random_state=rng)
Dumoulin Nicolas's avatar
Dumoulin Nicolas committed
43
        # Building the new culture reallocated
44
45
46
47
48
49
50
51
52
53
        if len(cult_to_increase) > 0:
            factors = surfDelta[cult_to_increase]
            factors = (factors*len(samples)/factors.sum()).map(round) # normalize on nb samples
            newCult = pd.Series(cult_to_increase).repeat(factors)
        else:
            # All cultural types have increased to objective
            # So we used all cultures with more weight for highest delta
            factors = -1/surfDelta
            factors = (factors*len(samples)/factors.sum()).map(round) # normalize on nb samples
            newCult = pd.Series(surfDelta.keys().tolist()).repeat(factors)
Dumoulin Nicolas's avatar
Dumoulin Nicolas committed
54
55
56
57
58
59
        if len(newCult) < len(samples): # may be due to factors rounding
            newCult = newCult.append(newCult.sample(n=len(samples)-len(newCult), random_state=rng), ignore_index=True)
        newCult = newCult.sample(frac=1, random_state=rng)[:len(samples)].reset_index(drop=True) # shuffle and cut extra elements
        # Doing the reallocation
        self.patches.loc[samples.index.values,'cultgeopat'] = newCult.values

60
61
62
63
64
65
66
67
68
class CulturalIndicator(Indicator):
    @overrides
    def __init__(self, config, initial_patches=None, patches_md5sum=None, targetPAT=None):
        self.cultgeopat = config

    @overrides
    def compute_indicator(self, patches):
        return patches[patches['cultgeopat']==self.cultgeopat]['SURF_PARC'].sum()

Dumoulin Nicolas's avatar
Dumoulin Nicolas committed
69
70
class Indicators:
    def __init__(self, config, initial_patches, patches_md5sum, targetPAT):
71
        self.indicators_names = ['Proximity', 'Resilience', 'Productivity', 'Biodiversity', 'Social']
72
        self._indicators = {
73
74
            indicator: eval(indicator)(config.get(indicator.lower()), initial_patches, patches_md5sum, targetPAT)
            for indicator in self.indicators_names
75
76
77
78
            }
        for cultgeopat in targetPAT.index.tolist():
            self._indicators[cultgeopat] = CulturalIndicator(cultgeopat)
            self.indicators_names.append(cultgeopat)
Dumoulin Nicolas's avatar
Dumoulin Nicolas committed
79
80

    def compute_indicators(self, patches):
81
        return [1/self._indicators[ind].compute_indicator(patches) for ind in self.indicators_names]
Dumoulin Nicolas's avatar
Dumoulin Nicolas committed
82
83
84
85
86
87
88
89
90
91
92
93
94
95

    def compute_indicators_pool(self, scenarios):
        rows=[]
        for patches in scenarios:
            rows.append(self.compute_indicators(patches))
        return pd.DataFrame(rows, columns=self.indicators_names)

class MCMC:
    def __init__(self, mcmc_config_filename):
        if not os.path.isfile(mcmc_config_filename):
        	print('Error: file not found "{}"'.format(mcmc_config_filename))
        	print('Please copy the template file "MCMC_config.sample.yml" and adjust to your settings and run again this program')
        	sys.exit(1)
        self.mcmc_config = yaml.load(open(mcmc_config_filename,'r'))
96
        self.multiprocessing = self.mcmc_config.get('multiprocessing', False)
97
98
99
100
101
102
103
        self.patches_md5sum = md5sum(self.mcmc_config['patches'])
        if 'rng_seed' in self.mcmc_config:
            self.rng = np.random.RandomState(self.mcmc_config['rng_seed'])
        else:
            self.rng = np.random.RandomState(42)
            print('MCMC initialized with default seed') # self.rng.get_state()
        # Copying input data in output dir
Dumoulin Nicolas's avatar
Dumoulin Nicolas committed
104
        self.outputdir = self.mcmc_config['output_dir'] + '/' + time.strftime('%Y%m%d-%H%M%S')
105
106
107
108
109
110
111
        if not os.path.exists(self.outputdir):
            os.makedirs(self.outputdir)
            print('All data will be written in {}'.format(self.outputdir))
        else:
            print('Output directory already exists! ({})'.format(self.outputdir))
            sys.exit(1)
        shutil.copy(mcmc_config_filename, self.outputdir)
112
        for f in [ self.mcmc_config['target'],
113
114
115
116
117
118
                self.mcmc_config['indicators_config']['resilience'],
                self.mcmc_config['indicators_config']['biodiversity']['matrixfilename'],
                self.mcmc_config['indicators_config']['social']['cost_matrix_filename'],
                self.mcmc_config['indicators_config']['social']['patches_costs_filename']
                ]:
            shutil.copy(f, self.outputdir)
119
120
        with open(self.outputdir+'/seed.txt', 'w') as outfile:
            outfile.write('{}\n'.format(self.rng.get_state()))
Dumoulin Nicolas's avatar
Dumoulin Nicolas committed
121
122
123
124
125
        config_data = {
            'patches_md5sum':self.patches_md5sum,
            'biodiversity_matrix_md5sum':md5sum(self.mcmc_config['indicators_config']['biodiversity']['matrixfilename']),
            'social_patches_costs_md5sum':md5sum(self.mcmc_config['indicators_config']['social']['patches_costs_filename'])
        }
126
        with open(self.outputdir+'/config.yml', 'w') as outfile:
Dumoulin Nicolas's avatar
Dumoulin Nicolas committed
127
            yaml.dump(config_data, outfile, default_flow_style=False)
128
        # finishing init
129
        self.patches = load_pat_patches(self.mcmc_config['patches'])
Dumoulin Nicolas's avatar
Dumoulin Nicolas committed
130
131
132
133
134
        self.target = pd.read_csv(self.mcmc_config['target'], sep=';',index_col=0)
        targetRatio = (self.target['2050']-self.target['2016'])/self.target['2016']
        self.targetPAT = self.patches.groupby('cultgeopat')['SURF_PARC'].sum()*(1+targetRatio)
        self.indicators = Indicators(self.mcmc_config['indicators_config'], self.patches, self.patches_md5sum, self.targetPAT)

135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162

    def is_pareto_efficient(costs, return_mask = True):
        """
        Find the pareto-efficient points
        :param costs: An (n_points, n_costs) array
        :param return_mask: True to return a mask
        :return: An array of indices of pareto-efficient points.
            If return_mask is True, this will be an (n_points, ) boolean array
            Otherwise it will be a (n_efficient_points, ) integer array of indices.
        Author:
        Source: https://stackoverflow.com/questions/32791911/fast-calculation-of-pareto-front-in-python
        """
        is_efficient = np.arange(costs.shape[0])
        n_points = costs.shape[0]
        next_point_index = 0  # Next index in the is_efficient array to search for
        while next_point_index<len(costs):
            nondominated_point_mask = np.any(costs<costs[next_point_index], axis=1)
            nondominated_point_mask[next_point_index] = True
            is_efficient = is_efficient[nondominated_point_mask]  # Remove dominated points
            costs = costs[nondominated_point_mask]
            next_point_index = np.sum(nondominated_point_mask[:next_point_index])+1
        if return_mask:
            is_efficient_mask = np.zeros(n_points, dtype = bool)
            is_efficient_mask[is_efficient] = True
            return is_efficient_mask
        else:
            return is_efficient

163
164
165
    def scenario_scores(patches, indicators):
        return indicators.compute_indicators(patches)

166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
    def step(self, iter_nb, nb_particles, scenarios, scores=None):
        '''
        Sample new scenarios, evaluate their scores and retain only pareto front.

        :param iter_nb: (int) number of this iteration, 0 for initial step, 1 for first iteration and so on.
        :param nb_particles: number of new scenarios to sample
        :param scenarios: (DataFrame) list of the scenarios used as base for sampling the new scenarios.
        :param scores: (DataFrame) list of scores for each scenario. May be None
        if given scenario are only used for sampling and will not be considered for pareto front.
        '''
        new_scenarios = []
        new_scores = []
        # Loop of sampling and scoring
        for index, patches in tqdm(scenarios.sample(nb_particles, replace=True, random_state=self.rng).iterrows(), total=nb_particles):
            scenario = Scenario(patches[0].copy())
            scenario.reallocate(self.rng, self.targetPAT, self.mcmc_config['ratio_patches_to_modify']) # 3.8 ms
            new_scenarios.append(scenario.patches)
183
184
185
186
            if not self.multiprocessing:
                new_scores.append(self.indicators.compute_indicators(scenario.patches))
        if self.multiprocessing:
            new_scores = self.pool.map(partial(MCMC.scenario_scores, indicators=self.indicators), new_scenarios)
187
188
189
190
191
192
193
194
195
196
        # merging with precedent data
        new_scores = pd.DataFrame(new_scores, columns=self.indicators.indicators_names)
        new_scores['iteration'] = iter_nb
        if scores is None:
            scores = new_scores
            scenarios = pd.DataFrame(new_scenarios)
        else:
            scores = scores.append(new_scores, ignore_index=True, sort=False)
            scenarios = scenarios.append(pd.DataFrame(new_scenarios), ignore_index=True, sort=False)
        # Computing pareto front
197
        pareto_mask = MCMC.is_pareto_efficient(scores[['Resilience','Proximity','Productivity','Biodiversity','Social']].values)
198
199
200
201
        scores['pareto'] = pareto_mask
        # Writing output data
        scores.to_csv(self.outputdir+'/mcmc_iter_{}.csv'.format(iter_nb), index=False)
        try:
202
            sns.pairplot(scores, vars=['Resilience','Proximity','Productivity','Biodiversity','Social'],
203
204
205
206
207
208
209
210
                    diag_kind="kde",hue="pareto"
                ).savefig(self.outputdir+'/mcmc_iter_{}.png'.format(iter_nb))
        except (np.linalg.linalg.LinAlgError, ValueError) as e:
            # when data doesn't vary enough on a dimension, it is impossible to generate the pareto density
            # and an error is triggered. Here, we ignore this error.
            pass
        return [scenarios[pareto_mask], scores[pareto_mask]]

211
212
    def run(self, nb_processes=mp.cpu_count()):
        self.pool = mp.Pool(processes=nb_processes)
Dumoulin Nicolas's avatar
Dumoulin Nicolas committed
213
        # Initial sampling and evaluation
214
215
216
217
218
219
220
        nb_particles = self.mcmc_config['initial_nb_particles']
        scenarios_init = pd.DataFrame([self.patches])
        scenarios, scores = self.step(0, nb_particles, scenarios_init)
        # Iteration
        for i in range(10):
            print('Iteration #{}'.format(i))
            scenarios, scores = self.step(i+1, nb_particles, scenarios, scores)
Dumoulin Nicolas's avatar
Dumoulin Nicolas committed
221
222
223
224
        # TODO
        # Storing variation of indicators
        init_var = scores.std()
        # sequential optimization loop
225
        self.pool.close()
226
        return [scenarios,scores]
Dumoulin Nicolas's avatar
Dumoulin Nicolas committed
227
228
229

if __name__ == '__main__':
    mcmc = MCMC('MCMC_config.yml')
230
231
    # scenario = Scenario(mcmc.patches.copy())
    # scenario.reallocate(mcmc.rng, mcmc.targetPAT, mcmc.mcmc_config['ratio_patches_to_modify'])
232
    scenario,scores = mcmc.run()
Dumoulin Nicolas's avatar
Dumoulin Nicolas committed
233
234
    # print(mcmc.indicators.biodiversity(mcmc.patches))
    # print(mcmc.indicators.proximity(mcmc.patches))