MCMC.py 11.6 KB
Newer Older
Dumoulin Nicolas's avatar
Dumoulin Nicolas committed
1
2
3
4
#!/usr/bin/python
# -*- coding: utf-8 -*-
import pandas as pd
import geopandas as gpd
5
import os, sys, time, shutil
6
import glob
Dumoulin Nicolas's avatar
Dumoulin Nicolas committed
7
import yaml
8
9
10
11
try:
    from yaml import CLoader as Loader
except ImportError:
    from yaml import Loader
Dumoulin Nicolas's avatar
Dumoulin Nicolas committed
12
import numpy as np
13
from indicators import Indicators
Dumoulin Nicolas's avatar
Dumoulin Nicolas committed
14
from tqdm import tqdm
15
import patutils
16
from patutils import md5sum, load_pat_patches
17
import seaborn as sns
18
import matplotlib.pyplot as plt
Dumoulin Nicolas's avatar
Dumoulin Nicolas committed
19
from scenarios import ScenariosStack, Reallocator
Dumoulin Nicolas's avatar
Dumoulin Nicolas committed
20

21
22
23
import multiprocessing as mp
from functools import partial

Dumoulin Nicolas's avatar
Dumoulin Nicolas committed
24
class MCMC:
25
26
27
28
29
30
31
    def buildOutputDirName(config, suffix):
        dirname = config['output_dir'] + '/' + time.strftime('%Y%m%d-%H%M%S')
        dirname += '_{initial_nb_particles}_{step_nb_particles}_{ratio_patches_to_modify}'.format(**config)
        dirname += suffix
        return dirname

    def __init__(self, mcmc_config_filename, output=True, suffix='', outputDirName=None):
Dumoulin Nicolas's avatar
Dumoulin Nicolas committed
32
33
34
35
36
        if not os.path.isfile(mcmc_config_filename):
        	print('Error: file not found "{}"'.format(mcmc_config_filename))
        	print('Please copy the template file "MCMC_config.sample.yml" and adjust to your settings and run again this program')
        	sys.exit(1)
        self.mcmc_config = yaml.load(open(mcmc_config_filename,'r'))
37
        self.patches_md5sum = md5sum(self.mcmc_config['patches'])
38
39
        rngSeed = self.mcmc_config.get('rng_seed', 0)
        self.rng = np.random.RandomState(rngSeed)
Dumoulin Nicolas's avatar
Dumoulin Nicolas committed
40
        if output:
41
42
43
44
            if outputDirName:
                self.outputdir = outputDirName
            else:
                self.outputdir = MCMC.buildOutputDirName(self.mcmc_config, suffix)
Dumoulin Nicolas's avatar
Dumoulin Nicolas committed
45
46
47
48
49
50
            if not os.path.exists(self.outputdir):
                os.makedirs(self.outputdir)
                print('All data will be written in {}'.format(self.outputdir))
            else:
                print('Output directory already exists! ({})'.format(self.outputdir))
                sys.exit(1)
51
            # Copying some input files and scripts
Dumoulin Nicolas's avatar
Dumoulin Nicolas committed
52
53
54
55
56
57
            shutil.copy(mcmc_config_filename, self.outputdir)
            for f in [ self.mcmc_config['target'],
                    self.mcmc_config['indicators_config']['resilience'],
                    self.mcmc_config['indicators_config']['social']['cost_matrix_filename']
                    ]:
                shutil.copy(f, self.outputdir)
58
59
60
61
62
63
64
65
66
67
68
            os.makedirs(self.outputdir+'/py')
            for f in glob.glob(os.path.dirname(os.path.abspath(__file__))+'/*.py'):
                shutil.copy(f, self.outputdir+'/py')
            # Storing configuration data
            config_data = { 'rng_seed':rngSeed, 'patches_md5sum':self.patches_md5sum }
            for key,filename in [
                    ['biodiversity_matrix_md5sum', self.mcmc_config['indicators_config']['biodiversity']['matrixfilename']],
                    ['social_patches_costs_md5sum', self.mcmc_config['indicators_config']['social']['patches_costs_filename']]
                ]:
                if os.path.isfile(filename):
                    config_data[key] = md5sum(filename)
Dumoulin Nicolas's avatar
Dumoulin Nicolas committed
69
70
            with open(self.outputdir+'/config.yml', 'w') as outfile:
                yaml.dump(config_data, outfile, default_flow_style=False)
71
        # finishing init
72
        self.patches = load_pat_patches(self.mcmc_config['patches'])
73
74
        self.surfaces = self.patches['SURF_PARC']
        self.target = pd.read_csv(self.mcmc_config['target'], sep=';',index_col=0).rename(index=patutils.code_cultgeopat)
Dumoulin Nicolas's avatar
Dumoulin Nicolas committed
75
76
77
        targetRatio = (self.target['2050']-self.target['2016'])/self.target['2016']
        self.targetPAT = self.patches.groupby('cultgeopat')['SURF_PARC'].sum()*(1+targetRatio)
        self.indicators = Indicators(self.mcmc_config['indicators_config'], self.patches, self.patches_md5sum, self.targetPAT)
78
        self.reallocator = Reallocator(self.patches, self.targetPAT, self.mcmc_config['ratio_patches_to_modify'])
Dumoulin Nicolas's avatar
Dumoulin Nicolas committed
79

80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107

    def is_pareto_efficient(costs, return_mask = True):
        """
        Find the pareto-efficient points
        :param costs: An (n_points, n_costs) array
        :param return_mask: True to return a mask
        :return: An array of indices of pareto-efficient points.
            If return_mask is True, this will be an (n_points, ) boolean array
            Otherwise it will be a (n_efficient_points, ) integer array of indices.
        Author:
        Source: https://stackoverflow.com/questions/32791911/fast-calculation-of-pareto-front-in-python
        """
        is_efficient = np.arange(costs.shape[0])
        n_points = costs.shape[0]
        next_point_index = 0  # Next index in the is_efficient array to search for
        while next_point_index<len(costs):
            nondominated_point_mask = np.any(costs<costs[next_point_index], axis=1)
            nondominated_point_mask[next_point_index] = True
            is_efficient = is_efficient[nondominated_point_mask]  # Remove dominated points
            costs = costs[nondominated_point_mask]
            next_point_index = np.sum(nondominated_point_mask[:next_point_index])+1
        if return_mask:
            is_efficient_mask = np.zeros(n_points, dtype = bool)
            is_efficient_mask[is_efficient] = True
            return is_efficient_mask
        else:
            return is_efficient

108
109
110
111
112
113
114
115
116
117
118
119
    def getRNGState(self, iter_nb, scen_nb):
        result = 1 # First RNG is the default one for local sampling
        if iter_nb==1:
            result += scen_nb
        else:
            result += self.mcmc_config['initial_nb_particles'] + (iter_nb - 1) * self.mcmc_config['step_nb_particles']
        return result

    def reallocate_mp(rng_state_and_scen_id):
        global scenarios
        rng_state, scen_id = rng_state_and_scen_id
        return scenarios.reallocate_mp(scen_id, np.random.RandomState(rng_state))
120

121
    def step(self, iter_nb, nb_particles, scenarios, write_data=True, disable_progress=False, num_cpus=1):
122
123
124
        '''
        Sample new scenarios, evaluate their scores and retain only pareto front.

125
        :param iter_nb: (int) number of this iteration.
126
        :param nb_particles: number of new scenarios to sample
127
        :param scenarios: (ScenariosStack) list of the scenarios used as base for sampling the new scenarios.
128
129
        '''
        # Loop of sampling and scoring
130
        start_time = time.time()
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
        if num_cpus>1:
            # multiprocessing
            loop_data = [[self.getRNGState(iter_nb, loop_index), scen_id] for loop_index,scen_id in enumerate(scenarios.sample(nb_particles, self.rng).index)]
            results = list(tqdm(self.mp_pool.imap(partial(MCMC.reallocate_mp), loop_data, chunksize=10), total=len(loop_data)))
            # Consolidate data
            scenarios.reconstitute_mp(iter_nb, results, disable_progress=disable_progress)
        else:
            # Sequential
            data={'scenarios_patches':[], 'scenarios_cult':[], 'previous_indexes':[], 'fullreallocated':[]}
            for loop_index, [index, scenario] in tqdm(enumerate(scenarios.sample(nb_particles, self.rng).iterrows()), total=nb_particles, disable=disable_progress):
                rng = np.random.RandomState(self.getRNGState(iter_nb, loop_index))
                [fullreallocated,scenario] = scenarios.reallocate(index, rng)
                data['fullreallocated'].append(fullreallocated)
                data['scenarios_patches'].append(scenario)
                data['scenarios_cult'].append(scenario['cultgeopat'])
                data['previous_indexes'].append(index)
            scenarios.reconstitute(iter_nb, **data, disable_progress=disable_progress)
148
        elapsed_time = time.time() - start_time
149
        print('Iteration duration: {} - {:.2f} scen/s '.format(time.strftime("%M:%S", time.gmtime(elapsed_time)), nb_particles/elapsed_time), end="", flush=True)
150
        # Computing pareto front
151
        start_time = time.time()
152
        pareto_mask = MCMC.is_pareto_efficient(scenarios.scores[['Resilience','Proximity','Productivity','Biodiversity','Social']].values)
153
        scenarios.scores['pareto'] = pareto_mask
154
        elapsed_time = time.time() - start_time
155
        print(' Fully reallocated scenarios {}/{} - '.format(scenarios.nbFullReallocated(), nb_particles), end="", flush=True)
156
        if (len(scenarios.scores)>0):
157
            print('{:.2f}% on pareto front (computed in {}) '.format(100*np.sum(pareto_mask)/len(pareto_mask), time.strftime("%M:%S", time.gmtime(elapsed_time))), end="", flush=True)
158
        # Writing output data
159
        if hasattr(self,'outputdir') and write_data:
160
161
162
163
164
165
166
            write_times=[]
            start_time = time.time()
            scenarios.scores.to_csv(self.outputdir+'/scores_iter_{0:03d}.csv'.format(iter_nb), index=True)
            write_times.append(time.time() - start_time)
            start_time = time.time()
            scenarios.cultgeopat.to_hdf(self.outputdir+'/cult_iter_{0:03d}.h5'.format(iter_nb), 'cultgeopat', index=True, complevel=1)
            write_times.append(time.time() - start_time)
Dumoulin Nicolas's avatar
Dumoulin Nicolas committed
167
168
169
170
171
172
173
            start_time = time.time()
            try:
                pairplot = sns.pairplot(scenarios.scores, vars=['Resilience','Proximity','Productivity','Biodiversity','Social','TargetDelta'],
                        diag_kind="kde",hue="pareto"
                    )
                pairplot.savefig(self.outputdir+'/mcmc_iter_{0:03d}.png'.format(iter_nb))
                plt.close(pairplot.fig)
174
            except:
Dumoulin Nicolas's avatar
Dumoulin Nicolas committed
175
176
177
178
                # when data doesn't vary enough on a dimension, it is impossible to generate the pareto density
                # and an error is triggered. Here, we ignore this error.
                pass
            write_times.append(time.time() - start_time)
179
            print(' - Scores written in {}/{}/{} '.format(*map(lambda x: time.strftime("%M:%S", time.gmtime(x)), write_times)), end="", flush=True)
180
        print()
181
        # Retaining only optimal particules
182
183
        scenarios.retain(pareto_mask)
        return scenarios
184

185
186
    def run(self):
        # Start with initial scenario
187
        global scenarios
Dumoulin Nicolas's avatar
Dumoulin Nicolas committed
188
        scenarios = ScenariosStack(self.indicators, self.reallocator, self.patches)
189
190
191
192
193
194
195
        num_cpus = self.mcmc_config.get('num_cpus', 1)
        if num_cpus > 1:
            import signal
            mp.set_start_method('fork', force=True)
            self.mp_pool = mp.Pool(self.mcmc_config['num_cpus'],
                # for catching correctly Ctrl-C keyboard interruption
                initializer=lambda:signal.signal(signal.SIGINT, signal.SIG_IGN))
196
        # Iteration
197
        nb_iteration=0
198
        for i in range(self.mcmc_config['max_iterations']):
199
200
            nb_iteration=i+1
            print('Iteration #{}'.format(nb_iteration))
201
202
203
204
            if nb_iteration==1:
                nb_particles = self.mcmc_config['initial_nb_particles']
            else:
                nb_particles = self.mcmc_config['step_nb_particles']
205
            self.step(nb_iteration, nb_particles, scenarios, write_data=self.mcmc_config.get('write_data', False), num_cpus=num_cpus)
206
207
208
            if scenarios.allFullReallocated():
                print('All scenarios are fully reallocated. Simulation finished.')
                break
209
        return scenarios
Dumoulin Nicolas's avatar
Dumoulin Nicolas committed
210
211

if __name__ == '__main__':
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
    suffix=''
    mcmc=None
    if len(sys.argv) > 1:
        suffix = '_' + sys.argv[1]
    try:
        config = yaml.load(open('MCMC_config.yml','r'))
        outputdir = MCMC.buildOutputDirName(config, suffix)
        mcmc = MCMC('MCMC_config.yml', suffix=suffix, outputDirName=outputdir)
        scenarios = mcmc.run()
    except KeyboardInterrupt:
        if mcmc and hasattr(mcmc, 'mp_pool'):
            mcmc.mp_pool.terminate()
            mcmc.mp_pool.join()
        print('Process interrupted.')
        answer = input('Would you like delete the output data generated? [y/N] ')
        if answer=='y':
            print('Deleting {}'.format(outputdir))
            shutil.rmtree(outputdir)
        sys.exit(0)