Commit f05f13e4 authored by Dumoulin Nicolas's avatar Dumoulin Nicolas
Browse files

checksum on shpfile with metadata in npz of matrix

parent 202e8d93
......@@ -22,14 +22,18 @@ class Biodiversity:
function P(i,j) = exp(-l*d). The coefficient l (lambda) is determined for a given
minimal probability at the maximal distance d.
'''
def __init__(self, initial_patches, dmax, epsilon=0.001):
def __init__(self, initial_patches, patches_md5sum, dmax, epsilon=0.001):
'''
Creates an indicator with initial parameters
The original file giving the patches is needed for checking the correspondance
between this file and the matrix cached in a file.
:param patches_filename file will be used for computing checksum
:param dmax: the maximum distance threshold for considering a pair of patches
:param epsilon: the minimum probability threshold that should resulted of
the exponential function at distance dmax
'''
self.md5sum = patches_md5sum
self.dmax = dmax
self.epsilon = epsilon
self.final_result_ratio = pow(initial_patches['SURF_PARC'].sum(), 2) / 100000
......@@ -44,13 +48,13 @@ class Biodiversity:
# get the lambda value for given thresholds
l = -log(self.epsilon, 2)/self.dmax
# Creating a buffer for these patches, for finding patches under the distance threshold by intersection
patches = patches
buffer = patches.buffer(self.dmax)
index = buffer.sindex
# sparse matrix initialisation
self.probabilities = lil_matrix((len(patches),len(patches)), dtype='d')
geom_idx=patches.columns.get_loc('geometry')
for i in tqdm(range(len(patches))):
geom_i = patches.iloc[i].geometry
geom_i = patches.iat[i,geom_idx]
for fid in list(index.intersection(geom_i.bounds)):
# proba zero on the diagonal
if i == fid:
......@@ -58,10 +62,10 @@ class Biodiversity:
if i >= fid:
# the matrix is symetric, so we process only one half and
# affect the proba on two sides
d = patches.iloc[fid].geometry.boundary.distance(geom_i.boundary)
d = patches.iat[fid,geom_idx].boundary.distance(geom_i.boundary)
if d <= self.dmax:
# patches i and fid have a distance less than the max distance
p = patches.iloc[fid].geometry.area*geom_i.area*l*exp(-l*d)
p = patches.iat[fid,geom_idx].area*geom_i.area*l*exp(-l*d)
self.probabilities[i,fid] = p
self.probabilities[fid,i] = p
else:
......@@ -85,16 +89,23 @@ class Biodiversity:
Save the matrix of probabilities and initial parameters in a file (npz format).
'''
m = self.probabilities
np.savez(filename, dmax=self.dmax, epsilon=self.epsilon, data=m.data,
indices=m.indices, indptr=m.indptr, shape=m.shape)
np.savez(filename, dmax=self.dmax, epsilon=self.epsilon, md5sum=self.md5sum,
data=m.data, indices=m.indices, indptr=m.indptr, shape=m.shape)
def load_probabilities(self, filename):
'''
Load the matrix of probabilities and initial parameters from a file (npz format).
The correspondance will be checked between
class attributes and metadata stored in the file
'''
loader = np.load(filename)
self.dmax = loader['dmax']
self.epsilon = loader['epsilon']
error_message = ('Matrix metadata {0} does\'nt correspond to metadata given at indicator construction'
'\n\t{0} in metadata: {1} while asked: {2}'
'\n\tmatrix filename: ') + filename
for k1,k2 in [[self.dmax,'dmax'],[self.epsilon,'epsilon'],[self.md5sum,'md5sum']]:
if k2 not in loader or k1!=loader[k2]:
raise ValueError(error_message.format(k2, dict(loader.items()).get(k2, 'NA'), k1))
self.probabilities = csr_matrix(
(loader['data'], loader['indices'], loader['indptr']),
shape=loader['shape']
......@@ -102,10 +113,17 @@ class Biodiversity:
if __name__ == '__main__':
import geopandas as gpd
patches = gpd.GeoDataFrame.from_file("../output/PAT_patches/PAT_patches.shp", encoding='utf-8')
matrix_filename='../output/matrix_biodiversity.npz'
import os
biodiv = Biodiversity(patches, 1000, 0.001)
import hashlib
filename = "../output/PAT_patches/PAT_patches.shp"
patches = gpd.GeoDataFrame.from_file(filename, encoding='utf-8')
patches = patches[patches['cultgeopat']!='Non Considérée']
matrix_filename='../output/matrix_biodiversity_test.npz'
hash_md5 = hashlib.md5()
with open(filename, "rb") as f:
for chunk in iter(lambda: f.read(4096), b""):
hash_md5.update(chunk)
biodiv = Biodiversity(patches, hash_md5.hexdigest(), 1000, 0.001)
if not os.path.isfile(matrix_filename):
biodiv.compute_dispersal_probabilities(patches)
biodiv.save_probabilities(matrix_filename)
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment