Commit 9fd92c31 authored by Dumoulin Nicolas's avatar Dumoulin Nicolas
Browse files

patches merged with rpg_ilots for retrieving farmers IDs

various fixes
parent 26065602
......@@ -36,28 +36,53 @@ def build_initial_PAT(rpg_parcelles_filename, rpg_ilots_filename, pat_cultural_c
rpg.geometry = rpg.geometry.centroid
patches = gpd.sjoin(rpg, municipalities, op='intersects') # 'op' is useless because sjoin between points and polygons
# sjoin with rpg_ilots for retrieving id of "exploitant"
rpg_ilots = gpd.GeoDataFrame.from_file(rpg_ilots_filename, encoding='utf-8')[['id_ilot','id_expl','geometry']]
rpg_ilots = gpd.GeoDataFrame.from_file(rpg_ilots_filename, encoding='utf-8')[['id_ilot','id_expl','surf_ilot','geometry']]
# restoring geometry instead of centroid
patches.geometry = patches['patches']
del patches['patches']
del patches['index_right']
patches_expl = gpd.sjoin(patches, rpg_ilots, how='left', op='intersects')
del patches_expl['index_right']
# removing orphan patches (without exploitant)
orphan_patches = patches_expl[patches_expl['id_ilot'].isnull()]
print('{} patches without "id_expl" ({:.4f} % of total area)'.format(
len(orphan_patches),
orphan_patches.geometry.area.sum() / patches_expl.geometry.area.sum()
))
patches_expl = patches_expl.fillna(-1)
patches_expl = patches_expl[~patches_expl['id_ilot'].isnull()]
patches_expl.geometry = patches_expl.geometry.buffer(0)
rpg_ilots.geometry = rpg_ilots.geometry.buffer(0)
# selecting the greatest intersection between duplicates patches from sjoin
patches_expl['intersection_surf'] = patches_expl.apply(
lambda row: rpg_ilots[rpg_ilots['id_ilot']==row['id_ilot']]['geometry'].intersection(row['geometry']).area.sum(),
axis=1)
patches_expl['max_intersection_surf'] = patches_expl.groupby('ID_PARCEL')['intersection_surf'].transform(max)
patches_expl = patches_expl[patches_expl['max_intersection_surf'] == patches_expl['intersection_surf']]
# cleaning
del patches_expl['max_intersection_surf']
del patches_expl['intersection_surf']
del patches_expl['index_right']
patches_expl['id_ilot'] = patches_expl['id_ilot'].astype(int)
patches_expl['id_expl'] = patches_expl['id_expl'].astype(int)
# for the last remaining duplicates, we select the "ilot" from the "exploitant" that have the greatest surface
patches_expl['expl_surf'] = patches_expl.apply(
lambda row: patches_expl[patches_expl['id_expl']==row['id_expl']]['surf_ilot'].sum(),
axis=1)
patches_expl['max_expl_surf'] = patches_expl.groupby('ID_PARCEL')['expl_surf'].transform(max)
patches_expl = patches_expl[patches_expl['max_expl_surf'] == patches_expl['expl_surf']]
del patches_expl['max_expl_surf']
del patches_expl['expl_surf']
# cultural code joining
codes = pd.read_csv(pat_cultural_classes_filename)
patches_expl = patches_expl.merge(codes[['category','Classe GéoPAT']],how='left',left_on='CODE_CULTU',right_on='category')
patches_expl = patches_expl.rename(columns={'Classe GéoPAT':'cultgeopat'})
patches_expl = patches_expl.rename(columns={
'Classe GéoPAT':'cultgeopat',
'BV2012':'Bdv'
})
# delete unconsidered rows
patches_expl = patches_expl[patches_expl['cultgeopat']!='Non Considérée']
# TODO join with MAJIC
return [municipalities,patches,rpg_ilots,patches_expl]
patches_expl['SURF_PARC'] = patches_expl.area
return [patches_expl, municipalities]
def build_PAT_municipalities(pat_municipalities_dir, bdv_filename, bdv_fixes_filename,
adminexpress_com_filename, adminexpress_epci_filename):
......@@ -94,14 +119,27 @@ def build_PAT_municipalities(pat_municipalities_dir, bdv_filename, bdv_fixes_fil
if __name__ == '__main__':
import yaml
try:
from yaml import CLoader as Loader
except ImportError:
from yaml import Loader
resources={}
for k,v in yaml.load(open('resources/INDEX.yml','r')).items():
for k,v in yaml.load(open('resources/INDEX.yml','r'), Loader=Loader).items():
resources[v['variable']] = 'resources/'+v['file'] if 'file' in v else k
municipalities,patches,rpg_ilots,patches_expl = build_initial_PAT(resources['rpg_parcelles_filename'], resources['rpg_ilots_filename'],
[patches, municipalities] = build_initial_PAT(
resources['rpg_parcelles_filename'],
resources['rpg_ilots_filename'],
resources['pat_cultural_classes_filename'],
resources['majic_filename'],
resources['pat_municipalities_dir'],
resources['bdv_filename'], resources['bdv_fixes'], resources['adminexpress_com_filename'],
resources['adminexpress_epci_filename'])
patches_expl.to_file('output/patches', encoding='utf-8')
# TODO merge with MAJIC data not automatic
# FIXME use raw data instead these preprocessed data
majic = gpd.GeoDataFrame.from_file('Productivite/productivite_shape/Parcelle_PAT_valCad_test.shp', encoding='utf-8')[['ID_PARCEL','VALEUR_CAD']]
patches = patches.merge(majic,how='left',on='ID_PARCEL')
# some missing data due to the preprocessed data
patches = patches[~patches['VALEUR_CAD'].isnull()]
patches.to_file('output/PAT_patches', encoding='utf-8')
gpd.GeoDataFrame(municipalities).to_file('output/municipalities', encoding='utf-8')
pyyaml
geopandas
rtree
pandas
tqdm
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment