Commit c9669cc9 authored by Fize Jacques's avatar Fize Jacques
Browse files

Debug

Create server script oriented to run Geomatching
parent c7703b30
No related merge requests found
Showing with 210 additions and 76 deletions
+210 -76
# coding = utf-8
import glob
from gmatch4py.bag_of_cliques import BagOfCliques
from gmatch4py.base import Base
from gmatch4py.ged.graph_edit_dist import GraphEditDistance
from gmatch4py.ged.bipartite_graph_matching_2 import BP_2
from gmatch4py.ged.greedy_edit_distance import GreedyEditDistance
from gmatch4py.ged.hausdorff_edit_distance import HED
from gmatch4py.jaccard import Jaccard
from gmatch4py.kernels.weisfeiler_lehman import *
from gmatch4py.mcs import MCS
from gmatch4py.vertex_edge_overlap import VertexEdgeOverlap
import argparse, os, sys, re, json
parser = argparse.ArgumentParser()
parser.add_argument("graphs_input_dir")
parser.add_argument("matrix_output_dir")
parser.add_argument("-d", action="store_true", help="Return distance matrix")
args = parser.parse_args()
if not os.path.exists(args.graphs_input_dir):
print("Input graph directory doesn't exist!")
sys.exit(1)
if not os.path.exists(args.matrix_output_dir):
print("Output matrix directory doesn't exist!")
print("Creating directory")
os.makedirs(args.matrix_output_dir)
print("Directory created")
graphs = []
mapping_files_to_graphs = {}
# Loading graphs
fns = glob.glob(args.graphs_input_dir.rstrip("/") + "/*.gexf")
if not fns:
print("Input dir empty! Not .gexf file found!")
i = 0
for fn in fns:
graphs.append(nx.read_gexf(fn))
mapping_files_to_graphs[i] = fn
#print(graphs)
# Compute matrices
for class_ in [BagOfCliques, GraphEditDistance, BP_2, GreedyEditDistance, HED, Jaccard, WeisfeleirLehmanKernel, MCS,
VertexEdgeOverlap]:
print("Computing the Similarity Matrix for {0}".format(class_.__name__))
if class_ in (GraphEditDistance, BP_2, GreedyEditDistance, HED):
comparator = class_(1, 1, 1, 1)
elif class_ == WeisfeleirLehmanKernel:
comparator = class_(h=2)
else:
comparator=class_()
matrix = comparator.compare(graphs, None)
if not args.d:
matrix = comparator.similarity(matrix)
else:
matrix= comparator.distance(matrix)
print("Matrix ready. Saving ...")
output_fn="{0}/{1}_{2}.npy".format(
args.matrix_output_dir.rstrip("/"),
class_.__name__,
os.path.dirname(args.graphs_input_dir).replace("/","_")
)
np.save(output_fn,matrix)
print("Matrix Saved")
json.dump(mapping_files_to_graphs,open("{0}/{1}".format(args.matrix_output_dir.rstrip("/"),"metadata.json")))
print("Done")
\ No newline at end of file
# coding = utf-8
from ..helpers.gazeteer_helpers import get_data
import numpy as np
def flattern(A):
rt = []
for i in A:
if isinstance(i, list):
rt.extend(flattern(i))
elif isinstance(i, np.ndarray):
rt.extend(flattern(i.tolist()))
else:
rt.append(i)
return rt
def most_common(lst):
if len(list(set(lst))) >1 and "P-PPL" in set(lst):
lst=[x for x in lst if x != "PPL"]
return max(set(lst), key=lst.count)
def granularity(graph):
"""
Return the granularity of a STR
:param graph:
:return:
"""
class_list=flattern([get_data(n)["class"] for n in list(graph.nodes())])
if not class_list:
return []
return most_common(class_list)
\ No newline at end of file
import json
import os
import shapely
from scipy.spatial import ConvexHull
from shapely.geometry import Polygon, Point, shape
from ..config.configuration import config
from .gazeteer_helpers import get_data
from .collision import collide
#from .collision import collide
import geopandas as gpd
__cache={}
__cache_adjacency={}
__limit_cache=400
def parseHull(hull_object,points):
hull=[]
for simplex in hull_object.simplices:
hull.append(points[simplex[0]])
hull.append(points[simplex[1]])
return hull
def getConvexHull(id):
res = get_data(id)
if not "path" in res:return []
path = res["path"]
data=json.load(open(os.path.join(config.osm_boundaries_directory,path)))
boundaries=data["geometry"]["coordinates"]
if data["geometry"]["type"]== "Polygon":
hull = parseHull(ConvexHull(boundaries[-1]),boundaries[-1])
return [hull]
else:
hull=[]
for bound in boundaries[-1]:
hull.append(parseHull(ConvexHull(bound),bound))
return hull
def add_cache(id_,hull):
global __cache,__limit_cache
if len(__cache) > __limit_cache:
__cache={}
__cache[id_]=hull
def include_in(hull_1,hull_2):
n_=len(hull_1)
inside=0
poly=Polygon(hull_2)
for p in hull_1:
if Point(p[0],p[1]).within(poly):
inside+=1
if inside/n_ > 0.5:
return True
def getGEO(id_se):
data=get_data(id_se)
if "path" in data:
return gpd.read_file(os.path.join(config.osm_boundaries_directory, data["path"])).geometry
elif "coord" in data:
return Point(data["coord"]["lon"],data["coord"]["lat"])
return None
def collide(se1,se2):
try:
if se1 in __cache:
data_se1=__cache[se1]
else:
data_se1 = gpd.GeoSeries(list(getGEO(se1).values[0]))
add_cache(se1,data_se1)
if se2 in __cache:
data_se2=__cache[se2]
else:
data_se2 = gpd.GeoSeries(list(getGEO(se2).values[0]))
add_cache(se2, data_se2)
except:
return False
if type(data_se1) != type(data_se2):
if type(data_se1) == gpd.geoseries.GeoSeries:
return data_se1.intersects(data_se2).any()
else:
return data_se2.intersects(data_se1).any()
try:
if data_se1.intersects(data_se2):
return True
except:
if data_se1.intersects(data_se2).any():
return True
return False
def collisionTwoSEBoundaries(id_SE1,id_SE2):
global __cache,__cache_adjacency
if id_SE1 in __cache_adjacency:
......@@ -61,24 +69,11 @@ def collisionTwoSEBoundaries(id_SE1,id_SE2):
if id_SE1 in __cache_adjacency[id_SE2]:
return __cache_adjacency[id_SE2][id_SE1]
if id_SE1 in __cache:
hull_1=__cache[id_SE1]
else:
hull_1 = getConvexHull(id_SE1)
add_cache(id_SE1,hull_1)
if id_SE2 in __cache:
hull_2=__cache[id_SE2]
else:
hull_2 = getConvexHull(id_SE2)
add_cache(id_SE2, hull_2)
if not id_SE1 in __cache_adjacency:
__cache_adjacency[id_SE1]={}
for h1 in hull_1:
for h2 in hull_2:
if collide(h1,h2) and not include_in(h1,h2):
__cache_adjacency[id_SE1][id_SE2] = True
return True
if collide(id_SE1,id_SE2): #and not include_in(h1,h2):
__cache_adjacency[id_SE1][id_SE2] = True
return True
__cache_adjacency[id_SE1][id_SE2]=False
return False
......@@ -6,7 +6,7 @@ import networkx as nx
import pandas as pd
import logging
from shapely.geometry import Point
from shapely.geometry import Point, MultiPoint, MultiLineString, LineString
from ..config.configuration import config
#logging.basicConfig(filename=config.log_file,level=logging.INFO)
......@@ -17,7 +17,7 @@ from ..helpers.collision_with_gazetteer_data import collisionTwoSEBoundaries
from ..helpers.deprecated import deprecated
from ..helpers.gazeteer_helpers import get_data, get_data_by_wikidata_id
from ..nlp.ner.ner import NER
import geopandas as gpd
def get_inclusion_chain(id_, prop):
"""
......@@ -223,7 +223,7 @@ class STR(object):
p47se1 = []
for el in data["P47"]:
d = get_data_by_wikidata_id(el)
if not "hits" in d:
if "id" in d:
p47se1.append(d["id"])
return p47se1
......@@ -233,47 +233,39 @@ class STR(object):
:return:
"""
stop_class=set(["A-PCLI","A-ADM1"])
for se1 in self.spatial_entities:
data_se1 = get_data(se1)
for se2 in self.spatial_entities:
if se1 == se2:
continue
if se1 == se2: continue
if self.is_included_in(se1,se2) or self.is_included_in(se2,se1):
continue
if se1 in self.adjacency_relationships:
if se2 in self.adjacency_relationships[se1]:
continue
if se2 in self.adjacency_relationships:
if se1 in self.adjacency_relationships[se2]:
continue
if self.is_included_in(se1, se2) or self.is_included_in(se2, se1):
continue
data_se2 = get_data(se2)
f = False
if "P47" in data_se1 and "P47" in data_se2:
p47se1 = self.getP47AdjacencyData(data_se1)
if se2 in p47se1:
if "P47" in data_se2:
if se1 in self.getP47AdjacencyData(data_se2):
f = True
else:
p47se2 = self.getP47AdjacencyData(data_se2)
if se1 in p47se2:
#print(data_se1["en"], data_se2["en"], "P47")
if not f:
if "P47" in data_se2:
if se2 in self.getP47AdjacencyData(data_se2):
f = True
#print(data_se1["en"], data_se2["en"], "P47")
if not f:
#if not self.shapes:
if collisionTwoSEBoundaries(se1, se2):
f = True
#else:
#if is_intersect(se1, se2, self.shapes):
#f = True
f = collisionTwoSEBoundaries(se1, se2)
if not f:
#print(data_se1,data_se2)
if Point(data_se1["coord"]["lon"],data_se1["coord"]["lat"]).distance(Point(data_se2["coord"]["lon"], data_se2["coord"]["lat"])) < 1\
and len(set(data_se1["class"]) & stop_class) < 1 and len(set(data_se2["class"]) & stop_class) < 1:
f=True
self.add_adjacency_rel(se1, se2,f)
if Point(data_se1["coord"]["lon"], data_se1["coord"]["lat"]).distance(
Point(data_se2["coord"]["lon"], data_se2["coord"]["lat"])) < 1 and len(
set(data_se1["class"]) & stop_class) < 1 and len(set(data_se2["class"]) & stop_class) < 1:
f = True
self.add_adjacency_rel(se1, se2, f)
......@@ -346,3 +338,45 @@ class STR(object):
self.add_inclusion_rel(edge[0], edge[1])
def set_all_shapes(self,shapes):
self.shapes=shapes
def map_projection(self):
import matplotlib.pyplot as plt
world = gpd.read_file(gpd.datasets.get_path('naturalearth_lowres'))
base = world.plot(color='white', edgecolor='black', figsize=(16, 9))
points=[]
for se in self.spatial_entities:
data=get_data(se)
try:
points.append(Point(data["coord"]["lon"],data["coord"]["lat"]))
except:
pass
lines_adj=[]
for se1 in self.adjacency_relationships:
data_se1=get_data(se1)
for se2 in self.adjacency_relationships[se1]:
data_se2 = get_data(se2)
if self.adjacency_relationships[se1][se2]:
lines_adj.append(
LineString([(data_se1["coord"]["lon"],data_se1["coord"]["lat"]),(data_se2["coord"]["lon"], data_se2["coord"]["lat"])])
)
lines_inc=[]
for se1 in self.inclusion_relationships:
data_se1=get_data(se1)
for se2 in self.inclusion_relationships[se1]:
if self.inclusion_relationships[se1][se2]:
data_se2 = get_data(se2)
lines_inc.append(
LineString([
(data_se1["coord"]["lon"], data_se1["coord"]["lat"]),
(data_se2["coord"]["lon"], data_se2["coord"]["lat"])]
)
)
gpd.GeoSeries(points).plot(ax=base,marker='o',markersize=5,color="blue")
gpd.GeoSeries(lines_adj).plot(ax=base, color="green")
gpd.GeoSeries(lines_inc).plot(ax=base, color="red")
print("adj",gpd.GeoSeries(lines_adj))
print("inc",gpd.GeoSeries(lines_inc))
plt.show()
Supports Markdown
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment