Commit 847ed7fb authored by Fize Jacques's avatar Fize Jacques
Browse files

Add Cache relation extraction using sqlite + Add entry for sqlite db in config.json

parent 4da62e60
No related merge requests found
Showing with 33 additions and 326 deletions
+33 -326
......@@ -26,4 +26,5 @@ __pycache__/
*.npy
*.pkl
*cache.json
*.gexf
\ No newline at end of file
*.gexf
temp_cluster/
......@@ -5,9 +5,13 @@ import argparse, os, re, json, glob
import pandas as pd
import networkx as nx
from strpython.eval.automatic_annotation import AnnotationAutomatic,save_cache
from strpython.eval.automatic_annotation import AnnotationAutomatic,save_cache,add_cache
from strpython.models.str import STR
from tqdm import tqdm,TqdmSynchronisationWarning
from joblib import Parallel, delayed
from multiprocessing import cpu_count
import warnings
warnings.simplefilter("ignore", TqdmSynchronisationWarning)
tqdm.pandas()
......@@ -40,10 +44,11 @@ def foo(x):
return annotater.all(strs[x.G1], strs[x.G2],x.G1, x.G2)
except KeyError as e:
print(e)
add_cache(strs[x.G1], strs[x.G2],[0, 0, 0, 0])
return [0, 0, 0, 0]
df["res"] = df.progress_apply(lambda x: foo(x), axis=1)
df["res"] = Parallel(n_jobs=cpu_count())(delayed(foo)(x) for x in tqdm(df.itertuples()))#df.progress_apply(lambda x: foo(x), axis=1)
df.res=df.res.apply(lambda x :list(map(int,x)) if x else [])
df[["c1"]] = df.res.apply(lambda x: x[0] if len(x)>0 else 0)
df[["c2"]] = df.res.apply(lambda x: x[1] if len(x)>0 else 0)
......
......@@ -12,5 +12,6 @@
"count":"/Users/jacquesfize/nas_cloud/Code/str-python/strpython/resources/count_wiki.pkl"
},
"language_resources_path":"/Users/jacquesfize/nas_cloud/Code/str-python/strpython/resources/language_resources",
"gazetteer":"geodict"
"gazetteer":"geodict",
"relation_db_path" : "/Users/jacquesfize/.services/relation_match.db"
}
\ No newline at end of file
......@@ -5,7 +5,7 @@ import re
from elasticsearch import Elasticsearch
from ..config.configuration import config
import pandas as pd
from ..helpers.objectify import objectify
import gazpy as ga
......
......@@ -5,7 +5,7 @@ import re
from elasticsearch import Elasticsearch
from ..config.configuration import config
import pandas as pd
from ..helpers.objectify import objectify
from mytoolbox.structure.objectify import objectify
es = Elasticsearch(config.es_server)
......
#!/usr/bin/env python
"""Scrap module.
Just tiny bits & bolts.
.. author: Adrian Castravete
.. modified by : Jacques Fize (Implemented for Python 3 and recursive objectification)
"""
from functools import wraps
def objectify(func):
"""Mimic an object given a dictionary.
Given a dictionary, create an object and make sure that each of its
keys are accessible via attributes.
If func is a function act as decorator, otherwise just change the dictionary
and return it.
:param func: A function or another kind of object.
:returns: Either the wrapper for the decorator, or the changed value.
Example::
>>> obj = {'old_key': 'old_value'}
>>> oobj = objectify(obj)
>>> oobj['new_key'] = 'new_value'
>>> print oobj['old_key'], oobj['new_key'], oobj.old_key, oobj.new_key
>>> @objectify
... def func():
... return {'old_key': 'old_value'}
>>> obj = func()
>>> obj['new_key'] = 'new_value'
>>> print obj['old_key'], obj['new_key'], obj.old_key, obj.new_key
"""
def create_object(value):
"""Create the object.
Given a dictionary, create an object and make sure that each of its
keys are accessible via attributes.
Ignore everything if the given value is not a dictionary.
:param value: A dictionary or another kind of object.
:returns: Either the created object or the given value.
"""
if isinstance(value, dict):
# Build a simple generic object.
class Object(dict):
def __setitem__(self, key, val):
setattr(self, key, val)
return super(Object, self).__setitem__(key, val)
# Create that simple generic object.
ret_obj = Object()
# Assign the attributes given the dictionary keys.
for key, val in value.items():
if isinstance(val,dict):
ret_obj[key] = objectify(val)
else:
ret_obj[key] = val
setattr(ret_obj, key, val)
return ret_obj
else:
return value
# If func is a function, wrap around and act like a decorator.
if hasattr(func, '__call__'):
@wraps(func)
def wrapper(*args, **kwargs):
"""Wrapper function for the decorator.
:returns: The return value of the decorated function.
"""
value = func(*args, **kwargs)
return create_object(value)
return wrapper
# Else just try to objectify the value given.
else:
return create_object(func)
# coding = utf-8
import copy
import logging
import os
import time
import warnings
from tqdm import tqdm
import folium
import geopandas as gpd
import networkx as nx
import pandas as pd
from shapely.geometry import MultiPoint, Polygon, Point, LineString
from sklearn.cluster import MeanShift, estimate_bandwidth, dbscan
import matplotlib.pyplot as plt
from ..helpers.collision import collisionTwoSEBoundaries, add_cache_adjacency
from ..helpers.geodict_helpers import gazetteer
from ..eval.stats import most_common
from sklearn.cluster import MeanShift, estimate_bandwidth, KMeans, dbscan
import numpy as np
# logging.basicConfig(filename=config.log_file,level=logging.INFO)
from strpython.helpers.relation_extraction import AdjacencyRelation, InclusionRelation
def get_inclusion_chain(id_, prop):
......@@ -42,13 +37,11 @@ class STR(object):
"""
Str basic structure
"""
__cache_inclusion = {} # Store inclusion relations found between spaital entities
__cache_adjacency = {} # Store adjacency relations found between spaital entities
__cache_entity_data = {} # Store data about entity requested
def __init__(self, tagged_text, spatial_entities,toponym_first=True):
"""
Constructir
Constructor
Parameters
----------
......@@ -71,6 +64,11 @@ class STR(object):
self.adjacency_relationships = {}
self.inclusion_relationships = {}
self.adj_rel_db=AdjacencyRelation()
self.inc_rel_db = InclusionRelation()
self.graph = nx.MultiDiGraph()
@staticmethod
def from_networkx_graph(g: nx.Graph, tagged_: list = []):
"""
......@@ -164,10 +162,8 @@ class STR(object):
id1, id2 = edge[0], edge[1]
if edge[2]["color"] == "green":
self.add_adjacency_rel(edge[0], edge[1])
self.add_cache__adjacency(id1, id2, True)
elif edge[2]["color"] == "red":
self.add_inclusion_rel(edge[0], edge[1])
self.add_cache_inclusion(id1, id2, True)
def add_spatial_entity(self, id, label=None, v=True):
"""
......@@ -213,7 +209,7 @@ class STR(object):
except:
label = None
self.add_spatial_entity(id, label, False)
# print(self.graph.nodes(data=True))
def add_adjacency_rel(self, se1, se2):
"""
......@@ -231,7 +227,6 @@ class STR(object):
if not se1 in self.adjacency_relationships: self.adjacency_relationships[se1] = {}
if not se2 in self.adjacency_relationships: self.adjacency_relationships[se2] = {}
self.adjacency_relationships[se1][se2], self.adjacency_relationships[se2][se1] = True, True
self.add_cache__adjacency(se1, se2, True)
def add_inclusion_rel(self, se1, se2):
"""
......@@ -248,47 +243,9 @@ class STR(object):
if not se1 in self.inclusion_relationships:
self.inclusion_relationships[se1] = {}
self.inclusion_relationships[se1][se2] = True
self.add_cache_inclusion(se1, se2, True)
def add_cache_inclusion(self, id1, id2, v=True):
"""
Add a relation of inclusion in a cache variable
Parameters
----------
id1 : str
id of the first spatial entity
id2 : str
id of the second spatial entity
v : bool, optional
if the relation exists between the two spatial entities. Default is True
"""
if not id1 in STR.__cache_inclusion:
STR.__cache_inclusion[id1] = {}
STR.__cache_inclusion[id1][id2] = v
def add_cache__adjacency(self, se1, se2, v=True):
"""
Add a relation of adjacency in a cache variable
Parameters
----------
id1 : str
id of the first spatial entity
id2 : str
id of the second spatial entity
v : bool, optional
if the relation exists between the two spatial entities. Default is True
"""
if not se1 in STR.__cache_adjacency:
STR.__cache_adjacency[se1] = {}
if not se2 in STR.__cache_adjacency:
STR.__cache_adjacency[se2] = {}
STR.__cache_adjacency[se1][se2] = v
STR.__cache_adjacency[se2][se1] = v
def get_data(self, id_se):
"""
......@@ -376,156 +333,15 @@ class STR(object):
if self.adjacency_relationships[se1][se2]:
self.graph.add_edge(se1, se2, key=0, color="green")
def is_included_in(self, se1_id, se2_id):
"""
Return True if a spatial entity is included within another one.
Parameters
----------
se1_id : str
id of the contained entity
se2_id : str
id of the entity container
Returns
-------
bool
if se1 included in se2
"""
if se1_id in self.inclusion_relationships:
if se2_id in self.inclusion_relationships[se1_id]:
return self.inclusion_relationships[se1_id][se2_id]
inc_chain_P131 = get_inclusion_chain(se1_id, "P131")
inc_chain_P706 = get_inclusion_chain(se1_id, "P706")
inc_chain = inc_chain_P131
inc_chain.extend(inc_chain_P706)
inc_chain = set(inc_chain)
if se2_id in inc_chain:
self.add_cache_inclusion(se1_id, se2_id, True)
return True
return False
def is_adjacent_cache(self, se1, se2):
"""
Return true if two spatial entities were found adjacent previously.
Parameters
----------
se1 : str
id of the first spatial entity
se2 : str
id of the second spatial entity
Returns
-------
bool
if se1 adjacent to se2
"""
if se1 in STR.__cache_adjacency:
if se2 in STR.__cache_adjacency[se1]:
return STR.__cache_adjacency[se1][se2]
if se2 in STR.__cache_adjacency:
if se1 in STR.__cache_adjacency[se2]:
return STR.__cache_adjacency[se2][se1]
return False
def is_included_cache(self, se1, se2):
"""
Return true if a spatial entity were found included previously in an other one.
Parameters
----------
se1 : str
id of the first spatial entity
se2 : str
id of the second spatial entity
Returns
-------
bool
if se1 included to se2
"""
if se1 in STR.__cache_inclusion:
if se2 in STR.__cache_inclusion[se1]:
return STR.__cache_inclusion[se1][se2]
return False
def is_adjacent(self, se1, se2, datase1=None, datase2=None):
"""
Return true if se1 is adjacent to se2.
Parameters
----------
se1 : str
id of the first spatial entity
se2 : str
id of the second spatial entity
datase1 : gazpy.Element, optional
if given cached data concerning the spatial entity with id = se1 (the default is None)
datase2 : gazpy.Element, optional
if given cached data concerning the spatial entity with id = se2 (the default is None)
Returns
-------
bool
true if adjacent
"""
stop_class = set(["A-PCLI", "A-ADM1"])
def get_p47_adjacency_data(data):
p47se1 = []
for el in data.other.P47:
d = gazetteer.get_by_other_id(el, "wikidata")
if not d: continue
p47se1.append(d[0].id)
return p47se1
if self.is_adjacent_cache(se1, se2):
return False
if self.is_included_in(se1, se2) or self.is_included_in(se2, se1):
return False
data_se1, data_se2 = self.get_data(se1), self.get_data(se2)
if "P47" in data_se2.other and se1 in get_p47_adjacency_data(data_se2):
return True
# print("P47")
elif "P47" in data_se1.other and se2 in get_p47_adjacency_data(data_se1):
return True
# print("P47")
if collisionTwoSEBoundaries(se1, se2):
return True
if data_se1 and data_se2 and "coord" in data_se1.other and "coord" in data_se2.other:
if Point(data_se1.coord.lon, data_se1.coord.lat).distance(
Point(data_se2.coord.lon, data_se2.coord.lat)) < 1 and len(
set(data_se1.class_) & stop_class) < 1 and len(set(data_se2.class_) & stop_class) < 1:
return True
return False
def get_inclusion_relationships(self):
"""
Find all the inclusion relationships between the spatial entities declared in the current STR.
"""
for se_ in tqdm(self.spatial_entities, desc="Extract Inclusion"):
inc_chain_P131 = get_inclusion_chain(se_, "P131")
inc_chain_P706 = get_inclusion_chain(se_, "P706")
inc_chain = inc_chain_P131
inc_chain.extend(inc_chain_P706)
inc_chain = set(inc_chain)
for se2_ in self.spatial_entities:
if se2_ in inc_chain:
if se_ != se2_ and self.inc_rel_db.is_relation(se_,se2_):
self.add_inclusion_rel(se_, se2_)
def get_adjacency_relationships(self):
......@@ -533,21 +349,11 @@ class STR(object):
Find all the adjacency relationships between the spatial entities declared in the current STR.
"""
data = {se: self.get_data(se) for se in self.spatial_entities}
for se1 in tqdm(self.spatial_entities, desc="Extract Adjacency Relationship"):
data_se1 = data[se1]
for se2 in self.spatial_entities:
if se1 == se2: continue
if se1 in self.adjacency_relationships:
if se2 in self.adjacency_relationships[se1]:
continue
if se2 in self.adjacency_relationships:
if se1 in self.adjacency_relationships[se2]:
continue
data_se2 = data[se2]
if self.is_adjacent(se1, se2, data_se1, data_se2):
self.add_adjacency_rel(se1, se2)
if se1 != se2 and self.adj_rel_db.is_relation(se1, se2):
self.add_adjacency_rel(se1,se2)
def build(self, inc=True, adj=True, verbose=False):
"""
......@@ -576,7 +382,6 @@ class STR(object):
graph.add_nodes_from(nodes)
if adj:
debut = time.time()
self.get_adjacency_relationships()
for se1 in self.adjacency_relationships:
for se2 in self.adjacency_relationships[se1]:
......@@ -585,7 +390,6 @@ class STR(object):
graph.add_edge(se2, se1, key=0, color="green")
if inc:
debut = time.time()
self.get_inclusion_relationships()
for se1 in self.inclusion_relationships:
for se2 in self.inclusion_relationships[se1]:
......@@ -616,7 +420,7 @@ class STR(object):
except:
print("Error while saving STR to {0}".format(format))
def getUndirected(self):
def get_undirected(self,simple_graph=True):
"""
Return the Undirected form of a STR graph.
......@@ -625,8 +429,9 @@ class STR(object):
networkx.Graph
unidirected graph
"""
return nx.Graph(self.graph)
if simple_graph:
return nx.Graph(self.graph)
return nx.MultiGraph(self.graph)
def get_geo_data_of_se(self):
"""
......@@ -762,7 +567,7 @@ class STR(object):
Matplotlib figure instance
"""
import matplotlib.pyplot as plt
world = gpd.read_file(gpd.datasets.get_path('naturalearth_lowres'))
base = world.plot(color='white', edgecolor='black', figsize=(16, 9))
points = []
......@@ -804,24 +609,6 @@ class STR(object):
plt.show()
# def to_Multipoints(x):
# """
# Return a polygon buffered representation for a set of point
# Parameters
# ----------
# x : pandas.Series
# coordinates columns
# Returns
# -------
# shapely.geometry.Polygon
# polygon
# """
# #print(x[["x","y"]].values)
# return Polygon([Point(z) for z in x[["x","y"]].values]).buffer(1)
def to_Polygon(x):
"""
Return a polygon buffered representation for a set of points.
......
Supports Markdown
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment