From 847ed7fbcd853341280f6a3e79c8a901e471b11e Mon Sep 17 00:00:00 2001
From: Fize Jacques <jacques.fize@cirad.fr>
Date: Wed, 13 Mar 2019 15:12:21 +0100
Subject: [PATCH] Add Cache relation extraction using sqlite + Add entry for
 sqlite db in config.json

---
 .gitignore                               |   3 +-
 auto_fill_annotation.py                  |   9 +-
 strpython/config/config.json             |   3 +-
 strpython/helpers/geodict_helpers.py     |   2 +-
 strpython/helpers/geodict_helpers_old.py |   2 +-
 strpython/helpers/objectify.py           |  87 --------
 strpython/models/str.py                  | 253 ++---------------------
 7 files changed, 33 insertions(+), 326 deletions(-)
 delete mode 100644 strpython/helpers/objectify.py

diff --git a/.gitignore b/.gitignore
index 4a97748..f380ba1 100644
--- a/.gitignore
+++ b/.gitignore
@@ -26,4 +26,5 @@ __pycache__/
 *.npy
 *.pkl
 *cache.json
-*.gexf
\ No newline at end of file
+*.gexf
+temp_cluster/
diff --git a/auto_fill_annotation.py b/auto_fill_annotation.py
index abcddfc..d683b74 100644
--- a/auto_fill_annotation.py
+++ b/auto_fill_annotation.py
@@ -5,9 +5,13 @@ import argparse, os, re, json, glob
 import pandas as pd
 import networkx as nx
 
-from strpython.eval.automatic_annotation import AnnotationAutomatic,save_cache
+from strpython.eval.automatic_annotation import AnnotationAutomatic,save_cache,add_cache
 from strpython.models.str import STR
 from tqdm import tqdm,TqdmSynchronisationWarning
+from joblib import Parallel, delayed
+from multiprocessing import cpu_count
+
+
 import warnings
 warnings.simplefilter("ignore", TqdmSynchronisationWarning)
 tqdm.pandas()
@@ -40,10 +44,11 @@ def foo(x):
         return annotater.all(strs[x.G1], strs[x.G2],x.G1, x.G2)
     except KeyError as e:
         print(e)
+        add_cache(strs[x.G1], strs[x.G2],[0, 0, 0, 0])
         return [0, 0, 0, 0]
 
 
-df["res"] = df.progress_apply(lambda x: foo(x), axis=1)
+df["res"] = Parallel(n_jobs=cpu_count())(delayed(foo)(x) for x in tqdm(df.itertuples()))#df.progress_apply(lambda x: foo(x), axis=1)
 df.res=df.res.apply(lambda x :list(map(int,x)) if x else [])
 df[["c1"]] = df.res.apply(lambda x: x[0] if len(x)>0 else 0)
 df[["c2"]] = df.res.apply(lambda x: x[1] if len(x)>0 else 0)
diff --git a/strpython/config/config.json b/strpython/config/config.json
index 2a86282..5f32fb2 100644
--- a/strpython/config/config.json
+++ b/strpython/config/config.json
@@ -12,5 +12,6 @@
     "count":"/Users/jacquesfize/nas_cloud/Code/str-python/strpython/resources/count_wiki.pkl"
   },
   "language_resources_path":"/Users/jacquesfize/nas_cloud/Code/str-python/strpython/resources/language_resources",
-  "gazetteer":"geodict"
+  "gazetteer":"geodict",
+  "relation_db_path" : "/Users/jacquesfize/.services/relation_match.db"
 }
\ No newline at end of file
diff --git a/strpython/helpers/geodict_helpers.py b/strpython/helpers/geodict_helpers.py
index 1425206..6c174d3 100644
--- a/strpython/helpers/geodict_helpers.py
+++ b/strpython/helpers/geodict_helpers.py
@@ -5,7 +5,7 @@ import re
 from elasticsearch import Elasticsearch
 from ..config.configuration import config
 import pandas as pd
-from ..helpers.objectify import objectify
+
 
 import gazpy as ga
 
diff --git a/strpython/helpers/geodict_helpers_old.py b/strpython/helpers/geodict_helpers_old.py
index 7da757d..dffac38 100644
--- a/strpython/helpers/geodict_helpers_old.py
+++ b/strpython/helpers/geodict_helpers_old.py
@@ -5,7 +5,7 @@ import re
 from elasticsearch import Elasticsearch
 from ..config.configuration import config
 import pandas as pd
-from ..helpers.objectify import objectify
+from mytoolbox.structure.objectify import objectify
 
 es = Elasticsearch(config.es_server)
 
diff --git a/strpython/helpers/objectify.py b/strpython/helpers/objectify.py
deleted file mode 100644
index 1bf4780..0000000
--- a/strpython/helpers/objectify.py
+++ /dev/null
@@ -1,87 +0,0 @@
-#!/usr/bin/env python
-
-
-"""Scrap module.
-
-Just tiny bits & bolts.
-
-.. author: Adrian Castravete
-.. modified by : Jacques Fize (Implemented for Python 3 and recursive objectification)
-"""
-
-from functools import wraps
-
-
-def objectify(func):
-    """Mimic an object given a dictionary.
-
-    Given a dictionary, create an object and make sure that each of its
-    keys are accessible via attributes.
-    If func is a function act as decorator, otherwise just change the dictionary
-    and return it.
-    :param func: A function or another kind of object.
-    :returns: Either the wrapper for the decorator, or the changed value.
-
-    Example::
-
-    >>> obj = {'old_key': 'old_value'}
-    >>> oobj = objectify(obj)
-    >>> oobj['new_key'] = 'new_value'
-    >>> print oobj['old_key'], oobj['new_key'], oobj.old_key, oobj.new_key
-
-    >>> @objectify
-    ... def func():
-    ...     return {'old_key': 'old_value'}
-    >>> obj = func()
-    >>> obj['new_key'] = 'new_value'
-    >>> print obj['old_key'], obj['new_key'], obj.old_key, obj.new_key
-
-    """
-
-    def create_object(value):
-        """Create the object.
-
-        Given a dictionary, create an object and make sure that each of its
-        keys are accessible via attributes.
-        Ignore everything if the given value is not a dictionary.
-        :param value: A dictionary or another kind of object.
-        :returns: Either the created object or the given value.
-
-        """
-        if isinstance(value, dict):
-            # Build a simple generic object.
-            class Object(dict):
-                def __setitem__(self, key, val):
-                    setattr(self, key, val)
-                    return super(Object, self).__setitem__(key, val)
-
-            # Create that simple generic object.
-            ret_obj = Object()
-            # Assign the attributes given the dictionary keys.
-            for key, val in value.items():
-                if isinstance(val,dict):
-                    ret_obj[key] = objectify(val)
-                else:
-                    ret_obj[key] = val
-                setattr(ret_obj, key, val)
-            return ret_obj
-        else:
-            return value
-
-    # If func is a function, wrap around and act like a decorator.
-    if hasattr(func, '__call__'):
-        @wraps(func)
-        def wrapper(*args, **kwargs):
-            """Wrapper function for the decorator.
-
-            :returns: The return value of the decorated function.
-
-            """
-            value = func(*args, **kwargs)
-            return create_object(value)
-
-        return wrapper
-
-    # Else just try to objectify the value given.
-    else:
-        return create_object(func)
diff --git a/strpython/models/str.py b/strpython/models/str.py
index 72aa9ca..e2d70b9 100644
--- a/strpython/models/str.py
+++ b/strpython/models/str.py
@@ -1,26 +1,21 @@
 # coding = utf-8
 import copy
-import logging
 import os
 import time
 import warnings
 
+
 from tqdm import tqdm
 import folium
 import geopandas as gpd
 import networkx as nx
 import pandas as pd
 from shapely.geometry import MultiPoint, Polygon, Point, LineString
+from sklearn.cluster import MeanShift, estimate_bandwidth, dbscan
+import matplotlib.pyplot as plt
 
-from ..helpers.collision import collisionTwoSEBoundaries, add_cache_adjacency
 from ..helpers.geodict_helpers import gazetteer
-from ..eval.stats import most_common
-
-from sklearn.cluster import MeanShift, estimate_bandwidth, KMeans, dbscan
-import numpy as np
-
-
-# logging.basicConfig(filename=config.log_file,level=logging.INFO)
+from strpython.helpers.relation_extraction import AdjacencyRelation, InclusionRelation
 
 
 def get_inclusion_chain(id_, prop):
@@ -42,13 +37,11 @@ class STR(object):
     """
     Str basic structure
     """
-    __cache_inclusion = {}  # Store inclusion relations found between spaital entities
-    __cache_adjacency = {}  # Store adjacency relations found between spaital entities
     __cache_entity_data = {}  #  Store data about entity requested
 
     def __init__(self, tagged_text, spatial_entities,toponym_first=True):
         """
-        Constructir
+        Constructor
 
         Parameters
         ----------
@@ -71,6 +64,11 @@ class STR(object):
         self.adjacency_relationships = {}
         self.inclusion_relationships = {}
 
+        self.adj_rel_db=AdjacencyRelation()
+        self.inc_rel_db = InclusionRelation()
+
+        self.graph = nx.MultiDiGraph()
+
     @staticmethod
     def from_networkx_graph(g: nx.Graph, tagged_: list = []):
         """
@@ -164,10 +162,8 @@ class STR(object):
             id1, id2 = edge[0], edge[1]
             if edge[2]["color"] == "green":
                 self.add_adjacency_rel(edge[0], edge[1])
-                self.add_cache__adjacency(id1, id2, True)
             elif edge[2]["color"] == "red":
                 self.add_inclusion_rel(edge[0], edge[1])
-                self.add_cache_inclusion(id1, id2, True)
 
     def add_spatial_entity(self, id, label=None, v=True):
         """
@@ -213,7 +209,7 @@ class STR(object):
             except:
                 label = None
             self.add_spatial_entity(id, label, False)
-        # print(self.graph.nodes(data=True))
+
 
     def add_adjacency_rel(self, se1, se2):
         """
@@ -231,7 +227,6 @@ class STR(object):
         if not se1 in self.adjacency_relationships: self.adjacency_relationships[se1] = {}
         if not se2 in self.adjacency_relationships: self.adjacency_relationships[se2] = {}
         self.adjacency_relationships[se1][se2], self.adjacency_relationships[se2][se1] = True, True
-        self.add_cache__adjacency(se1, se2, True)
 
     def add_inclusion_rel(self, se1, se2):
         """
@@ -248,47 +243,9 @@ class STR(object):
         if not se1 in self.inclusion_relationships:
             self.inclusion_relationships[se1] = {}
         self.inclusion_relationships[se1][se2] = True
-        self.add_cache_inclusion(se1, se2, True)
-
-    def add_cache_inclusion(self, id1, id2, v=True):
-        """
-        Add a relation of inclusion in a cache variable
-
-        Parameters
-        ----------
-        id1 : str
-            id of the first spatial entity
-        id2 : str
-            id of the second spatial entity
-        v : bool, optional
-            if the relation exists between the two spatial entities. Default is True
-
-        """
 
-        if not id1 in STR.__cache_inclusion:
-            STR.__cache_inclusion[id1] = {}
-        STR.__cache_inclusion[id1][id2] = v
 
-    def add_cache__adjacency(self, se1, se2, v=True):
-        """
-        Add a relation of adjacency in a cache variable
 
-        Parameters
-        ----------
-        id1 : str
-            id of the first spatial entity
-        id2 : str
-            id of the second spatial entity
-        v : bool, optional
-            if the relation exists between the two spatial entities. Default is True
-
-        """
-        if not se1 in STR.__cache_adjacency:
-            STR.__cache_adjacency[se1] = {}
-        if not se2 in STR.__cache_adjacency:
-            STR.__cache_adjacency[se2] = {}
-        STR.__cache_adjacency[se1][se2] = v
-        STR.__cache_adjacency[se2][se1] = v
 
     def get_data(self, id_se):
         """
@@ -376,156 +333,15 @@ class STR(object):
                 if self.adjacency_relationships[se1][se2]:
                     self.graph.add_edge(se1, se2, key=0, color="green")
 
-    def is_included_in(self, se1_id, se2_id):
-        """
-        Return True if a spatial entity is included within another one.
-
-        Parameters
-        ----------
-        se1_id : str
-            id of the contained entity
-        se2_id : str
-            id of the entity container
-
-        Returns
-        -------
-        bool
-            if se1 included in se2
-        """
-
-        if se1_id in self.inclusion_relationships:
-            if se2_id in self.inclusion_relationships[se1_id]:
-                return self.inclusion_relationships[se1_id][se2_id]
-
-        inc_chain_P131 = get_inclusion_chain(se1_id, "P131")
-        inc_chain_P706 = get_inclusion_chain(se1_id, "P706")
-        inc_chain = inc_chain_P131
-        inc_chain.extend(inc_chain_P706)
-        inc_chain = set(inc_chain)
-        if se2_id in inc_chain:
-            self.add_cache_inclusion(se1_id, se2_id, True)
-            return True
-
-        return False
-
-    def is_adjacent_cache(self, se1, se2):
-        """
-        Return true if two spatial entities were found adjacent previously.
-
-        Parameters
-        ----------
-        se1 : str
-            id of the first spatial entity
-        se2 : str
-            id of the second spatial entity
-
-        Returns
-        -------
-        bool
-            if se1 adjacent to se2
-        """
-
-        if se1 in STR.__cache_adjacency:
-            if se2 in STR.__cache_adjacency[se1]:
-                return STR.__cache_adjacency[se1][se2]
-        if se2 in STR.__cache_adjacency:
-            if se1 in STR.__cache_adjacency[se2]:
-                return STR.__cache_adjacency[se2][se1]
-        return False
-
-    def is_included_cache(self, se1, se2):
-        """
-        Return true if a spatial entity were found included previously in an other one.
-
-        Parameters
-        ----------
-        se1 : str
-            id of the first spatial entity
-        se2 : str
-            id of the second spatial entity
-
-        Returns
-        -------
-        bool
-            if se1 included to se2
-        """
-        if se1 in STR.__cache_inclusion:
-            if se2 in STR.__cache_inclusion[se1]:
-                return STR.__cache_inclusion[se1][se2]
-        return False
-
-    def is_adjacent(self, se1, se2, datase1=None, datase2=None):
-        """
-        Return true if se1 is adjacent to se2.
-
-        Parameters
-        ----------
-        se1 : str
-            id of the first spatial entity
-        se2 : str
-            id of the second spatial entity
-        datase1 : gazpy.Element, optional
-            if given cached data concerning the spatial entity with id = se1 (the default is None)
-        datase2 : gazpy.Element, optional
-            if given cached data concerning the spatial entity with id = se2 (the default is None)
-
-        Returns
-        -------
-        bool
-            true if adjacent
-        """
-
-        stop_class = set(["A-PCLI", "A-ADM1"])
-
-        def get_p47_adjacency_data(data):
-            p47se1 = []
-            for el in data.other.P47:
-                d = gazetteer.get_by_other_id(el, "wikidata")
-                if not d: continue
-                p47se1.append(d[0].id)
-            return p47se1
-
-        if self.is_adjacent_cache(se1, se2):
-            return False
-
-        if self.is_included_in(se1, se2) or self.is_included_in(se2, se1):
-            return False
-
-        data_se1, data_se2 = self.get_data(se1), self.get_data(se2)
-
-        if "P47" in data_se2.other and se1 in get_p47_adjacency_data(data_se2):
-            return True
-            # print("P47")
-        elif "P47" in data_se1.other and se2 in get_p47_adjacency_data(data_se1):
-            return True
-            # print("P47")
-
-        if collisionTwoSEBoundaries(se1, se2):
-            return True
-
-        if data_se1 and  data_se2 and "coord" in data_se1.other and "coord" in data_se2.other:
-            if Point(data_se1.coord.lon, data_se1.coord.lat).distance(
-                    Point(data_se2.coord.lon, data_se2.coord.lat)) < 1 and len(
-                set(data_se1.class_) & stop_class) < 1 and len(set(data_se2.class_) & stop_class) < 1:
-                return True
-        return False
 
     def get_inclusion_relationships(self):
         """
         Find all the inclusion relationships between the spatial entities declared in the current STR.
 
         """
-
         for se_ in tqdm(self.spatial_entities, desc="Extract Inclusion"):
-            inc_chain_P131 = get_inclusion_chain(se_, "P131")
-            inc_chain_P706 = get_inclusion_chain(se_, "P706")
-
-            inc_chain = inc_chain_P131
-            inc_chain.extend(inc_chain_P706)
-            inc_chain = set(inc_chain)
-
             for se2_ in self.spatial_entities:
-                if se2_ in inc_chain:
+                if se_ != se2_ and self.inc_rel_db.is_relation(se_,se2_):
                     self.add_inclusion_rel(se_, se2_)
 
     def get_adjacency_relationships(self):
@@ -533,21 +349,11 @@ class STR(object):
         Find all the adjacency relationships between the spatial entities declared in the current STR.
         """
 
-        data = {se: self.get_data(se) for se in self.spatial_entities}
-
         for se1 in tqdm(self.spatial_entities, desc="Extract Adjacency Relationship"):
-            data_se1 = data[se1]
             for se2 in self.spatial_entities:
-                if se1 == se2: continue
-                if se1 in self.adjacency_relationships:
-                    if se2 in self.adjacency_relationships[se1]:
-                        continue
-                if se2 in self.adjacency_relationships:
-                    if se1 in self.adjacency_relationships[se2]:
-                        continue
-                data_se2 = data[se2]
-                if self.is_adjacent(se1, se2, data_se1, data_se2):
-                    self.add_adjacency_rel(se1, se2)
+                if se1 != se2 and self.adj_rel_db.is_relation(se1, se2):
+                    self.add_adjacency_rel(se1,se2)
+
 
     def build(self, inc=True, adj=True, verbose=False):
         """
@@ -576,7 +382,6 @@ class STR(object):
         graph.add_nodes_from(nodes)
 
         if adj:
-            debut = time.time()
             self.get_adjacency_relationships()
             for se1 in self.adjacency_relationships:
                 for se2 in self.adjacency_relationships[se1]:
@@ -585,7 +390,6 @@ class STR(object):
                         graph.add_edge(se2, se1, key=0, color="green")
 
         if inc:
-            debut = time.time()
             self.get_inclusion_relationships()
             for se1 in self.inclusion_relationships:
                 for se2 in self.inclusion_relationships[se1]:
@@ -616,7 +420,7 @@ class STR(object):
         except:
             print("Error while saving STR to {0}".format(format))
 
-    def getUndirected(self):
+    def get_undirected(self,simple_graph=True):
         """
         Return the Undirected form of a STR graph.
 
@@ -625,8 +429,9 @@ class STR(object):
         networkx.Graph
             unidirected graph
         """
-
-        return nx.Graph(self.graph)
+        if simple_graph:
+            return  nx.Graph(self.graph)
+        return nx.MultiGraph(self.graph)
 
     def get_geo_data_of_se(self):
         """
@@ -762,7 +567,7 @@ class STR(object):
             Matplotlib figure instance
         """
 
-        import matplotlib.pyplot as plt
+
         world = gpd.read_file(gpd.datasets.get_path('naturalearth_lowres'))
         base = world.plot(color='white', edgecolor='black', figsize=(16, 9))
         points = []
@@ -804,24 +609,6 @@ class STR(object):
         plt.show()
 
 
-# def to_Multipoints(x):
-#     """
-#     Return a polygon buffered representation for a set of point
-
-#     Parameters
-#     ----------
-#     x : pandas.Series
-#         coordinates columns
-
-#     Returns
-#     -------
-#     shapely.geometry.Polygon
-#         polygon
-#     """
-
-#     #print(x[["x","y"]].values)
-#     return Polygon([Point(z) for z in x[["x","y"]].values]).buffer(1)
-
 def to_Polygon(x):
     """
     Return a polygon buffered representation for a set of points.
-- 
GitLab