# coding = utf-8 import json import numpy as np from ..models.str import STR from ..helpers.match_cache import MatchingCache from ..helpers.relation_extraction import AdjacencyRelation, InclusionRelation class AnnotationAutomatic(object): """ To facilitate the annotation, this class propose an automatic annotation. Author : Jacques Fize """ def __init__(self, dataset): self.matching_cache = MatchingCache(dataset) self.adj_rel_db = AdjacencyRelation() self.inc_rel_db = InclusionRelation() self.inclusion = json.load(open("notebooks/inclusion.json")) self.adjacency = json.load(open("notebooks/adjacency.json")) def all(self, str1, str2, id1=None, id2=None): """ Parameters ---------- str1 str2 id1 id2 Returns ------- """ if id1 and id2: found, value = self.matching_cache.is_match(int(id1), int(id2)) if found: return list(value) crit_ = [self.criterion1(str1, str2), self.criterion2(str1, str2), self.criterion3(str1, str2, id1, id2), self.criterion4(str1, str2, id1, id2)] self.matching_cache.add(id1, id2, *crit_) return crit_ def criterion1(self, str1, str2): """ Return True if both STR contains similar spatial entities. Parameters ---------- str1 str2 Returns ------- """ return int(len(set(str1.graph.nodes.keys()) & set(str2.graph.nodes.keys())) > 0) def criterion2(self, str1: STR, str2: STR): """ Return True if two STR contains proper spatial entities that share a proximity. Parameters ---------- str1 str2 Returns ------- """ stop_en = set(str1.graph.nodes.keys()) & set(str2.graph.nodes.keys()) for es in str1.spatial_entities: for es2 in str2.spatial_entities: if not es in stop_en and not es2 in stop_en and es != es2: if self.inclusion[es][es2]: return 1 if self.adjacency[es][es2]: return 1 return 0 def criterion3(self, str1: STR, str2: STR, id1=None, id2=None): """ Return True if one or multiple cluster of spatial entities have been found in both STR. Cluster are constructed based on low distance between spatial entities. The clustering method used is Mean-Shift as implemented in scikit-learn module. Parameters ---------- str1 str2 id1 id2 Returns ------- """ try: c1 = str1.get_cluster(id1) except: c1 = str1.get_cluster() try: c2 = str2.get_cluster(id2) except: c2 = str2.get_cluster() if ("geometry" not in c1) or ("geometry" not in c2): return 0 c1["area_"] = c1.area c2["area_"] = c2.area c1 = c1.sort_values(by="area_", ascending=False) c2 = c2.sort_values(by="area_", ascending=False) mean = np.mean(c1.area_) c1=c1[c1.area_ >= mean] return int(c1.intersects(c2).any()) # for ind, rows in c1.iterrows(): # if rows.area < mean: # break # for ind2, rows2 in c2.iterrows(): # if rows.geometry.intersects(rows2.geometry): # return 1 return 0 def criterion4(self, str1, str2, id1=None, id2=None, ): """ Return True if both str share the same clusters. Using the same clustering methods as in criterion3(). Parameters ---------- str1 str2 id1 id2 Returns ------- """ try: c1 = str1.get_cluster(id1) except: c1 = str1.get_cluster() # Feignasse !!!! try: c2 = str2.get_cluster(id2) except: c2 = str2.get_cluster() if ("geometry" not in c1) or ("geometry" not in c2): return 0 return int(c1.intersects(c2).all())