Commit e7ec63d0 authored by Pokiros's avatar Pokiros
Browse files

Add graph Transformations + modify graph viewer + add field in config +

change gmatch4py + change count in disambiguator (geodictID used instead of label)
parent 526352ea
No related merge requests found
Showing with 470 additions and 428 deletions
+470 -428
......@@ -3,5 +3,6 @@
"osm_boundaries_directory":"/Users/jacquesfize/install",
"core_nlp_URL":"http://localhost:9000",
"es_server_old":"http://192.168.1.15:9200/",
"es_server":"http://localhost:9200/"
"es_server":"http://localhost:9200/",
"database_json":"resources/database_graph_viewer.db"
}
\ No newline at end of file
......@@ -13,12 +13,16 @@ class PageRankDisambiguator(Disambiguator):
def disambiguate(self, ner_result, lang="en"):
count,se_ = self.extract_se_entities(ner_result)
new_count={}
selected_en = {}
for en in se_:
if label_exists(en, lang):
id_ = get_most_common_id(en, lang)
selected_en[id_] = en
new_count[id_]=count[en]
elif alias_exists(en,lang):
id_ = get_most_common_id_alias(en, lang)
selected_en[id_] = en
return count,selected_en
new_count[id_] = count[en]
return new_count,selected_en
# coding: utf-8
import glob
# Graph Edit Distance Algorithm Import
from gmatch4py.ged.geo_hed import GeoHED
from gmatch4py.ged.greedy_edit_distance import GreedyEditDistance
from gmatch4py.ged.hausdorff_edit_distance import HED
from progressbar import ProgressBar, Timer, Bar, ETA
......@@ -16,52 +11,74 @@ from gmatch4py.kernels.weisfeiler_lehman import *
from gmatch4py.kernels.weisfeiler_lehman_geo import *
from gmatch4py.kernels.weisfeiler_lehman_edge_geo import *
from gmatch4py.ged.bipartite_graph_matching_2 import BP_2
from gmatch4py.mcs import MCS
from pipeline import *
import glob,time
# Function for output generation
def_temp=[36,-36]
temp=def_temp
max_temp=-30
dec=5
def getLocInfo(id_):
data=get_data(id_)
if 'coord' in data:
return [data["coord"]["lat"],data["coord"]["lon"]]
return [0,0]
global temp,dec
try:
data=get_data(id_)
if 'coord' in data:
return [data["coord"]["lat"],data["coord"]["lon"]]
else:
temp = [temp[0] , temp[1]+dec]
if temp[1] >= max_temp:
temp = [temp[0] +dec, def_temp[1]]
return temp
except:
pass
def get_associated_es(associated_es_data):
global temp
new_={}
temp=def_temp
for id_ in associated_es_data:
new_[id_]={"label":associated_es_data[id_],"coord":getLocInfo(id_)}
try:
new_[id_]={"label":get_data(id_)["en"],"coord":getLocInfo(id_)}
except:
new_[id_] = {"label": id_, "coord": getLocInfo(id_)}
return new_
def getEdges4Draw(associated_es,edges):
data={}
for es in associated_es:
data[es]=getLocInfo(es)
def getEdges4Draw(data,edges):
lines=[]
for ed in edges:
lines.append([data[ed[0]],data[ed[1]],ed[2]["color"]])
return lines
lines.append([data[ed[0]]["coord"],data[ed[1]]["coord"],ed[2]["color"]])
if lines[-1][-1] == "cyan":
lines[-1][-1] = "blue";
return lines
# Similarity Function between graph and a set of graphs
grap_kernel_results=[]
graph_lookup={}
def compareMCS(graphs):
return MCS.compare(graphs)
# GED algorithm
def compareGED(id_,graphs):
return ApproximateGraphEditDistance.compare(graphs)[id_]
def compareGED(graphs):
return ApproximateGraphEditDistance.compare(graphs)
def compareBP2(graphs):
return BP_2.compare(graphs)
def compareBP2(id_,graphs):
return BP_2.compare(graphs)[id_]
def compareHED(graphs):
return HED.compare(graphs)
def compareHED(id_,graphs):
return HED.compare(graphs)[id_]
def compareGEOHED(graphs):
return GeoHED.compare(graphs)
def compareGreedy(id_,graphs):
return GreedyEditDistance.compare(graphs)[id_]
def compareGreedy(graphs):
return GreedyEditDistance.compare(graphs)
def compareWLSubTreeKernel(id_,graphs):
def compareWLSubTreeKernel(graphs):
global grap_kernel_results, graph_lookup
sc = np.zeros(len(graphs))
......@@ -74,9 +91,9 @@ def compareWLSubTreeKernel(id_,graphs):
grap_kernel_results=WeisfeleirLehmanKernel.compare(graphs_array,h=3)
return 1 - grap_kernel_results[id_]
return 1 - grap_kernel_results
def compareWLSubTreeKernelGeo(id_,graphs):
def compareWLSubTreeKernelGeo(graphs):
global grap_kernel_results, graph_lookup
sc = np.zeros(len(graphs))
......@@ -92,30 +109,32 @@ def compareWLSubTreeKernelGeo(id_,graphs):
grap_kernel_results=WeisfeleirLehmanKernelGEO.compare(graphs_array,h=3)
grap_kernel_results= np.nan_to_num(grap_kernel_results)
return 1-grap_kernel_results[id_]
return 1-grap_kernel_results
def compareWLSubTreeKernelEdgeGeo(id_,graphs):
def compareWLSubTreeKernelEdgeGeo(graphs):
global grap_kernel_results, graph_lookup
sc = np.zeros(len(graphs))
if len(grap_kernel_results)<1:
graphs_array = [None for i in range(max(graphs.keys()))]
graphs_array = [None for i in range(len(graphs))]
for i,g in graphs.items():
graphs_array[i]=g
grap_kernel_results=WeisfeleirLehmanKernelEdgeGeo.compare(graphs_array,h=3)
grap_kernel_results= np.nan_to_num(grap_kernel_results)
return 1-grap_kernel_results[id_]
return 1-grap_kernel_results
funcDict={
"MCS":compareMCS,
"GED":compareGED,
"BP2":compareBP2,
"HED":compareHED,
"GEOHED":compareGEOHED,
"GREEDY":compareGreedy,
"WLSUBTREE":compareWLSubTreeKernel,
"WLSUBTREEGEO":compareWLSubTreeKernelGeo,
......@@ -130,6 +149,7 @@ parser.add_argument("texts_dir")
parser.add_argument("graphs_dir")
parser.add_argument("metadata_fn")
parser.add_argument("-e","--evalEPI",action="store_true")
parser.add_argument("-a","--all",action="store_true")
parser.add_argument("-o","--output",help="Output Filename",default="GED")
args = parser.parse_args()
......@@ -137,8 +157,6 @@ args = parser.parse_args()
if not args.distance in funcDict.keys():
raise NotFoundDistance(args.distance,funcDict)
exit()
# Initialize Pipeline for Spatial Entities extraction and STR construction
# Load all the text from the corpus
texts=[]
......@@ -161,9 +179,8 @@ if not texts:
print("No text files were loaded !")
exit()
# Load graph data and associated spatial entities of each graph
# LOAD graph data and associated spatial entities of each graph
assC=json.load(open(args.metadata_fn))
associated_es,count_per_doc=assC[0],assC[1]
......@@ -172,17 +189,19 @@ for file in glob.glob(args.graphs_dir.rstrip("/")+"/*.gexf"):
id=int(re.findall("\d+",file)[0])
graphs[id]=nx.read_gexf(file)
# We take 50 documents chosen randomly. Then we test, if the top-10 returned documents are relevant !
if args.evalEPI:
selected_documents_=json.load(open("data/random_selected_doc.json"))
elif args.all:
selected_documents_=list(graphs.keys())
else:
selected_documents_ = []
ids=list(range(len(graphs)))
ids=[]
for i in range(len(graphs)):
if len(graphs[i])>1:
ids.append(i)
import random
random.shuffle(ids)
try:
......@@ -192,39 +211,38 @@ else:
# Generating Evaluation Output
top_ten_documents=[]
final_data={}
deb=time.time()
similarity_matrix = funcDict[args.distance](graphs)
print("Similarity Matrix Computed in {0} s.".format(time.time()-deb))
with ProgressBar(max_value=len(selected_documents_),widgets=[' [', Timer(), '] ',Bar(),' (', ETA(), ') ',]) as pg:
inc=0
for doc_s in selected_documents_:
if not len(graphs)>0:
pass
if not len(graphs[doc_s])>0:
continue
id_json=str(doc_s)
bow_score=funcDict[args.distance](doc_s,graphs)
bow_score=similarity_matrix[doc_s]
top_10_docs_score=np.sort(bow_score)[1:11].astype(float)
top_10_docs=np.argsort(bow_score)[1:11].astype(int)
final_data[id_json]={
"sp_entities":get_associated_es(associated_es[id_json]),
final_data[doc_s]={
"sp_entities":get_associated_es(graphs[doc_s].nodes()),
"text":texts[doc_s],
"edges":getEdges4Draw(associated_es[id_json],graphs[doc_s].edges(data=True))
}
final_data[id_json]["top_10"]=[]
final_data[doc_s]["edges"]=getEdges4Draw(final_data[doc_s]["sp_entities"],graphs[doc_s].edges(data=True))
#print(final_data[doc_s]["edges"])
final_data[doc_s]["top_10"]=[]
for d in range(len(top_10_docs)):
doc_data={}
doc_data["score"]=top_10_docs_score[d]
doc_data["id_txt"]=int(top_10_docs[d])
doc_data["text"]=texts[int(top_10_docs[d])]
doc_data["sp_entities"]=get_associated_es(associated_es[str(doc_data["id_txt"])])
doc_data["edges"]=getEdges4Draw(associated_es[str(doc_data["id_txt"])],graphs[doc_data["id_txt"]].edges(data=True))
doc_data["sp_entities"]=get_associated_es(graphs[doc_data["id_txt"]].nodes())
doc_data["edges"]=getEdges4Draw(doc_data["sp_entities"],graphs[doc_data["id_txt"]].edges(data=True))
doc_data["relevant"]=None
final_data[id_json]["top_10"].append(doc_data)
final_data[doc_s]["top_10"].append(doc_data)
inc+=1
pg.update(inc)
open("graph_viewer/evalTop10STR_{0}.json".format(args.distance),'w').write(json.dumps(final_data,indent=4))
open("gui_graph_viewer/evalTopJPT10STR_{0}.json".format(args.distance),'w').write(json.dumps(final_data,indent=4))
......@@ -35,6 +35,7 @@ pipeline= {
}
# Read Input Files
texts_=[]
......@@ -45,9 +46,12 @@ if os.path.exists(args.texts_input_dir):
exit()
for fn in files_:
try:
texts_.append(open(fn).read())
tex=open(fn).read()
lang = Detector(tex, quiet=True).language.code #for bug encoding
texts_.append(tex)
except:
print("{0} could'nt be read !".format(fn))
print("{0} could'nt be read ! Add Lorem Ipsum instead".format(fn))
texts_.append("Lorem ipsum dolor sit amet, consectetur adipiscing elit, sed do eiusmod tempor incididunt ut labore et dolore magna aliqua. Ut enim ad minim veniam, quis nostrud exercitation ullamco laboris nisi ut aliquip ex ea commodo consequat. Duis aute irure dolor in reprehenderit in voluptate velit esse cillum dolore eu fugiat nulla pariatur. Excepteur sint occaecat cupidatat non proident, sunt in culpa qui officia deserunt mollit anim id est laborum.")
# If output Dir doesn't exists
......@@ -62,6 +66,7 @@ if not texts_:
data={}
n=0
for text in range(len(texts_)):
# try:
lang=Detector(texts_[text],quiet=True).language.code
......@@ -70,9 +75,10 @@ for text in range(len(texts_)):
if lang in pipeline:
data[lang].append(text)
else:
if not "en" in data:data["en"]=[] # Ca peut arriver :s :s :s !!!
data["en"].append(text)
# except:
# print("No Language Detected")
# except:
# n+=1 # encoding error
associated_es={}
count_per_doc={}
......@@ -91,11 +97,18 @@ with ProgressBar(max_value=len(texts_),widgets=[' [', Timer(), '] ',Bar(),' (',
else:
t=filter_nonprintable(texts_[id_doc])
a, b, c = pipeline[lang].parse(t)
list_gs.append(pipeline[lang].build(t).graph)
# Save Metadata
count_per_doc[id_doc] = a
associated_es[id_doc] = c
try:
a, b, c = pipeline[lang].parse(t)
list_gs.append(pipeline[lang].build(t).graph)
# Save Metadata
count_per_doc[id_doc] = a
associated_es[id_doc] = c
except: # NER Bug
count_per_doc[id_doc] = {}
associated_es[id_doc] = {}
g = nx.MultiDiGraph()
list_gs.append(g)
# Save Graph structure
nx.write_gexf(list_gs[-1], args.graphs_output_dir+"/{0}.gexf".format(id_doc))
i+=1
......
# coding: utf-8
import glob
from gmatch4py.ged.geo_bp2 import GeoBP2
# Graph Edit Distance Algorithm Import
from gmatch4py.ged.geo_ged import GeoGED
from gmatch4py.ged.geo_hed import GeoHED
from gmatch4py.ged.greedy_edit_distance import GreedyEditDistance
from gmatch4py.ged.hausdorff_edit_distance import HED
from progressbar import ProgressBar, Timer, Bar, ETA
# Disa
from disambiguator.geodict_gaurav import *
from gmatch4py.exception import NotFoundDistance
from gmatch4py.kernels.weisfeiler_lehman import *
from gmatch4py.kernels.weisfeiler_lehman_geo import *
from gmatch4py.kernels.weisfeiler_lehman_edge_geo import WeisfeleirLehmanKernelEdgeGeo
from pipeline import *
from pos_tagger.tagger import Tagger
# Similarity Function between graph and a set of graphs
grap_kernel_results=[]
graph_lookup={}
def compareGED(id_,graphs):
g=graphs[id_]
sc=np.zeros(len(graphs))
for id_,g2 in graphs.items():
score=ged.compare(g,g2)
sc[id_]=score
return sc
def compareGEOGED(id1,graphs):
g=graphs[id1]
sc=np.zeros(len(graphs))
for id_,g2 in graphs.items():
try:
if len(g2) >1:
gg=GeoGED(g,g2)
score=gg.distance()
sc[id_] = score
else:
sc[id_]=np.inf
except:
sc[id_] = np.inf
return sc
def compareBP2(id_,graphs):
bp2=BP_2()
g = graphs[id_]
sc = np.zeros(len(graphs))
for id_, g2 in graphs.items():
if len(g2) >0:
score = bp2.bp2(g, g2)
sc[id_] = score
else:
sc[id_] = np.inf
return sc
def compareSubTreeKernel(id_,graphs):
global grap_kernel_results, graph_lookup
sc = np.zeros(len(graphs))
if len(grap_kernel_results)<1:
graphs_array=[None for i in range(len(graphs))]
for i,g in graphs.items():
graphs_array[i]=g
grap_kernel_results=WeisfeleirLehmanKernel.compare(graphs_array,h=3)
return 1 - grap_kernel_results[id_]
def compareSubTreeKernelGeo(id_,graphs):
global grap_kernel_results, graph_lookup
sc = np.zeros(len(graphs))
if len(grap_kernel_results)<1:
graphs_array=[None for i in range(len(graphs))]
for i,g in graphs.items():
graphs_array[i]=g
grap_kernel_results=WeisfeleirLehmanKernelGEO.compare(graphs_array,h=3)
grap_kernel_results= np.nan_to_num(grap_kernel_results)
return 1-grap_kernel_results[id_]
def compareSubTreeKernelEdgeGeo(id_,graphs):
global grap_kernel_results, graph_lookup
sc = np.zeros(len(graphs))
if len(grap_kernel_results)<1:
graphs_array=[None for i in range(len(graphs))]
for i,g in graphs.items():
graphs_array[i]=g
grap_kernel_results=WeisfeleirLehmanKernelEdgeGeo.compare(graphs_array,h=3)
grap_kernel_results= np.nan_to_num(grap_kernel_results)
return 1-grap_kernel_results[id_]
def compareGEOBP2(id_,graphs):
bp2=GeoBP2()
g = graphs[id_]
sc = np.zeros(len(graphs))
for id_, g2 in graphs.items():
if len(g2) >0:
score = bp2.bp2(g, g2)
sc[id_] = score
else:
sc[id_] = np.inf
return sc
def compareHED(id_,graphs):
h=HED()
g = graphs[id_]
sc = np.zeros(len(graphs))
for id_, g2 in graphs.items():
if len(g2) >0:
score = h.hed(g, g2)
sc[id_] = score
else:
sc[id_]=np.inf
return sc
def compareGEOHED(id_,graphs):
h=GeoHED()
g = graphs[id_]
sc = np.zeros(len(graphs))
for id_, g2 in graphs.items():
if len(g2)>1:
score = h.hed(g, g2)
sc[id_] = score
else:
sc[id_] = np.inf
return sc
def compareGreedy(id_,graphs):
g = graphs[id_]
sc = np.zeros(len(graphs))
for id_, g2 in graphs.items():
h = GreedyEditDistance(g,g2)
score = h.distance()
sc[id_] = score
return sc
funcDict={
"GED":compareGED,
"GEOGED":compareGEOGED,
"BP2":compareBP2,
"GEOBP2":compareGEOBP2,
"HED":compareHED,
"GEOHED":compareGEOHED,
"GREEDY":compareGreedy,
"WLSUBTREE":compareSubTreeKernel,
"WLSUBTREEGEO":compareSubTreeKernelGeo,
"WLSUBTREEEDGEGEO":compareSubTreeKernelEdgeGeo
}
import argparse
parser = argparse.ArgumentParser()
parser.add_argument("distance")
parser.add_argument("graphs_dir")
parser.add_argument("--ignore",help="Ignore Output",action="store_true")
parser.add_argument("-o","--output",help="Output Filename",default="GED")
args = parser.parse_args()
if not args.distance in funcDict.keys():
raise NotFoundDistance(args.distance,funcDict)
exit()
# Initialize Pipeline for Spatial Entities extraction and STR construction
pip=Pipeline(lang="english",tagger=Tagger(),ner=StanfordNER(lang="en"))
# Load all the text from the corpus
def get_text_data(directory):
"""
Load the Epidemiology corpus
"""
files = glob.glob(os.path.join(directory,"*.json.processed.json"))
texts={}
for filepath in files:
id_doc=int(re.findall("\d+",filepath)[-1])
data=json.load(open(filepath))["content"]
texts[id_doc]=data
return texts
__t = json.load(open("data/CorpusHeterogene21docs.txt"))
texts = {}
for i in range(len(__t)):
texts[i] = __t[i]
# Extract All spatial entities
if not os.path.exists("associated_and_count_JPT.json"):
pass
associated_es={}
count_per_doc={}
for id_,text in texts.items():
if text:
a,b,c=pip.parse(text)
count_per_doc[id_]=a
associated_es[id_]=c
else:
associated_es[id_]={}
count_per_doc[id_]={}
open("associated_and_count_JPT.json",'w').write(json.dumps([associated_es,count_per_doc],indent=4))
if not os.path.exists(args.graphs_dir):
pass
import networkx as nx
graphs={}
for t,text in texts.items():
if text:
graphs[t]=pip.buildSemSTR(text,win_size=7).graph
else:
graphs[t]=nx.MultiDiGraph()
os.mkdir(args.graphs_dir)
for t,g in graphs.items():
print(t)
nx.write_gexf(g,os.path.join(args.graphs_dir,"{0}.gexf".format(t)))
# LOAD graph data and associated spatial entities of each graph
assC=json.load(open("associated_and_count_JPT.json"))
associated_es,count_per_doc=assC[0],assC[1]
graphs={}
for file in glob.glob(args.graphs_dir.rstrip("/")+"/*.gexf"):
id=int(re.findall("\d+",file)[0])
graphs[id]=nx.read_gexf(file)
#print("TEST associated_es and graphs",ass,gra)
# We take 50 documents chosen randomly. Then we test, if the top-10 returned documents are relevant !
selected_documents_=range(len(graphs))
from gmatch4py.ged.algorithm import graph_edit_dist as ged
from gmatch4py.ged.bipartite_graph_matching_2 import BP_2
def getLocInfo(id_):
data=get_data(id_)
if 'coord' in data:
return [data["coord"]["lat"],data["coord"]["lon"]]
return [0,0]
def get_associated_es(associated_es_data):
new_={}
for id_ in associated_es_data:
new_[id_]={"label":associated_es_data[id_],"coord":getLocInfo(id_)}
return new_
def getEdges4Draw(associated_es,edges):
data={}
for es in associated_es:
data[es]=getLocInfo(es)
lines=[]
for ed in edges:
try:
lines.append([data[ed[0]],data[ed[1]],ed[2]["color"]])
except:
print(ed)
return lines
# Generating Evaluation Output
top_ten_documents=[]
final_data={}
import copy
with ProgressBar(max_value=len(selected_documents_),widgets=[' [', Timer(), '] ',Bar(),' (', ETA(), ') ',]) as pg:
inc=0
for doc_s in selected_documents_:
id_json=str(doc_s)
#print(id_json)
bow_score=funcDict[args.distance](doc_s,copy.deepcopy(graphs))
top_10_docs_score=np.sort(bow_score)[1:11].astype(float)
top_10_docs=np.argsort(bow_score)[1:11].astype(int)
#print(top_10_docs)
final_data[id_json]={
"sp_entities":get_associated_es(associated_es[id_json]),
"text":texts[doc_s],
"edges":getEdges4Draw(associated_es[id_json],graphs[doc_s].edges(data=True))
}
final_data[id_json]["top_10"]=[]
for d in range(len(top_10_docs)):
doc_data={}
doc_data["score"]=top_10_docs_score[d]
doc_data["id_txt"]=int(top_10_docs[d])
doc_data["text"]=texts[int(top_10_docs[d])]
doc_data["sp_entities"]=get_associated_es(associated_es[str(doc_data["id_txt"])])
doc_data["edges"]=getEdges4Draw(associated_es[str(doc_data["id_txt"])],graphs[doc_data["id_txt"]].edges(data=True))
doc_data["relevant"]=None
final_data[id_json]["top_10"].append(doc_data)
inc+=1
pg.update(inc)
if not args.ignore:
open("graph_viewer/evalTopJPT10STR_{0}.json".format(args.distance),'w').write(json.dumps(final_data,indent=4))
......@@ -6,7 +6,7 @@ Gmatch4py is a library dedicated to graph matching. Graph structure are stored i
* DeltaCon and DeltaCon0 (*debug needed*) [1]
* Vertex Ranking (*debug needed*) [2]
* Vertex Edge Overlap [2
* Vertex Edge Overlap [2]
* Graph kernels
* Random Walk Kernel (*debug needed*) [3]
* Geometrical
......@@ -21,7 +21,8 @@ Gmatch4py is a library dedicated to graph matching. Graph structure are stored i
* Approximated Graph Edit Distance
* Hausdorff Graph Edit Distance
* Bipartite Graph Edit Distance
* Greedy Edit Distance
* Greedy Edit Distance
* MCS [6]
## Publications associated
......@@ -31,7 +32,7 @@ Gmatch4py is a library dedicated to graph matching. Graph structure are stored i
* [3] Vishwanathan, S. V. N., Schraudolph, N. N., Kondor, R., & Borgwardt, K. M. (2010). Graph kernels. Journal of Machine Learning Research, 11(Apr), 1201-1242.
* [4] Shervashidze, N., Schweitzer, P., Leeuwen, E. J. V., Mehlhorn, K., & Borgwardt, K. M. (2011). Weisfeiler-lehman graph kernels. Journal of Machine Learning Research, 12(Sep), 2539-2561.
* [5] Fischer, A., Riesen, K., & Bunke, H. (2017). Improved quadratic time approximation of graph edit distance by combining Hausdorff matching and greedy assignment. Pattern Recognition Letters, 87, 55-62.
* [6] A graph distance metric based on the maximal common subgraph, H. Bunke and K. Shearer, Pattern Recognition Letters, 1998
## Authors
......@@ -39,5 +40,4 @@ Jacques Fize
## TODO
* Add MCS + Jaccard
* Debug algorithms with --> (*debug needed*)
\ No newline at end of file
__version__ = "0.1"
import os
\ No newline at end of file
......@@ -12,6 +12,6 @@ class ApproximateGraphEditDistance():
for i in range(n):
for j in range(i,n):
comparison_matrix[i,j]= GraphEditDistance(listgs[i],listgs[j],False,node_del=c_del_node,node_ins=c_ins_node,edge_del=c_del_edge,edge_ins=c_ins_edge).distance()
comparison_matrix[j,i]= comparison_matrix[i,j]
comparison_matrix[j,i]= comparison_matrix[i,j] # Unethical ! Since AGED is not a symmetric similarity measure !
return comparison_matrix
\ No newline at end of file
......@@ -24,7 +24,19 @@ class GeoGED(GraphEditDistance):
else:
self.g2_info=_cache_g_info[",".join(g2.nodes())]
@staticmethod
def compare(listgs, c_del_node=1, c_del_edge=1, c_ins_node=1, c_ins_edge=1):
n = len(listgs)
comparison_matrix = np.zeros((n, n))
for i in range(n):
for j in range(i, n):
comparison_matrix[i, j] = GraphEditDistance(listgs[i], listgs[j], False, node_del=c_del_node,
node_ins=c_ins_node, edge_del=c_del_edge,
edge_ins=c_ins_edge).distance()
comparison_matrix[j, i] = comparison_matrix[
i, j] # Unethical ! Since AGED is not a symmetric similarity measure !
return comparison_matrix
def insert_geo_distance(self,node2):
# If one nodes given, compute average distance
......
......@@ -11,6 +11,17 @@ class GeoHED(HED):
"""Constructor for GeoHED"""
HED.__init__(self,node_del, node_ins, edge_del, edge_ins)
@staticmethod
def compare(listgs, c_del_node=1, c_del_edge=1, c_ins_node=1, c_ins_edge=1):
n = len(listgs)
comparator = GeoHED(c_del_node, c_ins_node, c_del_edge, c_ins_edge)
comparison_matrix = np.zeros((n, n))
for i in range(n):
for j in range(i, n):
comparison_matrix[i, j] = comparator.hed(listgs[i], listgs[j])
comparison_matrix[j, i] = comparison_matrix[i, j]
return comparison_matrix
def geo_distance(self,g1,g2,node1,node2):
g1_info = get_nodes_geolocalization(g1)
......
......@@ -19,10 +19,14 @@ class GeometricRandomWalkKernel():
comparison_matrix=np.zeros((n,n))
for i in range(n):
for j in range(i,n):
if len(listgs[i]) <1 or len(listgs[j]) <1:
comparison_matrix[i, j] = 0
comparison_matrix[j, i] = 0
continue
direct_product_graph=nx.tensor_product(listgs[i],listgs[j])
Ax = nx.adjacency_matrix(direct_product_graph).todense()
try:
la = 1/ GeometricRandomWalkKernel.maxDegree(direct_product_graph) # lambda value
la = 1/ ((GeometricRandomWalkKernel.maxDegree(direct_product_graph)**2)+1) # lambda value
except:
la= pow(1,-6)
eps = pow(10,-10)
......@@ -31,21 +35,20 @@ class GeometricRandomWalkKernel():
x=I_vec.copy()
x_pre=np.zeros(Ax.shape[0])
c=0
while (np.linalg.norm(x-x_pre)) > eps:
if c > 100:
break
x_pre=x
x= I_vec + la*(np.multiply(Ax,x_pre))
x= I_vec + la*np.dot(Ax,x_pre.T)
c+=1
comparison_matrix[i,j]=np.sum(x)
comparison_matrix[j,i]=comparison_matrix[i,j]
print(comparison_matrix)
for i in range(n):
for j in range(i,n):
comparison_matrix[i,j] = (comparison_matrix[i,j]/np.sqrt(comparison_matrix[i,i]*comparison_matrix[j,j]))
if i != j:
comparison_matrix[i,j]/=100 #Pourquoi ?
comparison_matrix[j,i]=comparison_matrix[i,j]
return comparison_matrix
......@@ -58,13 +61,18 @@ class KStepRandomWalkKernel():
dmax = max(degree_sequence)
return dmax
@staticmethod
def compare(listgs,lambda_list=[1,2,3],k=3):
def compare(listgs,lambda_list=[1,1,1]):
k=len(lambda_list)
if not len(lambda_list) == k:
raise AttributeError
n = len(listgs)
comparison_matrix=np.zeros((n,n))
for i in range(n):
for j in range(i,n):
if len(listgs[i]) <1 or len(listgs[j]) <1:
comparison_matrix[i, j] = 0
comparison_matrix[j, i] = 0
continue
direct_product_graph=nx.tensor_product(listgs[i],listgs[j])
Ax = nx.adjacency_matrix(direct_product_graph).todense()
eps = pow(10,-10)
......@@ -75,13 +83,11 @@ class KStepRandomWalkKernel():
ax_pow *= Ax
sum_ += lambda_list[kk] * ax_pow
comparison_matrix[i, j] = np.sum(sum_)
comparison_matrix[i, j] = np.sum(sum_)/(len(listgs[i])**2 * len(listgs[j])**2)
comparison_matrix[j,i] = comparison_matrix[i,j]
for i in range(n):
for j in range(i,n):
comparison_matrix[i,j] = comparison_matrix[i,j]/np.sqrt(comparison_matrix[i,i]*comparison_matrix[j,j])
if i != j:
comparison_matrix[i,j]/=100 #Pourquoi ?
comparison_matrix[j,i]=comparison_matrix[i,j]
return comparison_matrix
\ No newline at end of file
......@@ -13,9 +13,10 @@ import numpy as np
import networkx as nx
import copy
class WeisfeleirLehmanKernelGEO(object):
__type__ = "sim"
__depreciated__=True
@staticmethod
def compare(graph_list,h=2,verbose=False):
......
# coding = utf-8
import networkx as nx
import numpy as np
class MCS():
"""
A graph distance metric based on the maximal common subgraph, H. Bunke and K. Shearer,
Pattern Recognition Letters, 1998
"""
@staticmethod
def compare(listgs):
n = len(listgs)
comparison_matrix = np.zeros((n, n))
for i in range(n):
for j in range(i, n):
g1 = listgs[i]
g2 = listgs[j]
comparison_matrix[i, j] = MCS.s_mcs(g1,g2)
comparison_matrix[j, i] = comparison_matrix[i, j]
return comparison_matrix
@staticmethod
def intersect(a, b):
return list(set(a) & set(b))
@staticmethod
def transform_edges(ed):
for e in range(len(ed)):
if "id" in ed[e][-1]:
del ed[e][-1]["id"]
return ed
@staticmethod
def intersect_edges(g1, g2):
ed1 = MCS.transform_edges(g1.edges(data=True))
ed2 = MCS.transform_edges(g2.edges(data=True))
inter_ed = []
for e1 in ed1:
for e2 in ed2:
if e1 == e2:
inter_ed.append(e1)
return inter_ed
@staticmethod
def intersect_nodes(g1, g2):
return MCS.intersect(g1.nodes(), g2.nodes())
@staticmethod
def maximum_common_subgraph(g1, g2):
"""
Extract maximum common subgraph
"""
res = nx.MultiDiGraph()
res.add_nodes_from(MCS.intersect_nodes(g1, g2))
res.add_edges_from(MCS.intersect_edges(g1, g2))
return res
@staticmethod
def s_mcs(g1, g2):
return len(MCS.maximum_common_subgraph(g1, g2)) / max(len(g1), len(g2))
......@@ -76,7 +76,7 @@ def get_distance_two_entity(n1,n2,info1,info2):
#print(n1,info1[n1]["fr"],info2[n2]["fr"])
score+=0.5
else:
score+=4
score+=1
#if set(info1[n1]["class"]) and info2[n2]["class"]:
# score-=1
......
# coding = utf-8
import os, json, re, datetime, random, uuid, glob
from flask import Flask,jsonify, render_template, url_for, flash, make_response, request, redirect, session, Markup, jsonify
app = Flask(__name__)
dataFiles=glob.glob("evalTop10STR_*")
data_={}
for fn in dataFiles:
data_[fn.replace("evalTop10STR_","").rstrip(".json")]=fn
print(data_.keys())
@app.route("/<gmmeasure>")
def index(gmmeasure="GED"):
if not gmmeasure in data_.keys():
gmmeasure="GED"
return render_template("index.html",data=json.dumps(json.load(open(data_[gmmeasure]))),measureAvailable=list(data_.keys()))
if __name__ == '__main__':
app.run("0.0.0.0",port=5000,debug=True)
\ No newline at end of file
File moved
{
"database_json":"../resources/database_graph_viewer.db"
}
\ No newline at end of file
# coding = utf-8
import json
class Configuration(object):
def __init__(self, data):
self.__dict__=data
for d in self.__dict__:
if isinstance(self.__dict__[d],dict):
self.__dict__[d]=Configuration(self.__dict__[d])
def __getitem__(self, item):
return self.__dict__[item]
config = Configuration(json.load(open("config/config.json")))
# coding = utf-8
# -*- coding: utf-8 -*-
import pandas as pd
from sqlalchemy import create_engine
from sqlalchemy.ext.declarative import declarative_base
from sqlalchemy import Column,Integer,String, Boolean, ForeignKey, Text
from sqlalchemy.orm import sessionmaker
from config.configuration import config
import bcrypt
engine = create_engine('sqlite:///'+config.database_json, echo=True)
Base = declarative_base()
class User(Base):
__tablename__="users"
id = Column(Integer, primary_key=True)
name = Column(String(80), unique=True)
email = Column(String(120), unique=True)
password = Column(String(120))
level = Column(Integer)
def __init__(self, name, email,password,level=1,sign_up=True):
self.name=name
self.email=email
self.password= password
if sign_up:self.password=bcrypt.hashpw(password, bcrypt.gensalt(14))
self.level=level
def check_password(self,password):
return self.password == bcrypt.hashpw(password.encode(), self.password)
def is_authenticated(self):
return True
def is_active(self):
return True
def is_anonymous(self):
return False
def get_id(self):
return str(self.id)
class Annotation(Base):
__tablename__="annotations"
id = Column(Integer, primary_key=True)
type_annotation = Column(String(120))
user_id = Column(Integer,ForeignKey('users.id'))
data=Column(Text)
finished = Column(Boolean)
def __init__(self, type_annotation,user_id,data,finished=False):
self.type_annotation=type_annotation
self.user_id=user_id
self.finished=finished
self.data=data
###################################################
# Database Population functions
###################################################
def add_users(session,data):
for d in range(len(data)):
line=data.iloc[[d]].values[0]
user=User(line[0],line[1],line[2].encode(),line[3])
session.add(user)
session.commit()
if __name__ == '__main__':
# Create tables (delete if exists)
Base.metadata.drop_all(engine)
Base.metadata.create_all(engine)
# Initialize session
Session = sessionmaker(bind=engine)
session = Session()
# Load data
user_input=pd.read_csv("user.csv",sep=";")
# Populate the database
add_users(session,user_input)
# coding = utf-8
import os, json, re, datetime, random, uuid, glob
from flask import Flask, render_template, url_for, flash, make_response, request, redirect, session, Markup, jsonify
from flask_session import Session
from flask_login import LoginManager, login_user, logout_user, current_user, login_required
from db import *
app = Flask(__name__)
Sessiona = sessionmaker(bind=engine)
sql_session = Sessiona()
login_manager = LoginManager()
login_manager.init_app(app)
"""
Load results files
"""
dataFiles=glob.glob("evalTop10STR_*")
data_={}
for fn in dataFiles:
data_[fn.replace("evalTop10STR_","").rstrip(".json")]=fn
print("File Available",data_.keys())
@app.route("/")
@app.route("/<gmmeasure>")
@login_required
def index(gmmeasure="GED"):
"""
Home Route
:param gmmeasure:
:return:
"""
if not gmmeasure in data_.keys():
gmmeasure="GED"
return render_template("index.html",data=json.dumps(json.load(open(data_[gmmeasure]))),measureAvailable=list(data_.keys()),measure=gmmeasure)
@app.route("/about")
def about():
return render_template("about.html",measureAvailable=list(data_.keys()))
@app.route("/save")
@login_required
def save():
pass
###################################################
# User Login/Signup/Logout managment
###################################################
@app.route('/login', methods=['GET', 'POST'])
def login():
"""
User login
"""
# If already logged in
if current_user.is_authenticated:
return redirect("/")
# Login page render
if request.method == 'GET':
return render_template('login.html',measureAvailable=list(data_.keys()))
# Get necessary variable
email = request.form['email']
password = request.form['password']
registered_user = sql_session.query(User).filter_by(email=email).first()
# Error message
error = Markup(
'<strong>Email</strong> or <strong>Password</strong> is invalid')
# If no user found
if registered_user is None:
flash(Markup(error), 'error')
return redirect(url_for('login'))
# If password is incorrect
if not registered_user.check_password(password):
flash(error, 'error')
return redirect(url_for('login'))
# Logged the user
login_user(registered_user)
return redirect(request.args.get('next') or url_for('index'))
@app.route('/signup', methods=['GET', 'POST'])
@login_required
def signup():
"""
User signup
"""
# If already logged in
if current_user.level != 1:
return redirect("/")
# Login page render
if request.method == 'GET':
return render_template('signup.html',measureAvailable=list(data_.keys()))
# Get necessary variable
name = request.form['name']
email = request.form['email']
password = request.form['password']
password2 = request.form['password_2']
if name and email and password:
if password == password2:
user=User(name,email,password.encode(),1)
sql_session.add(user)
sql_session.commit()
flash(Markup('Account for {0} is created !'.format(name)), 'success')
else:
error = Markup('Indicate two identical password !')
flash(error, 'danger')
else:
error = Markup('<strong>Email</strong> or <strong>Password</strong> or <strong>Name</strong> is empty')
flash(error, 'danger')
return redirect("/signup")
@app.route('/logout')
def logout():
"""
Logout page
"""
logout_user()
session.clear()
return redirect("/")
###################################################
# Login Manager Functions Overrided
###################################################
@login_manager.user_loader
def load_user(id):
return sql_session.query(User).get(int(id))
@login_manager.unauthorized_handler
def unauthorized_handler():
return redirect("/login")
if __name__ == '__main__':
app.secret_key = os.urandom(24)
app.run("0.0.0.0",port=5000,debug=True)
\ No newline at end of file
Supports Markdown
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment