An error occurred while loading the file. Please try again.
-
Fize Jacques authored5a936e7f
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
# coding = utf-8
import glob
import argparse, os, sys, re, json, logging
import datetime
import time
import numpy as np
import pandas as pd
import networkx as nx
from gmatch4py import *
from gmatch4py.helpers.reader import import_dir
from gmatch4py import GraphEditDistance as GED2
from gmatch4py.base import Base
#############
# FUNCTIONS #
#############
def _get_graphs(df,id_colsname,colsname):
"""
Return a list of graphs
Parameters
----------
df : pandas.Dataframe
input
id_colsname : str
name of the column that contains graph's ids
colsname : str
name of the column that contains the graphs
Returns
-------
list
list of graphs
"""
N=np.max(df[id_colsname])
graphs=[nx.Graph()]*(N+1)
for _, row in df.iterrows():
graphs[row[id_colsname]]=(row[colsname] if isinstance(row[colsname],nx.Graph) else row[colsname].graph)
return graphs
#######################
# PARSE ARGUMENTS #
#######################
parser = argparse.ArgumentParser()
parser.add_argument("input")
parser.add_argument("-s","--selected",default="")
parser.add_argument("-g","--graphcol",
help="Type of graph you want to compare",
action="append")
parser.add_argument("-i","--idcol",default="id_doc")
parser.add_argument("-l","--logfile",default="{0}.csv".format(datetime.datetime.now().strftime("%Y_%m_%d__%H_%M_%S")))
parser.add_argument("-o","--output",default="output/",help="Output Directory")
args = parser.parse_args()
#################
# MAIN PROGRAM #
#################
#Check input file existence
if not os.path.exists(args.input):
raise FileNotFoundError("Input file doesn't exists ! {0}".format(args.input))
# LOAD INPUT FILE
df = pd.read_pickle(args.input)
# Check id_col
if not args.idcol in df:
raise KeyError("Column Id with key = {0} does not exists ! ".format(args.idcol))
# Check graph column
for col in args.graphcol:
if not col in df:
raise KeyError("Graph Column with key = {0} does not exists ! ".format(col))
# IF SELECTED GRAPHS
selected = None
if args.selected and os.path.exists(args.selected):
selected=json.load(open(os.path.join(args.selected)))
# LOAD DATA for each type of graph
datas={type_:_get_graphs(df,args.idcol,type_) for type_ in args.graphcol}
# OUTPUT FN
matrix_output_dir=args.output
output_text=[]
for str_type in args.graphcol:
graphs=datas[str_type]
for class_ in [BagOfNodes,WeisfeleirLehmanKernel,GraphEditDistance, BP_2, HED, GreedyEditDistance, Jaccard, MCS, VertexEdgeOverlap]:
deb=time.time()
print("Computing the Similarity Matrix for {0} and {1}".format(class_.__name__,str_type))
if class_ in (GraphEditDistance, BP_2, GreedyEditDistance, HED):
comparator = class_(1, 1, 1, 1)
elif class_ == GED2:
comparator = class_(1, 1, 1, 1,weighted=True)
elif class_ == WeisfeleirLehmanKernel:
comparator = class_(h=2)
else:
comparator=class_()
# COMPARE
matrix = comparator.compare(graphs, selected)
matrix = comparator.similarity(matrix)
# OUTPUT FILENAME
output_fn="{0}/{1}_{2}.npy".format(
matrix_output_dir.rstrip("/"),
class_.__name__,
str_type
)
# UPDATE LOG
output_text.append([class_.__name__,str_type,time.time()-deb])
# SAVE RESULT
np.save(output_fn,matrix)
print("Matrix Saved")
open(args.logfile,'w').write(json.dumps(output_fn))
print("Done")