From 320a3742e8c78763b320c74a8a21ca1060603745 Mon Sep 17 00:00:00 2001 From: Fize Jacques <jacques.fize@cirad.fr> Date: Thu, 5 Jul 2018 18:16:54 +0200 Subject: [PATCH] - Gmatch4py become a independant module - General debug --- .gitignore | 1 + gmatch4py/__init__.py | 1 - gmatch4py/exception/__init__.py | 1 - gmatch4py/ged/__init__.py | 2 - gmatch4py/ged/algorithm/__init__.py | 0 gmatch4py/ged/graph/__init__.py | 1 - gmatch4py/kernels/__init__.py | 1 - gmatch4py_cython/README.md | 43 ---- gmatch4py_cython/__init__.py | 1 - gmatch4py_cython/gmatch4py/__init__.py | 1 - gmatch4py_cython/gmatch4py/bag_of_cliques.pyx | 168 --------------- gmatch4py_cython/gmatch4py/deltacon.pyx | 153 -------------- .../gmatch4py/exception/__init__.py | 1 - gmatch4py_cython/gmatch4py/ged/__init__.py | 2 - .../gmatch4py/ged/algorithm/__init__.py | 0 .../algorithm/abstract_graph_edit_dist.pyx | 118 ----------- .../ged/algorithm/edge_edit_dist.pyx | 29 --- .../ged/algorithm/graph_edit_dist.pyx | 72 ------- .../gmatch4py/ged/approximate_ged.pyx | 33 --- .../ged/bipartite_graph_matching_2.pyx | 161 --------------- .../gmatch4py/ged/graph/__init__.py | 1 - .../gmatch4py/ged/graph/__init__.pyx | 0 .../gmatch4py/ged/graph/edge_graph.pyx | 16 -- .../gmatch4py/ged/greedy_edit_distance.pyx | 55 ----- .../gmatch4py/ged/hausdorff_edit_distance.pyx | 171 ---------------- .../gmatch4py/graph_matching_algorithm.pyx | 37 ---- gmatch4py_cython/gmatch4py/jaccard.pyx | 95 --------- .../gmatch4py/kernels/__init__.py | 1 - .../gmatch4py/kernels/random_walk_kernel.pyx | 93 --------- .../kernels/shortest_path_kernel.pyx | 88 -------- .../gmatch4py/kernels/weisfeiler_lehman.pyx | 143 ------------- .../kernels/weisfeiler_lehman_edge.pyx | 192 ------------------ .../kernels/weisfeiler_lehman_geo.pyx | 165 --------------- gmatch4py_cython/gmatch4py/mcs.pyx | 74 ------- .../gmatch4py/vertex_edge_overlap.pyx | 78 ------- gmatch4py_cython/gmatch4py/vertex_ranking.pyx | 39 ---- gmatch4py_cython/setup.py | 48 ----- helpers/boundary.py | 2 +- helpers/collision_with_gazetteer_data.py | 2 +- models/node2vec.py | 8 +- models/str.py | 4 +- models/transformation/transform.py | 2 +- nlp/bow_se.py | 4 +- nlp/disambiguator/wikipedia_cooc.py | 4 +- notebooks/Eval.ipynb | 2 +- requirements.txt | 2 +- 46 files changed, 16 insertions(+), 2099 deletions(-) delete mode 100644 gmatch4py/__init__.py delete mode 100644 gmatch4py/exception/__init__.py delete mode 100644 gmatch4py/ged/__init__.py delete mode 100644 gmatch4py/ged/algorithm/__init__.py delete mode 100644 gmatch4py/ged/graph/__init__.py delete mode 100644 gmatch4py/kernels/__init__.py delete mode 100644 gmatch4py_cython/README.md delete mode 100644 gmatch4py_cython/__init__.py delete mode 100644 gmatch4py_cython/gmatch4py/__init__.py delete mode 100644 gmatch4py_cython/gmatch4py/bag_of_cliques.pyx delete mode 100644 gmatch4py_cython/gmatch4py/deltacon.pyx delete mode 100644 gmatch4py_cython/gmatch4py/exception/__init__.py delete mode 100644 gmatch4py_cython/gmatch4py/ged/__init__.py delete mode 100644 gmatch4py_cython/gmatch4py/ged/algorithm/__init__.py delete mode 100644 gmatch4py_cython/gmatch4py/ged/algorithm/abstract_graph_edit_dist.pyx delete mode 100644 gmatch4py_cython/gmatch4py/ged/algorithm/edge_edit_dist.pyx delete mode 100644 gmatch4py_cython/gmatch4py/ged/algorithm/graph_edit_dist.pyx delete mode 100644 gmatch4py_cython/gmatch4py/ged/approximate_ged.pyx delete mode 100644 gmatch4py_cython/gmatch4py/ged/bipartite_graph_matching_2.pyx delete mode 100644 gmatch4py_cython/gmatch4py/ged/graph/__init__.py delete mode 100644 gmatch4py_cython/gmatch4py/ged/graph/__init__.pyx delete mode 100644 gmatch4py_cython/gmatch4py/ged/graph/edge_graph.pyx delete mode 100644 gmatch4py_cython/gmatch4py/ged/greedy_edit_distance.pyx delete mode 100644 gmatch4py_cython/gmatch4py/ged/hausdorff_edit_distance.pyx delete mode 100644 gmatch4py_cython/gmatch4py/graph_matching_algorithm.pyx delete mode 100644 gmatch4py_cython/gmatch4py/jaccard.pyx delete mode 100644 gmatch4py_cython/gmatch4py/kernels/__init__.py delete mode 100644 gmatch4py_cython/gmatch4py/kernels/random_walk_kernel.pyx delete mode 100644 gmatch4py_cython/gmatch4py/kernels/shortest_path_kernel.pyx delete mode 100644 gmatch4py_cython/gmatch4py/kernels/weisfeiler_lehman.pyx delete mode 100644 gmatch4py_cython/gmatch4py/kernels/weisfeiler_lehman_edge.pyx delete mode 100644 gmatch4py_cython/gmatch4py/kernels/weisfeiler_lehman_geo.pyx delete mode 100644 gmatch4py_cython/gmatch4py/mcs.pyx delete mode 100644 gmatch4py_cython/gmatch4py/vertex_edge_overlap.pyx delete mode 100644 gmatch4py_cython/gmatch4py/vertex_ranking.pyx delete mode 100644 gmatch4py_cython/setup.py diff --git a/.gitignore b/.gitignore index 2fa705c..572be86 100644 --- a/.gitignore +++ b/.gitignore @@ -11,3 +11,4 @@ test* __pycache__/ /UNHCR_en.txt !/tests/ +.DS_Store diff --git a/gmatch4py/__init__.py b/gmatch4py/__init__.py deleted file mode 100644 index 950f635..0000000 --- a/gmatch4py/__init__.py +++ /dev/null @@ -1 +0,0 @@ -# coding = utf-8 \ No newline at end of file diff --git a/gmatch4py/exception/__init__.py b/gmatch4py/exception/__init__.py deleted file mode 100644 index 950f635..0000000 --- a/gmatch4py/exception/__init__.py +++ /dev/null @@ -1 +0,0 @@ -# coding = utf-8 \ No newline at end of file diff --git a/gmatch4py/ged/__init__.py b/gmatch4py/ged/__init__.py deleted file mode 100644 index e5c6c3c..0000000 --- a/gmatch4py/ged/__init__.py +++ /dev/null @@ -1,2 +0,0 @@ -# coding = utf-8 - diff --git a/gmatch4py/ged/algorithm/__init__.py b/gmatch4py/ged/algorithm/__init__.py deleted file mode 100644 index e69de29..0000000 diff --git a/gmatch4py/ged/graph/__init__.py b/gmatch4py/ged/graph/__init__.py deleted file mode 100644 index 950f635..0000000 --- a/gmatch4py/ged/graph/__init__.py +++ /dev/null @@ -1 +0,0 @@ -# coding = utf-8 \ No newline at end of file diff --git a/gmatch4py/kernels/__init__.py b/gmatch4py/kernels/__init__.py deleted file mode 100644 index 950f635..0000000 --- a/gmatch4py/kernels/__init__.py +++ /dev/null @@ -1 +0,0 @@ -# coding = utf-8 \ No newline at end of file diff --git a/gmatch4py_cython/README.md b/gmatch4py_cython/README.md deleted file mode 100644 index d30da9a..0000000 --- a/gmatch4py_cython/README.md +++ /dev/null @@ -1,43 +0,0 @@ -# Gmatch4py a graph matching library for Python - -Gmatch4py is a library dedicated to graph matching. Graph structure are stored in NetworkX.Graph objects. - -## List of algorithm - - * DeltaCon and DeltaCon0 (*debug needed*) [1] - * Vertex Ranking (*debug needed*) [2] - * Vertex Edge Overlap [2] - * Graph kernels - * Random Walk Kernel (*debug needed*) [3] - * Geometrical - * K-Step - * Shortest Path Kernel [3] - * Weisfeiler-Lehman Kernel [4] - * Subtree Kernel - * Edge Kernel - * Subtree Geo Kernel [new] - * Edge Geo Kernel [new] - * Graph Edit Distance [5] - * Approximated Graph Edit Distance - * Hausdorff Graph Edit Distance - * Bipartite Graph Edit Distance - * Greedy Edit Distance - * MCS [6] - - -## Publications associated - - * [1] Koutra, D., Vogelstein, J. T., & Faloutsos, C. (2013, May). Deltacon: A principled massive-graph similarity function. In Proceedings of the 2013 SIAM International Conference on Data Mining (pp. 162-170). Society for Industrial and Applied Mathematics. - * [2] Papadimitriou, P., Dasdan, A., & Garcia-Molina, H. (2010). Web graph similarity for anomaly detection. Journal of Internet Services and Applications, 1(1), 19-30. - * [3] Vishwanathan, S. V. N., Schraudolph, N. N., Kondor, R., & Borgwardt, K. M. (2010). Graph kernels. Journal of Machine Learning Research, 11(Apr), 1201-1242. - * [4] Shervashidze, N., Schweitzer, P., Leeuwen, E. J. V., Mehlhorn, K., & Borgwardt, K. M. (2011). Weisfeiler-lehman graph kernels. Journal of Machine Learning Research, 12(Sep), 2539-2561. - * [5] Fischer, A., Riesen, K., & Bunke, H. (2017). Improved quadratic time approximation of graph edit distance by combining Hausdorff matching and greedy assignment. Pattern Recognition Letters, 87, 55-62. - * [6] A graph distance metric based on the maximal common subgraph, H. Bunke and K. Shearer, Pattern Recognition Letters, 1998 - -## Authors - -Jacques Fize - -## TODO - - * Debug algorithms with --> (*debug needed*) \ No newline at end of file diff --git a/gmatch4py_cython/__init__.py b/gmatch4py_cython/__init__.py deleted file mode 100644 index 950f635..0000000 --- a/gmatch4py_cython/__init__.py +++ /dev/null @@ -1 +0,0 @@ -# coding = utf-8 \ No newline at end of file diff --git a/gmatch4py_cython/gmatch4py/__init__.py b/gmatch4py_cython/gmatch4py/__init__.py deleted file mode 100644 index 950f635..0000000 --- a/gmatch4py_cython/gmatch4py/__init__.py +++ /dev/null @@ -1 +0,0 @@ -# coding = utf-8 \ No newline at end of file diff --git a/gmatch4py_cython/gmatch4py/bag_of_cliques.pyx b/gmatch4py_cython/gmatch4py/bag_of_cliques.pyx deleted file mode 100644 index 14c22da..0000000 --- a/gmatch4py_cython/gmatch4py/bag_of_cliques.pyx +++ /dev/null @@ -1,168 +0,0 @@ -# coding = utf-8 - -import copy -from typing import Sequence - -import networkx as nx -import numpy as np -cimport numpy as np -import sys - -from networkit import graph -from networkit.clique import MaximalCliques - -def nx2nk(nxG, weightAttr=None): - """ - Convert a networkx.Graph to a NetworKit.Graph - :param weightAttr: the edge attribute which should be treated as the edge weight. - """ - - # map networkx node ids to consecutive numerical node ids - idmap = dict((id, u) for (id, u) in zip(nxG.nodes(), range(nxG.number_of_nodes()))) - z = max(idmap.values()) + 1 - # print("z = {0}".format(z)) - - if weightAttr is not None: - nkG = graph.Graph(z, weighted=True, directed=nxG.is_directed()) - for (u_, v_) in nxG.edges(): - u, v = idmap[u_], idmap[v_] - w = nxG[u_][v_][weightAttr] - nkG.addEdge(u, v, w) - else: - nkG = graph.Graph(z, directed=nxG.is_directed()) - for (u_, v_) in nxG.edges(): - u, v = idmap[u_], idmap[v_] - # print(u_, v_, u, v) - assert (u < z) - assert (v < z) - nkG.addEdge(u, v) - - assert (nkG.numberOfNodes() == nxG.number_of_nodes()) - assert (nkG.numberOfEdges() == nxG.number_of_edges()) - return nkG.removeSelfLoops(),idmap - -def getClique(nx_graph): - final_cliques=[] - if len(nx_graph) ==0: - return final_cliques - netkit_graph,idmap=nx2nk(nx_graph) - idmap={v:k for k,v in idmap.items()} - cliques=MaximalCliques(netkit_graph).run().getCliques() - for cl in cliques: - final_cliques.append(list(map(lambda x:idmap[x],cl))) - return final_cliques - -class BagOfCliques(): - - @staticmethod - def compare(graphs,selected): - b=BagOfCliques() - bog=b.getBagOfCliques(graphs).astype(np.float32) - #Compute cosine similarity - cdef int n=bog.shape[0] - cdef double[:,:] scores = np.zeros((n,n)) - cdef int i - for i in range(len(scores)): - if not i in selected: - continue - for j in range(i,len(scores)): - scores[i,j]=(np.dot(bog[i],bog[j]))/(np.sqrt(np.sum(bog[i]**2))*np.sqrt(np.sum(bog[j]**2))) # Can be computed in one line - scores[j,i]=scores[i,j] - return scores - - def getUniqueCliques(self,graphs): - """ - Return unique cliques from a population of graphs - :return: - """ - t = {} - c_ = 0 - cdef list clique_vocab = [] - cdef list cli_temp - cdef list cliques - cdef int len_graphs=len(graphs) - cdef int km= -1 - for g in graphs: - km+=1 - if not g: - continue - sys.stdout.write("\r{0}/{1} -- {2}".format(km,len_graphs,len(g))) - try: - cliques = list(getClique(nx.Graph(g))) - except: - #no clique found - print(nx.Graph(g).edges()) - cliques =[] - for clique in cliques: - - cli_temp = copy.deepcopy(clique) - new_clique = False - for i in range(len(clique)): - flag = False - v = None # vertex deleted - for vertex in cli_temp: - if vertex in t: - v = vertex - flag = True - - if not flag in t: - v = cli_temp[0] - t[v] = {} - new_clique = True - t = t[v] - cli_temp.remove(v) - - if new_clique: - c_ += 1 - clique_vocab.append(clique) - return clique_vocab - - - def clique2str(self,cliques): - return "".join(sorted(cliques)) - - def transform_clique_vocab(self,clique_vocab): - cdef dict new_vocab={} - cdef int len_voc=len(clique_vocab) - for c in range(len_voc): - print(c) - new_vocab[self.clique2str(clique_vocab[c])]=c - return new_vocab - - - def ifHaveMinor(self,clique, dict mapping): - """ - If a clique (minor) H belong to a graph G - :param H: - :return: - """ - if self.clique2str(clique) in mapping: - return 1 - return 0 - - - def getBagOfCliques(self,graphs ): - """ - - :param clique_vocab: - :return: - """ - cdef list clique_vocab=self.getUniqueCliques(graphs) - print("DONE") - cdef dict map_str_cliques=self.transform_clique_vocab(clique_vocab) - print("DONE2") - cdef int l_v=len(clique_vocab) - cdef np.ndarray boc = np.zeros((len(graphs), l_v)) - cdef np.ndarray vector - cdef list cliques - for g in range(len(graphs)): - sys.stdout.write("\r{0}/{1}".format(g,5552)) - gr = graphs[g] - vector = np.zeros(l_v) - cliques = list(getClique(nx.Graph(gr))) - for clique in cliques: - hash=self.clique2str(clique) - if hash in map_str_cliques: - vector[map_str_cliques[hash]] = 1 - boc[g] = vector - return boc \ No newline at end of file diff --git a/gmatch4py_cython/gmatch4py/deltacon.pyx b/gmatch4py_cython/gmatch4py/deltacon.pyx deleted file mode 100644 index 6749994..0000000 --- a/gmatch4py_cython/gmatch4py/deltacon.pyx +++ /dev/null @@ -1,153 +0,0 @@ -# coding = utf-8 - -import networkx as nx -import numpy as np -import scipy.sparse - - -class DeltaCon0(): - __type__ = "sim" - - @staticmethod - def compare(list_gs,selected): - n=len(list_gs) - - comparison_matrix = np.zeros((n,n)) - for i in range(n): - for j in range(i,n): - g1,g2=list_gs[i],list_gs[j] - f=True - if not list_gs[i] or not list_gs[j]: - f=False - elif len(list_gs[i])== 0 or len(list_gs[j]) == 0: - f=False - if selected: - if not i in selected: - f=False - if f: - # S1 - epsilon = 1/(1+DeltaCon0.maxDegree(g1)) - D, A = DeltaCon0.degreeAndAdjacencyMatrix(g1) - S1 = np.linalg.inv(np.identity(len(g1))+(epsilon**2)*D -epsilon*A) - - # S2 - D, A = DeltaCon0.degreeAndAdjacencyMatrix(g2) - epsilon = 1 / (1 + DeltaCon0.maxDegree(g2)) - S2 = np.linalg.inv(np.identity(len(g2))+(epsilon**2)*D -epsilon*A) - - - comparison_matrix[i,j] = 1/(1+DeltaCon0.rootED(S1,S2)) - comparison_matrix[j,i] = comparison_matrix[i,j] - else: - comparison_matrix[i, j] = 0. - comparison_matrix[j, i] = comparison_matrix[i, j] - - - return comparison_matrix - - @staticmethod - def rootED(S1,S2): - return np.sqrt(np.sum((S1-S2)**2)) # Long live numpy ! - - @staticmethod - def degreeAndAdjacencyMatrix(G): - """ - Return the Degree(D) and Adjacency Matrix(A) from a graph G. - Inspired of nx.laplacian_matrix(G,nodelist,weight) code proposed by networkx - :param G: - :return: - """ - A = nx.to_scipy_sparse_matrix(G, nodelist=G.nodes(), weight="weight", - format='csr') - n, m = A.shape - diags = A.sum(axis=1) - D = scipy.sparse.spdiags(diags.flatten(), [0], m, n, format='csr') - - return D, A - @staticmethod - def maxDegree(G): - degree_sequence = sorted(nx.degree(G).values(), reverse=True) # degree sequence - # print "Degree sequence", degree_sequence - dmax = max(degree_sequence) - return dmax - -class DeltaCon(): - __type__ = "sim" - - @staticmethod - def relabel_nodes(graph_list): - label_lookup = {} - label_counter = 0 - n= len(graph_list) - # label_lookup is an associative array, which will contain the - # mapping from multiset labels (strings) to short labels - # (integers) - for i in range(n): - nodes = graph_list[i].nodes() - - for j in range(len(nodes)): - if not (nodes[j] in label_lookup): - label_lookup[nodes[j]] = label_counter - label_counter += 1 - - graph_list[i] = nx.relabel_nodes(graph_list[i], label_lookup) - return graph_list - @staticmethod - def compare(list_gs, g=3): - n=len(list_gs) - list_gs=DeltaCon.relabel_nodes(list_gs) - comparison_matrix = np.zeros((n,n)) - for i in range(n): - for j in range(i,n): - g1,g2=list_gs[i],list_gs[j] - - V = g1.nodes() - V.extend(g2.nodes()) - V=np.unique(V) - - partitions=V.copy() - np.random.shuffle(partitions) - if len(partitions)< g: - partitions=np.array([partitions]) - else: - partitions=np.array_split(partitions,g) - partitions_e_1 = DeltaCon.partitions2e(partitions, g1.nodes()) - partitions_e_2 = DeltaCon.partitions2e(partitions, g2.nodes()) - S1,S2=[],[] - for k in range(len(partitions)): - s0k1,s0k2=partitions_e_1[k],partitions_e_2[k] - - # S1 - epsilon = 1/(1+DeltaCon0.maxDegree(g1)) - D, A = DeltaCon0.degreeAndAdjacencyMatrix(g1) - s1k = np.linalg.inv(np.identity(len(g1))+(epsilon**2)*D -epsilon*A) - s1k=np.linalg.solve(s1k,s0k1).tolist() - - # S2 - D, A = DeltaCon0.degreeAndAdjacencyMatrix(g2) - epsilon = 1 / (1 + DeltaCon0.maxDegree(g2)) - s2k= np.linalg.inv(np.identity(len(g2))+(epsilon**2)*D -epsilon*A) - s2k = np.linalg.solve(s2k, s0k2).tolist() - - - - S1.append(s1k) - S2.append(s2k) - - comparison_matrix[i,j] = 1/(1+DeltaCon0.rootED(np.array(S1),np.array(S2))) - comparison_matrix[j,i] = comparison_matrix[i,j] - - return comparison_matrix - - - @staticmethod - def partitions2e( partitions, V): - e = [ [] for i in range(len(partitions))] - for p in range(len(partitions)): - e[p] = [] - for i in range(len(V)): - if i in partitions[p]: - e[p].append(1.0) - else: - e[p].append(0.0) - return e \ No newline at end of file diff --git a/gmatch4py_cython/gmatch4py/exception/__init__.py b/gmatch4py_cython/gmatch4py/exception/__init__.py deleted file mode 100644 index 950f635..0000000 --- a/gmatch4py_cython/gmatch4py/exception/__init__.py +++ /dev/null @@ -1 +0,0 @@ -# coding = utf-8 \ No newline at end of file diff --git a/gmatch4py_cython/gmatch4py/ged/__init__.py b/gmatch4py_cython/gmatch4py/ged/__init__.py deleted file mode 100644 index e5c6c3c..0000000 --- a/gmatch4py_cython/gmatch4py/ged/__init__.py +++ /dev/null @@ -1,2 +0,0 @@ -# coding = utf-8 - diff --git a/gmatch4py_cython/gmatch4py/ged/algorithm/__init__.py b/gmatch4py_cython/gmatch4py/ged/algorithm/__init__.py deleted file mode 100644 index e69de29..0000000 diff --git a/gmatch4py_cython/gmatch4py/ged/algorithm/abstract_graph_edit_dist.pyx b/gmatch4py_cython/gmatch4py/ged/algorithm/abstract_graph_edit_dist.pyx deleted file mode 100644 index 481ec69..0000000 --- a/gmatch4py_cython/gmatch4py/ged/algorithm/abstract_graph_edit_dist.pyx +++ /dev/null @@ -1,118 +0,0 @@ -# -*- coding: UTF-8 -*- -from __future__ import print_function - -import sys - -import numpy as np -from scipy.optimize import linear_sum_assignment -cimport numpy as np - - -class AbstractGraphEditDistance(object): - - - def __init__(self, g1, g2,debug=False,**kwargs): - self.g1 = g1 - self.g2 = g2 - self.debug=debug - - self.node_del = kwargs.get("node_del",1) - self.node_ins = kwargs.get("node_ins",1) - self.edge_del = kwargs.get("edge_del",1) - self.edge_ins = kwargs.get("edge_ins",1) - - - def distance(self): - opt_path = self.edit_costs() - if self.debug: - print("Edit path for ",str(self.__class__.__name__),"\n",opt_path) - return sum(opt_path) - - def print_operations(self,cost_matrix,row_ind,col_ind): - cdef list nodes1 = self.g1.nodes() - cdef list nodes2 = self.g2.nodes() - dn1 = self.g1.node - dn2 = self.g2.node - - cdef int n=len(nodes1) - cdef int m=len(nodes2) - cdef int x,y,i - for i in range(len(row_ind)): - y,x=row_ind[i],col_ind[i] - val=cost_matrix[row_ind[i]][col_ind[i]] - if x<m and y<n: - print("SUB {0} to {1} cost = {2}".format(dn1[nodes1[y]]["label"],dn2[nodes2[x]]["label"],val)) - elif x <m and y>=n: - print("ADD {0} cost = {1}".format(dn2[nodes2[y-n]]["label"],val)) - elif x>=m and y<n: - print("DEL {0} cost = {1}".format(dn1[nodes1[m-x]]["label"],val)) - - def edit_costs(self): - cdef np.ndarray cost_matrix = self.create_cost_matrix() - if self.debug: - np.set_printoptions(precision=3) - print("Cost Matrix for ",str(self.__class__.__name__),"\n",cost_matrix) - - row_ind,col_ind = linear_sum_assignment(cost_matrix) - if self.debug: - self.print_operations(cost_matrix,row_ind,col_ind) - cdef int f=len(row_ind) - return [cost_matrix[row_ind[i]][col_ind[i]] for i in range(f)] - - def create_cost_matrix(self): - """ - Creates a |N+M| X |N+M| cost matrix between all nodes in - graphs g1 and g2 - Each cost represents the cost of substituting, - deleting or inserting a node - The cost matrix consists of four regions: - - substitute | insert costs - ------------------------------- - delete | delete -> delete - - The delete -> delete region is filled with zeros - """ - cdef int n = len(self.g1) - cdef int m = len(self.g2) - cdef np.ndarray cost_matrix = np.zeros((n+m,n+m)) - #cost_matrix = [[0 for i in range(n + m)] for j in range(n + m)] - cdef list nodes1 = self.g1.nodes() - cdef list nodes2 = self.g2.nodes() - cdef int i,j - for i in range(n): - for j in range(m): - cost_matrix[i,j] = self.substitute_cost(nodes1[i], nodes2[j]) - - for i in range(m): - for j in range(m): - cost_matrix[i+n,j] = self.insert_cost(i, j, nodes2) - - for i in range(n): - for j in range(n): - cost_matrix[j,i+m] = self.delete_cost(i, j, nodes1) - - self.cost_matrix = cost_matrix - return cost_matrix - - def insert_cost(self, int i, int j): - raise NotImplementedError - - def delete_cost(self, int i, int j): - raise NotImplementedError - - def substitute_cost(self, nodes1, nodes2): - raise NotImplementedError - - def print_matrix(self): - print("cost matrix:") - print(self.g1.nodes()) - print(self.g2.nodes()) - print(np.array(self.create_cost_matrix())) - for column in self.create_cost_matrix(): - for row in column: - if row == sys.maxsize: - print ("inf\t") - else: - print ("%.2f\t" % float(row)) - print("") diff --git a/gmatch4py_cython/gmatch4py/ged/algorithm/edge_edit_dist.pyx b/gmatch4py_cython/gmatch4py/ged/algorithm/edge_edit_dist.pyx deleted file mode 100644 index 6f0c587..0000000 --- a/gmatch4py_cython/gmatch4py/ged/algorithm/edge_edit_dist.pyx +++ /dev/null @@ -1,29 +0,0 @@ -import sys - -from gmatch4py_old.ged.algorithm.abstract_graph_edit_dist import AbstractGraphEditDistance - - -class EdgeEditDistance(AbstractGraphEditDistance): - """ - Calculates the graph edit distance between two edges. - A node in this context is interpreted as a graph, - and edges are interpreted as nodes. - """ - - def __init__(self, g1, g2,**kwargs): - AbstractGraphEditDistance.__init__(self, g1, g2,**kwargs) - - def insert_cost(self, int i, int j, nodes2): - if i == j: - return self.edge_ins - return sys.maxsize - - def delete_cost(self, int i, int j, nodes1): - if i == j: - return self.edge_del - return sys.maxsize - - def substitute_cost(self, edge1, edge2): - if edge1 == edge2: - return 0. - return self.edge_del+self.edge_ins diff --git a/gmatch4py_cython/gmatch4py/ged/algorithm/graph_edit_dist.pyx b/gmatch4py_cython/gmatch4py/ged/algorithm/graph_edit_dist.pyx deleted file mode 100644 index 1dcd1e9..0000000 --- a/gmatch4py_cython/gmatch4py/ged/algorithm/graph_edit_dist.pyx +++ /dev/null @@ -1,72 +0,0 @@ -# -*- coding: UTF-8 -*- - -import sys - -import networkx as nx - -from gmatch4py_old.ged.algorithm.abstract_graph_edit_dist import AbstractGraphEditDistance -from gmatch4py_old.ged.algorithm.edge_edit_dist import EdgeEditDistance -from gmatch4py_old.ged.graph.edge_graph import EdgeGraph - - -def compare(g1, g2, print_details=False): - ged = GraphEditDistance(g1, g2,print_details) - return ged.distance() - - -class GraphEditDistance(AbstractGraphEditDistance): - - def __init__(self, g1, g2,debug=False,**kwargs): - AbstractGraphEditDistance.__init__(self, g1, g2,debug,**kwargs) - - def substitute_cost(self, node1, node2): - return self.relabel_cost(node1, node2) + self.edge_diff(node1, node2) - - def relabel_cost(self, node1, node2): - if node1 == node2: - edges1=set(self.get_edge_multigraph(self.g1,node1)) - edges2=set(self.get_edge_multigraph(self.g2,node2)) - return abs(len(edges2.difference(edges1))) # Take in account if there is a different number of edges - else: - return self.node_ins+self.node_del - - def delete_cost(self, int i, int j, nodes1): - if i == j: - return self.node_del+self.g1.degree(nodes1[i]) # Deleting a node implicate to delete in and out edges - return sys.maxsize - - def insert_cost(self, int i, int j, nodes2): - if i == j: - deg=self.g2.degree(nodes2[j]) - if isinstance(deg,dict):deg=0 - return self.node_ins+deg - else: - return sys.maxsize - - def get_edge_multigraph(self,g,node): - cdef list edges=[] - for id_,val in g.edge[node].items(): - if not 0 in val: - edges.append(str(id_) + val["color"]) - else: - for _,edge in val.items(): - edges.append(str(id_)+edge["color"]) - return edges - - def edge_diff(self, node1, node2): - cdef list edges1,edges2 - if isinstance(self.g1,nx.MultiDiGraph): - edges1 = self.get_edge_multigraph(self.g1,node1) - edges2 = self.get_edge_multigraph(self.g2,node2) - else: - edges1 = list(self.g1.edge[node1].keys()) - edges2 = list(self.g2.edge[node2].keys()) - if len(edges1) == 0 or len(edges2) == 0: - return max(len(edges1), len(edges2)) - - edit_edit_dist = EdgeEditDistance( - EdgeGraph(node1,edges1), - EdgeGraph(node2,edges2), - edge_del=self.edge_del,edge_ins=self.edge_ins,node_ins=self.node_ins,node_del=self.node_del - ) - return edit_edit_dist.distance() diff --git a/gmatch4py_cython/gmatch4py/ged/approximate_ged.pyx b/gmatch4py_cython/gmatch4py/ged/approximate_ged.pyx deleted file mode 100644 index 27ea437..0000000 --- a/gmatch4py_cython/gmatch4py/ged/approximate_ged.pyx +++ /dev/null @@ -1,33 +0,0 @@ -# coding = utf-8 - -import numpy as np - -from .algorithm.graph_edit_dist import GraphEditDistance -from cython.parallel import prange - -class ApproximateGraphEditDistance(): - __type__ = "dist" - - @staticmethod - def compare(listgs,selected,c_del_node=1,c_del_edge=1,c_ins_node=1,c_ins_edge=1): - cdef int n= len(listgs) - cdef double[:,:] comparison_matrix = np.zeros((n,n)) - cdef int i,j - for i in prange(n,nogil=True): - for j in range(i,n): - with gil: - f=True - if not listgs[i] or not listgs[j]: - f=False - elif len(listgs[i])== 0 or len(listgs[j]) == 0: - f=False - if selected: - if not i in selected: - f=False - - if f: - comparison_matrix[i][j] = GraphEditDistance(listgs[i],listgs[j],False,node_del=c_del_node,node_ins=c_ins_node,edge_del=c_del_edge,edge_ins=c_ins_edge).distance() - else: - comparison_matrix[i][j] = np.inf - comparison_matrix[j][i] = comparison_matrix[i][j] - return comparison_matrix \ No newline at end of file diff --git a/gmatch4py_cython/gmatch4py/ged/bipartite_graph_matching_2.pyx b/gmatch4py_cython/gmatch4py/ged/bipartite_graph_matching_2.pyx deleted file mode 100644 index d6d68e6..0000000 --- a/gmatch4py_cython/gmatch4py/ged/bipartite_graph_matching_2.pyx +++ /dev/null @@ -1,161 +0,0 @@ -# coding = utf-8 -import numpy as np -cimport numpy as np - -cdef class BP_2(): - """ - - """ - __type__="dist" - - cdef int node_del - cdef int node_ins - cdef int edge_del - cdef int edge_ins - - @staticmethod - def compare(listgs,selected, c_del_node=1, c_del_edge=1, c_ins_node=1, c_ins_edge=1): - cdef int n = len(listgs) - comparator = BP_2(c_del_node, c_ins_node, c_del_edge, c_ins_edge) - cdef np.ndarray comparison_matrix = np.zeros((n, n)) - for i in range(n): - for j in range(i, n): - f=True - if not listgs[i] or not listgs[j]: - f=False - elif len(listgs[i])== 0 or len(listgs[j]) == 0: - f=False - if selected: - if not i in selected: - f=False - if f: - comparison_matrix[i, j] = comparator.bp2(listgs[i], listgs[j]) - else: - comparison_matrix[i, j] = np.inf - comparison_matrix[j, i] = comparison_matrix[i, j] - - return comparison_matrix - - def __init__(self, node_del=1, node_ins=1, edge_del=1, edge_ins=1): - """Constructor for HED""" - self.node_del = node_del - self.node_ins = node_ins - self.edge_del = edge_del - self.edge_ins = edge_ins - - def bp2(self, g1, g2): - """ - Compute de Hausdorff Edit Distance - :param g1: first graph - :param g2: second graph - :return: - """ - return np.min(self.distance(self.psi(g1,g2)),self.distance(self.psi(g2,g1))) - - def distance(self,e): - return np.sum(e) - - cdef list psi(self,g1,g2): - cdef list psi_=[] - cdef list nodes1 = g1.nodes() - cdef list nodes2 = g2.nodes() - for u in nodes1: - v=None - for w in nodes2: - if 2*self.fuv(g1,g2,u,w) < self.fuv(g1,g2,u,None) + self.fuv(g1,g2,None,w)\ - and self.fuv(g1,g2,u,w) < self.fuv(g1,g2,u,v): - v=w - psi_.append(self.fuv(g1,g2,u,v)) - if u: - nodes1= list(set(nodes1).difference(set([u]))) - if v: - nodes2= list(set(nodes2).difference(set([v]))) - for v in nodes2: - psi_.append(self.fuv(g1,g2,None,v)) - return psi_ - - - cdef float fuv(self, g1, g2, n1, n2): - """ - Compute the Node Distance function - :param g1: first graph - :param g2: second graph - :param n1: node of the first graph - :param n2: node of the second graph - :return: - """ - if n2 == None: # Del - return self.node_del + ((self.edge_del / 2) * g1.degree(n1)) - if n1 == None: # Insert - return self.node_ins + ((self.edge_ins / 2) * g2.degree(n2)) - else: - if n1 == n2: - return 0. - return (self.node_del + self.node_ins + self.hed_edge(g1, g2, n1, n2)) / 2 - - cdef float hed_edge(self, g1, g2, n1, n2): - """ - Compute HEDistance between edges of n1 and n2, respectively in g1 and g2 - :param g1: first graph - :param g2: second graph - :param n1: node of the first graph - :param n2: node of the second graph - :return: - """ - return self.sum_gpq(g1, n1, g2, n2) + self.sum_gpq(g1, n1, g2, n2) - - cdef list get_edge_multigraph(self, g, node): - """ - Get list of edge around a node in a Multigraph - :param g: multigraph - :param node: node in the multigraph - :return: - """ - - cdef list originals_ = g.edges(node, data=True) - cdef int n= len(originals_) - if n == 0: - return [] - - cdef list edges = [""]*n - for i in range(n): - edge=originals_[i] - edges[i]=("{0}-{1}".format(edge[0],edge[1])) - return edges - - cdef float sum_gpq(self, g1, n1, g2, n2): - """ - Compute Nearest Neighbour Distance between edges around n1 in G1 and edges around n2 in G2 - :param g1: first graph - :param n1: node in the first graph - :param g2: second graph - :param n2: node in the second graph - :return: - """ - cdef list edges1 = self.get_edge_multigraph(g1, n1) - cdef list edges2 = self.get_edge_multigraph(g2, n2) - edges2.extend([None]) - cdef np.ndarray min_sum = np.zeros(len(edges1)) - for i in range(len(edges1)): - min_i = np.zeros(len(edges2)) - for j in range(len(edges2)): - min_i[j] = self.gpq(edges1[i], edges2[j]) - min_sum[i] = np.min(min_i) - return np.sum(min_sum) - - cdef float gpq(self, e1, e2): - """ - Compute the edge distance function - :param e1: edge1 - :param e2: edge2 - :return: - """ - if e2 == None: # Del - return self.edge_del - if e1 == None: # Insert - return self.edge_ins - else: - if e1 == e2: - return 0. - return (self.edge_del + self.edge_ins) / 2 - diff --git a/gmatch4py_cython/gmatch4py/ged/graph/__init__.py b/gmatch4py_cython/gmatch4py/ged/graph/__init__.py deleted file mode 100644 index 950f635..0000000 --- a/gmatch4py_cython/gmatch4py/ged/graph/__init__.py +++ /dev/null @@ -1 +0,0 @@ -# coding = utf-8 \ No newline at end of file diff --git a/gmatch4py_cython/gmatch4py/ged/graph/__init__.pyx b/gmatch4py_cython/gmatch4py/ged/graph/__init__.pyx deleted file mode 100644 index e69de29..0000000 diff --git a/gmatch4py_cython/gmatch4py/ged/graph/edge_graph.pyx b/gmatch4py_cython/gmatch4py/ged/graph/edge_graph.pyx deleted file mode 100644 index 24b8bda..0000000 --- a/gmatch4py_cython/gmatch4py/ged/graph/edge_graph.pyx +++ /dev/null @@ -1,16 +0,0 @@ -# -*- coding: UTF-8 -*- - - -class EdgeGraph(): - - def __init__(self, init_node, nodes): - self.init_node=init_node - self.nodes_ = nodes - self.edge=nodes - def nodes(self): - return self.nodes_ - - def size(self): - return len(self.nodes) - def __len__(self): - return len(self.nodes_) diff --git a/gmatch4py_cython/gmatch4py/ged/greedy_edit_distance.pyx b/gmatch4py_cython/gmatch4py/ged/greedy_edit_distance.pyx deleted file mode 100644 index 7752030..0000000 --- a/gmatch4py_cython/gmatch4py/ged/greedy_edit_distance.pyx +++ /dev/null @@ -1,55 +0,0 @@ -# coding = utf-8 -import numpy as np - -from .algorithm.graph_edit_dist import GraphEditDistance -cimport numpy as np - -class GreedyEditDistance(GraphEditDistance): - """ - Implementation of the Greedy Edit Distance presented in : - - Improved quadratic time approximation of graph edit distance by Hausdorff matching and greedy assignement - Andreas Fischer, Kaspar Riesen, Horst Bunke - 2016 - """ - __type__ = "dist" - @staticmethod - def compare(listgs, selected, c_del_node=1, c_del_edge=1, c_ins_node=1, c_ins_edge=1): - cdef int n = len(listgs) - cdef np.ndarray comparison_matrix = np.zeros((n, n)) - for i in range(n): - for j in range(i, n): - f=True - if not listgs[i] or not listgs[j]: - f=False - elif len(listgs[i])== 0 or len(listgs[j]) == 0: - f=False - if selected: - if not i in selected: - f=False - if f: - comparison_matrix[i, j] = GreedyEditDistance(listgs[i], listgs[j],False, node_del=c_del_node, - node_ins=c_ins_node, edge_del=c_del_edge, - edge_ins=c_ins_edge).distance() - else: - comparison_matrix[i, j] = np.inf - comparison_matrix[j, i] = comparison_matrix[i, j] - - - return comparison_matrix - - def __init__(self,g1,g2,debug=False,**kwargs): - """Constructor for GreedyEditDistance""" - super().__init__(g1,g2,debug,**kwargs) - - - def edit_costs(self): - cdef np.ndarray cost_matrix=self.create_cost_matrix() - cdef np.ndarray cost_matrix_2=cost_matrix.copy() - cdef list psi=[] - for i in range(len(cost_matrix)): - phi_i=np.argmin((cost_matrix[i])) - cost_matrix=np.delete(cost_matrix,phi_i,1) - psi.append([i,phi_i+i]) #+i to compensate the previous column deletion - return [cost_matrix_2[psi[i][0]][psi[i][1]] for i in range(len(psi))] - diff --git a/gmatch4py_cython/gmatch4py/ged/hausdorff_edit_distance.pyx b/gmatch4py_cython/gmatch4py/ged/hausdorff_edit_distance.pyx deleted file mode 100644 index 06dc664..0000000 --- a/gmatch4py_cython/gmatch4py/ged/hausdorff_edit_distance.pyx +++ /dev/null @@ -1,171 +0,0 @@ -# coding = utf-8 - -import numpy as np -cimport numpy as np -#from libcpp.list cimport list as cpplist -cdef class HED: - """ - Implementation of Hausdorff Edit Distance described in - - Improved quadratic time approximation of graph edit distance by Hausdorff matching and greedy assignement - Andreas Fischer, Kaspar Riesen, Horst Bunke - 2016 - """ - - cdef int node_del - cdef int node_ins - cdef int edge_del - cdef int edge_ins - - __type__ = "dist" - @staticmethod - def compare(list listgs, selected, int c_del_node=1, int c_del_edge=1, int c_ins_node=1, int c_ins_edge=1): - cdef int n = len(listgs) - comparator = HED(c_del_node, c_ins_node, c_del_edge, c_ins_edge) - cdef np.ndarray comparison_matrix = np.zeros((n, n)) - for i in range(n): - for j in range(i, n): - f=True - if not listgs[i] or not listgs[j]: - f=False - elif len(listgs[i])== 0 or len(listgs[j]) == 0: - f=False - if selected: - if not i in selected: - f=False - if f: - comparison_matrix[i, j] = comparator.hed(listgs[i], listgs[j]) - else: - comparison_matrix[i, j] = np.inf - comparison_matrix[j, i] = comparison_matrix[i, j] - - return comparison_matrix - - - def __init__(self, int node_del=1, int node_ins=1, int edge_del=1, int edge_ins=1): - """Constructor for HED""" - self.node_del = node_del - self.node_ins = node_ins - self.edge_del = edge_del - self.edge_ins = edge_ins - - cpdef float hed(self, g1, g2): - """ - Compute de Hausdorff Edit Distance - :param g1: first graph - :param g2: second graph - :return: - """ - return self.sum_fuv(g1, g2) + self.sum_fuv(g2, g1) - - cdef float sum_fuv(self, g1, g2): - """ - Compute Nearest Neighbour Distance between G1 and G2 - :param g1: First Graph - :param g2: Second Graph - :return: - """ - cdef np.ndarray min_sum = np.zeros(len(g1)) - nodes1 = g1.nodes() - nodes2 = g2.nodes() - nodes2.extend([None]) - cdef np.ndarray min_i - for i in range(len(nodes1)): - min_i = np.zeros(len(nodes2)) - for j in range(len(nodes2)): - min_i[j] = self.fuv(g1, g2, nodes1[i], nodes2[j]) - min_sum[i] = np.min(min_i) - return np.sum(min_sum) - - cdef float fuv(self, g1, g2, n1, n2): - """ - Compute the Node Distance function - :param g1: first graph - :param g2: second graph - :param n1: node of the first graph - :param n2: node of the second graph - :return: - """ - if n2 == None: # Del - return self.node_del + ((self.edge_del / 2) * g1.degree(n1)) - if n1 == None: # Insert - return self.node_ins + ((self.edge_ins / 2) * g2.degree(n2)) - else: - if n1 == n2: - return 0 - return (self.node_del + self.node_ins + self.hed_edge(g1, g2, n1, n2)) / 2 - - cdef float hed_edge(self, g1, g2, n1, n2): - """ - Compute HEDistance between edges of n1 and n2, respectively in g1 and g2 - :param g1: first graph - :param g2: second graph - :param n1: node of the first graph - :param n2: node of the second graph - :return: - """ - return self.sum_gpq(g1, n1, g2, n2) + self.sum_gpq(g1, n1, g2, n2) - - cdef list get_edge_multigraph(self, g, node): - """ - Get list of edge around a node in a Multigraph - :param g: multigraph - :param node: node in the multigraph - :return: - """ - - cdef list originals_ = g.edges(node, data=True) - cdef int n= len(originals_) - if n == 0: - return [] - - - cdef list edges = [""]*n - for i in range(n): - edge=originals_[i] - edges[i]=("{0}-{1}".format(edge[0],edge[1])) - return edges - - cdef float sum_gpq(self, g1, n1, g2, n2): - """ - Compute Nearest Neighbour Distance between edges around n1 in G1 and edges around n2 in G2 - :param g1: first graph - :param n1: node in the first graph - :param g2: second graph - :param n2: node in the second graph - :return: - """ - - #if isinstance(g1, nx.MultiDiGraph): - cdef list edges1 = self.get_edge_multigraph(g1, n1) - cdef list edges2 = self.get_edge_multigraph(g2, n2) - - #else: - #edges1 = [str(n1 + "-" + ef) for ef in list(g1.edge[n1].keys())] - #edges2 = [str(n2 + "-" + ef) for ef in list(g2.edge[n2].keys())] - - cdef np.ndarray min_sum = np.zeros(len(edges1)) - edges2.extend([None]) - cdef np.ndarray min_i - for i in range(len(edges1)): - min_i = np.zeros(len(edges2)) - for j in range(len(edges2)): - min_i[j] = self.gpq(edges1[i], edges2[j]) - min_sum[i] = np.min(min_i) - return np.sum(min_sum) - - cdef float gpq(self, str e1, str e2): - """ - Compute the edge distance function - :param e1: edge1 - :param e2: edge2 - :return: - """ - if e2 == None: # Del - return self.edge_del - if e1 == None: # Insert - return self.edge_ins - else: - if e1 == e2: - return 0 - return (self.edge_del + self.edge_ins) / 2 diff --git a/gmatch4py_cython/gmatch4py/graph_matching_algorithm.pyx b/gmatch4py_cython/gmatch4py/graph_matching_algorithm.pyx deleted file mode 100644 index 5b2ed35..0000000 --- a/gmatch4py_cython/gmatch4py/graph_matching_algorithm.pyx +++ /dev/null @@ -1,37 +0,0 @@ -# coding = utf-8 -import numpy as np -cimport numpy as np - -cdef class GraphMatchingAlgorithm: - - def __init__(self,type="distance",max_value="inf"): - self.type=type - self.maxvalue=max_value - - def comparator(self,g1, g2, int c_del_node, int c_del_edge, int c_ins_node, int c_ins_edge,h): - pass - - def null_value(self): - if self.type == "distance": - return np.nan - else: # if similarity - return 0 - - def compare(self,list listgs, selected=None, int c_del_node=1, int c_del_edge=1, int c_ins_node=1, int c_ins_edge=1,int h=2): - n = len(listgs) - comparison_matrix = np.zeros((n, n)) - for i in range(n): - for j in range(i, n): - f=True - if not listgs[i] or not listgs[j]: - f=False - if selected: - if not i in selected: - f=False - if f: - comparison_matrix[i, j] = self.comparator(listgs[i], listgs[j],c_del_node=c_del_node, c_del_edge=c_del_edge, c_ins_node=c_ins_node, c_ins_edge=c_ins_edge,h=h) - else: - comparison_matrix[i, j] = self.null_value() - comparison_matrix[j, i] = comparison_matrix[i, j] - - return comparison_matrix diff --git a/gmatch4py_cython/gmatch4py/jaccard.pyx b/gmatch4py_cython/gmatch4py/jaccard.pyx deleted file mode 100644 index 894afc6..0000000 --- a/gmatch4py_cython/gmatch4py/jaccard.pyx +++ /dev/null @@ -1,95 +0,0 @@ -# coding = utf-8 - -# coding = utf-8 - -import numpy as np -cimport numpy as np - -def intersect(a, b): - return list(set(a) & set(b)) -class Jaccard(): - __type__ = "sim" - - - @staticmethod - def compare(listgs,selected): - cdef int n = len(listgs) - cdef np.ndarray comparison_matrix = np.zeros((n, n)) - cdef i=0 - cdef j=0 - for i in range(n): - for j in range(i,n): - g1 = listgs[i] - g2 = listgs[j] - f=True - if not listgs[i] or not listgs[j]: - f=False - elif len(listgs[i])== 0 or len(listgs[j]) == 0: - f=False - if selected: - if not i in selected: - f=False - if f: - inter_ver,inter_ed = Jaccard.intersect_graph(g1,g2) - un_ver,un_edg=Jaccard.union_nodes(g1,g2),Jaccard.union_edges(g1,g2) - if len(un_ver) == 0 or len(un_edg) == 0: - comparison_matrix[i, j] = 0. - else: - comparison_matrix[i,j]=(len(inter_ver)/len(un_ver))*(len(inter_ed)/len(un_edg)) - else: - comparison_matrix[i, j] = 0. - - comparison_matrix[j, i] = comparison_matrix[i, j] - - return comparison_matrix - - - @staticmethod - def intersect_edges(g1,g2): - cdef list ed1 = Jaccard.transform_edges(g1.edges(data=True)) - cdef list ed2 = Jaccard.transform_edges(g2.edges(data=True)) - cdef list inter_ed=[] - for e1 in ed1: - for e2 in ed2: - if e1 == e2: - inter_ed.append(e1) - return inter_ed - - @staticmethod - def union_nodes(g1, g2): - cdef set union=set([]) - for n in g1.nodes():union.add(n) - for n in g2.nodes(): union.add(n) - return union - - @staticmethod - def union_edges(g1, g2): - cdef list ed1 = Jaccard.transform_edges(g1.edges(data=True)) - cdef list ed2 = Jaccard.transform_edges(g2.edges(data=True)) - cdef list union = [] - cdef set register=set([]) - trans_=lambda x : "{0}-{1}:{2}".format(x[0],x[1],x[2]["color"]) - for e1 in ed1: - if not trans_(e1) in register: - union.append(e1) - register.add(trans_(e1)) - for e2 in ed2: - if not trans_(e2) in register: - union.append(e2) - register.add(trans_(e2)) - return union - @staticmethod - def intersect_nodes(g1,g2): - return intersect(g1.nodes(),g2.nodes()) - - @staticmethod - def intersect_graph(g1,g2): - return Jaccard.intersect_nodes(g1,g2),Jaccard.intersect_edges(g1,g2) - - @staticmethod - def transform_edges(ed): - for e in range(len(ed)): - if "id" in ed[e][-1]: - del ed[e][-1]["id"] - return ed - diff --git a/gmatch4py_cython/gmatch4py/kernels/__init__.py b/gmatch4py_cython/gmatch4py/kernels/__init__.py deleted file mode 100644 index 950f635..0000000 --- a/gmatch4py_cython/gmatch4py/kernels/__init__.py +++ /dev/null @@ -1 +0,0 @@ -# coding = utf-8 \ No newline at end of file diff --git a/gmatch4py_cython/gmatch4py/kernels/random_walk_kernel.pyx b/gmatch4py_cython/gmatch4py/kernels/random_walk_kernel.pyx deleted file mode 100644 index d780eb0..0000000 --- a/gmatch4py_cython/gmatch4py/kernels/random_walk_kernel.pyx +++ /dev/null @@ -1,93 +0,0 @@ -# coding = utf-8 - -import networkx as nx -import numpy as np - -class GeometricRandomWalkKernel(): - __type__ = "sim" - @staticmethod - def maxDegree(G): - degree_sequence = sorted(nx.degree(G).values(), reverse=True) # degree sequence - - # print "Degree sequence", degree_sequence - dmax = max(degree_sequence) - return dmax - @staticmethod - def compare(listgs): - - n = len(listgs) - comparison_matrix=np.zeros((n,n)) - for i in range(n): - for j in range(i,n): - if len(listgs[i]) <1 or len(listgs[j]) <1: - comparison_matrix[i, j] = 0 - comparison_matrix[j, i] = 0 - continue - direct_product_graph=nx.tensor_product(listgs[i],listgs[j]) - Ax = nx.adjacency_matrix(direct_product_graph).todense() - try: - la = 1/ ((GeometricRandomWalkKernel.maxDegree(direct_product_graph)**2)+1) # lambda value - except: - la= pow(1,-6) - eps = pow(10,-10) - I=np.identity(Ax.shape[0]) - I_vec=np.ones(Ax.shape[0]) - x=I_vec.copy() - x_pre=np.zeros(Ax.shape[0]) - c=0 - - while (np.linalg.norm(x-x_pre)) > eps: - if c > 100: - break - x_pre=x - - x= I_vec + la*np.dot(Ax,x_pre.T) - c+=1 - comparison_matrix[i,j]=np.sum(x) - comparison_matrix[j,i]=comparison_matrix[i,j] - print(comparison_matrix) - for i in range(n): - for j in range(i,n): - comparison_matrix[i,j] = (comparison_matrix[i,j]/np.sqrt(comparison_matrix[i,i]*comparison_matrix[j,j])) - comparison_matrix[j,i]=comparison_matrix[i,j] - return comparison_matrix - -class KStepRandomWalkKernel(): - __type__ = "sim" - @staticmethod - def maxDegree(G): - degree_sequence = sorted(nx.degree(G).values(), reverse=True) # degree sequence - # print "Degree sequence", degree_sequence - dmax = max(degree_sequence) - return dmax - @staticmethod - def compare(listgs,lambda_list=[1,1,1]): - k=len(lambda_list) - if not len(lambda_list) == k: - raise AttributeError - n = len(listgs) - comparison_matrix=np.zeros((n,n)) - for i in range(n): - for j in range(i,n): - if len(listgs[i]) <1 or len(listgs[j]) <1: - comparison_matrix[i, j] = 0 - comparison_matrix[j, i] = 0 - continue - direct_product_graph=nx.tensor_product(listgs[i],listgs[j]) - Ax = nx.adjacency_matrix(direct_product_graph).todense() - eps = pow(10,-10) - I=np.identity(Ax.shape[0]) - ax_pow = I.copy() - sum_ = lambda_list[0] * I - for kk in range(1, k): - ax_pow *= Ax - sum_ += lambda_list[kk] * ax_pow - - comparison_matrix[i, j] = np.sum(sum_)/(len(listgs[i])**2 * len(listgs[j])**2) - comparison_matrix[j,i] = comparison_matrix[i,j] - - for i in range(n): - for j in range(i,n): - comparison_matrix[i,j] = comparison_matrix[i,j]/np.sqrt(comparison_matrix[i,i]*comparison_matrix[j,j]) - comparison_matrix[j,i]=comparison_matrix[i,j] - return comparison_matrix \ No newline at end of file diff --git a/gmatch4py_cython/gmatch4py/kernels/shortest_path_kernel.pyx b/gmatch4py_cython/gmatch4py/kernels/shortest_path_kernel.pyx deleted file mode 100644 index e7e7444..0000000 --- a/gmatch4py_cython/gmatch4py/kernels/shortest_path_kernel.pyx +++ /dev/null @@ -1,88 +0,0 @@ -# coding = utf-8 - -""" -Shortest-Path graph kernel. -Python implementation based on: "Shortest-path kernels on graphs", by -Borgwardt, K.M.; Kriegel, H.-P., in Data Mining, Fifth IEEE -International Conference on , vol., no., pp.8 pp.-, 27-30 Nov. 2005 -doi: 10.1109/ICDM.2005.132 -Author : Sandro Vega-Pons, Emanuele Olivetti -Modified by : Jacques Fize -""" - -import networkx as nx -import numpy as np - - -class ShortestPathGraphKernel: - """ - Shorthest path graph kernel. - """ - __type__ = "sim" - @staticmethod - def compare( g_1, g_2, verbose=False): - """Compute the kernel value (similarity) between two graphs. - Parameters - ---------- - g1 : networkx.Graph - First graph. - g2 : networkx.Graph - Second graph. - Returns - ------- - k : The similarity value between g1 and g2. - """ - # Diagonal superior matrix of the floyd warshall shortest - # paths: - fwm1 = np.array(nx.floyd_warshall_numpy(g_1)) - fwm1 = np.where(fwm1 == np.inf, 0, fwm1) - fwm1 = np.where(fwm1 == np.nan, 0, fwm1) - fwm1 = np.triu(fwm1, k=1) - bc1 = np.bincount(fwm1.reshape(-1).astype(int)) - - fwm2 = np.array(nx.floyd_warshall_numpy(g_2)) - fwm2 = np.where(fwm2 == np.inf, 0, fwm2) - fwm2 = np.where(fwm2 == np.nan, 0, fwm2) - fwm2 = np.triu(fwm2, k=1) - bc2 = np.bincount(fwm2.reshape(-1).astype(int)) - - # Copy into arrays with the same length the non-zero shortests - # paths: - v1 = np.zeros(max(len(bc1), len(bc2)) - 1) - v1[range(0, len(bc1)-1)] = bc1[1:] - - v2 = np.zeros(max(len(bc1), len(bc2)) - 1) - v2[range(0, len(bc2)-1)] = bc2[1:] - - return np.sum(v1 * v2) - - - @staticmethod - def compare_list(graph_list, verbose=False): - """Compute the all-pairs kernel values for a list of graphs. - This function can be used to directly compute the kernel - matrix for a list of graphs. The direct computation of the - kernel matrix is faster than the computation of all individual - pairwise kernel values. - Parameters - ---------- - graph_list: list - A list of graphs (list of networkx graphs) - Return - ------ - K: numpy.array, shape = (len(graph_list), len(graph_list)) - The similarity matrix of all graphs in graph_list. - """ - n = len(graph_list) - k = np.zeros((n, n)) - for i in range(n): - for j in range(i, n): - k[i, j] = ShortestPathGraphKernel.compare(graph_list[i], graph_list[j]) - k[j, i] = k[i, j] - - k_norm = np.zeros(k.shape) - for i in range(k.shape[0]): - for j in range(k.shape[1]): - k_norm[i, j] = k[i, j] / np.sqrt(k[i, i] * k[j, j]) - - return k_norm \ No newline at end of file diff --git a/gmatch4py_cython/gmatch4py/kernels/weisfeiler_lehman.pyx b/gmatch4py_cython/gmatch4py/kernels/weisfeiler_lehman.pyx deleted file mode 100644 index 2731f0f..0000000 --- a/gmatch4py_cython/gmatch4py/kernels/weisfeiler_lehman.pyx +++ /dev/null @@ -1,143 +0,0 @@ -# coding = utf-8 - -"""Weisfeiler_Lehman graph kernel. - -Python implementation based on: "Weisfeiler-Lehman Graph Kernels", by: -Nino Shervashidze, Pascal Schweitzer, Erik J. van Leeuwen, Kurt -Mehlhorn, Karsten M. Borgwardt, JMLR, 2012. -http://jmlr.csail.mit.edu/papers/v12/shervashidze11a.html - -Author : Sandro Vega-Pons, Emanuele Olivetti -Source : https://github.com/emanuele/jstsp2015/blob/master/gk_weisfeiler_lehman.py -Modified by : Jacques Fizen -""" - -import copy - -import networkx as nx -import numpy as np -cimport numpy as np - - -class WeisfeleirLehmanKernel(object): - __type__ = "sim" - @staticmethod - def compare(graph_list,selected,h=2): - """Compute the all-pairs kernel values for a list of graphs. - This function can be used to directly compute the kernel - matrix for a list of graphs. The direct computation of the - kernel matrix is faster than the computation of all individual - pairwise kernel values. - Parameters - ---------- - graph_list: list - A list of graphs (list of networkx graphs) - h : interger - Number of iterations. - node_label : boolean - Whether to use original node labels. True for using node labels - saved in the attribute 'node_label'. False for using the node - degree of each node as node attribute. - Return - ------ - K: numpy.array, shape = (len(graph_list), len(graph_list)) - The similarity matrix of all graphs in graph_list. - """ - - cdef int n = len(graph_list) - cdef np.ndarray phi - cdef int n_nodes = 0 - cdef int n_max = 0 - cdef int i,j - # Compute adjacency lists and n_nodes, the total number of - # nodes in the dataset. - for i in range(n): - n_nodes += graph_list[i].number_of_nodes() - - # Computing the maximum number of nodes in the graphs. It - # will be used in the computation of vectorial - # representation. - if (n_max < graph_list[i].number_of_nodes()): - n_max = graph_list[i].number_of_nodes() - - phi = np.zeros((n_nodes, n), dtype=np.uint64) - - # INITIALIZATION: initialize the nodes labels for each graph - # with their labels or with degrees (for unlabeled graphs) - - cdef list labels = [0] * n - cdef dict label_lookup = {} - cdef int label_counter = 0 - - - # label_lookup is an associative array, which will contain the - # mapping from multiset labels (strings) to short labels - # (integers) - - cdef list nodes - for i in range(n): - nodes = graph_list[i].nodes() - # It is assumed that the graph has an attribute - # 'node_label' - labels[i] = np.zeros(len(nodes), dtype=np.int32) - - for j in range(len(nodes)): - if not (nodes[j] in label_lookup): - label_lookup[nodes[j]] = str(label_counter) - labels[i][j] = label_counter - label_counter += 1 - else: - labels[i][j] = label_lookup[nodes[j]] - # labels are associated to a natural number - # starting with 0. - - phi[labels[i][j], i] += 1 - - graph_list[i]=nx.relabel_nodes(graph_list[i],label_lookup) - - cdef np.ndarray[np.float64_t] k - k = np.dot(phi.transpose(), phi) - - # MAIN LOOP - cdef int it = 0 - - new_labels = copy.deepcopy(labels) # Can't work without it !!! - - while it < h: - # create an empty lookup table - label_lookup = {} - label_counter = 0 - - phi = np.zeros((n_nodes, n)) - for i in range(n): - nodes = graph_list[i].nodes() - for v in range(len(nodes)): - # form a multiset label of the node v of the i'th graph - # and convert it to a string - - long_label = [] - long_label.extend(nx.neighbors(graph_list[i],nodes[v])) - - long_label_string = "".join(long_label) - # if the multiset label has not yet occurred, add it to the - # lookup table and assign a number to it - if not (long_label_string in label_lookup): - label_lookup[long_label_string] = str(label_counter) - new_labels[i][v] = label_counter - label_counter += 1 - else: - new_labels[i][v] = label_lookup[long_label_string] - # fill the column for i'th graph in phi - aux = np.bincount(new_labels[i]) - phi[new_labels[i], i] += aux[new_labels[i]] - - k += np.dot(phi.transpose(), phi) - it = it + 1 - - # Compute the normalized version of the kernel - cdef np.ndarray[np.float64_t] k_norm = np.zeros((k.shape[0],k.shape[1])) - for i in range(k.shape[0]): - for j in range(k.shape[1]): - k_norm[i, j] = k[i, j] / np.sqrt(k[i, i] * k[j, j]) - - return k_norm \ No newline at end of file diff --git a/gmatch4py_cython/gmatch4py/kernels/weisfeiler_lehman_edge.pyx b/gmatch4py_cython/gmatch4py/kernels/weisfeiler_lehman_edge.pyx deleted file mode 100644 index 367e099..0000000 --- a/gmatch4py_cython/gmatch4py/kernels/weisfeiler_lehman_edge.pyx +++ /dev/null @@ -1,192 +0,0 @@ -# coding = utf-8 - -# coding = utf-8# coding = utf-8 - -from helpers.gazeteer_helpers import get_data,get_data_by_wikidata_id - -# coding = utf-8 - -"""Weisfeiler_Lehman GEO graph kernel. - -""" - -import numpy as np -import networkx as nx -import copy - -class WeisfeleirLehmanKernelEdge(object): - __type__ = "sim" - - - @staticmethod - def compare(graph_list,h=3): - """Compute the all-pairs kernel values for a list of graphs. - This function can be used to directly compute the kernel - matrix for a list of graphs. The direct computation of the - kernel matrix is faster than the computation of all individual - pairwise kernel values. - Parameters - ---------- - graph_list: list - A list of graphs (list of networkx graphs) - h : interger - Number of iterations. - node_label : boolean - Whether to use original node labels. True for using node labels - saved in the attribute 'node_label'. False for using the node - degree of each node as node attribute. - Return - ------ - K: numpy.array, shape = (len(graph_list), len(graph_list)) - The similarity matrix of all graphs in graph_list. - """ - - n = len(graph_list) - k = [0] * (h + 1) - n_nodes = 0 - n_max = 0 - ams=[nx.adjacency_matrix(g).todense() for g in graph_list] - inclusion_dictionnary={} - - # Compute adjacency lists and n_nodes, the total number of - # nodes in the dataset. - for i in range(n): - n_nodes += graph_list[i].number_of_nodes() - - """ - Store Inclusion Informations - """ - for node in graph_list[i].nodes(): - graph_list[i].node[node]["id_GD"]=node - if not node in inclusion_dictionnary: - inc_list = [] - try: - inc_list = get_data(node)["inc_P131"] - except: - try: - inc_list=get_data_by_wikidata_id(get_data(node)["continent"])["id"] - except: - pass # No inclusion - if inc_list: - inc_list = inc_list if isinstance(inc_list,list) else [inc_list] - - inclusion_dictionnary[node]=inc_list[0] - for j in range(1,len(inc_list)): - if j+1 < len(inc_list): - inclusion_dictionnary[inc_list[j]]=inc_list[j+1] - - - - - # Computing the maximum number of nodes in the graphs. It - # will be used in the computation of vectorial - # representation. - if (n_max < graph_list[i].number_of_nodes()): - n_max = graph_list[i].number_of_nodes() - - phi = np.zeros((n_nodes, n), dtype=np.uint64) - #print(inclusion_dictionnary) - # INITIALIZATION: initialize the nodes labels for each graph - # with their labels or with degrees (for unlabeled graphs) - - labels = [0] * n - label_lookup = {} - label_counter = 0 - - # label_lookup is an associative array, which will contain the - # mapping from multiset labels (strings) to short labels - # (integers) - for i in range(n): - nodes = graph_list[i].nodes() - # It is assumed that the graph has an attribute - # 'node_label' - labels[i] = np.zeros(len(nodes), dtype=np.int32) - - for j in range(len(nodes)): - if not (nodes[j] in label_lookup): - label_lookup[nodes[j]] = str(label_counter) - labels[i][j] = label_counter - label_counter += 1 - else: - labels[i][j] = label_lookup[nodes[j]] - # labels are associated to a natural number - # starting with 0. - - phi[labels[i][j], i] += 1 - - graph_list[i]=nx.relabel_nodes(graph_list[i],label_lookup) - - L=label_counter - print("L1",L) - ed=np.zeros((np.int((L*(L+1))),n)) - # MAIN LOOP - it = 0 - new_labels = copy.deepcopy(labels) # Can't work without it !!! - - for i in range(n): - labels_aux = np.tile(new_labels[i].reshape(-1,1),len(new_labels[i])) - a=np.minimum(labels_aux,labels_aux.T) - b=np.maximum(labels_aux,np.transpose(labels_aux)) - I=np.triu((ams[i] !=0),1) - a_i=np.extract(I,a) - b_i = np.extract(I, b) - Ind=np.abs(np.multiply((a[I]-1),(2*L+2-a[I])/2+b[I]-a[I]+1).astype(int)) - minind=np.min(Ind) - aux=np.bincount(Ind) - ed[Ind,i]=aux[Ind] - - mask=np.sum(ed,1) !=0 - ed= ed[mask] - k=np.dot(ed.T,ed) - - it = 0 - new_labels = copy.deepcopy(new_labels) # Can't work without it !!! - - while it < h: - label_lookup={} - label_counter=0 - for i in range(n): - nodes = graph_list[i].nodes() - for v in range(len(nodes)): - # form a multiset label of the node v of the i'th graph - # and convert it to a string - - long_label = [] - long_label.extend(nx.neighbors(graph_list[i],nodes[v])) - - long_label_string = "".join(long_label) - # if the multiset label has not yet occurred, add it to the - # lookup table and assign a number to it - if not (long_label_string in label_lookup): - label_lookup[long_label_string] = str(label_counter) - new_labels[i][v] = label_counter - label_counter += 1 - else: - new_labels[i][v] = label_lookup[long_label_string] - - L = label_counter - print("L2",L) - ed = np.zeros((np.int((L * (L + 1))), n)) - for i in range(n): - labels_aux = np.tile(new_labels[i].reshape(-1, 1), len(new_labels[i])) - a = np.minimum(labels_aux, labels_aux.T) - b = np.maximum(labels_aux, np.transpose(labels_aux)) - I = np.triu((ams[i] != 0), 1) - a_i = np.extract(I, a) - b_i = np.extract(I, b) - Ind = np.abs(np.multiply((a[I] - 1), (2 * L + 2 - a[I]) / 2 + b[I] - a[I] + 1).astype(int)) - minind = np.min(Ind) - aux = np.bincount(Ind) - ed[Ind, i] = aux[Ind] - - mask = np.sum(ed, 1) != 0 - ed = ed[mask] - k += np.dot(ed.T, ed) - print(k) - it+=1 - k_norm = np.zeros(k.shape) - for i in range(k.shape[0]): - for j in range(k.shape[1]): - k_norm[i, j] = k[i, j] / np.sqrt(k[i, i] * k[j, j]) - - return k_norm \ No newline at end of file diff --git a/gmatch4py_cython/gmatch4py/kernels/weisfeiler_lehman_geo.pyx b/gmatch4py_cython/gmatch4py/kernels/weisfeiler_lehman_geo.pyx deleted file mode 100644 index 59c2b1b..0000000 --- a/gmatch4py_cython/gmatch4py/kernels/weisfeiler_lehman_geo.pyx +++ /dev/null @@ -1,165 +0,0 @@ -# coding = utf-8 - -from helpers.gazeteer_helpers import get_data,get_data_by_wikidata_id - -# coding = utf-8 - -"""Weisfeiler_Lehman GEO graph kernel. - -""" - -import numpy as np -import networkx as nx -import copy - - -class WeisfeleirLehmanKernelGEO(object): - __type__ = "sim" - __depreciated__=True - - @staticmethod - def compare(graph_list,h=2,verbose=False): - """Compute the all-pairs kernel values for a list of graphs. - This function can be used to directly compute the kernel - matrix for a list of graphs. The direct computation of the - kernel matrix is faster than the computation of all individual - pairwise kernel values. - Parameters - ---------- - graph_list: list - A list of graphs (list of networkx graphs) - h : interger - Number of iterations. - node_label : boolean - Whether to use original node labels. True for using node labels - saved in the attribute 'node_label'. False for using the node - degree of each node as node attribute. - Return - ------ - K: numpy.array, shape = (len(graph_list), len(graph_list)) - The similarity matrix of all graphs in graph_list. - """ - - n = len(graph_list) - k = [0] * (h + 1) - n_nodes = 0 - n_max = 0 - - inclusion_dictionnary={} - - # Compute adjacency lists and n_nodes, the total number of - # nodes in the dataset. - for i in range(n): - n_nodes += graph_list[i].number_of_nodes() - - """ - Store Inclusion Informations - """ - for node in graph_list[i].nodes(): - graph_list[i].node[node]["id_GD"]=node - if not node in inclusion_dictionnary: - inc_list = [] - try: - inc_list = get_data(node)["inc_P131"] - except: - try: - inc_list=get_data_by_wikidata_id(get_data(node)["continent"])["id"] - except: - pass # No inclusion - if inc_list: - inc_list = inc_list if isinstance(inc_list,list) else [inc_list] - - inclusion_dictionnary[node]=inc_list[0] - for j in range(1,len(inc_list)): - if j+1 < len(inc_list): - inclusion_dictionnary[inc_list[j]]=inc_list[j+1] - - - - - # Computing the maximum number of nodes in the graphs. It - # will be used in the computation of vectorial - # representation. - if (n_max < graph_list[i].number_of_nodes()): - n_max = graph_list[i].number_of_nodes() - - phi = np.zeros((n_nodes, n), dtype=np.uint64) - if verbose: print(inclusion_dictionnary) - # INITIALIZATION: initialize the nodes labels for each graph - # with their labels or with degrees (for unlabeled graphs) - - labels = [0] * n - label_lookup = {} - label_counter = 0 - - # label_lookup is an associative array, which will contain the - # mapping from multiset labels (strings) to short labels - # (integers) - for i in range(n): - nodes = graph_list[i].nodes() - # It is assumed that the graph has an attribute - # 'node_label' - labels[i] = np.zeros(len(nodes), dtype=np.int32) - - for j in range(len(nodes)): - if not (nodes[j] in label_lookup): - label_lookup[nodes[j]] = str(label_counter) - labels[i][j] = label_counter - label_counter += 1 - else: - labels[i][j] = label_lookup[nodes[j]] - # labels are associated to a natural number - # starting with 0. - - phi[labels[i][j], i] += 1 - - graph_list[i]=nx.relabel_nodes(graph_list[i],label_lookup) - k = np.dot(phi.transpose(), phi).astype(np.float64) - - # MAIN LOOP - it = 0 - new_labels = copy.deepcopy(labels) # Can't work without it !!! - - while it < h: - # create an empty lookup table - label_lookup = {} - label_counter = 0 - - phi = np.zeros((n_nodes, n)) - for i in range(n): - nodes = graph_list[i].nodes() - for v in range(len(nodes)): - # form a multiset label of the node v of the i'th graph - # and convert it to a string - - id_GD = graph_list[i].node[nodes[v]]['id_GD'] - if id_GD in inclusion_dictionnary: - - long_label_string = inclusion_dictionnary[id_GD] - graph_list[i].node[nodes[v]]['id_GD']=inclusion_dictionnary[id_GD] - else: - long_label_string = id_GD - - - # if the multiset label has not yet occurred, add it to the - # lookup table and assign a number to it - if not (long_label_string in label_lookup): - label_lookup[long_label_string] = str(label_counter) - new_labels[i][v] = label_counter - label_counter += 1 - else: - new_labels[i][v] = label_lookup[long_label_string] - # fill the column for i'th graph in phi - aux = np.bincount(new_labels[i]) - phi[new_labels[i], i] += (1/(it+2))*aux[new_labels[i]] # +2 because it0 =0 - - k += np.dot(phi.transpose(), phi) - it = it + 1 - - # Compute the normalized version of the kernel - k_norm = np.zeros(k.shape) - for i in range(k.shape[0]): - for j in range(k.shape[1]): - k_norm[i, j] = k[i, j] / np.sqrt(k[i, i] * k[j, j]) - - return k_norm \ No newline at end of file diff --git a/gmatch4py_cython/gmatch4py/mcs.pyx b/gmatch4py_cython/gmatch4py/mcs.pyx deleted file mode 100644 index 93365ef..0000000 --- a/gmatch4py_cython/gmatch4py/mcs.pyx +++ /dev/null @@ -1,74 +0,0 @@ -# coding = utf-8 -import networkx as nx -import numpy as np -cimport numpy as np - -class MCS(): - """ - *A graph distance metric based on the maximal common subgraph, H. Bunke and K. Shearer, - Pattern Recognition Letters, 1998* - """ - @staticmethod - def compare(listgs,selected): - cdef int n = len(listgs) - cdef np.ndarray comparison_matrix = np.zeros((n, n)) - for i in range(n): - for j in range(i, n): - f=True - if not listgs[i] or not listgs[j]: - f=False - elif len(listgs[i])== 0 or len(listgs[j]) == 0: - f=False - if selected: - if not i in selected: - f=False - if f: - comparison_matrix[i, j] = MCS.s_mcs(listgs[i],listgs[j]) - else: - comparison_matrix[i, j] = 0. - comparison_matrix[j, i] = comparison_matrix[i, j] - return comparison_matrix - - - @staticmethod - def intersect(a, b): - return list(set(a) & set(b)) - - @staticmethod - def transform_edges(ed): - for e in range(len(ed)): - if "id" in ed[e][-1]: - del ed[e][-1]["id"] - return ed - - - @staticmethod - def intersect_edges(g1, g2): - cdef list ed1 = MCS.transform_edges(g1.edges(data=True)) - cdef list ed2 = MCS.transform_edges(g2.edges(data=True)) - inter_ed = [] - for e1 in ed1: - for e2 in ed2: - if e1 == e2: - inter_ed.append(e1) - return inter_ed - - @staticmethod - def intersect_nodes(g1, g2): - return MCS.intersect(g1.nodes(), g2.nodes()) - - @staticmethod - def maximum_common_subgraph(g1, g2): - """ - Extract maximum common subgraph - """ - res = nx.MultiDiGraph() - res.add_nodes_from(MCS.intersect_nodes(g1, g2)) - res.add_edges_from(MCS.intersect_edges(g1, g2)) - return res - - @staticmethod - def s_mcs(g1, g2): - - return len(MCS.maximum_common_subgraph(g1, g2)) / float(max(len(g1), len(g2))) - diff --git a/gmatch4py_cython/gmatch4py/vertex_edge_overlap.pyx b/gmatch4py_cython/gmatch4py/vertex_edge_overlap.pyx deleted file mode 100644 index 7888376..0000000 --- a/gmatch4py_cython/gmatch4py/vertex_edge_overlap.pyx +++ /dev/null @@ -1,78 +0,0 @@ -# coding = utf-8 - -import numpy as np -cimport numpy as np - - - -cdef list intersect(a, b): - return list(set(a) & set(b)) -class VertexEdgeOverlap(): - __type__ = "sim" - - """ - Vertex/Edge Overlap Algorithm - presented in Web graph similarity for anomaly detection, Journal of Internet Services and Applications, 2008 - by P. Papadimitriou, A. Dasdan and H.Gracia-Molina - - Code Author : Jacques Fize - """ - - @staticmethod - def compare(list listgs,selected): - n = len(listgs) - cdef np.ndarray comparison_matrix = np.zeros((n, n)) - cdef list inter_ver - cdef list inter_ed - cdef int denom - for i in range(n): - for j in range(i,n): - f=True - if not listgs[i] or not listgs[j]: - f=False - elif len(listgs[i])== 0 or len(listgs[j]) == 0: - f=False - if selected: - if not i in selected: - f=False - if f: - g1 = listgs[i] - g2 = listgs[j] - inter_ver,inter_ed = VertexEdgeOverlap.intersect_graph(g1,g2) - denom=len(g1)+len(g2)+len(g1.edges(data=True))+len(g2.edges(data=True)) - if denom == 0: - continue - comparison_matrix[i,j]=2*(len(inter_ver)+len(inter_ed))/denom # Data = True --> For nx.MultiDiGraph - else: - comparison_matrix[i, j] = 0. - comparison_matrix[j, i] = comparison_matrix[i, j] - return comparison_matrix - - - @staticmethod - def intersect_edges(g1,g2): - cdef list ed1 = VertexEdgeOverlap.transform_edges(g1.edges(data=True)) - cdef list ed2 = VertexEdgeOverlap.transform_edges(g2.edges(data=True)) - cdef list inter_ed=[] - for e1 in ed1: - for e2 in ed2: - if e1 == e2: - inter_ed.append(e1) - return inter_ed - - - @staticmethod - def intersect_nodes(g1,g2): - return intersect(g1.nodes(),g2.nodes()) - - @staticmethod - def intersect_graph(g1,g2): - return VertexEdgeOverlap.intersect_nodes(g1,g2),VertexEdgeOverlap.intersect_edges(g1,g2) - - @staticmethod - def transform_edges(ed): - for e in range(len(ed)): - if "id" in ed[e][-1]: - del ed[e][-1]["id"] - return ed - diff --git a/gmatch4py_cython/gmatch4py/vertex_ranking.pyx b/gmatch4py_cython/gmatch4py/vertex_ranking.pyx deleted file mode 100644 index 8f72a4d..0000000 --- a/gmatch4py_cython/gmatch4py/vertex_ranking.pyx +++ /dev/null @@ -1,39 +0,0 @@ -# coding = utf-8 - -import networkx as nx -import numpy as np -cimport numpy as np -from scipy.stats import spearmanr - - -def intersect(a, b): - return list(set(a) & set(b)) - -class VertexRanking(): - """ - Vertex Ranking - presented in Web graph similarity for anomaly detection, Journal of Internet Services and Applications, 2008 # Maybe not ?? - by P. Papadimitriou, A. Dasdan and H.Gracia-Molina - - Code Author : Jacques Fize - - """ - __type__ = "sim" - @staticmethod - def compare(listgs): - cdef int n = len(listgs) - cdef np.ndarray comparison_matrix = np.zeros((n,n)) - cdef list page_r=[nx.pagerank(nx.DiGraph(g)) for g in listgs] - cdef list node_intersection - cdef list X - cdef list Y - for i in range(n): - for j in range(i,n): - node_intersection=intersect(list(page_r[i].keys()),list(page_r[j].keys())) - X,Y=[],[] - for node in node_intersection: - X.append(page_r[i][node]) - Y.append(page_r[j][node]) - comparison_matrix[i,j] = spearmanr(X,Y)[0] - comparison_matrix[j,i] = comparison_matrix[i,j] - return comparison_matrix diff --git a/gmatch4py_cython/setup.py b/gmatch4py_cython/setup.py deleted file mode 100644 index 8f84375..0000000 --- a/gmatch4py_cython/setup.py +++ /dev/null @@ -1,48 +0,0 @@ -import sys, os -from distutils.core import setup -from distutils.extension import Extension - -# we'd better have Cython installed, or it's a no-go -try: - from Cython.Distutils import build_ext -except: - print("You don't seem to have Cython installed. Please get a") - print("copy from www.cython.org and install it") - sys.exit(1) - - -# scan the 'dvedit' directory for extension files, converting -# them to extension names in dotted notation -def scandir(dir, files=[]): - for file in os.listdir(dir): - path = os.path.join(dir, file) - if os.path.isfile(path) and path.endswith(".pyx"): - files.append(path.replace(os.path.sep, ".")[:-4]) - elif os.path.isdir(path): - scandir(path, files) - return files - - -# generate an Extension object from its dotted name -def makeExtension(extName): - extPath = extName.replace(".", os.path.sep)+".pyx" - return Extension( - extName, - [extPath], - language="c++", - extra_compile_args = ["-O3", "-Wall",'-std=c++11', '-v'], - ) - -# get the list of extensions -extNames = scandir("gmatch4py") - -# and build up the set of Extension objects -extensions = [makeExtension(name) for name in extNames] - -# finally, we can pass all this to distutils -setup( - name="gmatch4py_test", - packages=["gmatch4py", "gmatch4py.ged","gmatch4py.kernels"], - ext_modules=extensions, - cmdclass = {'build_ext': build_ext}, -) \ No newline at end of file diff --git a/helpers/boundary.py b/helpers/boundary.py index 28e8c82..0ea1d40 100644 --- a/helpers/boundary.py +++ b/helpers/boundary.py @@ -8,7 +8,7 @@ from helpers.gazeteer_helpers import get_data from scipy.spatial import ConvexHull from shapely.geometry import Polygon, Point, shape from config.configuration import config -from .collision_c import collide +from .collision import collide __collisions={} diff --git a/helpers/collision_with_gazetteer_data.py b/helpers/collision_with_gazetteer_data.py index 9cc8aff..8042d74 100644 --- a/helpers/collision_with_gazetteer_data.py +++ b/helpers/collision_with_gazetteer_data.py @@ -7,7 +7,7 @@ from shapely.geometry import Polygon, Point, shape from config.configuration import config from helpers.gazeteer_helpers import get_data -from .collision_c import collide +from .collision import collide __cache={} __cache_adjacency={} diff --git a/models/node2vec.py b/models/node2vec.py index e5d0414..b36e7e5 100644 --- a/models/node2vec.py +++ b/models/node2vec.py @@ -45,7 +45,7 @@ class Graph(): # sys.stdout.write("\r") G = self.G walks = [] - nodes = list(G.nodes()) + nodes = list(G.nodes) for walk_iter in range(num_walks): # sys.stdout.write( # '\rWalk iteration: {0}/{1}'.format(walk_iter + 1, num_walks)) @@ -86,7 +86,7 @@ class Graph(): is_directed = self.is_directed alias_nodes = {} - for node in G.nodes(): + for node in list(G.nodes): unnormalized_probs = [G[node][nbr]['weight'] for nbr in sorted(G.neighbors(node))] norm_const = sum(unnormalized_probs) @@ -98,10 +98,10 @@ class Graph(): triads = {} if is_directed: - for edge in G.edges(): + for edge in list(G.edges()): alias_edges[edge] = self.get_alias_edge(edge[0], edge[1]) else: - for edge in G.edges(): + for edge in list(G.edges()): alias_edges[edge] = self.get_alias_edge(edge[0], edge[1]) alias_edges[(edge[1], edge[0])] = self.get_alias_edge( edge[1], edge[0]) diff --git a/models/str.py b/models/str.py index e93613d..7b80075 100644 --- a/models/str.py +++ b/models/str.py @@ -61,7 +61,7 @@ class STR(object): sp_en = {} for nod in g: try: - sp_en[nod] = g.node[nod]["label"] + sp_en[nod] = g.nodes[nod]["label"] except KeyError: # If no label found, grab one from the geo-database data = get_data(nod) if data: @@ -157,7 +157,7 @@ class STR(object): warnings.warn("{0} doesn't exists in the geo database!".format(new_se)) self.graph = nx.relabel_nodes(self.graph, final_transform_map) for se_ in new_label: - self.graph.node[se_]["label"] = new_label[se_] + self.graph.nodes[se_]["label"] = new_label[se_] def update(self): """ diff --git a/models/transformation/transform.py b/models/transformation/transform.py index d643fb9..5839b63 100644 --- a/models/transformation/transform.py +++ b/models/transformation/transform.py @@ -172,7 +172,7 @@ class Expansion(Transformation): return [] def select_es(self,graph): - es = np.array(graph.nodes()) + es = np.array(list(graph.nodes)) score = [np.inf for i in range(len(es))] for e in range(len(es)): data = get_data(es[e]) diff --git a/nlp/bow_se.py b/nlp/bow_se.py index 43e67c5..90c32fd 100644 --- a/nlp/bow_se.py +++ b/nlp/bow_se.py @@ -46,7 +46,7 @@ class BOWSE(object): # Computing the maximum number of nodes in the graphs. It # will be used in the computation of vectorial # representation. - if (n_max < graph_list[i].number_of_nodes()): + if n_max < graph_list[i].number_of_nodes(): n_max = graph_list[i].number_of_nodes() phi = np.zeros((n_nodes, n), dtype=np.uint64) @@ -62,7 +62,7 @@ class BOWSE(object): # mapping from multiset labels (strings) to short labels # (integers) for i in range(n): - nodes = graph_list[i].nodes() + nodes = list(graph_list[i].nodes) # It is assumed that the graph has an attribute # 'node_label' labels[i] = np.zeros(len(nodes), dtype=np.int32) diff --git a/nlp/disambiguator/wikipedia_cooc.py b/nlp/disambiguator/wikipedia_cooc.py index c1b262a..45e865f 100644 --- a/nlp/disambiguator/wikipedia_cooc.py +++ b/nlp/disambiguator/wikipedia_cooc.py @@ -49,7 +49,7 @@ class WikipediaDisambiguator(Disambiguator): g = nx.Graph() possible_candidates = [] - betw_cand={} # indicate which toponym group a candidate belong to # maybe useless ... + betw_cand={} # indicate which toponym group a candidate belong to #w maybe useless ... group_candidate = {} #candidates per toponym for e in spat_en: cand = get_top_candidate(e, lang) @@ -78,7 +78,7 @@ class WikipediaDisambiguator(Disambiguator): prob = 0.0 if not cand == cand2: # take the lowest co-occurrency between two candidates - if (cand2, cand) in g.edges(): + if (cand2, cand) in list(g.edges): if g.edge[cand2][cand]["weight"] < prob: continue g.add_edge(cand, cand2, weight=prob) diff --git a/notebooks/Eval.ipynb b/notebooks/Eval.ipynb index acab54e..f5096da 100644 --- a/notebooks/Eval.ipynb +++ b/notebooks/Eval.ipynb @@ -82542,7 +82542,7 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.6.0" + "version": "3.6.5" }, "toc": { "nav_menu": {}, diff --git a/requirements.txt b/requirements.txt index eab9149..c8a121f 100755 --- a/requirements.txt +++ b/requirements.txt @@ -2,7 +2,7 @@ Flask_Session==0.3.1 Shapely==1.5.17.post1 matplotlib==2.0.2 termcolor==1.1.0 -networkx==1.9 +networkx==2.1 requests==2.18.4 numpy==1.14.0 gensim==1.0.1 -- GitLab