Commit 35a27dee by Fize Jacques

### DEBUG + ADD real test

parent 7b844b13
 ... ... @@ -7,7 +7,7 @@ notifications: install: - pip install -q cython numpy networkx scipy scikit-learn pandas - python setup.py build_ext --inplace - pip install . script: - pytest test/test.py \ No newline at end of file
 ... ... @@ -28,7 +28,7 @@ cd GMatch4py In `GMatch4py`, algorithms manipulate `networkx.Graph`, a complete graph model that comes with a large spectrum of parser to load your graph from various inputs : `*.graphml,*.gexf,..` (check [here](https://networkx.github.io/documentation/stable/reference/readwrite/index.html) to see all the format accepted) ### Use Gmatch4py ### Use GMatch4py If you want to use algorithms like *graph edit distances*, here is an example: ```python ... ... @@ -44,7 +44,7 @@ g1=nx.complete_bipartite_graph(5,4) g2=nx.complete_bipartite_graph(6,4) ``` All graph matching algorithms in `Gmatch4py work this way: All graph matching algorithms in `Gmatch4py` work this way: * Each algorithm is associated with an object, each object having its specific parameters. In this case, the parameters are the edit costs (delete a vertex, add a vertex, ...) * Each object is associated with a `compare()` function with two parameters. First parameter is **a list of the graphs** you want to **compare**, i.e. measure the distance/similarity (depends on the algorithm). Then, you can specify a sample of graphs to be compared to all the other graphs. To this end, the second parameter should be **a list containing the indices** of these graphs (based on the first parameter list). If you rather compute the distance/similarity **between all graphs**, just use the `None` value. ... ... @@ -68,7 +68,16 @@ ged.similarity(result) ged.distance(result) ``` ## Exploit nodes and edges attributes In this latest version, we add the possibility to exploit graph attributes ! To do so, the `base.Base` is extended with the `set_attr_graph_used(node_attr,edge_attr)` method. ```python import networkx as nx import gmatch4py as gm ged = gm.GraphEditDistance(1,1,1,1) ged.set_attr_graph_used("theme","color") # Edge colors and node themes attributes will be used. ``` ## List of algorithms ... ... @@ -115,6 +124,6 @@ each code is associated with a reference to the original.** ## TODO List * Debug algorithms --> Random Walk Kernel * Debug algorithms --> Random Walk Kernel, Deltacon * Optimize algorithms --> Vertex Ranking * Write the documentation :runner: \ No newline at end of file
 ... ... @@ -22,7 +22,7 @@ cdef class Graph: for id1 in G.adj: for id2 in G.adj[id1]: for id3 in G.adj[id1][id2]: G._adj[id1][id2][id3]["id"]=i G._adj[id1][id2][id3]["id"]=str(i) i+=1 self.is_edge_attr = True edge_attr_key = "id" ... ... @@ -91,7 +91,7 @@ cdef class Graph: if self.is_edge_attr: if self.is_directed: in_edge=list(G.in_edges(n,data=True)) out_edge=list(G.in_edges(n,data=True)) out_edge=list(G.out_edges(n,data=True)) for n1,n2,attr_dict in in_edge: self.degree_per_attr[attr_dict[self.edge_attr_key]][n]["in"]+=1 self.degree_per_attr_weighted[attr_dict[self.edge_attr_key]][n]["in"]+=1*(attr_dict["weight"] if "weight" in attr_dict else 1 ) ... ... @@ -169,10 +169,10 @@ cdef class Graph: return "_".join(sorted([n1,attr_value])) cpdef str hash_edge_attr(self,str n1,str n2, str attr_value): if not self.is_directed: if self.is_directed: return "_".join([n1,n2,attr_value]) ed=sorted([n1,n2]) ed.extend(attr_value) ed.extend([attr_value]) return "_".join(ed) ## EXIST FUNCTION ... ...
 ... ... @@ -58,6 +58,7 @@ this_directory = path.abspath(path.dirname(__file__)) with open(path.join(this_directory, 'README.md'), encoding='utf-8') as f: long_description = f.read() requirements=["numpy","networkx","scipy",'scikit-learn','tqdm','pandas'] setup( name="GMatch4py", author="Jacques Fize", ... ... @@ -68,9 +69,9 @@ setup( packages=["gmatch4py","gmatch4py.helpers"], ext_modules=extensions, cmdclass={'build_ext': build_ext}, setup_requires=["numpy","networkx","scipy",'scikit-learn'], install_requires=["numpy","networkx","scipy",'scikit-learn'], version="0.2.4.2beta", setup_requires=requirements, install_requires=requirements, version="0.2.4.3beta", classifiers=[ "Programming Language :: Python :: 3", "License :: OSI Approved :: MIT License", ... ...
 from gmatch4py import * import networkx as nx import time from tqdm import tqdm import pandas as pd max_=500 size_g=10 graphs_all=[nx.random_tree(size_g) for i in range(max_)] result_compiled=[] for size_ in tqdm(range(50,max_,50)): graphs=graphs_all[:size_] comparator=None for class_ in [BagOfNodes,WeisfeleirLehmanKernel,GraphEditDistance, GreedyEditDistance, HED, BP_2 Jaccard, MCS, VertexEdgeOverlap]: deb=time.time() if class_ in (GraphEditDistance, BP_2, GreedyEditDistance, HED): comparator = class_(1, 1, 1, 1) elif class_ == WeisfeleirLehmanKernel: comparator = class_(h=2) else: comparator=class_() matrix = comparator.compare(graphs,None) print([class_.__name__,size_,time.time()-deb]) result_compiled.append([class_.__name__,size_,time.time()-deb]) import os os.chdir(os.environ["HOME"]) df=pd.DataFrame(result_compiled,columns="algorithm size_data time_exec_s".split()) df.to_csv("new_gmatch4py_res_{0}graphs_{1}size.csv".format(max_,size_g)) \ No newline at end of file def test_mesure(): import gmatch4py as gm import networkx as nx import time from tqdm import tqdm import pandas as pd max_=100 size_g=10 graphs_all=[nx.random_tree(size_g) for i in range(max_)] result_compiled=[] for size_ in tqdm(range(50,max_,50)): graphs=graphs_all[:size_] comparator=None for class_ in [gm.BagOfNodes,gm.WeisfeleirLehmanKernel, gm.GraphEditDistance, gm.GreedyEditDistance, gm.HED, gm.BP_2, gm.Jaccard, gm.MCS, gm.VertexEdgeOverlap]: deb=time.time() if class_ in (gm.GraphEditDistance, gm.BP_2, gm.GreedyEditDistance, gm.HED): comparator = class_(1, 1, 1, 1) elif class_ == gm.WeisfeleirLehmanKernel: comparator = class_(h=2) else: comparator=class_() matrix = comparator.compare(graphs,None) print([class_.__name__,size_,time.time()-deb]) result_compiled.append([class_.__name__,size_,time.time()-deb]) df = pd.DataFrame(result_compiled,columns="algorithm size_data time_exec_s".split()) df.to_csv("new_gmatch4py_res_{0}graphs_{1}size.csv".format(max_,size_g)) \ No newline at end of file
 from gmatch4py import * import networkx as nx graphs=[nx.random_tree(10) for i in range(10)] comparator=None for class_ in [BagOfNodes,WeisfeleirLehmanKernel,GraphEditDistance, BP_2, GreedyEditDistance, HED, Jaccard, MCS, VertexEdgeOverlap]: print(class_) if class_ in (GraphEditDistance, BP_2, GreedyEditDistance, HED): comparator = class_(1, 1, 1, 1) elif class_ == WeisfeleirLehmanKernel: comparator = class_(h=2) else: comparator=class_() matrix = comparator.compare(graphs, [])