bipartite_graph_matching_2.pyx 6.67 KiB
# coding = utf-8
import numpy as np
cimport numpy as np
from ..base cimport Base
from cython.parallel cimport prange,parallel
from ..helpers.general import parsenx2graph
cimport cython

cdef class BP_2(Base):


    cdef int node_del
    cdef int node_ins
    cdef int edge_del
    cdef int edge_ins

    def __init__(self, int node_del=1, int node_ins=1, int edge_del=1, int edge_ins=1):
        """
        BP_2 Constructor

        Parameters
        ----------
        node_del :int
            Node deletion cost
        node_ins : int
            Node insertion cost
        edge_del : int
            Edge Deletion cost
        edge_ins : int
            Edge Insertion cost
        """
        Base.__init__(self,1,False)
        self.node_del = node_del
        self.node_ins = node_ins
        self.edge_del = edge_del
        self.edge_ins = edge_ins


    @cython.boundscheck(False)
    cpdef np.ndarray compare(self,list listgs, list selected):
        cdef int n = len(listgs)
        cdef list new_gs=parsenx2graph(listgs)
        cdef double[:,:] comparison_matrix = np.zeros((n, n))
        cdef double[:] selected_test = self.get_selected_array(selected,n)
        cdef int i,j
        cdef long[:] n_nodes = np.array([g.size() for g in new_gs])
        cdef long[:] n_edges = np.array([g.density() for g in new_gs])

        with nogil, parallel(num_threads=self.cpu_count):
            for i in prange(n,schedule='static'):
                for j in range(i,n):
                    if  n_nodes[i] > 0 and n_nodes[j] > 0  and selected_test[i] == 1:
                        with gil:
                            comparison_matrix[i, j] = self.bp2(new_gs[i], new_gs[j])
                    else:
                        comparison_matrix[i, j] = 0
                    comparison_matrix[j, i] = comparison_matrix[i, j]

        return np.array(comparison_matrix)


    cdef double bp2(self, g1, g2):
        """
        Compute the BP2 similarity value between two `networkx.Graph`
        
        Parameters
        ----------
        g1 : gmatch4py.Graph
            First Graph
        g2 : gmatch4py.Graph
            Second Graph

        Returns
        -------
        float 
            similarity value
        """
        return np.min([self.distance_bp2(self.psi(g1,g2)),self.distance_bp2(self.psi(g2,g1))])

    cdef double distance_bp2(self,e):
        """
        Return the distance based on the edit path found.
        Parameters
        ----------
        e : list
            Contains the edit path costs

        Returns
        -------
        double
            Return sum of the costs from the edit path
        """
        return np.sum(e)

    cdef list psi(self,g1,g2):
        """
        Return the optimal edit path :math:`\psi` based on BP2 algorithm.
        
        
        Parameters
        ----------
        g1 : networkx.Graph
            First Graph
        g2 : networkx.Graph
            Second Graph

        Returns
        -------
        list
            list containing costs from the optimal edit path
        """
        cdef list psi_=[]
        cdef list nodes1 = list(g1.nodes())
        cdef list nodes2 = list(g2.nodes())
        for u in nodes1:
            v=None
            for w in nodes2:
                if 2*self.fuv(g1,g2,u,w) < self.fuv(g1,g2,u,None) + self.fuv(g1,g2,None,w)\
                     and self.fuv(g1,g2,u,w) < self.fuv(g1,g2,u,v):
                    v=w
                psi_.append(self.fuv(g1,g2,u,v))
            if u:
                nodes1= list(set(nodes1).difference(set([u])))
            if v:
                nodes2= list(set(nodes2).difference(set([v])))
        for v in nodes2:
            psi_.append(self.fuv(g1,g2,None,v))
        return  psi_



    cdef float fuv(self, g1, g2, str n1, str n2):
        """
        Compute the Node Distance function
        Parameters
        ----------
        g1 : gmatch4py.Graph
            First graph
        g2 : gmatch4py.Graph
            Second graph
        n1 : int or str
            identifier of the first node
        n2 : int or str
            identifier of the second node

        Returns
        -------
        float
            node distance
        """
        if n2 == None:  # Del
            return self.node_del + ((self.edge_del / 2.) * g1.degree(n1))
        if n1 == None:  # Insert
            return self.node_ins + ((self.edge_ins / 2.) * g2.degree(n2))
        else:
            if n1 == n2:
                return 0
            return (self.node_del + self.node_ins + self.hed_edge(g1, g2, n1, n2)) / 2

    cdef float hed_edge(self, g1, g2, str n1, str n2):
        """
        Compute HEDistance between edges of n1 and n2, respectively in g1 and g2
        Parameters
        ----------
        g1 : gmatch4py.Graph
            First graph
        g2 : gmatch4py.Graph
            Second graph
        n1 : int or str
            identifier of the first node
        n2 : int or str
            identifier of the second node

        Returns
        -------
        float
            HEDistance between g1 and g2
        """
        return self.sum_gpq(g1, n1, g2, n2) + self.sum_gpq(g1, n1, g2, n2)


    cdef float sum_gpq(self, g1, str n1, g2, str n2):
        """
        Compute Nearest Neighbour Distance between edges around n1 in G1  and edges around n2 in G2
        Parameters
        ----------
        g1 : gmatch4py.Graph
            First graph
        g2 : gmatch4py.Graph
            Second graph
        n1 : int or str
            identifier of the first node
        n2 : int or str
            identifier of the second node

        Returns
        -------
        float
            Nearest Neighbour Distance
        """

        #if isinstance(g1, nx.MultiDiGraph):
        cdef list edges1 = g1.get_edges_no(n1) if n1 else []
        cdef list edges2 = g2.get_edges_no(n2) if n2 else []

        cdef np.ndarray min_sum = np.zeros(len(edges1))
        edges2.extend([None])
        cdef np.ndarray min_i
        for i in range(len(edges1)):
            min_i = np.zeros(len(edges2))
            for j in range(len(edges2)):
                min_i[j] = self.gpq(edges1[i], edges2[j])
            min_sum[i] = np.min(min_i)
        return np.sum(min_sum)

    cdef float gpq(self, str e1, str e2):
        """
        Compute the edge distance function
        Parameters
        ----------
        e1 : str
            first edge identifier
        e2
            second edge indentifier
        Returns
        -------
        float
            edge distance
        """

        if e2 == None:  # Del
            return self.edge_del
        if e1 == None:  # Insert
            return self.edge_ins
        else:
            if e1 == e2:
                return 0
            return (self.edge_del + self.edge_ins) / 2.