From 85eb7d5c8fecbbf3129aec75a929c1cb58777bee Mon Sep 17 00:00:00 2001 From: domyang Date: Mon, 27 Apr 2020 23:39:26 -0700 Subject: [PATCH 01/12] Adding equivalence property to Graph class --- uclasm/equivalence/__init__.py | 2 + .../equivalence/equivalence_data_structure.py | 206 ++++++++++++++++ uclasm/equivalence/partition_sparse.py | 225 ++++++++++++++++++ 3 files changed, 433 insertions(+) create mode 100644 uclasm/equivalence/__init__.py create mode 100644 uclasm/equivalence/equivalence_data_structure.py create mode 100644 uclasm/equivalence/partition_sparse.py diff --git a/uclasm/equivalence/__init__.py b/uclasm/equivalence/__init__.py new file mode 100644 index 0000000..8d629ff --- /dev/null +++ b/uclasm/equivalence/__init__.py @@ -0,0 +1,2 @@ +from .equivalence_data_structure import Equivalence +from .partition_sparse import bfs_partition_graph diff --git a/uclasm/equivalence/equivalence_data_structure.py b/uclasm/equivalence/equivalence_data_structure.py new file mode 100644 index 0000000..9809471 --- /dev/null +++ b/uclasm/equivalence/equivalence_data_structure.py @@ -0,0 +1,206 @@ +""" +Implementation of an equivalence data structure for partitioning +vertices into equivalence classes. From Pattis' ICS46 lecture's note: +https://www.ics.uci.edu/~pattis/ICS-46/lectures/notes/equivalence.txt +by TimNg +""" + +from collections import defaultdict +from copy import deepcopy + +class Equivalence: + """ + Equivalence class data structure for efficient equivalence classes + computation. This is implemented using the disjoint sets data structure. + See https://en.wikipedia.org/wiki/Disjoint-set_data_structure for a basic + overview of the data structure. + + Internally, this class maintains a dictionary containing the mapping + of every element to its parent (which may be itself) called parent_map. + Searches for which class an element is in amounts to traversing through + the parents until it finds an element which is its own parent, and then + this is the representative element. + """ + def __init__(self, starting_set = set()): + """ + Starting_set is a set of node to start with. + """ + # dictionary of parents where each key is mapped to its parent + # start with everything maps to itself + self.parent_map = {a: a for a in starting_set} + # dictionary of root: size_of_tree + # also serves as something to store the roots + self.root_size_map = {a : 1 for a in starting_set} + + ### METHODS ### + def add_singleton(self, new_value): + """ + Add a new value as its own equiv class + """ + # Check first that it is not already in a member + error_msg = ("Equivalence.add_singleton: {} is already " + + "in the equiv class!".format(new_value)) + assert new_value not in self.parent_map, error_msg + + self.parent_map[new_value] = new_value + self.root_size_map[new_value] = 1 + + def merge_classes_of(self, a , b): + """ + Merge the equivalence classes of a and b together + by setting each parent's map to point to the same root. + """ + assert a in self.parent_map, "Value " + str(a) + " does not exist." + assert b in self.parent_map, "Value " + str(b) + " does not exist." + + # get the roots while compressing the tree + root_of_a = self.compress_to_root(a) + root_of_b = self.compress_to_root(b) + + if (root_of_a == root_of_b): + return # they are already in the same equivalence class + + # find the "big" root and the "small" root + if self.root_size_map[root_of_a] < self.root_size_map[root_of_b]: + small_root, big_root = root_of_a, root_of_b + else: + small_root, big_root = root_of_b, root_of_a + + # now we change the root of the smaller size map to the bigger one + # then we update the size of the new equiv class and then remove + # the smaller one as a root + self.parent_map[small_root] = big_root + self.root_size_map[big_root] += self.root_size_map[small_root] + + # remove the small_root from the roots dict + del self.root_size_map[small_root] + + def merge_set(self, set_to_merge): + """ + Given a set of elements in Equivalence, merge them together + """ + for a in set_to_merge: + assert a in self.parent_map, "Value " + str(a) + " does not exist." + one_elem = next(iter(set_to_merge)) + for a in set_to_merge: + self.merge_classes_of(one_elem, a) + + def partition(self, equivalence_relation: 'function', *args, **kwargs): + """ + Given an equivalence_relation function compute the appropriate + equivalence classes of the elements in the Equivalence data structure. + Users have to make sure of semantic and care should be taken to make + sure the equivalence_relation passed is an actual equiv relation, + otherwise the output will be undefined. + --> Checking in_same_class(a,b) will be the same as using + equivalence_relation(a,b) + This uses a divide and conquer approach on the root_size_map dictionary. + """ + classes = [] + for elem in self.root_size_map: + found = False + for c in classes: + # if these two classes are equivalence then we add them + if equivalence_relation(next(iter(c)), elem, *args, *kwargs): + c.add(i) + found = True + break + if not found: # new class + classes.append(set([elem])) + for c in classes: + self.merge_set(c) + return classes + + ### QUERIES ### + def in_same_class(self, a, b) -> bool: + """ + return a bool indicating if a and b are in the same classes + """ + assert a in self.parent_map, str(a) + " does not exist." + assert b in self.parent_map, str(b) + " does not exist." + root_of_a = self.compress_to_root(a) + root_of_b = self.compress_to_root(b) + return root_of_a == root_of_b + + def __len__(self): + """ + Returns the number of elements in the equiv class + """ + return len(self.parent_map) + + def class_count(self): + """ + return the number of unique equiv classes + """ + return len(self.root_size_map) + + @property + def classes(self): + """ + This function returns a dictionary whose values are the equivalence + classes and whose keys are the representatives of the classes. + """ + answer_map = defaultdict(set) + for elem in self.parent_map: + class_rep = self.representative(elem) + answer_map[class_rep].add(elem) + return answer_map + + def get_class(self, elem): + return self.classes[self.representative(elem)] + + def non_trivial_classes(self): + """ + This function returns a dictionary whose values are the equivalence + classes and whose keys are the representatives of the classes. + The dictionary will only contain classes which have more than one + element. + """ + return {rep: class_ for (rep, class_) in self.classes.items() + if len(class_) > 1} + + def __str__(self): + return "Equiv Classes: {}\nParent_map: {}\nRoot_size_map: {}".format( + self.classes, self.parent_map, self.root_size_map) + + def __getitem__(self, key): + """ + Return the equivalence class for the passed in key. + """ + rep = self.representative(key) + return self.classes[rep] + + def compress_to_root(self, elem) -> 'root of a': + """ + Returns the root of elem, and on the way, set the roots of the + parents of elem to be the root of elem + """ + assert elem in self.parent_map, str(elem) + " does not exist." + parents_of_elem = set() # set to store the parents of elem + curr_val = elem # current value + + # We traverse up through the ancestors of elem and keep track + # of the elements that we see so that we can compress the path + while curr_val != self.parent_map[curr_val]: + parents_of_elem.add(curr_val) + curr_val = self.parent_map[curr_val] + + # curr_val is now the root + # now we set all the parents of elem to the root + for parent in parents_of_elem: + self.parent_map[parent] = curr_val + return curr_val + + def representative(self, elem): + """ + Return the representative of elem's equivalence class. + This performs path compression in the process of finding the + representative. + """ + return self.compress_to_root(elem) + + def copy(self): + """ + Return a deep copy of self + """ + return deepcopy(self) diff --git a/uclasm/equivalence/partition_sparse.py b/uclasm/equivalence/partition_sparse.py new file mode 100644 index 0000000..406e34e --- /dev/null +++ b/uclasm/equivalence/partition_sparse.py @@ -0,0 +1,225 @@ +""" +Implementation of functions to partition vertices according to permutation equivalency +for the DARPA Multichannel Directed Multigraph Subgraph Matching Project +by TimNg +""" + +from .equivalence_data_structure import Equivalence +import numpy as np +from queue import Queue + +def array_equal(mat1, mat2): + """ + Check if two sparse matrices are equal. + """ + return (mat1 != mat2).nnz==0 + +def permutation_relation(u, v, adj_mat_csr, + adj_mat_csc, row_nnz, col_nnz): + """ + We compare two vertices: a and b to see if they are equivalence + under permutation. This can be done by checking if certain pieces + of their values in the adj_matrix are the same (see notes) + + Args: + u (int): The index of the first vertex + v (int): The index of the second vertex + adj_mat_csr (sparse_matrix): A csr representation of the adjacency + matrix + adj_mat_csc (sparse_matrix): A csc representation of the adjacency + matrix + row_nnz (array): An array of the number nonzero count of each row + col_nnz (array): An array of the number nonzero count of each col + """ + # Trivial Case + if u == v: + return True + + u, v = (u ,v) if u < v else (v, u) + + # If they both have no neighbors, they are equivalent. + if (col_nnz[u] == 0 and col_nnz[v] == 0 and + row_nnz[u] == 0 and row_nnz[v] == 0): + return True + + n = adj_mat_csr.shape[0] + + if adj_mat_csr[u,u] != adj_mat_csr[v,v]: + return False + if adj_mat_csr[u,v] != adj_mat_csr[v,u]: + return False + + # We do this check to avoid checking subarrays if we don't have to. + if row_nnz[u] != 0: + # check the rows (3 pieces), maybe there are better ways to do this + if not (array_equal(adj_mat_csr[u,0:u], adj_mat_csr[v,0:u]) and + array_equal(adj_mat_csr[u,u+1:v], adj_mat_csr[v,u+1:v]) and + array_equal(adj_mat_csr[u,v+1:n], adj_mat_csr[v,v+1:n])): + return False + # check the columns + if col_nnz[u] != 0: + if not (array_equal(adj_mat_csc[0:u,u], adj_mat_csc[0:u,v]) and + array_equal(adj_mat_csc[u+1:v,u], adj_mat_csc[u+1:v,v]) and + array_equal(adj_mat_csc[v+1:n,u], adj_mat_csc[v+1:n,v])): + return False + # We have passed all the test if we reach here. + return True + + +def partition_vertices(adj_matrix: '2d numpy array') -> [{'equiv-vertices'}]: + """ + Given an adj_matrix (directed multigraph) we partition the vertices + (by indices) in terms of the permutation relation. + """ + assert adj_matrix.shape[0] == adj_matrix.shape[1], "Must be square matrix!" + # initialize an equivalence relation fill with nodes + nodes = range(0, adj_matrix.shape[0]) + adj_matrix_csc = adj_matrix.tocsc() + row_nnz = adj_matrix.getnnz(1) + col_nnz = adj_matrix_csc.getnnz(0) + vertices_partition = Equivalence(nodes) + # partition into equivalence classes + vertices_partition.partition(permutation_relation, adj_matrix, + adj_matrix_csc, row_nnz, col_nnz) + return vertices_partition + + +def equivalent(u, v, adj_mat_csrs, adj_mat_cscs, row_nnzs, col_nnzs): + """ + Check if vertex u is equivalent to vertex v. This does this by checking + if the respective entries of the adjacency matrices match. This does this + with respect to all channels. + + Args: + u (int): The index of the first vertex + v (int): The index of the second vertex + adj_mat_csrs (dict): A dictionary from channels to csr representations + of the matrices + adj_mat_cscs (dict): A dictionary from channels to csc representations + of the matrices + row_nnzs (dict): A dictionary from channels to an array of the + number nonzero count of each row + col_nnzs (dict): A dictionary from channels to an array of the + number nonzero count of each column + + Returns: + bool: A flag indicating if u is equivalent to v + """ + if u == v: + return True + + # We do preliminary checks to rule out as many equivalences as possible + # because the later checks are expensive + # We first check that the degrees match + for channel in adj_mat_csrs: + row_nnz = row_nnzs[channel] + col_nnz = col_nnzs[channel] + if row_nnz[u] != row_nnz[v] or col_nnz[u] != col_nnz[v]: + return False + + for channel in adj_mat_csrs: + if not permutation_relation(u, v, adj_mat_csrs[channel], + adj_mat_cscs[channel], row_nnzs[channel], col_nnzs[channel]): + return False + return True + + +def partition_neighbors(neighbors, adj_mat_csrs, adj_mat_cscs, + row_nnzs, col_nnzs): + """ + Construct the equivalence classes of the passed in vertices. + + Args: + neighbors (list): A list of the indices of the neighbors + adj_mat_csrs (dict): A dictionary from channels to csr representations + of the matrices + adj_mat_cscs (dict): A dictionary from channels to csc representations + of the matrices + row_nnzs (dict): A dictionary from channels to an array of the + number nonzero count of each row + col_nnzs (dict): A dictionary from channels to an array of the + number nonzero count of each column + """ + eq_classes = [[neighbors[0]]] + for neighbor in neighbors[1:]: + # See if a neighbor is in one of the already computed eq classes + for eq_class in eq_classes: + if equivalent(neighbor, eq_class[0], adj_mat_csrs, adj_mat_cscs, + row_nnzs, col_nnzs): + eq_class.append(neighbor) + break + else: + # Otherwise it is in its own class + eq_classes.append([neighbor]) + + return eq_classes + + +def bfs_partition_graph(graph): + """ + This performs a breadth first search approach to computing the equivalence + classes. This function works by performing a breadth first search of the + vertices; At each step it forms equivalence classes of the neighbors + (No vertex that is not a neighbor can be equivalent to any of the neighbors + so there is no need to check outside the neighbors) + In the BFS, we also only need to visit one representative of each + equivalence class. + + It then needs to do a last check to find which class the inital vertex is + in. + + Args: + graph (Graph): A graph + Returns: + Equivalence: The equivalence structure of the graph + """ + # We create a queue to perform a Breadth First Search + queue = Queue() + visited = np.zeros(graph.n_nodes, dtype=np.bool) + visited[0] = True + all_eq_classes = [] + # We start at vertex 0 + queue.put(0) + + # We create these beforehand so as to only compute them once + adj_mat_csrs = graph.adjs + adj_mat_cscs = [adj_mat.tocsc() for adj_mat in adj_mat_csrs] + row_nnzs = {adj_mat.getnnz(1) for adj_mat in adj_mat_csrs} + col_nnzs = {adj_mat.getnnz(0) for adj_mat in adj_mat_cscs} + + while True: + # If out of vertices to visit, exit loop + if queue.empty(): + break + curr_vertex = queue.get() + + neighbors = graph.sym_composite_adj[curr_vertex].nonzero()[1] + neighbors = neighbors[~visited[neighbors]] + visited[neighbors] = True + if len(neighbors) == 0: + continue + eq_classes = partition_neighbors(neighbors, adj_mat_csrs, adj_mat_cscs, + row_nnzs, col_nnzs) + all_eq_classes.extend(eq_classes) + # For each equivalence class, we put a representative + for eq_class in eq_classes: + queue.put(eq_class[0]) + + # We still need to find which equivalence class 0 is in + for eq_class in all_eq_classes: + if equivalent(0, eq_class[0], adj_mat_csrs, adj_mat_cscs, + row_nnzs, col_nnzs): + eq_class.append(0) + break + else: + # 0 is in its own class + all_eq_classes.append([0]) + + # Construct the Equivalence object. + equivalence = Equivalence(list(range(graph.n_nodes))) + for eq_class in all_eq_classes: + x = eq_class[0] + for y in eq_class[1:]: + equivalence.merge_classes_of(x, y) + + return equivalence From 9958b042a2869410f526fce8fe0d28a6481effbc Mon Sep 17 00:00:00 2001 From: domyang Date: Tue, 28 Apr 2020 01:19:52 -0700 Subject: [PATCH 02/12] Tests for equivalence --- tests/test_equivalence.py | 58 ++++++++++++++++++++++++++ uclasm/equivalence/partition_sparse.py | 16 +++---- 2 files changed, 66 insertions(+), 8 deletions(-) create mode 100644 tests/test_equivalence.py diff --git a/tests/test_equivalence.py b/tests/test_equivalence.py new file mode 100644 index 0000000..22b10b4 --- /dev/null +++ b/tests/test_equivalence.py @@ -0,0 +1,58 @@ +""" +Tests for structural equivalence. +""" +import pytest +import networkx as nx + +from uclasm.graph import from_networkx_graph + + +@pytest.fixture +def star_graph_5(): + """ + Creates the star graph S5, the graph with 1 central node and 5 spokes. + The node with index 0 should be the central node. + """ + star_graph = nx.star_graph(5) + return from_networkx_graph(star_graph) + + +@pytest.fixture +def complete_graph_5(): + """ + Creates the complete graph K5, the graph with 5 nodes each connected to + every other node. + """ + complete_graph = nx.complete_graph(5) + return from_networkx_graph(complete_graph) + + +@pytest.fixture +def path_graph_5(): + """ + Creates the path graph P5, the path graph with 5 nodes. + """ + path_graph = nx.path_graph(5) + return from_networkx_graph(path_graph) + + +def test_star_graph_equivalence(star_graph_5): + equiv = star_graph_5.equivalence_classes + assert len(equiv.classes) == 2 + assert equiv.classes[0] == {0} + rep_1 = equiv.representative(1) + assert equiv.classes[rep_1] == {1,2,3,4,5} + + +def test_complete_graph_equivalence(complete_graph_5): + equiv = complete_graph_5.equivalence_classes + assert len(equiv.classes) == 1 + rep = equiv.representative(0) + assert equiv.classes[rep] == {0,1,2,3,4} + + +def test_path_graph_equivalence(path_graph_5): + equiv = path_graph_5.equivalence_classes + assert len(equiv.classes) == 5 + for i in range(5): + assert equiv.classes[i] == {i} diff --git a/uclasm/equivalence/partition_sparse.py b/uclasm/equivalence/partition_sparse.py index 406e34e..c26fc1d 100644 --- a/uclasm/equivalence/partition_sparse.py +++ b/uclasm/equivalence/partition_sparse.py @@ -111,15 +111,15 @@ def equivalent(u, v, adj_mat_csrs, adj_mat_cscs, row_nnzs, col_nnzs): # We do preliminary checks to rule out as many equivalences as possible # because the later checks are expensive # We first check that the degrees match - for channel in adj_mat_csrs: - row_nnz = row_nnzs[channel] - col_nnz = col_nnzs[channel] + for i in range(len(adj_mat_csrs)): + row_nnz = row_nnzs[i] + col_nnz = col_nnzs[i] if row_nnz[u] != row_nnz[v] or col_nnz[u] != col_nnz[v]: return False - for channel in adj_mat_csrs: - if not permutation_relation(u, v, adj_mat_csrs[channel], - adj_mat_cscs[channel], row_nnzs[channel], col_nnzs[channel]): + for i in range(len(adj_mat_csrs)): + if not permutation_relation(u, v, adj_mat_csrs[i], + adj_mat_cscs[i], row_nnzs[i], col_nnzs[i]): return False return True @@ -184,8 +184,8 @@ def bfs_partition_graph(graph): # We create these beforehand so as to only compute them once adj_mat_csrs = graph.adjs adj_mat_cscs = [adj_mat.tocsc() for adj_mat in adj_mat_csrs] - row_nnzs = {adj_mat.getnnz(1) for adj_mat in adj_mat_csrs} - col_nnzs = {adj_mat.getnnz(0) for adj_mat in adj_mat_cscs} + row_nnzs = [adj_mat.getnnz(1) for adj_mat in adj_mat_csrs] + col_nnzs = [adj_mat.getnnz(0) for adj_mat in adj_mat_cscs] while True: # If out of vertices to visit, exit loop From 01f5393a81ca030a4389442dfa882972b290ff7a Mon Sep 17 00:00:00 2001 From: domyang Date: Tue, 28 Apr 2020 01:20:24 -0700 Subject: [PATCH 03/12] Utility for reading networkx style graphs --- uclasm/graph.py | 31 +++++++++++++++++++++++++++++++ 1 file changed, 31 insertions(+) diff --git a/uclasm/graph.py b/uclasm/graph.py index 935e5a2..f2db88e 100644 --- a/uclasm/graph.py +++ b/uclasm/graph.py @@ -6,6 +6,7 @@ import sys from .utils import index_map, one_hot +from .equivalence import bfs_partition_graph # functools.cached_property was introduced in python 3.8. # https://docs.python.org/3/library/functools.html#functools.cached_property @@ -213,6 +214,14 @@ def in_out_degrees(self): deglist = [self.in_degrees, self.out_degrees] return np.concatenate(deglist, axis=1).astype(np.single) + @cached_property + def equivalence_classes(self): + """Equivalence: The equivalence structure of the graph + + Computes the template equivalence + """ + return bfs_partition_graph(self) + def loopless_subgraph(self): """Get a subgraph containing no self-edges. @@ -328,3 +337,25 @@ def node_cover(self): # TODO: Remove any nodes from the node cover which are not necessary. return np.array(cover) + + +def from_networkx_graph(nx_graph): + """ + This will convert a networkx style graph into a uclasm style Graph. + Currently this does not preserve node or edge labels. It just copies + over the adjacency structure. + + Parameters + ---------- + nx_graph : networkx.Graph + """ + + # TODO: Add the ability to port over node and edge labels + try: + import networkx as nx + except ImportError as e: + print("This function requires networkx to be installed") + raise(e) + + adj = nx.to_scipy_sparse_matrix(nx_graph) + return Graph([adj]) From 6cdd2e34e33ba3c85bac25f908c2056b30d681a3 Mon Sep 17 00:00:00 2001 From: domyang Date: Tue, 28 Apr 2020 14:07:04 -0700 Subject: [PATCH 04/12] Add test for SI when world graph is complete --- tests/test_counting.py | 20 ++++++++++++++++++++ 1 file changed, 20 insertions(+) diff --git a/tests/test_counting.py b/tests/test_counting.py index 41b0dc1..564a183 100644 --- a/tests/test_counting.py +++ b/tests/test_counting.py @@ -7,6 +7,7 @@ import numpy as np import pandas as pd +import networkx as nx from scipy.sparse import csr_matrix @pytest.fixture @@ -54,6 +55,7 @@ def smp_star(): smp = MatchingProblem(tmplt, world) return smp + @pytest.fixture def smp_node_cover(): """Create a subgraph matching problem requiring the use of node cover.""" @@ -178,3 +180,21 @@ def test_count_isomorphisms_overlapping_cands(self, smp_overlapping_cands): assert np.sum(smp_overlapping_cands.candidates()) == 7 count = count_isomorphisms(smp_overlapping_cands, verbose=True) assert count == 6 + + def test_count_isomorphisms_random_all_matches(self): + """ + Perform a test where every possible mapping of template to world nodes + is a match. + """ + random_adj = np.random.random((5, 5)) + # No self loops + np.fill_diagonal(random_adj, 0) + t_graph = Graph([csr_matrix(random_adj)]) + + complete_graph = nx.complete_graph(5) + w_graph = uclasm.from_networkx_graph(complete_graph) + + smp = MatchingProblem(t_graph, w_graph) + count = count_isomorphisms(smp, verbose=True) + + assert count == 120 # 5 factorial From eccadb34ee8855975b0f260c0844751f2cb278a1 Mon Sep 17 00:00:00 2001 From: domyang Date: Tue, 28 Apr 2020 14:07:52 -0700 Subject: [PATCH 05/12] Function for computing an Equivalence data structure given a partition --- uclasm/equivalence/equivalence_data_structure.py | 15 +++++++++++++++ 1 file changed, 15 insertions(+) diff --git a/uclasm/equivalence/equivalence_data_structure.py b/uclasm/equivalence/equivalence_data_structure.py index 9809471..fb01eb1 100644 --- a/uclasm/equivalence/equivalence_data_structure.py +++ b/uclasm/equivalence/equivalence_data_structure.py @@ -204,3 +204,18 @@ def copy(self): Return a deep copy of self """ return deepcopy(self) + + +def equivalence_from_partition(partition): + """ + Given a partition of some set, create an Equivalence object where two + elements are equivalent if they are in the same cell in the partition. + This isn't as efficient as it could be, but it suffices for now. + """ + elems = [x for xs in partition for x in xs] + + equiv = Equivalence(elems) + for xs in partition: + equiv.merge_set(xs) + + return equiv From de40fcfe043f5b0357193b015eeb0bf1ee5553a3 Mon Sep 17 00:00:00 2001 From: domyang Date: Tue, 28 Apr 2020 14:08:22 -0700 Subject: [PATCH 06/12] Update imports --- uclasm/equivalence/__init__.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/uclasm/equivalence/__init__.py b/uclasm/equivalence/__init__.py index 8d629ff..1f1d628 100644 --- a/uclasm/equivalence/__init__.py +++ b/uclasm/equivalence/__init__.py @@ -1,2 +1,2 @@ -from .equivalence_data_structure import Equivalence +from .equivalence_data_structure import Equivalence, equivalence_from_partition from .partition_sparse import bfs_partition_graph From 0d87aeb7c40c5ac662eec80d7e1aba0aed3ba0de Mon Sep 17 00:00:00 2001 From: domyang Date: Tue, 28 Apr 2020 14:08:39 -0700 Subject: [PATCH 07/12] Removing unnecessary code --- uclasm/graph.py | 7 +------ 1 file changed, 1 insertion(+), 6 deletions(-) diff --git a/uclasm/graph.py b/uclasm/graph.py index f2db88e..a260a7f 100644 --- a/uclasm/graph.py +++ b/uclasm/graph.py @@ -3,6 +3,7 @@ import scipy.sparse as sparse import numpy as np import pandas as pd +import networkx as nx import sys from .utils import index_map, one_hot @@ -351,11 +352,5 @@ def from_networkx_graph(nx_graph): """ # TODO: Add the ability to port over node and edge labels - try: - import networkx as nx - except ImportError as e: - print("This function requires networkx to be installed") - raise(e) - adj = nx.to_scipy_sparse_matrix(nx_graph) return Graph([adj]) From 2242f863bd9169d0506ce2f8f351e0689ea492d3 Mon Sep 17 00:00:00 2001 From: domyang Date: Tue, 28 Apr 2020 14:10:04 -0700 Subject: [PATCH 08/12] Adding ability to count using template equivalence classes --- uclasm/counting/alldiffs.py | 179 +++++++++++++++++++++++++++----- uclasm/counting/isomorphisms.py | 20 ++-- 2 files changed, 167 insertions(+), 32 deletions(-) diff --git a/uclasm/counting/alldiffs.py b/uclasm/counting/alldiffs.py index 8f72c20..a0a028c 100644 --- a/uclasm/counting/alldiffs.py +++ b/uclasm/counting/alldiffs.py @@ -1,46 +1,167 @@ -from ..utils import invert, values_map_to_same_key +""" +alldiffs.py + +Routine for computing solutions to the all different problem +""" + +from math import factorial + import numpy as np -from functools import reduce +from scipy.special import comb -def recursive_alldiff_counter(tnode_to_eqids, eq_class_sizes): - # If no more tnodes to assign - if len(tnode_to_eqids) == 0: - return 1 +from ..utils import invert, values_map_to_same_key +from ..equivalence import equivalence_from_partition + +# Right now we are using a global variable to keep track of solution count +num_solutions = 0 - count = 0 - # This tnode will now be assigned to one of the eq_classes +def count_solutions(matching, smp, w_classes, tmplt_equivalence=False): + """ + This function will compute the number of solutions for a given matching. + + Parameters + ---------- + matching : dict + The matching for which we want to compute solutions + smp : MatchingProblem + The matching problem context + w_classes : list + A list of sets of world nodes that are equivalent + tmplt_equivalence : bool + A flag indicating whether to use template equivalence + + Returns + ------- + int : The number of solutions that can be generated using template and + world equivalence + """ + count = 1 + + w_class_sizes = [len(w_class) for w_class in w_classes] + if tmplt_equivalence: + t_classes = smp.tmplt.equivalence_classes + + for t_class in t_classes.classes().values(): + # We can permute members of template equivalence classes freely + # So we multiply by the factorial of the size of each class + count *= factorial(t_class) + + # Then we need to determine within each template equivalence class + # how many template nodes are assigned to the same world equivalence + # class. Once we do this, we need to choose which elements of the + # world equivalence class to assign to these template nodes. + w_class_counts = {} + for tnode in t_class: + wnode = matching[tnode] + # If class_id is None after this loop, then the tnode was + # assigned prior to calling the alldiffs problem. It will be + # assigned to only one node and not a class. + class_id = None + # Determine which equivalence class it is in + for i, w_class in enumerate(w_classes): + if wnode in w_class: + class_id = i + break + + w_class_counts[class_id] = w_class_counts.get(class_id, 0) + 1 + + for w_class_id in w_class_counts: + if w_class_id is None: + continue + + # Choose which elements of the w_class to assign to the + # members of the template class + count *= comb(w_class_sizes[w_class_id], + w_class_counts[w_class_id], exact=True) + + w_class_sizes[w_class_id] -= w_class_counts[w_class_id] + + else: + for tnode in matching: + wnode = matching[tnode] + class_id = None + # Determine which equivalence class it is in + for i, w_class in enumerate(w_classes): + if wnode in w_class: + class_id = i + break + if class_id is None: + continue + + count *= w_class_sizes[class_id] + w_class_sizes[class_id] -= 1 + + return count + + +def recursive_alldiff_counter(tnode_to_eqids, w_classes, w_class_sizes, smp, + matching, tmplt_equivalence=False): + """ + Recursive routine for computing the number of ways to assign template + nodes to world nodes such that they are all different. + + Parameters + ---------- + tnode_to_eqids : dict + Mapping from template node to a number representing a class of + equivalence nodes + w_classes : list + A partition of world nodes into equivalence classes + w_class_sizes : list + A list of the number of currently unassigned world nodes for each + equivalence class + smp : SubgraphMatchingProblem + The current matching problem + matching : dict + The current assignment of template nodes to world nodes, matching[i] + is the index of the world node that template node i is assigned to + tmplt_equivalence : bool + A flag indicating whether we are using template equivalence + """ + + # We need to count the number of solutions the current matching represents + if len(tnode_to_eqids) == 0: + sol_count = count_solutions(matching, smp, w_classes, tmplt_equivalence) + global num_solutions + num_solutions += sol_count + return + + + # This tnode will now be assigned to one of the w_classes tnode, eqids = tnode_to_eqids.popitem() # We try mapping the tnode to each equivalence class of world nodes for eqid in eqids: # TODO: break out this inner loop into its own function for clarity? - n_cands = eq_class_sizes[eqid] + n_cands = w_class_sizes[eqid] # If no candidates left in this equivalence class, continue if n_cands == 0: continue # We decrement the amount of available candidates in the eq class. - eq_class_sizes[eqid] -= 1 + w_class_sizes[eqid] -= 1 - # Count the number of ways to assign the rest of candidates - n_ways_to_assign_rest = recursive_alldiff_counter(tnode_to_eqids, eq_class_sizes) + # Assign tnode to an arbitrary element of the equivalence class + eq_class = list(w_classes[eqid]) + matching[tnode] = eq_class[w_class_sizes[eqid]] - # We have n_cands possible ways to assign the current tnode to the - # current equivalence class, so we multiply by n_cands - count += n_cands * n_ways_to_assign_rest + # Assign the rest + recursive_alldiff_counter(tnode_to_eqids, w_classes, w_class_sizes, + smp, matching, tmplt_equivalence) - # Unmatch tnode from equivalence class - eq_class_sizes[eqid] += 1 + # Unmatch tnode + matching[tnode] = -1 + # Restore the world node to the class its from + w_class_sizes[eqid] += 1 tnode_to_eqids[tnode] = eqids - return count - def get_equivalence_classes(tnode_to_cands): - """Get equivalence classes of cands which are candidates for the same nodes. + """Get equivalence classes of cands which are candidates for the same + template nodes. Parameters ---------- @@ -50,7 +171,8 @@ def get_equivalence_classes(tnode_to_cands): Returns ------- tnode_to_eqids : dict - Mapping from template node to the equivalence classes to which its candidates belong. + Mapping from template node to the equivalence classes to which its + candidates belong. eq_classes : list List of the equivalence classes """ @@ -65,6 +187,7 @@ def get_equivalence_classes(tnode_to_cands): # Each cand_set here is an equivalence class tnode_set_to_eq_class = values_map_to_same_key(cand_to_tnode_set) eq_classes = list(tnode_set_to_eq_class.values()) + w_equivalence = equivalence_from_partition(eq_classes) cand_to_eqids = { cand: [eqid for eqid, eq_class in enumerate(eq_classes) if cand in eq_class] @@ -83,15 +206,20 @@ def get_equivalence_classes(tnode_to_cands): return tnode_to_eqids, eq_classes -def count_alldiffs(tnode_to_cands): +def count_alldiffs(tnode_to_cands, smp, matching, tmplt_equivalence=False): """ tnode_to_cands: dict(item, list) + smp: SubgraphMatchingProblem + matching: dict + tmplt_equivalence: bool Count the number of ways to assign template nodes to candidates without using any candidate for more than one template node. i.e. count solutions to the alldiff problem, where the nodeiables are the keys of tnode_to_cands, and the domains are the values. """ + global num_solutions + num_solutions = 0 # TODO: can this function be vectorized? # TODO: does scipy have a solver for this already? @@ -99,11 +227,14 @@ def count_alldiffs(tnode_to_cands): # Check if any template node has no candidates if any(len(cands)==0 for cands in tnode_to_cands.values()): return 0 + tnode_to_eqids, eq_classes = get_equivalence_classes(tnode_to_cands) # Alternatively, list(map(len, eq_classes)) eq_class_sizes = [len(eq_class) for eq_class in eq_classes] - count = recursive_alldiff_counter(tnode_to_eqids, eq_class_sizes) + recursive_alldiff_counter(tnode_to_eqids, eq_classes, + eq_class_sizes, smp, matching, tmplt_equivalence) + + return num_solutions - return count diff --git a/uclasm/counting/isomorphisms.py b/uclasm/counting/isomorphisms.py index 096d069..b6055a5 100644 --- a/uclasm/counting/isomorphisms.py +++ b/uclasm/counting/isomorphisms.py @@ -46,8 +46,9 @@ def recursive_isomorphism_counter(smp, matching, *, ---------- smp : MatchingProblem A subgraph matching problem - matching : list - A list of tuples which designate what each template vertex is matched to + matching : np.array + An array keeping track of which template vertex is mapped to which + world vertex. unspec_cover : np.array Array of the indices of the nodes with more than 1 candidate verbose : bool @@ -73,9 +74,9 @@ def recursive_isomorphism_counter(smp, matching, *, # can skip straight to counting solutions to the alldiff constraint problem if len(unspec_cover) == 0: # Elimination filter is not needed here and would be a waste of time - node_to_cands = {node: smp.world.nodes[candidates[idx]] + node_to_cands = {idx: smp.world.nodes[candidates[idx]] for idx, node in enumerate(smp.tmplt.nodes)} - return count_alldiffs(node_to_cands) + return count_alldiffs(node_to_cands, smp, matching) # Since the node cover is not empty, we first choose some valid # assignment of the unspecified nodes one at a time until the remaining @@ -90,7 +91,7 @@ def recursive_isomorphism_counter(smp, matching, *, # candidates_copy[node_idx] = one_hot(cand_idx, world.n_nodes) smp_copy.add_match(node_idx, cand_idx) - matching.append((node_idx, cand_idx)) + matching[node_idx] = cand_idx # Remove matched node from the unspecified list new_unspec_cover = unspec_cover[:node_idx] + unspec_cover[node_idx+1:] @@ -101,7 +102,7 @@ def recursive_isomorphism_counter(smp, matching, *, init_changed_cands=one_hot(node_idx, smp.tmplt.n_nodes)) # Unmatch template vertex - matching.pop() + matching[node_idx] = -1 # TODO: more useful progress summary if verbose: @@ -144,12 +145,15 @@ def count_isomorphisms(smp, *, verbose=True, The number of isomorphisms """ - matching = [] + # This dict keeps track of what nodes have been matched + # If a value is -1, it means that the template vertex with that index + # is currently unmatched. + matching = {node: -1 for node in smp.tmplt.nodes} candidates = smp.candidates() spec_nodes = np.where(candidates.sum(axis=1) == 1)[0] for t_vert in spec_nodes: w_vert = np.where(candidates[t_vert,:])[0][0] - matching.append((t_vert, w_vert)) + matching[t_vert] = w_vert unspec_nodes = np.where(candidates.sum(axis=1) > 1)[0] tmplt_subgraph = smp.tmplt.node_subgraph(unspec_nodes) From 37dde415955964b01ee4c345b4758abaa6a4de47 Mon Sep 17 00:00:00 2001 From: domyang Date: Tue, 28 Apr 2020 14:10:22 -0700 Subject: [PATCH 09/12] Adding equivalence package to __init__.py --- uclasm/__init__.py | 1 + 1 file changed, 1 insertion(+) diff --git a/uclasm/__init__.py b/uclasm/__init__.py index ff93597..094f2a2 100644 --- a/uclasm/__init__.py +++ b/uclasm/__init__.py @@ -7,3 +7,4 @@ from .utils import * from .matching import * from .interface import * +from .equivalence import * From e7f50c425aeb444d2de8d70f29ef93127c0a33e3 Mon Sep 17 00:00:00 2001 From: domyang Date: Tue, 28 Apr 2020 14:25:57 -0700 Subject: [PATCH 10/12] Maintaining old version of all different code for use outside of SI context --- uclasm/counting/alldiffs.py | 99 +++++++++++++++++++++++++++++++++---- 1 file changed, 90 insertions(+), 9 deletions(-) diff --git a/uclasm/counting/alldiffs.py b/uclasm/counting/alldiffs.py index a0a028c..e66fad3 100644 --- a/uclasm/counting/alldiffs.py +++ b/uclasm/counting/alldiffs.py @@ -95,11 +95,12 @@ def count_solutions(matching, smp, w_classes, tmplt_equivalence=False): return count -def recursive_alldiff_counter(tnode_to_eqids, w_classes, w_class_sizes, smp, - matching, tmplt_equivalence=False): +def recursive_alldiff_counter_SMP(tnode_to_eqids, w_classes, w_class_sizes, + smp, matching, tmplt_equivalence=False): """ Recursive routine for computing the number of ways to assign template - nodes to world nodes such that they are all different. + nodes to world nodes such that they are all different. This version is + intended for use with the subgraph matching problem. Parameters ---------- @@ -206,17 +207,20 @@ def get_equivalence_classes(tnode_to_cands): return tnode_to_eqids, eq_classes -def count_alldiffs(tnode_to_cands, smp, matching, tmplt_equivalence=False): - """ - tnode_to_cands: dict(item, list) - smp: SubgraphMatchingProblem - matching: dict - tmplt_equivalence: bool +def count_alldiffs_SMP(tnode_to_cands, smp, matching, tmplt_equivalence=False): + """ Count the number of ways to assign template nodes to candidates without using any candidate for more than one template node. i.e. count solutions to the alldiff problem, where the nodeiables are the keys of tnode_to_cands, and the domains are the values. + + Parameters + ---------- + tnode_to_cands: dict(item, list) + smp: SubgraphMatchingProblem + matching: dict + tmplt_equivalence: bool """ global num_solutions num_solutions = 0 @@ -238,3 +242,80 @@ def count_alldiffs(tnode_to_cands, smp, matching, tmplt_equivalence=False): return num_solutions + +def recursive_alldiff_counter(tnode_to_eqids, w_class_sizes): + """ + Parameters + ---------- + tnode_to_eqids : dict + Mapping from template node to a number representing a class of + equivalence nodes + w_class_sizes : list + A list of the number of currently unassigned world nodes for each + equivalence class + + Returns + ------- + The number of solutions to the all different problem + """ + + # If no more tnodes to assign + if len(tnode_to_eqids) == 0: + return 1 + + count = 0 + + # This tnode will now be assigned to one of the w_classes + tnode, eqids = tnode_to_eqids.popitem() + + # We try mapping the tnode to each equivalence class of world nodes + for eqid in eqids: + # TODO: break out this inner loop into its own function for clarity? + n_cands = w_class_sizes[eqid] + + # If no candidates left in this equivalence class, continue + if n_cands == 0: + continue + + # We decrement the amount of available candidates in the eq class. + w_class_sizes[eqid] -= 1 + + # Count the number of ways to assign the rest of candidates + n_ways_to_assign_rest = recursive_alldiff_counter(tnode_to_eqids, w_class_sizes) + + # We have n_cands possible ways to assign the current tnode to the + # current equivalence class, so we multiply by n_cands + count += n_cands * n_ways_to_assign_rest + + # Unmatch tnode from equivalence class + w_class_sizes[eqid] += 1 + + tnode_to_eqids[tnode] = eqids + + return count + + +def count_alldiffs(tnode_to_cands): + """ + tnode_to_cands: dict(item, list) + + Count the number of ways to assign template nodes to candidates without + using any candidate for more than one template node. + i.e. count solutions to the alldiff problem, where the nodeiables + are the keys of tnode_to_cands, and the domains are the values. + """ + + # TODO: can this function be vectorized? + # TODO: does scipy have a solver for this already? + + # Check if any template node has no candidates + if any(len(cands)==0 for cands in tnode_to_cands.values()): + return 0 + tnode_to_eqids, eq_classes = get_equivalence_classes(tnode_to_cands) + + # Alternatively, list(map(len, eq_classes)) + eq_class_sizes = [len(eq_class) for eq_class in eq_classes] + + count = recursive_alldiff_counter(tnode_to_eqids, eq_class_sizes) + + return count From d377b721b6f620c3966460434bcf573a082f9bc2 Mon Sep 17 00:00:00 2001 From: domyang Date: Tue, 28 Apr 2020 14:55:49 -0700 Subject: [PATCH 11/12] Additional counting test --- tests/test_counting.py | 45 +++++++++++++++++++++++++++++++----------- 1 file changed, 34 insertions(+), 11 deletions(-) diff --git a/tests/test_counting.py b/tests/test_counting.py index 564a183..17f7f80 100644 --- a/tests/test_counting.py +++ b/tests/test_counting.py @@ -186,15 +186,38 @@ def test_count_isomorphisms_random_all_matches(self): Perform a test where every possible mapping of template to world nodes is a match. """ - random_adj = np.random.random((5, 5)) - # No self loops - np.fill_diagonal(random_adj, 0) - t_graph = Graph([csr_matrix(random_adj)]) - - complete_graph = nx.complete_graph(5) - w_graph = uclasm.from_networkx_graph(complete_graph) + for i in range(10): + random_adj = np.random.random((5, 5)) + # No self loops + np.fill_diagonal(random_adj, 0) + t_graph = Graph([csr_matrix(random_adj)]) + + complete_graph = nx.complete_graph(5) + w_graph = uclasm.from_networkx_graph(complete_graph) + + smp = MatchingProblem(t_graph, w_graph) + + count = count_isomorphisms(smp, verbose=True) + assert count == 120 # 5 factorial + + eq_count = count_isomorphisms(smp, verbose=True, tmplt_equivalence=True) + assert count == 120 + + def test_count_isomorphisms_random_no_matches(self): + for i in range(10): + random_adj = np.random.random((5, 5)) + # No self loops + np.fill_diagonal(random_adj, 0) + t_graph = Graph([csr_matrix(random_adj)]) + + empty_adj = csr_matrix(np.zeros((5,5))) + w_graph = Graph([empty_adj]) + + smp = MatchingProblem(t_graph, w_graph) + + count = count_isomorphisms(smp, verbose=True) + assert count == 0 + + eq_count = count_isomorphisms(smp, verbose=True, tmplt_equivalence=True) + assert count == 0 - smp = MatchingProblem(t_graph, w_graph) - count = count_isomorphisms(smp, verbose=True) - - assert count == 120 # 5 factorial From 47530f978b4012bde0cc47a46f921e8296a3adad Mon Sep 17 00:00:00 2001 From: domyang Date: Tue, 28 Apr 2020 14:56:32 -0700 Subject: [PATCH 12/12] Changing name of SMP alldiffs to count_alldiffs_SMP --- uclasm/counting/alldiffs.py | 6 +++--- uclasm/counting/isomorphisms.py | 18 ++++-------------- 2 files changed, 7 insertions(+), 17 deletions(-) diff --git a/uclasm/counting/alldiffs.py b/uclasm/counting/alldiffs.py index e66fad3..5e7174a 100644 --- a/uclasm/counting/alldiffs.py +++ b/uclasm/counting/alldiffs.py @@ -149,8 +149,8 @@ def recursive_alldiff_counter_SMP(tnode_to_eqids, w_classes, w_class_sizes, matching[tnode] = eq_class[w_class_sizes[eqid]] # Assign the rest - recursive_alldiff_counter(tnode_to_eqids, w_classes, w_class_sizes, - smp, matching, tmplt_equivalence) + recursive_alldiff_counter_SMP(tnode_to_eqids, w_classes, w_class_sizes, + smp, matching, tmplt_equivalence) # Unmatch tnode matching[tnode] = -1 @@ -237,7 +237,7 @@ def count_alldiffs_SMP(tnode_to_cands, smp, matching, tmplt_equivalence=False): # Alternatively, list(map(len, eq_classes)) eq_class_sizes = [len(eq_class) for eq_class in eq_classes] - recursive_alldiff_counter(tnode_to_eqids, eq_classes, + recursive_alldiff_counter_SMP(tnode_to_eqids, eq_classes, eq_class_sizes, smp, matching, tmplt_equivalence) return num_solutions diff --git a/uclasm/counting/isomorphisms.py b/uclasm/counting/isomorphisms.py index b6055a5..9387914 100644 --- a/uclasm/counting/isomorphisms.py +++ b/uclasm/counting/isomorphisms.py @@ -1,6 +1,6 @@ from ..matching.search.search_utils import iterate_to_convergence from ..utils import invert, values_map_to_same_key, one_hot -from .alldiffs import count_alldiffs +from .alldiffs import count_alldiffs_SMP import numpy as np import pandas as pd from functools import reduce @@ -37,8 +37,7 @@ def pick_minimum_domain_vertex(candidates): return t_vert def recursive_isomorphism_counter(smp, matching, *, - unspec_cover, verbose, init_changed_cands, tmplt_equivalence=False, - world_equivalence=False): + unspec_cover, verbose, init_changed_cands, tmplt_equivalence=False): """ Recursive routine for solving subgraph isomorphism. @@ -59,8 +58,6 @@ def recursive_isomorphism_counter(smp, matching, *, this will be all zeros tmplt_equivalence : bool Flag indicating whether to use template equivalence - world_equivalence : bool - Flag indicating whether to use world equivalence Returns ------- int @@ -76,7 +73,7 @@ def recursive_isomorphism_counter(smp, matching, *, # Elimination filter is not needed here and would be a waste of time node_to_cands = {idx: smp.world.nodes[candidates[idx]] for idx, node in enumerate(smp.tmplt.nodes)} - return count_alldiffs(node_to_cands, smp, matching) + return count_alldiffs_SMP(node_to_cands, smp, matching) # Since the node cover is not empty, we first choose some valid # assignment of the unspecified nodes one at a time until the remaining @@ -104,11 +101,6 @@ def recursive_isomorphism_counter(smp, matching, *, # Unmatch template vertex matching[node_idx] = -1 - # TODO: more useful progress summary - if verbose: - print("depth {}: {} of {}".format(len(unspec_cover), i, - len(cand_idxs)), n_isomorphisms) - # If we are using template equivalence, we can mark for all equivalent # template vertices that cand_idx cannot be a cannot be a candidate. if tmplt_equivalence: @@ -119,7 +111,7 @@ def recursive_isomorphism_counter(smp, matching, *, def count_isomorphisms(smp, *, verbose=True, - tmplt_equivalence=False, world_equivalence=False): + tmplt_equivalence=False): """ Counts the number of ways to assign template nodes to world nodes such that edges between template nodes also appear between the corresponding world @@ -137,8 +129,6 @@ def count_isomorphisms(smp, *, verbose=True, Flag for verbose output tmplt_equivalence : bool Flag indicating whether to use template equivalence - world_equivalence : bool - Flag indicating whether to use world equivalence Returns ------- int