diff --git a/tests/test_counting.py b/tests/test_counting.py index 41b0dc1..17f7f80 100644 --- a/tests/test_counting.py +++ b/tests/test_counting.py @@ -7,6 +7,7 @@ import numpy as np import pandas as pd +import networkx as nx from scipy.sparse import csr_matrix @pytest.fixture @@ -54,6 +55,7 @@ def smp_star(): smp = MatchingProblem(tmplt, world) return smp + @pytest.fixture def smp_node_cover(): """Create a subgraph matching problem requiring the use of node cover.""" @@ -178,3 +180,44 @@ def test_count_isomorphisms_overlapping_cands(self, smp_overlapping_cands): assert np.sum(smp_overlapping_cands.candidates()) == 7 count = count_isomorphisms(smp_overlapping_cands, verbose=True) assert count == 6 + + def test_count_isomorphisms_random_all_matches(self): + """ + Perform a test where every possible mapping of template to world nodes + is a match. + """ + for i in range(10): + random_adj = np.random.random((5, 5)) + # No self loops + np.fill_diagonal(random_adj, 0) + t_graph = Graph([csr_matrix(random_adj)]) + + complete_graph = nx.complete_graph(5) + w_graph = uclasm.from_networkx_graph(complete_graph) + + smp = MatchingProblem(t_graph, w_graph) + + count = count_isomorphisms(smp, verbose=True) + assert count == 120 # 5 factorial + + eq_count = count_isomorphisms(smp, verbose=True, tmplt_equivalence=True) + assert count == 120 + + def test_count_isomorphisms_random_no_matches(self): + for i in range(10): + random_adj = np.random.random((5, 5)) + # No self loops + np.fill_diagonal(random_adj, 0) + t_graph = Graph([csr_matrix(random_adj)]) + + empty_adj = csr_matrix(np.zeros((5,5))) + w_graph = Graph([empty_adj]) + + smp = MatchingProblem(t_graph, w_graph) + + count = count_isomorphisms(smp, verbose=True) + assert count == 0 + + eq_count = count_isomorphisms(smp, verbose=True, tmplt_equivalence=True) + assert count == 0 + diff --git a/tests/test_equivalence.py b/tests/test_equivalence.py new file mode 100644 index 0000000..22b10b4 --- /dev/null +++ b/tests/test_equivalence.py @@ -0,0 +1,58 @@ +""" +Tests for structural equivalence. +""" +import pytest +import networkx as nx + +from uclasm.graph import from_networkx_graph + + +@pytest.fixture +def star_graph_5(): + """ + Creates the star graph S5, the graph with 1 central node and 5 spokes. + The node with index 0 should be the central node. + """ + star_graph = nx.star_graph(5) + return from_networkx_graph(star_graph) + + +@pytest.fixture +def complete_graph_5(): + """ + Creates the complete graph K5, the graph with 5 nodes each connected to + every other node. + """ + complete_graph = nx.complete_graph(5) + return from_networkx_graph(complete_graph) + + +@pytest.fixture +def path_graph_5(): + """ + Creates the path graph P5, the path graph with 5 nodes. + """ + path_graph = nx.path_graph(5) + return from_networkx_graph(path_graph) + + +def test_star_graph_equivalence(star_graph_5): + equiv = star_graph_5.equivalence_classes + assert len(equiv.classes) == 2 + assert equiv.classes[0] == {0} + rep_1 = equiv.representative(1) + assert equiv.classes[rep_1] == {1,2,3,4,5} + + +def test_complete_graph_equivalence(complete_graph_5): + equiv = complete_graph_5.equivalence_classes + assert len(equiv.classes) == 1 + rep = equiv.representative(0) + assert equiv.classes[rep] == {0,1,2,3,4} + + +def test_path_graph_equivalence(path_graph_5): + equiv = path_graph_5.equivalence_classes + assert len(equiv.classes) == 5 + for i in range(5): + assert equiv.classes[i] == {i} diff --git a/uclasm/__init__.py b/uclasm/__init__.py index ff93597..094f2a2 100644 --- a/uclasm/__init__.py +++ b/uclasm/__init__.py @@ -7,3 +7,4 @@ from .utils import * from .matching import * from .interface import * +from .equivalence import * diff --git a/uclasm/counting/alldiffs.py b/uclasm/counting/alldiffs.py index 8f72c20..5e7174a 100644 --- a/uclasm/counting/alldiffs.py +++ b/uclasm/counting/alldiffs.py @@ -1,46 +1,168 @@ -from ..utils import invert, values_map_to_same_key +""" +alldiffs.py + +Routine for computing solutions to the all different problem +""" + +from math import factorial + import numpy as np -from functools import reduce +from scipy.special import comb -def recursive_alldiff_counter(tnode_to_eqids, eq_class_sizes): - # If no more tnodes to assign - if len(tnode_to_eqids) == 0: - return 1 +from ..utils import invert, values_map_to_same_key +from ..equivalence import equivalence_from_partition - count = 0 +# Right now we are using a global variable to keep track of solution count +num_solutions = 0 + + +def count_solutions(matching, smp, w_classes, tmplt_equivalence=False): + """ + This function will compute the number of solutions for a given matching. + + Parameters + ---------- + matching : dict + The matching for which we want to compute solutions + smp : MatchingProblem + The matching problem context + w_classes : list + A list of sets of world nodes that are equivalent + tmplt_equivalence : bool + A flag indicating whether to use template equivalence + + Returns + ------- + int : The number of solutions that can be generated using template and + world equivalence + """ + count = 1 + + w_class_sizes = [len(w_class) for w_class in w_classes] + if tmplt_equivalence: + t_classes = smp.tmplt.equivalence_classes + + for t_class in t_classes.classes().values(): + # We can permute members of template equivalence classes freely + # So we multiply by the factorial of the size of each class + count *= factorial(t_class) + + # Then we need to determine within each template equivalence class + # how many template nodes are assigned to the same world equivalence + # class. Once we do this, we need to choose which elements of the + # world equivalence class to assign to these template nodes. + w_class_counts = {} + for tnode in t_class: + wnode = matching[tnode] + # If class_id is None after this loop, then the tnode was + # assigned prior to calling the alldiffs problem. It will be + # assigned to only one node and not a class. + class_id = None + # Determine which equivalence class it is in + for i, w_class in enumerate(w_classes): + if wnode in w_class: + class_id = i + break + + w_class_counts[class_id] = w_class_counts.get(class_id, 0) + 1 + + for w_class_id in w_class_counts: + if w_class_id is None: + continue + + # Choose which elements of the w_class to assign to the + # members of the template class + count *= comb(w_class_sizes[w_class_id], + w_class_counts[w_class_id], exact=True) + + w_class_sizes[w_class_id] -= w_class_counts[w_class_id] + + else: + for tnode in matching: + wnode = matching[tnode] + class_id = None + # Determine which equivalence class it is in + for i, w_class in enumerate(w_classes): + if wnode in w_class: + class_id = i + break + if class_id is None: + continue + + count *= w_class_sizes[class_id] + w_class_sizes[class_id] -= 1 + + return count - # This tnode will now be assigned to one of the eq_classes + +def recursive_alldiff_counter_SMP(tnode_to_eqids, w_classes, w_class_sizes, + smp, matching, tmplt_equivalence=False): + """ + Recursive routine for computing the number of ways to assign template + nodes to world nodes such that they are all different. This version is + intended for use with the subgraph matching problem. + + Parameters + ---------- + tnode_to_eqids : dict + Mapping from template node to a number representing a class of + equivalence nodes + w_classes : list + A partition of world nodes into equivalence classes + w_class_sizes : list + A list of the number of currently unassigned world nodes for each + equivalence class + smp : SubgraphMatchingProblem + The current matching problem + matching : dict + The current assignment of template nodes to world nodes, matching[i] + is the index of the world node that template node i is assigned to + tmplt_equivalence : bool + A flag indicating whether we are using template equivalence + """ + + # We need to count the number of solutions the current matching represents + if len(tnode_to_eqids) == 0: + sol_count = count_solutions(matching, smp, w_classes, tmplt_equivalence) + global num_solutions + num_solutions += sol_count + return + + + # This tnode will now be assigned to one of the w_classes tnode, eqids = tnode_to_eqids.popitem() # We try mapping the tnode to each equivalence class of world nodes for eqid in eqids: # TODO: break out this inner loop into its own function for clarity? - n_cands = eq_class_sizes[eqid] + n_cands = w_class_sizes[eqid] # If no candidates left in this equivalence class, continue if n_cands == 0: continue # We decrement the amount of available candidates in the eq class. - eq_class_sizes[eqid] -= 1 + w_class_sizes[eqid] -= 1 - # Count the number of ways to assign the rest of candidates - n_ways_to_assign_rest = recursive_alldiff_counter(tnode_to_eqids, eq_class_sizes) + # Assign tnode to an arbitrary element of the equivalence class + eq_class = list(w_classes[eqid]) + matching[tnode] = eq_class[w_class_sizes[eqid]] - # We have n_cands possible ways to assign the current tnode to the - # current equivalence class, so we multiply by n_cands - count += n_cands * n_ways_to_assign_rest + # Assign the rest + recursive_alldiff_counter_SMP(tnode_to_eqids, w_classes, w_class_sizes, + smp, matching, tmplt_equivalence) - # Unmatch tnode from equivalence class - eq_class_sizes[eqid] += 1 + # Unmatch tnode + matching[tnode] = -1 + # Restore the world node to the class its from + w_class_sizes[eqid] += 1 tnode_to_eqids[tnode] = eqids - return count - def get_equivalence_classes(tnode_to_cands): - """Get equivalence classes of cands which are candidates for the same nodes. + """Get equivalence classes of cands which are candidates for the same + template nodes. Parameters ---------- @@ -50,7 +172,8 @@ def get_equivalence_classes(tnode_to_cands): Returns ------- tnode_to_eqids : dict - Mapping from template node to the equivalence classes to which its candidates belong. + Mapping from template node to the equivalence classes to which its + candidates belong. eq_classes : list List of the equivalence classes """ @@ -65,6 +188,7 @@ def get_equivalence_classes(tnode_to_cands): # Each cand_set here is an equivalence class tnode_set_to_eq_class = values_map_to_same_key(cand_to_tnode_set) eq_classes = list(tnode_set_to_eq_class.values()) + w_equivalence = equivalence_from_partition(eq_classes) cand_to_eqids = { cand: [eqid for eqid, eq_class in enumerate(eq_classes) if cand in eq_class] @@ -83,6 +207,94 @@ def get_equivalence_classes(tnode_to_cands): return tnode_to_eqids, eq_classes + +def count_alldiffs_SMP(tnode_to_cands, smp, matching, tmplt_equivalence=False): + """ + Count the number of ways to assign template nodes to candidates without + using any candidate for more than one template node. + i.e. count solutions to the alldiff problem, where the nodeiables + are the keys of tnode_to_cands, and the domains are the values. + + Parameters + ---------- + tnode_to_cands: dict(item, list) + smp: SubgraphMatchingProblem + matching: dict + tmplt_equivalence: bool + """ + global num_solutions + num_solutions = 0 + + # TODO: can this function be vectorized? + # TODO: does scipy have a solver for this already? + + # Check if any template node has no candidates + if any(len(cands)==0 for cands in tnode_to_cands.values()): + return 0 + + tnode_to_eqids, eq_classes = get_equivalence_classes(tnode_to_cands) + + # Alternatively, list(map(len, eq_classes)) + eq_class_sizes = [len(eq_class) for eq_class in eq_classes] + + recursive_alldiff_counter_SMP(tnode_to_eqids, eq_classes, + eq_class_sizes, smp, matching, tmplt_equivalence) + + return num_solutions + + +def recursive_alldiff_counter(tnode_to_eqids, w_class_sizes): + """ + Parameters + ---------- + tnode_to_eqids : dict + Mapping from template node to a number representing a class of + equivalence nodes + w_class_sizes : list + A list of the number of currently unassigned world nodes for each + equivalence class + + Returns + ------- + The number of solutions to the all different problem + """ + + # If no more tnodes to assign + if len(tnode_to_eqids) == 0: + return 1 + + count = 0 + + # This tnode will now be assigned to one of the w_classes + tnode, eqids = tnode_to_eqids.popitem() + + # We try mapping the tnode to each equivalence class of world nodes + for eqid in eqids: + # TODO: break out this inner loop into its own function for clarity? + n_cands = w_class_sizes[eqid] + + # If no candidates left in this equivalence class, continue + if n_cands == 0: + continue + + # We decrement the amount of available candidates in the eq class. + w_class_sizes[eqid] -= 1 + + # Count the number of ways to assign the rest of candidates + n_ways_to_assign_rest = recursive_alldiff_counter(tnode_to_eqids, w_class_sizes) + + # We have n_cands possible ways to assign the current tnode to the + # current equivalence class, so we multiply by n_cands + count += n_cands * n_ways_to_assign_rest + + # Unmatch tnode from equivalence class + w_class_sizes[eqid] += 1 + + tnode_to_eqids[tnode] = eqids + + return count + + def count_alldiffs(tnode_to_cands): """ tnode_to_cands: dict(item, list) diff --git a/uclasm/counting/isomorphisms.py b/uclasm/counting/isomorphisms.py index 096d069..9387914 100644 --- a/uclasm/counting/isomorphisms.py +++ b/uclasm/counting/isomorphisms.py @@ -1,6 +1,6 @@ from ..matching.search.search_utils import iterate_to_convergence from ..utils import invert, values_map_to_same_key, one_hot -from .alldiffs import count_alldiffs +from .alldiffs import count_alldiffs_SMP import numpy as np import pandas as pd from functools import reduce @@ -37,8 +37,7 @@ def pick_minimum_domain_vertex(candidates): return t_vert def recursive_isomorphism_counter(smp, matching, *, - unspec_cover, verbose, init_changed_cands, tmplt_equivalence=False, - world_equivalence=False): + unspec_cover, verbose, init_changed_cands, tmplt_equivalence=False): """ Recursive routine for solving subgraph isomorphism. @@ -46,8 +45,9 @@ def recursive_isomorphism_counter(smp, matching, *, ---------- smp : MatchingProblem A subgraph matching problem - matching : list - A list of tuples which designate what each template vertex is matched to + matching : np.array + An array keeping track of which template vertex is mapped to which + world vertex. unspec_cover : np.array Array of the indices of the nodes with more than 1 candidate verbose : bool @@ -58,8 +58,6 @@ def recursive_isomorphism_counter(smp, matching, *, this will be all zeros tmplt_equivalence : bool Flag indicating whether to use template equivalence - world_equivalence : bool - Flag indicating whether to use world equivalence Returns ------- int @@ -73,9 +71,9 @@ def recursive_isomorphism_counter(smp, matching, *, # can skip straight to counting solutions to the alldiff constraint problem if len(unspec_cover) == 0: # Elimination filter is not needed here and would be a waste of time - node_to_cands = {node: smp.world.nodes[candidates[idx]] + node_to_cands = {idx: smp.world.nodes[candidates[idx]] for idx, node in enumerate(smp.tmplt.nodes)} - return count_alldiffs(node_to_cands) + return count_alldiffs_SMP(node_to_cands, smp, matching) # Since the node cover is not empty, we first choose some valid # assignment of the unspecified nodes one at a time until the remaining @@ -90,7 +88,7 @@ def recursive_isomorphism_counter(smp, matching, *, # candidates_copy[node_idx] = one_hot(cand_idx, world.n_nodes) smp_copy.add_match(node_idx, cand_idx) - matching.append((node_idx, cand_idx)) + matching[node_idx] = cand_idx # Remove matched node from the unspecified list new_unspec_cover = unspec_cover[:node_idx] + unspec_cover[node_idx+1:] @@ -101,12 +99,7 @@ def recursive_isomorphism_counter(smp, matching, *, init_changed_cands=one_hot(node_idx, smp.tmplt.n_nodes)) # Unmatch template vertex - matching.pop() - - # TODO: more useful progress summary - if verbose: - print("depth {}: {} of {}".format(len(unspec_cover), i, - len(cand_idxs)), n_isomorphisms) + matching[node_idx] = -1 # If we are using template equivalence, we can mark for all equivalent # template vertices that cand_idx cannot be a cannot be a candidate. @@ -118,7 +111,7 @@ def recursive_isomorphism_counter(smp, matching, *, def count_isomorphisms(smp, *, verbose=True, - tmplt_equivalence=False, world_equivalence=False): + tmplt_equivalence=False): """ Counts the number of ways to assign template nodes to world nodes such that edges between template nodes also appear between the corresponding world @@ -136,20 +129,21 @@ def count_isomorphisms(smp, *, verbose=True, Flag for verbose output tmplt_equivalence : bool Flag indicating whether to use template equivalence - world_equivalence : bool - Flag indicating whether to use world equivalence Returns ------- int The number of isomorphisms """ - matching = [] + # This dict keeps track of what nodes have been matched + # If a value is -1, it means that the template vertex with that index + # is currently unmatched. + matching = {node: -1 for node in smp.tmplt.nodes} candidates = smp.candidates() spec_nodes = np.where(candidates.sum(axis=1) == 1)[0] for t_vert in spec_nodes: w_vert = np.where(candidates[t_vert,:])[0][0] - matching.append((t_vert, w_vert)) + matching[t_vert] = w_vert unspec_nodes = np.where(candidates.sum(axis=1) > 1)[0] tmplt_subgraph = smp.tmplt.node_subgraph(unspec_nodes) diff --git a/uclasm/equivalence/__init__.py b/uclasm/equivalence/__init__.py new file mode 100644 index 0000000..1f1d628 --- /dev/null +++ b/uclasm/equivalence/__init__.py @@ -0,0 +1,2 @@ +from .equivalence_data_structure import Equivalence, equivalence_from_partition +from .partition_sparse import bfs_partition_graph diff --git a/uclasm/equivalence/equivalence_data_structure.py b/uclasm/equivalence/equivalence_data_structure.py new file mode 100644 index 0000000..fb01eb1 --- /dev/null +++ b/uclasm/equivalence/equivalence_data_structure.py @@ -0,0 +1,221 @@ +""" +Implementation of an equivalence data structure for partitioning +vertices into equivalence classes. From Pattis' ICS46 lecture's note: +https://www.ics.uci.edu/~pattis/ICS-46/lectures/notes/equivalence.txt +by TimNg +""" + +from collections import defaultdict +from copy import deepcopy + +class Equivalence: + """ + Equivalence class data structure for efficient equivalence classes + computation. This is implemented using the disjoint sets data structure. + See https://en.wikipedia.org/wiki/Disjoint-set_data_structure for a basic + overview of the data structure. + + Internally, this class maintains a dictionary containing the mapping + of every element to its parent (which may be itself) called parent_map. + Searches for which class an element is in amounts to traversing through + the parents until it finds an element which is its own parent, and then + this is the representative element. + """ + def __init__(self, starting_set = set()): + """ + Starting_set is a set of node to start with. + """ + # dictionary of parents where each key is mapped to its parent + # start with everything maps to itself + self.parent_map = {a: a for a in starting_set} + # dictionary of root: size_of_tree + # also serves as something to store the roots + self.root_size_map = {a : 1 for a in starting_set} + + ### METHODS ### + def add_singleton(self, new_value): + """ + Add a new value as its own equiv class + """ + # Check first that it is not already in a member + error_msg = ("Equivalence.add_singleton: {} is already " + + "in the equiv class!".format(new_value)) + assert new_value not in self.parent_map, error_msg + + self.parent_map[new_value] = new_value + self.root_size_map[new_value] = 1 + + def merge_classes_of(self, a , b): + """ + Merge the equivalence classes of a and b together + by setting each parent's map to point to the same root. + """ + assert a in self.parent_map, "Value " + str(a) + " does not exist." + assert b in self.parent_map, "Value " + str(b) + " does not exist." + + # get the roots while compressing the tree + root_of_a = self.compress_to_root(a) + root_of_b = self.compress_to_root(b) + + if (root_of_a == root_of_b): + return # they are already in the same equivalence class + + # find the "big" root and the "small" root + if self.root_size_map[root_of_a] < self.root_size_map[root_of_b]: + small_root, big_root = root_of_a, root_of_b + else: + small_root, big_root = root_of_b, root_of_a + + # now we change the root of the smaller size map to the bigger one + # then we update the size of the new equiv class and then remove + # the smaller one as a root + self.parent_map[small_root] = big_root + self.root_size_map[big_root] += self.root_size_map[small_root] + + # remove the small_root from the roots dict + del self.root_size_map[small_root] + + def merge_set(self, set_to_merge): + """ + Given a set of elements in Equivalence, merge them together + """ + for a in set_to_merge: + assert a in self.parent_map, "Value " + str(a) + " does not exist." + one_elem = next(iter(set_to_merge)) + for a in set_to_merge: + self.merge_classes_of(one_elem, a) + + def partition(self, equivalence_relation: 'function', *args, **kwargs): + """ + Given an equivalence_relation function compute the appropriate + equivalence classes of the elements in the Equivalence data structure. + Users have to make sure of semantic and care should be taken to make + sure the equivalence_relation passed is an actual equiv relation, + otherwise the output will be undefined. + --> Checking in_same_class(a,b) will be the same as using + equivalence_relation(a,b) + This uses a divide and conquer approach on the root_size_map dictionary. + """ + classes = [] + for elem in self.root_size_map: + found = False + for c in classes: + # if these two classes are equivalence then we add them + if equivalence_relation(next(iter(c)), elem, *args, *kwargs): + c.add(i) + found = True + break + if not found: # new class + classes.append(set([elem])) + for c in classes: + self.merge_set(c) + return classes + + ### QUERIES ### + def in_same_class(self, a, b) -> bool: + """ + return a bool indicating if a and b are in the same classes + """ + assert a in self.parent_map, str(a) + " does not exist." + assert b in self.parent_map, str(b) + " does not exist." + root_of_a = self.compress_to_root(a) + root_of_b = self.compress_to_root(b) + return root_of_a == root_of_b + + def __len__(self): + """ + Returns the number of elements in the equiv class + """ + return len(self.parent_map) + + def class_count(self): + """ + return the number of unique equiv classes + """ + return len(self.root_size_map) + + @property + def classes(self): + """ + This function returns a dictionary whose values are the equivalence + classes and whose keys are the representatives of the classes. + """ + answer_map = defaultdict(set) + for elem in self.parent_map: + class_rep = self.representative(elem) + answer_map[class_rep].add(elem) + return answer_map + + def get_class(self, elem): + return self.classes[self.representative(elem)] + + def non_trivial_classes(self): + """ + This function returns a dictionary whose values are the equivalence + classes and whose keys are the representatives of the classes. + The dictionary will only contain classes which have more than one + element. + """ + return {rep: class_ for (rep, class_) in self.classes.items() + if len(class_) > 1} + + def __str__(self): + return "Equiv Classes: {}\nParent_map: {}\nRoot_size_map: {}".format( + self.classes, self.parent_map, self.root_size_map) + + def __getitem__(self, key): + """ + Return the equivalence class for the passed in key. + """ + rep = self.representative(key) + return self.classes[rep] + + def compress_to_root(self, elem) -> 'root of a': + """ + Returns the root of elem, and on the way, set the roots of the + parents of elem to be the root of elem + """ + assert elem in self.parent_map, str(elem) + " does not exist." + parents_of_elem = set() # set to store the parents of elem + curr_val = elem # current value + + # We traverse up through the ancestors of elem and keep track + # of the elements that we see so that we can compress the path + while curr_val != self.parent_map[curr_val]: + parents_of_elem.add(curr_val) + curr_val = self.parent_map[curr_val] + + # curr_val is now the root + # now we set all the parents of elem to the root + for parent in parents_of_elem: + self.parent_map[parent] = curr_val + return curr_val + + def representative(self, elem): + """ + Return the representative of elem's equivalence class. + This performs path compression in the process of finding the + representative. + """ + return self.compress_to_root(elem) + + def copy(self): + """ + Return a deep copy of self + """ + return deepcopy(self) + + +def equivalence_from_partition(partition): + """ + Given a partition of some set, create an Equivalence object where two + elements are equivalent if they are in the same cell in the partition. + This isn't as efficient as it could be, but it suffices for now. + """ + elems = [x for xs in partition for x in xs] + + equiv = Equivalence(elems) + for xs in partition: + equiv.merge_set(xs) + + return equiv diff --git a/uclasm/equivalence/partition_sparse.py b/uclasm/equivalence/partition_sparse.py new file mode 100644 index 0000000..c26fc1d --- /dev/null +++ b/uclasm/equivalence/partition_sparse.py @@ -0,0 +1,225 @@ +""" +Implementation of functions to partition vertices according to permutation equivalency +for the DARPA Multichannel Directed Multigraph Subgraph Matching Project +by TimNg +""" + +from .equivalence_data_structure import Equivalence +import numpy as np +from queue import Queue + +def array_equal(mat1, mat2): + """ + Check if two sparse matrices are equal. + """ + return (mat1 != mat2).nnz==0 + +def permutation_relation(u, v, adj_mat_csr, + adj_mat_csc, row_nnz, col_nnz): + """ + We compare two vertices: a and b to see if they are equivalence + under permutation. This can be done by checking if certain pieces + of their values in the adj_matrix are the same (see notes) + + Args: + u (int): The index of the first vertex + v (int): The index of the second vertex + adj_mat_csr (sparse_matrix): A csr representation of the adjacency + matrix + adj_mat_csc (sparse_matrix): A csc representation of the adjacency + matrix + row_nnz (array): An array of the number nonzero count of each row + col_nnz (array): An array of the number nonzero count of each col + """ + # Trivial Case + if u == v: + return True + + u, v = (u ,v) if u < v else (v, u) + + # If they both have no neighbors, they are equivalent. + if (col_nnz[u] == 0 and col_nnz[v] == 0 and + row_nnz[u] == 0 and row_nnz[v] == 0): + return True + + n = adj_mat_csr.shape[0] + + if adj_mat_csr[u,u] != adj_mat_csr[v,v]: + return False + if adj_mat_csr[u,v] != adj_mat_csr[v,u]: + return False + + # We do this check to avoid checking subarrays if we don't have to. + if row_nnz[u] != 0: + # check the rows (3 pieces), maybe there are better ways to do this + if not (array_equal(adj_mat_csr[u,0:u], adj_mat_csr[v,0:u]) and + array_equal(adj_mat_csr[u,u+1:v], adj_mat_csr[v,u+1:v]) and + array_equal(adj_mat_csr[u,v+1:n], adj_mat_csr[v,v+1:n])): + return False + # check the columns + if col_nnz[u] != 0: + if not (array_equal(adj_mat_csc[0:u,u], adj_mat_csc[0:u,v]) and + array_equal(adj_mat_csc[u+1:v,u], adj_mat_csc[u+1:v,v]) and + array_equal(adj_mat_csc[v+1:n,u], adj_mat_csc[v+1:n,v])): + return False + # We have passed all the test if we reach here. + return True + + +def partition_vertices(adj_matrix: '2d numpy array') -> [{'equiv-vertices'}]: + """ + Given an adj_matrix (directed multigraph) we partition the vertices + (by indices) in terms of the permutation relation. + """ + assert adj_matrix.shape[0] == adj_matrix.shape[1], "Must be square matrix!" + # initialize an equivalence relation fill with nodes + nodes = range(0, adj_matrix.shape[0]) + adj_matrix_csc = adj_matrix.tocsc() + row_nnz = adj_matrix.getnnz(1) + col_nnz = adj_matrix_csc.getnnz(0) + vertices_partition = Equivalence(nodes) + # partition into equivalence classes + vertices_partition.partition(permutation_relation, adj_matrix, + adj_matrix_csc, row_nnz, col_nnz) + return vertices_partition + + +def equivalent(u, v, adj_mat_csrs, adj_mat_cscs, row_nnzs, col_nnzs): + """ + Check if vertex u is equivalent to vertex v. This does this by checking + if the respective entries of the adjacency matrices match. This does this + with respect to all channels. + + Args: + u (int): The index of the first vertex + v (int): The index of the second vertex + adj_mat_csrs (dict): A dictionary from channels to csr representations + of the matrices + adj_mat_cscs (dict): A dictionary from channels to csc representations + of the matrices + row_nnzs (dict): A dictionary from channels to an array of the + number nonzero count of each row + col_nnzs (dict): A dictionary from channels to an array of the + number nonzero count of each column + + Returns: + bool: A flag indicating if u is equivalent to v + """ + if u == v: + return True + + # We do preliminary checks to rule out as many equivalences as possible + # because the later checks are expensive + # We first check that the degrees match + for i in range(len(adj_mat_csrs)): + row_nnz = row_nnzs[i] + col_nnz = col_nnzs[i] + if row_nnz[u] != row_nnz[v] or col_nnz[u] != col_nnz[v]: + return False + + for i in range(len(adj_mat_csrs)): + if not permutation_relation(u, v, adj_mat_csrs[i], + adj_mat_cscs[i], row_nnzs[i], col_nnzs[i]): + return False + return True + + +def partition_neighbors(neighbors, adj_mat_csrs, adj_mat_cscs, + row_nnzs, col_nnzs): + """ + Construct the equivalence classes of the passed in vertices. + + Args: + neighbors (list): A list of the indices of the neighbors + adj_mat_csrs (dict): A dictionary from channels to csr representations + of the matrices + adj_mat_cscs (dict): A dictionary from channels to csc representations + of the matrices + row_nnzs (dict): A dictionary from channels to an array of the + number nonzero count of each row + col_nnzs (dict): A dictionary from channels to an array of the + number nonzero count of each column + """ + eq_classes = [[neighbors[0]]] + for neighbor in neighbors[1:]: + # See if a neighbor is in one of the already computed eq classes + for eq_class in eq_classes: + if equivalent(neighbor, eq_class[0], adj_mat_csrs, adj_mat_cscs, + row_nnzs, col_nnzs): + eq_class.append(neighbor) + break + else: + # Otherwise it is in its own class + eq_classes.append([neighbor]) + + return eq_classes + + +def bfs_partition_graph(graph): + """ + This performs a breadth first search approach to computing the equivalence + classes. This function works by performing a breadth first search of the + vertices; At each step it forms equivalence classes of the neighbors + (No vertex that is not a neighbor can be equivalent to any of the neighbors + so there is no need to check outside the neighbors) + In the BFS, we also only need to visit one representative of each + equivalence class. + + It then needs to do a last check to find which class the inital vertex is + in. + + Args: + graph (Graph): A graph + Returns: + Equivalence: The equivalence structure of the graph + """ + # We create a queue to perform a Breadth First Search + queue = Queue() + visited = np.zeros(graph.n_nodes, dtype=np.bool) + visited[0] = True + all_eq_classes = [] + # We start at vertex 0 + queue.put(0) + + # We create these beforehand so as to only compute them once + adj_mat_csrs = graph.adjs + adj_mat_cscs = [adj_mat.tocsc() for adj_mat in adj_mat_csrs] + row_nnzs = [adj_mat.getnnz(1) for adj_mat in adj_mat_csrs] + col_nnzs = [adj_mat.getnnz(0) for adj_mat in adj_mat_cscs] + + while True: + # If out of vertices to visit, exit loop + if queue.empty(): + break + curr_vertex = queue.get() + + neighbors = graph.sym_composite_adj[curr_vertex].nonzero()[1] + neighbors = neighbors[~visited[neighbors]] + visited[neighbors] = True + if len(neighbors) == 0: + continue + eq_classes = partition_neighbors(neighbors, adj_mat_csrs, adj_mat_cscs, + row_nnzs, col_nnzs) + all_eq_classes.extend(eq_classes) + # For each equivalence class, we put a representative + for eq_class in eq_classes: + queue.put(eq_class[0]) + + # We still need to find which equivalence class 0 is in + for eq_class in all_eq_classes: + if equivalent(0, eq_class[0], adj_mat_csrs, adj_mat_cscs, + row_nnzs, col_nnzs): + eq_class.append(0) + break + else: + # 0 is in its own class + all_eq_classes.append([0]) + + # Construct the Equivalence object. + equivalence = Equivalence(list(range(graph.n_nodes))) + for eq_class in all_eq_classes: + x = eq_class[0] + for y in eq_class[1:]: + equivalence.merge_classes_of(x, y) + + return equivalence diff --git a/uclasm/graph.py b/uclasm/graph.py index 935e5a2..a260a7f 100644 --- a/uclasm/graph.py +++ b/uclasm/graph.py @@ -3,9 +3,11 @@ import scipy.sparse as sparse import numpy as np import pandas as pd +import networkx as nx import sys from .utils import index_map, one_hot +from .equivalence import bfs_partition_graph # functools.cached_property was introduced in python 3.8. # https://docs.python.org/3/library/functools.html#functools.cached_property @@ -213,6 +215,14 @@ def in_out_degrees(self): deglist = [self.in_degrees, self.out_degrees] return np.concatenate(deglist, axis=1).astype(np.single) + @cached_property + def equivalence_classes(self): + """Equivalence: The equivalence structure of the graph + + Computes the template equivalence + """ + return bfs_partition_graph(self) + def loopless_subgraph(self): """Get a subgraph containing no self-edges. @@ -328,3 +338,19 @@ def node_cover(self): # TODO: Remove any nodes from the node cover which are not necessary. return np.array(cover) + + +def from_networkx_graph(nx_graph): + """ + This will convert a networkx style graph into a uclasm style Graph. + Currently this does not preserve node or edge labels. It just copies + over the adjacency structure. + + Parameters + ---------- + nx_graph : networkx.Graph + """ + + # TODO: Add the ability to port over node and edge labels + adj = nx.to_scipy_sparse_matrix(nx_graph) + return Graph([adj])