From eea3e2a405f9167ac004d2bec12139323824483e Mon Sep 17 00:00:00 2001 From: CHERRY-ui8 <2693275288@qq.com> Date: Mon, 5 Jan 2026 17:49:44 +0800 Subject: [PATCH 1/5] fix: add special edge handling in bfs, dfs and base partitioner --- graphgen/bases/base_partitioner.py | 7 ++++++- graphgen/models/partitioner/bfs_partitioner.py | 13 ++++++++----- graphgen/models/partitioner/dfs_partitioner.py | 12 +++++++++--- 3 files changed, 23 insertions(+), 9 deletions(-) diff --git a/graphgen/bases/base_partitioner.py b/graphgen/bases/base_partitioner.py index d948e3a7..048e80e3 100644 --- a/graphgen/bases/base_partitioner.py +++ b/graphgen/bases/base_partitioner.py @@ -39,7 +39,12 @@ def community2batch( if node_data: nodes_data.append((node, node_data)) edges_data = [] - for u, v in edges: + for edge in edges: + # Filter out self-loops and invalid edges + if isinstance(edge, (tuple, list)) and len(edge) == 2: + u, v = edge[0], edge[1] + else: + continue edge_data = g.get_edge(u, v) if edge_data: edges_data.append((u, v, edge_data)) diff --git a/graphgen/models/partitioner/bfs_partitioner.py b/graphgen/models/partitioner/bfs_partitioner.py index 994e08e8..d7bde5aa 100644 --- a/graphgen/models/partitioner/bfs_partitioner.py +++ b/graphgen/models/partitioner/bfs_partitioner.py @@ -43,7 +43,7 @@ def partition( continue comm_n: List[str] = [] - comm_e: List[tuple[str, str]] = [] + comm_e: List[frozenset[str]] = [] queue: deque[tuple[str, Any]] = deque([(kind, seed)]) cnt = 0 @@ -63,9 +63,7 @@ def partition( if it in used_e: continue used_e.add(it) - - u, v = it - comm_e.append((u, v)) + comm_e.append(it) cnt += 1 # push nodes that are not visited for n in it: @@ -73,4 +71,9 @@ def partition( queue.append((NODE_UNIT, n)) if comm_n or comm_e: - yield Community(id=seed, nodes=comm_n, edges=comm_e) + # Filter out self-loops and invalid edges + valid_edges = [ + tuple(edge) for edge in comm_e + if isinstance(edge, frozenset) and len(edge) == 2 + ] + yield Community(id=seed, nodes=comm_n, edges=valid_edges) diff --git a/graphgen/models/partitioner/dfs_partitioner.py b/graphgen/models/partitioner/dfs_partitioner.py index 4d93ad7f..5369a914 100644 --- a/graphgen/models/partitioner/dfs_partitioner.py +++ b/graphgen/models/partitioner/dfs_partitioner.py @@ -42,7 +42,8 @@ def partition( ): continue - comm_n, comm_e = [], [] + comm_n: List[str] = [] + comm_e: List[frozenset[str]] = [] stack = [(kind, seed)] cnt = 0 @@ -63,7 +64,7 @@ def partition( if it in used_e: continue used_e.add(it) - comm_e.append(tuple(it)) + comm_e.append(it) cnt += 1 # push neighboring nodes for n in it: @@ -71,4 +72,9 @@ def partition( stack.append((NODE_UNIT, n)) if comm_n or comm_e: - yield Community(id=seed, nodes=comm_n, edges=comm_e) + # Filter out self-loops and invalid edges + valid_edges = [ + tuple(edge) for edge in comm_e + if isinstance(edge, frozenset) and len(edge) == 2 + ] + yield Community(id=seed, nodes=comm_n, edges=valid_edges) From c534f94c40858e801574a7306f2a233ecc8e3ec5 Mon Sep 17 00:00:00 2001 From: CHERRY-ui8 <2693275288@qq.com> Date: Mon, 5 Jan 2026 19:28:35 +0800 Subject: [PATCH 2/5] refactor: extract a helper method in the base class --- graphgen/bases/base_partitioner.py | 8 ++++++++ graphgen/models/partitioner/bfs_partitioner.py | 6 +----- graphgen/models/partitioner/dfs_partitioner.py | 6 +----- graphgen/models/partitioner/ece_partitioner.py | 2 +- 4 files changed, 11 insertions(+), 11 deletions(-) diff --git a/graphgen/bases/base_partitioner.py b/graphgen/bases/base_partitioner.py index 048e80e3..326eb1b6 100644 --- a/graphgen/bases/base_partitioner.py +++ b/graphgen/bases/base_partitioner.py @@ -19,6 +19,13 @@ def partition( :return: List of communities """ + @staticmethod + def _filter_valid_edges(edges: List[Any]) -> List[tuple[str, str]]: + return [ + tuple(edge) for edge in edges + if isinstance(edge, frozenset) and len(edge) == 2 + ] + @staticmethod def community2batch( comm: Community, g: BaseGraphStorage @@ -44,6 +51,7 @@ def community2batch( if isinstance(edge, (tuple, list)) and len(edge) == 2: u, v = edge[0], edge[1] else: + # Skip invalid edge format (e.g., self-loops or malformed edges) continue edge_data = g.get_edge(u, v) if edge_data: diff --git a/graphgen/models/partitioner/bfs_partitioner.py b/graphgen/models/partitioner/bfs_partitioner.py index d7bde5aa..f50a032e 100644 --- a/graphgen/models/partitioner/bfs_partitioner.py +++ b/graphgen/models/partitioner/bfs_partitioner.py @@ -71,9 +71,5 @@ def partition( queue.append((NODE_UNIT, n)) if comm_n or comm_e: - # Filter out self-loops and invalid edges - valid_edges = [ - tuple(edge) for edge in comm_e - if isinstance(edge, frozenset) and len(edge) == 2 - ] + valid_edges = self._filter_valid_edges(comm_e) yield Community(id=seed, nodes=comm_n, edges=valid_edges) diff --git a/graphgen/models/partitioner/dfs_partitioner.py b/graphgen/models/partitioner/dfs_partitioner.py index 5369a914..f4189bc5 100644 --- a/graphgen/models/partitioner/dfs_partitioner.py +++ b/graphgen/models/partitioner/dfs_partitioner.py @@ -72,9 +72,5 @@ def partition( stack.append((NODE_UNIT, n)) if comm_n or comm_e: - # Filter out self-loops and invalid edges - valid_edges = [ - tuple(edge) for edge in comm_e - if isinstance(edge, frozenset) and len(edge) == 2 - ] + valid_edges = self._filter_valid_edges(comm_e) yield Community(id=seed, nodes=comm_n, edges=valid_edges) diff --git a/graphgen/models/partitioner/ece_partitioner.py b/graphgen/models/partitioner/ece_partitioner.py index af3af7c7..7f29608d 100644 --- a/graphgen/models/partitioner/ece_partitioner.py +++ b/graphgen/models/partitioner/ece_partitioner.py @@ -142,7 +142,7 @@ def _add_unit(u): return Community( id=seed_unit[1], nodes=list(community_nodes.keys()), - edges=[tuple(edge) for edge in community_edges if isinstance(edge, frozenset) and len(edge)==2], + edges=self._filter_valid_edges(list(community_edges.keys())), ) for unit in tqdm(all_units, desc="ECE partition"): From 4b55d10c397f239230bd1b82fc92700c8dc2d80e Mon Sep 17 00:00:00 2001 From: CHERRY-ui8 <2693275288@qq.com> Date: Mon, 5 Jan 2026 19:33:11 +0800 Subject: [PATCH 3/5] fix: add List import to dfs_partitioner --- graphgen/models/partitioner/dfs_partitioner.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/graphgen/models/partitioner/dfs_partitioner.py b/graphgen/models/partitioner/dfs_partitioner.py index f4189bc5..7eeebfa2 100644 --- a/graphgen/models/partitioner/dfs_partitioner.py +++ b/graphgen/models/partitioner/dfs_partitioner.py @@ -1,6 +1,6 @@ import random from collections.abc import Iterable -from typing import Any +from typing import Any, List from graphgen.bases import BaseGraphStorage, BasePartitioner from graphgen.bases.datatypes import Community From 33e0afbd3dab7324c8f57d9cd2f7f8c731ef32de Mon Sep 17 00:00:00 2001 From: chenzihong <522023320011@smail.nju.edu.cn> Date: Thu, 8 Jan 2026 22:44:46 +0800 Subject: [PATCH 4/5] fix: align edge type --- graphgen/bases/base_partitioner.py | 34 ++++---- .../models/partitioner/bfs_partitioner.py | 7 +- .../models/partitioner/dfs_partitioner.py | 7 +- .../models/partitioner/ece_partitioner.py | 2 +- graphgen/utils/help_nltk.py | 4 +- tests/integration_tests/test_engine.py | 78 ------------------- 6 files changed, 22 insertions(+), 110 deletions(-) delete mode 100644 tests/integration_tests/test_engine.py diff --git a/graphgen/bases/base_partitioner.py b/graphgen/bases/base_partitioner.py index 326eb1b6..384c9e4e 100644 --- a/graphgen/bases/base_partitioner.py +++ b/graphgen/bases/base_partitioner.py @@ -19,13 +19,6 @@ def partition( :return: List of communities """ - @staticmethod - def _filter_valid_edges(edges: List[Any]) -> List[tuple[str, str]]: - return [ - tuple(edge) for edge in edges - if isinstance(edge, frozenset) and len(edge) == 2 - ] - @staticmethod def community2batch( comm: Community, g: BaseGraphStorage @@ -48,18 +41,15 @@ def community2batch( edges_data = [] for edge in edges: # Filter out self-loops and invalid edges - if isinstance(edge, (tuple, list)) and len(edge) == 2: - u, v = edge[0], edge[1] - else: - # Skip invalid edge format (e.g., self-loops or malformed edges) + if not isinstance(edge, tuple) or len(edge) != 2: + continue + u, v = edge + if u == v: continue - edge_data = g.get_edge(u, v) + + edge_data = g.get_edge(u, v) or g.get_edge(v, u) if edge_data: edges_data.append((u, v, edge_data)) - else: - edge_data = g.get_edge(v, u) - if edge_data: - edges_data.append((v, u, edge_data)) return nodes_data, edges_data @staticmethod @@ -74,9 +64,11 @@ def _build_adjacency_list( """ adj: dict[str, List[str]] = {n[0]: [] for n in nodes} edge_set: set[tuple[str, str]] = set() - for e in edges: - adj[e[0]].append(e[1]) - adj[e[1]].append(e[0]) - edge_set.add((e[0], e[1])) - edge_set.add((e[1], e[0])) + for u, v, _ in edges: + if u == v: + continue + adj[u].append(v) + adj[v].append(u) + edge_set.add((u, v)) + edge_set.add((v, u)) return adj, edge_set diff --git a/graphgen/models/partitioner/bfs_partitioner.py b/graphgen/models/partitioner/bfs_partitioner.py index f50a032e..a00ad76d 100644 --- a/graphgen/models/partitioner/bfs_partitioner.py +++ b/graphgen/models/partitioner/bfs_partitioner.py @@ -43,7 +43,7 @@ def partition( continue comm_n: List[str] = [] - comm_e: List[frozenset[str]] = [] + comm_e: List[tuple[str, str]] = [] queue: deque[tuple[str, Any]] = deque([(kind, seed)]) cnt = 0 @@ -63,7 +63,7 @@ def partition( if it in used_e: continue used_e.add(it) - comm_e.append(it) + comm_e.append(tuple(sorted(it))) cnt += 1 # push nodes that are not visited for n in it: @@ -71,5 +71,4 @@ def partition( queue.append((NODE_UNIT, n)) if comm_n or comm_e: - valid_edges = self._filter_valid_edges(comm_e) - yield Community(id=seed, nodes=comm_n, edges=valid_edges) + yield Community(id=seed, nodes=comm_n, edges=comm_e) diff --git a/graphgen/models/partitioner/dfs_partitioner.py b/graphgen/models/partitioner/dfs_partitioner.py index 7eeebfa2..fa2786e6 100644 --- a/graphgen/models/partitioner/dfs_partitioner.py +++ b/graphgen/models/partitioner/dfs_partitioner.py @@ -43,7 +43,7 @@ def partition( continue comm_n: List[str] = [] - comm_e: List[frozenset[str]] = [] + comm_e: List[tuple[str, str]] = [] stack = [(kind, seed)] cnt = 0 @@ -64,7 +64,7 @@ def partition( if it in used_e: continue used_e.add(it) - comm_e.append(it) + comm_e.append(tuple(sorted(it))) cnt += 1 # push neighboring nodes for n in it: @@ -72,5 +72,4 @@ def partition( stack.append((NODE_UNIT, n)) if comm_n or comm_e: - valid_edges = self._filter_valid_edges(comm_e) - yield Community(id=seed, nodes=comm_n, edges=valid_edges) + yield Community(id=seed, nodes=comm_n, edges=comm_e) diff --git a/graphgen/models/partitioner/ece_partitioner.py b/graphgen/models/partitioner/ece_partitioner.py index 7f29608d..c2611be3 100644 --- a/graphgen/models/partitioner/ece_partitioner.py +++ b/graphgen/models/partitioner/ece_partitioner.py @@ -142,7 +142,7 @@ def _add_unit(u): return Community( id=seed_unit[1], nodes=list(community_nodes.keys()), - edges=self._filter_valid_edges(list(community_edges.keys())), + edges=[tuple(sorted(e)) for e in community_edges] ) for unit in tqdm(all_units, desc="ECE partition"): diff --git a/graphgen/utils/help_nltk.py b/graphgen/utils/help_nltk.py index c7d5e301..2605a584 100644 --- a/graphgen/utils/help_nltk.py +++ b/graphgen/utils/help_nltk.py @@ -3,13 +3,13 @@ from typing import Dict, List, Final, Optional import warnings import nltk -import jieba - warnings.filterwarnings( "ignore", category=UserWarning, module=r"jieba\._compat" ) +import jieba # pylint: disable=import-error + class NLTKHelper: """ diff --git a/tests/integration_tests/test_engine.py b/tests/integration_tests/test_engine.py deleted file mode 100644 index 6a389e42..00000000 --- a/tests/integration_tests/test_engine.py +++ /dev/null @@ -1,78 +0,0 @@ -import pytest - -from graphgen.engine import Context, Engine, op - -engine = Engine(max_workers=2) - - -def test_simple_dag(capsys): - """Verify the DAG A->B/C->D execution results and print order.""" - ctx = Context() - - @op("A") - def op_a(self, ctx): - print("Running A") - ctx.set("A", 1) - - @op("B", deps=["A"]) - def op_b(self, ctx): - print("Running B") - ctx.set("B", ctx.get("A") + 1) - - @op("C", deps=["A"]) - def op_c(self, ctx): - print("Running C") - ctx.set("C", ctx.get("A") + 2) - - @op("D", deps=["B", "C"]) - def op_d(self, ctx): - print("Running D") - ctx.set("D", ctx.get("B") + ctx.get("C")) - - # Explicitly list the nodes to run; avoid relying on globals(). - ops = [op_a, op_b, op_c, op_d] - engine.run(ops, ctx) - - # Assert final results. - assert ctx["A"] == 1 - assert ctx["B"] == 2 - assert ctx["C"] == 3 - assert ctx["D"] == 5 - - # Assert print order: A must run before B and C; D must run after B and C. - captured = capsys.readouterr().out.strip().splitlines() - assert "Running A" in captured - assert "Running B" in captured - assert "Running C" in captured - assert "Running D" in captured - - a_idx = next(i for i, line in enumerate(captured) if "Running A" in line) - b_idx = next(i for i, line in enumerate(captured) if "Running B" in line) - c_idx = next(i for i, line in enumerate(captured) if "Running C" in line) - d_idx = next(i for i, line in enumerate(captured) if "Running D" in line) - - assert a_idx < b_idx - assert a_idx < c_idx - assert d_idx > b_idx - assert d_idx > c_idx - - -def test_cyclic_detection(): - """A cyclic dependency should raise ValueError.""" - ctx = Context() - - @op("X", deps=["Y"]) - def op_x(self, ctx): - pass - - @op("Y", deps=["X"]) - def op_y(self, ctx): - pass - - ops = [op_x, op_y] - with pytest.raises(ValueError, match="Cyclic dependencies"): - engine.run(ops, ctx) - - -if __name__ == "__main__": - pytest.main([__file__, "-v"]) From b7c675cb2250f17f6f275ab042d1368f420eb2d9 Mon Sep 17 00:00:00 2001 From: chenzihong <522023320011@smail.nju.edu.cn> Date: Thu, 8 Jan 2026 22:51:33 +0800 Subject: [PATCH 5/5] fix: fix lint error --- graphgen/utils/help_nltk.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/graphgen/utils/help_nltk.py b/graphgen/utils/help_nltk.py index 2605a584..86d55e5f 100644 --- a/graphgen/utils/help_nltk.py +++ b/graphgen/utils/help_nltk.py @@ -8,7 +8,8 @@ category=UserWarning, module=r"jieba\._compat" ) -import jieba # pylint: disable=import-error +# pylint: disable=wrong-import-position +import jieba class NLTKHelper: