From eea3e2a405f9167ac004d2bec12139323824483e Mon Sep 17 00:00:00 2001
From: CHERRY-ui8 <2693275288@qq.com>
Date: Mon, 5 Jan 2026 17:49:44 +0800
Subject: [PATCH 1/5] fix: add special edge handling in bfs, dfs and base
 partitioner

---
 graphgen/bases/base_partitioner.py             |  7 ++++++-
 graphgen/models/partitioner/bfs_partitioner.py | 13 ++++++++-----
 graphgen/models/partitioner/dfs_partitioner.py | 12 +++++++++---
 3 files changed, 23 insertions(+), 9 deletions(-)

diff --git a/graphgen/bases/base_partitioner.py b/graphgen/bases/base_partitioner.py
index d948e3a7..048e80e3 100644
--- a/graphgen/bases/base_partitioner.py
+++ b/graphgen/bases/base_partitioner.py
@@ -39,7 +39,12 @@ def community2batch(
             if node_data:
                 nodes_data.append((node, node_data))
         edges_data = []
-        for u, v in edges:
+        for edge in edges:
+            # Filter out self-loops and invalid edges
+            if isinstance(edge, (tuple, list)) and len(edge) == 2:
+                u, v = edge[0], edge[1]
+            else:
+                continue
             edge_data = g.get_edge(u, v)
             if edge_data:
                 edges_data.append((u, v, edge_data))
diff --git a/graphgen/models/partitioner/bfs_partitioner.py b/graphgen/models/partitioner/bfs_partitioner.py
index 994e08e8..d7bde5aa 100644
--- a/graphgen/models/partitioner/bfs_partitioner.py
+++ b/graphgen/models/partitioner/bfs_partitioner.py
@@ -43,7 +43,7 @@ def partition(
                 continue
 
             comm_n: List[str] = []
-            comm_e: List[tuple[str, str]] = []
+            comm_e: List[frozenset[str]] = []
             queue: deque[tuple[str, Any]] = deque([(kind, seed)])
             cnt = 0
 
@@ -63,9 +63,7 @@ def partition(
                     if it in used_e:
                         continue
                     used_e.add(it)
-
-                    u, v = it
-                    comm_e.append((u, v))
+                    comm_e.append(it)
                     cnt += 1
                     # push nodes that are not visited
                     for n in it:
@@ -73,4 +71,9 @@ def partition(
                             queue.append((NODE_UNIT, n))
 
             if comm_n or comm_e:
-                yield Community(id=seed, nodes=comm_n, edges=comm_e)
+                # Filter out self-loops and invalid edges
+                valid_edges = [
+                    tuple(edge) for edge in comm_e
+                    if isinstance(edge, frozenset) and len(edge) == 2
+                ]
+                yield Community(id=seed, nodes=comm_n, edges=valid_edges)
diff --git a/graphgen/models/partitioner/dfs_partitioner.py b/graphgen/models/partitioner/dfs_partitioner.py
index 4d93ad7f..5369a914 100644
--- a/graphgen/models/partitioner/dfs_partitioner.py
+++ b/graphgen/models/partitioner/dfs_partitioner.py
@@ -42,7 +42,8 @@ def partition(
             ):
                 continue
 
-            comm_n, comm_e = [], []
+            comm_n: List[str] = []
+            comm_e: List[frozenset[str]] = []
             stack = [(kind, seed)]
             cnt = 0
 
@@ -63,7 +64,7 @@ def partition(
                     if it in used_e:
                         continue
                     used_e.add(it)
-                    comm_e.append(tuple(it))
+                    comm_e.append(it)
                     cnt += 1
                     # push neighboring nodes
                     for n in it:
@@ -71,4 +72,9 @@ def partition(
                             stack.append((NODE_UNIT, n))
 
             if comm_n or comm_e:
-                yield Community(id=seed, nodes=comm_n, edges=comm_e)
+                # Filter out self-loops and invalid edges
+                valid_edges = [
+                    tuple(edge) for edge in comm_e
+                    if isinstance(edge, frozenset) and len(edge) == 2
+                ]
+                yield Community(id=seed, nodes=comm_n, edges=valid_edges)

From c534f94c40858e801574a7306f2a233ecc8e3ec5 Mon Sep 17 00:00:00 2001
From: CHERRY-ui8 <2693275288@qq.com>
Date: Mon, 5 Jan 2026 19:28:35 +0800
Subject: [PATCH 2/5] refactor: extract a helper method in the base class

---
 graphgen/bases/base_partitioner.py             | 8 ++++++++
 graphgen/models/partitioner/bfs_partitioner.py | 6 +-----
 graphgen/models/partitioner/dfs_partitioner.py | 6 +-----
 graphgen/models/partitioner/ece_partitioner.py | 2 +-
 4 files changed, 11 insertions(+), 11 deletions(-)

diff --git a/graphgen/bases/base_partitioner.py b/graphgen/bases/base_partitioner.py
index 048e80e3..326eb1b6 100644
--- a/graphgen/bases/base_partitioner.py
+++ b/graphgen/bases/base_partitioner.py
@@ -19,6 +19,13 @@ def partition(
         :return: List of communities
         """
 
+    @staticmethod
+    def _filter_valid_edges(edges: List[Any]) -> List[tuple[str, str]]:
+        return [
+            tuple(edge) for edge in edges
+            if isinstance(edge, frozenset) and len(edge) == 2
+        ]
+
     @staticmethod
     def community2batch(
         comm: Community, g: BaseGraphStorage
@@ -44,6 +51,7 @@ def community2batch(
             if isinstance(edge, (tuple, list)) and len(edge) == 2:
                 u, v = edge[0], edge[1]
             else:
+                # Skip invalid edge format (e.g., self-loops or malformed edges)
                 continue
             edge_data = g.get_edge(u, v)
             if edge_data:
diff --git a/graphgen/models/partitioner/bfs_partitioner.py b/graphgen/models/partitioner/bfs_partitioner.py
index d7bde5aa..f50a032e 100644
--- a/graphgen/models/partitioner/bfs_partitioner.py
+++ b/graphgen/models/partitioner/bfs_partitioner.py
@@ -71,9 +71,5 @@ def partition(
                             queue.append((NODE_UNIT, n))
 
             if comm_n or comm_e:
-                # Filter out self-loops and invalid edges
-                valid_edges = [
-                    tuple(edge) for edge in comm_e
-                    if isinstance(edge, frozenset) and len(edge) == 2
-                ]
+                valid_edges = self._filter_valid_edges(comm_e)
                 yield Community(id=seed, nodes=comm_n, edges=valid_edges)
diff --git a/graphgen/models/partitioner/dfs_partitioner.py b/graphgen/models/partitioner/dfs_partitioner.py
index 5369a914..f4189bc5 100644
--- a/graphgen/models/partitioner/dfs_partitioner.py
+++ b/graphgen/models/partitioner/dfs_partitioner.py
@@ -72,9 +72,5 @@ def partition(
                             stack.append((NODE_UNIT, n))
 
             if comm_n or comm_e:
-                # Filter out self-loops and invalid edges
-                valid_edges = [
-                    tuple(edge) for edge in comm_e
-                    if isinstance(edge, frozenset) and len(edge) == 2
-                ]
+                valid_edges = self._filter_valid_edges(comm_e)
                 yield Community(id=seed, nodes=comm_n, edges=valid_edges)
diff --git a/graphgen/models/partitioner/ece_partitioner.py b/graphgen/models/partitioner/ece_partitioner.py
index af3af7c7..7f29608d 100644
--- a/graphgen/models/partitioner/ece_partitioner.py
+++ b/graphgen/models/partitioner/ece_partitioner.py
@@ -142,7 +142,7 @@ def _add_unit(u):
             return Community(
                 id=seed_unit[1],
                 nodes=list(community_nodes.keys()),
-                edges=[tuple(edge) for edge in community_edges if isinstance(edge, frozenset) and len(edge)==2],
+                edges=self._filter_valid_edges(list(community_edges.keys())),
             )
 
         for unit in tqdm(all_units, desc="ECE partition"):

From 4b55d10c397f239230bd1b82fc92700c8dc2d80e Mon Sep 17 00:00:00 2001
From: CHERRY-ui8 <2693275288@qq.com>
Date: Mon, 5 Jan 2026 19:33:11 +0800
Subject: [PATCH 3/5] fix: add List import to dfs_partitioner

---
 graphgen/models/partitioner/dfs_partitioner.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/graphgen/models/partitioner/dfs_partitioner.py b/graphgen/models/partitioner/dfs_partitioner.py
index f4189bc5..7eeebfa2 100644
--- a/graphgen/models/partitioner/dfs_partitioner.py
+++ b/graphgen/models/partitioner/dfs_partitioner.py
@@ -1,6 +1,6 @@
 import random
 from collections.abc import Iterable
-from typing import Any
+from typing import Any, List
 
 from graphgen.bases import BaseGraphStorage, BasePartitioner
 from graphgen.bases.datatypes import Community

From 33e0afbd3dab7324c8f57d9cd2f7f8c731ef32de Mon Sep 17 00:00:00 2001
From: chenzihong <522023320011@smail.nju.edu.cn>
Date: Thu, 8 Jan 2026 22:44:46 +0800
Subject: [PATCH 4/5] fix: align edge type

---
 graphgen/bases/base_partitioner.py            | 34 ++++----
 .../models/partitioner/bfs_partitioner.py     |  7 +-
 .../models/partitioner/dfs_partitioner.py     |  7 +-
 .../models/partitioner/ece_partitioner.py     |  2 +-
 graphgen/utils/help_nltk.py                   |  4 +-
 tests/integration_tests/test_engine.py        | 78 -------------------
 6 files changed, 22 insertions(+), 110 deletions(-)
 delete mode 100644 tests/integration_tests/test_engine.py

diff --git a/graphgen/bases/base_partitioner.py b/graphgen/bases/base_partitioner.py
index 326eb1b6..384c9e4e 100644
--- a/graphgen/bases/base_partitioner.py
+++ b/graphgen/bases/base_partitioner.py
@@ -19,13 +19,6 @@ def partition(
         :return: List of communities
         """
 
-    @staticmethod
-    def _filter_valid_edges(edges: List[Any]) -> List[tuple[str, str]]:
-        return [
-            tuple(edge) for edge in edges
-            if isinstance(edge, frozenset) and len(edge) == 2
-        ]
-
     @staticmethod
     def community2batch(
         comm: Community, g: BaseGraphStorage
@@ -48,18 +41,15 @@ def community2batch(
         edges_data = []
         for edge in edges:
             # Filter out self-loops and invalid edges
-            if isinstance(edge, (tuple, list)) and len(edge) == 2:
-                u, v = edge[0], edge[1]
-            else:
-                # Skip invalid edge format (e.g., self-loops or malformed edges)
+            if not isinstance(edge, tuple) or len(edge) != 2:
+                continue
+            u, v = edge
+            if u == v:
                 continue
-            edge_data = g.get_edge(u, v)
+
+            edge_data = g.get_edge(u, v) or g.get_edge(v, u)
             if edge_data:
                 edges_data.append((u, v, edge_data))
-            else:
-                edge_data = g.get_edge(v, u)
-                if edge_data:
-                    edges_data.append((v, u, edge_data))
         return nodes_data, edges_data
 
     @staticmethod
@@ -74,9 +64,11 @@ def _build_adjacency_list(
         """
         adj: dict[str, List[str]] = {n[0]: [] for n in nodes}
         edge_set: set[tuple[str, str]] = set()
-        for e in edges:
-            adj[e[0]].append(e[1])
-            adj[e[1]].append(e[0])
-            edge_set.add((e[0], e[1]))
-            edge_set.add((e[1], e[0]))
+        for u, v, _ in edges:
+            if u == v:
+                continue
+            adj[u].append(v)
+            adj[v].append(u)
+            edge_set.add((u, v))
+            edge_set.add((v, u))
         return adj, edge_set
diff --git a/graphgen/models/partitioner/bfs_partitioner.py b/graphgen/models/partitioner/bfs_partitioner.py
index f50a032e..a00ad76d 100644
--- a/graphgen/models/partitioner/bfs_partitioner.py
+++ b/graphgen/models/partitioner/bfs_partitioner.py
@@ -43,7 +43,7 @@ def partition(
                 continue
 
             comm_n: List[str] = []
-            comm_e: List[frozenset[str]] = []
+            comm_e: List[tuple[str, str]] = []
             queue: deque[tuple[str, Any]] = deque([(kind, seed)])
             cnt = 0
 
@@ -63,7 +63,7 @@ def partition(
                     if it in used_e:
                         continue
                     used_e.add(it)
-                    comm_e.append(it)
+                    comm_e.append(tuple(sorted(it)))
                     cnt += 1
                     # push nodes that are not visited
                     for n in it:
@@ -71,5 +71,4 @@ def partition(
                             queue.append((NODE_UNIT, n))
 
             if comm_n or comm_e:
-                valid_edges = self._filter_valid_edges(comm_e)
-                yield Community(id=seed, nodes=comm_n, edges=valid_edges)
+                yield Community(id=seed, nodes=comm_n, edges=comm_e)
diff --git a/graphgen/models/partitioner/dfs_partitioner.py b/graphgen/models/partitioner/dfs_partitioner.py
index 7eeebfa2..fa2786e6 100644
--- a/graphgen/models/partitioner/dfs_partitioner.py
+++ b/graphgen/models/partitioner/dfs_partitioner.py
@@ -43,7 +43,7 @@ def partition(
                 continue
 
             comm_n: List[str] = []
-            comm_e: List[frozenset[str]] = []
+            comm_e: List[tuple[str, str]] = []
             stack = [(kind, seed)]
             cnt = 0
 
@@ -64,7 +64,7 @@ def partition(
                     if it in used_e:
                         continue
                     used_e.add(it)
-                    comm_e.append(it)
+                    comm_e.append(tuple(sorted(it)))
                     cnt += 1
                     # push neighboring nodes
                     for n in it:
@@ -72,5 +72,4 @@ def partition(
                             stack.append((NODE_UNIT, n))
 
             if comm_n or comm_e:
-                valid_edges = self._filter_valid_edges(comm_e)
-                yield Community(id=seed, nodes=comm_n, edges=valid_edges)
+                yield Community(id=seed, nodes=comm_n, edges=comm_e)
diff --git a/graphgen/models/partitioner/ece_partitioner.py b/graphgen/models/partitioner/ece_partitioner.py
index 7f29608d..c2611be3 100644
--- a/graphgen/models/partitioner/ece_partitioner.py
+++ b/graphgen/models/partitioner/ece_partitioner.py
@@ -142,7 +142,7 @@ def _add_unit(u):
             return Community(
                 id=seed_unit[1],
                 nodes=list(community_nodes.keys()),
-                edges=self._filter_valid_edges(list(community_edges.keys())),
+                edges=[tuple(sorted(e)) for e in community_edges]
             )
 
         for unit in tqdm(all_units, desc="ECE partition"):
diff --git a/graphgen/utils/help_nltk.py b/graphgen/utils/help_nltk.py
index c7d5e301..2605a584 100644
--- a/graphgen/utils/help_nltk.py
+++ b/graphgen/utils/help_nltk.py
@@ -3,13 +3,13 @@
 from typing import Dict, List, Final, Optional
 import warnings
 import nltk
-import jieba
-
 warnings.filterwarnings(
     "ignore",
     category=UserWarning,
     module=r"jieba\._compat"
 )
+import jieba # pylint: disable=import-error
+
 
 class NLTKHelper:
     """
diff --git a/tests/integration_tests/test_engine.py b/tests/integration_tests/test_engine.py
deleted file mode 100644
index 6a389e42..00000000
--- a/tests/integration_tests/test_engine.py
+++ /dev/null
@@ -1,78 +0,0 @@
-import pytest
-
-from graphgen.engine import Context, Engine, op
-
-engine = Engine(max_workers=2)
-
-
-def test_simple_dag(capsys):
-    """Verify the DAG A->B/C->D execution results and print order."""
-    ctx = Context()
-
-    @op("A")
-    def op_a(self, ctx):
-        print("Running A")
-        ctx.set("A", 1)
-
-    @op("B", deps=["A"])
-    def op_b(self, ctx):
-        print("Running B")
-        ctx.set("B", ctx.get("A") + 1)
-
-    @op("C", deps=["A"])
-    def op_c(self, ctx):
-        print("Running C")
-        ctx.set("C", ctx.get("A") + 2)
-
-    @op("D", deps=["B", "C"])
-    def op_d(self, ctx):
-        print("Running D")
-        ctx.set("D", ctx.get("B") + ctx.get("C"))
-
-    # Explicitly list the nodes to run; avoid relying on globals().
-    ops = [op_a, op_b, op_c, op_d]
-    engine.run(ops, ctx)
-
-    # Assert final results.
-    assert ctx["A"] == 1
-    assert ctx["B"] == 2
-    assert ctx["C"] == 3
-    assert ctx["D"] == 5
-
-    # Assert print order: A must run before B and C; D must run after B and C.
-    captured = capsys.readouterr().out.strip().splitlines()
-    assert "Running A" in captured
-    assert "Running B" in captured
-    assert "Running C" in captured
-    assert "Running D" in captured
-
-    a_idx = next(i for i, line in enumerate(captured) if "Running A" in line)
-    b_idx = next(i for i, line in enumerate(captured) if "Running B" in line)
-    c_idx = next(i for i, line in enumerate(captured) if "Running C" in line)
-    d_idx = next(i for i, line in enumerate(captured) if "Running D" in line)
-
-    assert a_idx < b_idx
-    assert a_idx < c_idx
-    assert d_idx > b_idx
-    assert d_idx > c_idx
-
-
-def test_cyclic_detection():
-    """A cyclic dependency should raise ValueError."""
-    ctx = Context()
-
-    @op("X", deps=["Y"])
-    def op_x(self, ctx):
-        pass
-
-    @op("Y", deps=["X"])
-    def op_y(self, ctx):
-        pass
-
-    ops = [op_x, op_y]
-    with pytest.raises(ValueError, match="Cyclic dependencies"):
-        engine.run(ops, ctx)
-
-
-if __name__ == "__main__":
-    pytest.main([__file__, "-v"])

From b7c675cb2250f17f6f275ab042d1368f420eb2d9 Mon Sep 17 00:00:00 2001
From: chenzihong <522023320011@smail.nju.edu.cn>
Date: Thu, 8 Jan 2026 22:51:33 +0800
Subject: [PATCH 5/5] fix: fix lint error

---
 graphgen/utils/help_nltk.py | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/graphgen/utils/help_nltk.py b/graphgen/utils/help_nltk.py
index 2605a584..86d55e5f 100644
--- a/graphgen/utils/help_nltk.py
+++ b/graphgen/utils/help_nltk.py
@@ -8,7 +8,8 @@
     category=UserWarning,
     module=r"jieba\._compat"
 )
-import jieba # pylint: disable=import-error
+# pylint: disable=wrong-import-position
+import jieba
 
 
 class NLTKHelper: