synaptic-memory/src/synaptic/mcp/server.py at main · PlateerLab/synaptic-memory · GitHub

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
"""Synaptic Memory MCP Server — expose knowledge graph as MCP tools.

Usage:
    synaptic-mcp                          # stdio (default, for Claude Code)
    synaptic-mcp --db ./knowledge.db      # custom DB path
    synaptic-mcp --dsn postgresql://...   # PostgreSQL backend
"""

from __future__ import annotations

import asyncio
import logging
import sys
from typing import Any

from mcp.server.fastmcp import FastMCP

from synaptic.mcp import __version__

logger = logging.getLogger("synaptic.mcp")

server = FastMCP(
    "Synaptic Memory",
    dependencies=["aiosqlite"],
)

# Module-level state (initialized on first tool call)
_graph: Any = None
_backend: Any = None
_embedder: Any = None
_tracker: Any = None
_db_path: str = "knowledge.db"
_dsn: str = ""
_source_dsn: str = ""  # Default source DB for CDC sync tools (optional)
_embed_url: str = ""
_embed_model: str = "default"
# Vector cascade tuning — see synaptic.search.HybridSearch docstring
# for the per-embedder cosine distribution guide. None means "use
# the package default" (DEFAULT_VECTOR_MIN_COSINE / RELATIVE_DROP),
# which is also overridable via the SYNAPTIC_VECTOR_* env vars.
_vector_min_cosine: float | None = None
_vector_relative_drop: float | None = None

# Lazy initialisation under concurrent tool calls — FastMCP dispatches
# tool invocations on a shared asyncio loop, so two tools firing at
# once during the first turn both hit `_graph is None` and can race
# into two SynapticGraph constructions. The lock serialises the
# initialisation path; once `_graph` is set, the fast path needs no
# locking.
_graph_init_lock: asyncio.Lock | None = None


def _get_init_lock() -> asyncio.Lock:
    global _graph_init_lock
    if _graph_init_lock is None:
        _graph_init_lock = asyncio.Lock()
    return _graph_init_lock


async def _ensure_graph() -> Any:
    """Lazy-initialize the SynapticGraph on first use (concurrency-safe)."""
    global _graph, _backend, _embedder

    # Fast path: already initialised. No lock needed because `_graph`
    # is only assigned once and never reset at runtime.
    if _graph is not None:
        return _graph

    async with _get_init_lock():
        # Double-checked: another coroutine may have initialised while
        # we were queued on the lock.
        if _graph is not None:
            return _graph

        from synaptic.extensions.chunk_entity_index import ChunkEntityIndex
        from synaptic.extensions.phrase_extractor import PhraseExtractor
        from synaptic.extensions.tagger_regex import RegexTagExtractor
        from synaptic.graph import SynapticGraph
        from synaptic.ontology import build_agent_ontology

        if _dsn:
            from synaptic.backends.postgresql import PostgreSQLBackend

            _backend = PostgreSQLBackend(_dsn)
        else:
            from synaptic.backends.sqlite import SQLiteBackend

            _backend = SQLiteBackend(_db_path)

        await _backend.connect()

        # Auto-embedding: connect to any OpenAI-compatible endpoint
        if _embed_url:
            from synaptic.extensions.embedder import OpenAIEmbeddingProvider

            _embedder = OpenAIEmbeddingProvider(api_base=_embed_url, model=_embed_model)
            logger.info("Embedder configured: %s (model=%s)", _embed_url, _embed_model)

        # Wire the cross-document bridge mechanism.
        #
        # Without these, ingesting N files produces N isolated clusters
        # of nodes that share no edges. The HippoRAG2-style "phrase hub"
        # design relies on:
        #
        #   1. PhraseExtractor — pulls salient phrases from each chunk
        #      and creates one ENTITY node per unique phrase. Multiple
        #      chunks containing the same phrase all CONTAINS-edge into
        #      the same hub, which makes the hub a bridge between docs.
        #
        #   2. ChunkEntityIndex — bidirectional registry that lets the
        #      search pipeline (PPR, HybridReranker, GraphExpander)
        #      walk from a chunk to its phrase hubs and back.
        #
        # Both are required for cross-document search to work. Wiring
        # only one of them silently degrades the graph to "FTS over
        # disjoint files".
        _graph = SynapticGraph(
            _backend,
            tag_extractor=RegexTagExtractor(),
            ontology=build_agent_ontology(),
            embedder=_embedder,
            chunk_entity_index=ChunkEntityIndex(),
            phrase_extractor=PhraseExtractor(),
            vector_min_cosine=_vector_min_cosine,
            vector_relative_drop=_vector_relative_drop,
        )
        logger.info(
            "Knowledge graph initialized "
            "(backend=%s, embedder=%s, phrase_extractor=on, vector_min_cos=%s, vector_rel_drop=%s)",
            type(_backend).__name__,
            "on" if _embedder is not None else "off",
            _vector_min_cosine if _vector_min_cosine is not None else "default",
            _vector_relative_drop if _vector_relative_drop is not None else "default",
        )
        return _graph


async def _ensure_tracker() -> Any:
    """Lazy-initialize the ActivityTracker (concurrency-safe)."""
    global _tracker

    if _tracker is not None:
        return _tracker

    async with _get_init_lock():
        if _tracker is not None:
            return _tracker

        from synaptic.activity import ActivityTracker

        graph = await _ensure_graph()
        _tracker = ActivityTracker(graph)
        return _tracker


# --- Tools ---


@server.tool()
async def knowledge_search(
    query: str,
    limit: int = 10,
) -> dict[str, Any]:
    """Search the knowledge graph for lessons, decisions, patterns, and past outcomes.

    Use this to find relevant company knowledge before starting a task.
    Supports Korean and English queries with synonym expansion.

    Routes through :class:`EvidenceSearch` (the same engine that
    backs ``agent_search`` / ``agent_deep_search``) so semantic
    queries that share no surface words with the source documents
    still surface relevant hits. Replaces the legacy
    ``graph.search()`` / ``HybridSearch`` path whose hardcoded
    ``cos >= 0.45`` cutoff silently dropped most semantic-only
    matches on OpenAI v3 / Cohere / MiniLM embedders.

    Args:
        query: Search query (Korean or English)
        limit: Maximum number of results to return
    """
    graph = await _ensure_graph()

    # Lazy-import the modern pipeline so test fixtures and the
    # legacy `graph.search` codepath stay independent.
    from synaptic.extensions.evidence_search import EvidenceSearch

    searcher = EvidenceSearch(backend=graph.backend, embedder=_embedder)
    result = await searcher.search(
        query,
        k=limit,
        # Over-fetch FTS seeds so the reranker has a richer pool to
        # draw from — matters most when the embedder is wired up.
        fts_seed_limit=max(20, limit * 3),
    )

    if not result.evidence:
        return {
            "success": True,
            "message": "No knowledge found for this query.",
            "results": [],
        }

    results = []
    for ev in result.evidence:
        node = ev.node
        results.append(
            {
                "id": node.id,
                "kind": str(node.kind),
                "title": node.title,
                "content": node.content[:500],
                "tags": node.tags,
                "level": str(node.level),
                "score": round(ev.score, 3),
                "reason": ev.reason,
                "category": ev.category,
            }
        )

    return {
        "success": True,
        "results": results,
        "total_candidates": len(result.scored),
        "search_time_ms": round(result.elapsed_ms, 1),
        "anchors": {
            "categories": list(result.anchors.categories),
            "entities": list(result.anchors.entities),
        },
    }


@server.tool()
async def knowledge_add(
    title: str,
    content: str,
    kind: str = "concept",
    tags: str = "",
    source: str = "",
) -> dict[str, Any]:
    """Add a new knowledge node to the graph.

    Args:
        title: Node title (concise summary)
        content: Full content/description
        kind: Node type — concept, entity, lesson, decision, rule, artifact, agent, task, sprint
        tags: Comma-separated tags (e.g. "deploy,ci/cd,automation")
        source: Origin of this knowledge (e.g. "sprint:123", "manual")
    """
    from synaptic.models import NodeKind

    graph = await _ensure_graph()

    try:
        node_kind = NodeKind(kind)
    except ValueError:
        return {"success": False, "message": f"Invalid kind: {kind}. Use: {', '.join(NodeKind)}"}

    tag_list = [t.strip() for t in tags.split(",") if t.strip()] if tags else None

    node = await graph.add(
        title=title,
        content=content,
        kind=node_kind,
        tags=tag_list,
        source=source,
    )

    return {
        "success": True,
        "node_id": node.id,
        "title": node.title,
        "kind": str(node.kind),
        "tags": node.tags,
    }


@server.tool()
async def knowledge_link(
    source_id: str,
    target_id: str,
    kind: str = "related",
    weight: float = 1.0,
) -> dict[str, Any]:
    """Create a link between two knowledge nodes.

    Args:
        source_id: Source node ID
        target_id: Target node ID
        kind: Edge type (related/caused/learned_from/depends_on/produced/contradicts/supersedes)
        weight: Connection strength (0.0 to 5.0)
    """
    from synaptic.models import EdgeKind

    graph = await _ensure_graph()

    try:
        edge_kind = EdgeKind(kind)
    except ValueError:
        return {"success": False, "message": f"Invalid kind: {kind}. Use: {', '.join(EdgeKind)}"}

    edge = await graph.link(source_id, target_id, kind=edge_kind, weight=weight)

    return {
        "success": True,
        "edge_id": edge.id,
        "source_id": edge.source_id,
        "target_id": edge.target_id,
        "kind": str(edge.kind),
        "weight": edge.weight,
    }


@server.tool()
async def knowledge_reinforce(
    node_ids: str,
    success: bool = True,
) -> dict[str, Any]:
    """Reinforce knowledge nodes after use (Hebbian learning).

    Strengthens connections between co-activated nodes on success,
    weakens them on failure.

    Args:
        node_ids: Comma-separated node IDs to reinforce
        success: True if the knowledge was useful, False if not
    """
    graph = await _ensure_graph()
    ids = [nid.strip() for nid in node_ids.split(",") if nid.strip()]
    if not ids:
        return {"success": False, "message": "No node IDs provided"}

    await graph.reinforce(ids, success=success)
    return {
        "success": True,
        "reinforced": len(ids),
        "outcome": "success" if success else "failure",
    }


@server.tool()
async def knowledge_stats() -> dict[str, Any]:
    """Get knowledge graph statistics — node counts by kind and level, cache stats."""
    graph = await _ensure_graph()
    stats = await graph.stats()
    return {"success": True, **{k: v for k, v in stats.items()}}


@server.tool()
async def knowledge_snapshot(
    max_entities: int = 5_000,
    top_phrase_hubs: int = 15,
    top_categories: int = 30,
    include_sample_queries: bool = True,
) -> dict[str, Any]:
    """Generate a markdown snapshot of the graph — for agent priming.

    Returns a compact human-readable summary the agent can read at the
    start of a session to skip the usual cold-start exploration turns
    (probing categories / tables / entities). Sections covered:

    - Scale (documents, chunks, phrase hubs, structured rows, edges)
    - Categories (with doc counts) — usable as ``deep_search(category=)``
    - Top phrase hubs (mention-ranked) — likely good search anchors
    - Tables (structured data) — for ``filter/aggregate/join`` tools
    - Edge types (sampled) — for ``follow``
    - Sample queries — 1-3 illustrative tool invocations

    All stats are computed from direct backend reads — no LLM calls.

    Args:
        max_entities: Cap on entity scan (default 5000). Higher = more
            accurate phrase-hub ranking on large corpora, slower.
        top_phrase_hubs: How many phrase hubs to surface (default 15).
        top_categories: How many categories to list (default 30).
        include_sample_queries: Append a "Sample queries" section with
            1-3 hint invocations derived from the corpus shape.
    """
    graph = await _ensure_graph()
    from synaptic.snapshot import generate_snapshot

    md = await generate_snapshot(
        graph._backend,
        max_entities_scanned=max_entities,
        top_n_phrase_hubs=top_phrase_hubs,
        top_n_categories=top_categories,
        include_sample_queries=include_sample_queries,
    )
    return {"success": True, "format": "markdown", "snapshot": md, "length": len(md)}


@server.tool()
async def knowledge_export(
    output_format: str = "markdown",
) -> dict[str, Any]:
    """Export the knowledge graph.

    Args:
        output_format: Export format — "markdown" or "json"
    """
    graph = await _ensure_graph()

    if output_format == "json":
        content = await graph.export_json()
    else:
        content = await graph.export_markdown()

    return {"success": True, "format": output_format, "content": content}


@server.tool()
async def knowledge_consolidate() -> dict[str, Any]:
    """Run memory consolidation — expire old L0 nodes, promote accessed ones.

    L0 (72h TTL) → L1 (accessed 3+) → L2 (accessed 10+) → L3 (permanent, 80%+ success rate).
    Also runs vitality decay and edge pruning.
    """
    graph = await _ensure_graph()
    result = await graph.consolidate()
    decayed = await graph.decay()
    pruned = await graph.prune()

    return {
        "success": True,
        "nodes_promoted": len(result.nodes_updated),
        "nodes_created": len(result.nodes_created),
        "vitality_decayed": decayed,
        "edges_pruned": pruned,
    }


@server.tool()
async def knowledge_backfill(
    scope: str = "all",
    batch_size: int = 64,
    max_nodes: int | None = None,
) -> dict[str, Any]:
    """Repair existing nodes that are missing embeddings or phrase hubs.

    Use this when a graph was ingested *before* an embedder was
    wired up, or before the MCP server's PhraseExtractor wiring fix
    in v0.14.3 — both situations leave the graph half-built and
    silently degrade search. ``backfill`` walks the graph in place,
    fills the missing signals, and is safe to run repeatedly
    (idempotent: every pass that finds nothing to fix returns
    ``{embeddings_filled: 0, phrases_linked: 0}``).

    Args:
        scope: One of:
          - ``"all"`` (default) — run both embedding and phrase backfill.
          - ``"embeddings"`` — only fill empty embeddings. Requires
            an embedder; no-op without one.
          - ``"phrases"`` — only re-run phrase extraction on nodes
            that have no outgoing CONTAINS edge. Requires a phrase
            extractor; no-op without one.
        batch_size: Embedding batch size (passed to
            ``embedder.embed_batch``). Phrase extraction is per-node
            so this only affects the embedding pass.
        max_nodes: Optional cap on nodes scanned, useful for
            incremental progress on very large graphs. ``None``
            (default) processes every node.
    """
    graph = await _ensure_graph()
    do_emb = scope in ("all", "embeddings")
    do_phrases = scope in ("all", "phrases")
    if not do_emb and not do_phrases:
        return {
            "success": False,
            "error": (f"unknown scope {scope!r} — expected one of 'all', 'embeddings', 'phrases'"),
        }
    result = await graph.backfill(
        embeddings=do_emb,
        phrases=do_phrases,
        batch_size=batch_size,
        max_nodes=max_nodes,
    )
    return {
        "success": True,
        "scope": scope,
        "scanned": result.scanned,
        "embeddings_filled": result.embeddings_filled,
        "phrases_linked": result.phrases_linked,
        "skipped_no_text": result.skipped_no_text,
        "elapsed_ms": round(result.elapsed_ms, 1),
        "errors": result.errors,
    }


# --- Ingest Tools ---


@server.tool()
async def knowledge_add_document(
    title: str,
    content: str,
    chunk_size: int = 1000,
    chunk_overlap: int = 200,
    tags: str = "",
    source: str = "",
) -> dict[str, Any]:
    """Add a long document to the graph with automatic chunking.

    Short documents become a single node; long documents are split at
    sentence boundaries and connected with NEXT_CHUNK edges so the
    search pipeline can surface context around a hit.

    Args:
        title: Document title (becomes the node title and a chunk prefix).
        content: Full document text.
        chunk_size: Max characters per chunk (default 1000).
        chunk_overlap: Overlapping characters between adjacent chunks (default 200).
        tags: Comma-separated tags.
        source: Origin identifier (e.g. "manual:admin-guide", "url:https://...").
    """
    graph = await _ensure_graph()
    tag_list = [t.strip() for t in tags.split(",") if t.strip()] if tags else None
    nodes = await graph.add_document(
        title=title,
        content=content,
        chunk_size=chunk_size,
        chunk_overlap=chunk_overlap,
        tags=tag_list,
        source=source,
    )
    return {
        "success": True,
        "title": title,
        "chunks": len(nodes),
        "first_node_id": nodes[0].id if nodes else None,
    }


@server.tool()
async def knowledge_add_table(
    table_name: str,
    columns: list[dict[str, str]],
    rows: list[dict[str, Any]],
    primary_key: str = "id",
    foreign_keys: dict[str, list[str]] | None = None,
    tags: str = "",
    source: str = "",
) -> dict[str, Any]:
    """Ingest a structured table into the graph.

    Each row becomes an ENTITY node and foreign keys become RELATED
    edges to the referenced table's rows. The table schema is
    auto-registered in the ontology so downstream filter / aggregate /
    join tools work immediately.

    Args:
        table_name: Logical table name (used for ontology type + node titles).
        columns: Column definitions, e.g. ``[{"name": "id", "type": "int"}, ...]``.
        rows: Row data, e.g. ``[{"id": 1, "name": "..."}, ...]``.
        primary_key: Primary key column (default "id").
        foreign_keys: Mapping ``{"col": ["target_table", "target_col"]}``.
            JSON-friendly shape — ``["target_table", "target_col"]`` is
            converted to a tuple internally.
        tags: Comma-separated tags.
        source: Origin identifier.
    """
    graph = await _ensure_graph()
    tag_list = [t.strip() for t in tags.split(",") if t.strip()] if tags else None
    fk_map: dict[str, tuple[str, str]] | None = None
    if foreign_keys:
        fk_map = {
            col: (target[0], target[1]) if len(target) >= 2 else (target[0], "id")
            for col, target in foreign_keys.items()
        }
    nodes = await graph.add_table(
        table_name,
        columns,
        rows,
        primary_key=primary_key,
        foreign_keys=fk_map,
        tags=tag_list,
        source=source,
    )
    return {
        "success": True,
        "table_name": table_name,
        "rows_ingested": len(nodes),
        "fk_count": len(fk_map) if fk_map else 0,
    }


@server.tool()
async def knowledge_add_chunks(
    chunks: list[dict[str, Any]],
    default_source: str = "",
) -> dict[str, Any]:
    """Ingest pre-chunked content (BYO-chunker workflow).

    Use when you have already split a document with your own parser
    (LangChain, Unstructured, custom OCR, ...) and want to hand the
    chunks directly to the graph. Each chunk dict should contain:

    - ``title`` (required): Node title for the chunk.
    - ``content`` (required): Chunk text.
    - ``tags`` (optional): List of tag strings.
    - ``source`` (optional): Per-chunk source identifier. Falls back
      to ``default_source`` when omitted.
    - ``properties`` (optional): Extra string→string metadata.
    """
    graph = await _ensure_graph()
    added = 0
    errors: list[str] = []
    first_id: str | None = None
    for i, chunk in enumerate(chunks):
        title = chunk.get("title")
        content = chunk.get("content")
        if not title or not content:
            errors.append(f"chunk[{i}]: missing title or content")
            continue
        node = await graph.add(
            title=title,
            content=content,
            tags=chunk.get("tags"),
            source=chunk.get("source") or default_source,
            properties=chunk.get("properties"),
        )
        if first_id is None:
            first_id = node.id
        added += 1
    return {
        "success": True,
        "chunks_added": added,
        "errors": errors,
        "first_node_id": first_id,
    }


def _inspect_path(path: str) -> dict[str, Any]:
    """Sync helper: classify a filesystem path for ingest routing.

    Keeping the file-system touch in a sync function lets the async
    tool body stay blocking-I/O-free (ruff ASYNC230/ASYNC240).
    """
    from pathlib import Path

    p = Path(path)
    if not p.exists():
        return {"exists": False}
    return {
        "exists": True,
        "is_file": p.is_file(),
        "suffix": p.suffix.lower(),
        "stem": p.stem,
        "path": str(p),
    }


def _read_csv_rows(path: str) -> list[dict[str, str]]:
    """Sync helper: read a CSV file into a list of dict rows."""
    import csv
    from pathlib import Path

    with Path(path).open(encoding="utf-8") as f:
        return [dict(r) for r in csv.DictReader(f)]


def _read_jsonl_records(path: str) -> list[dict[str, Any]]:
    """Sync helper: read a JSONL file into a list of record dicts."""
    import json
    from pathlib import Path

    records: list[dict[str, Any]] = []
    with Path(path).open(encoding="utf-8") as f:
        for line in f:
            stripped = line.strip()
            if not stripped:
                continue
            try:
                records.append(json.loads(stripped))
            except json.JSONDecodeError:
                continue
    return records


def _read_text_file(path: str) -> str:
    """Sync helper: read a text file with error-tolerant decoding."""
    from pathlib import Path

    return Path(path).read_text(encoding="utf-8", errors="replace")


@server.tool()
async def knowledge_ingest_path(
    path: str,
    source: str = "",
) -> dict[str, Any]:
    """Ingest a file from the local filesystem into the *current* graph.

    Handles CSV, JSONL, and plain text files. For directories or
    office files (PDF/DOCX/...), use ``SynapticGraph.from_data()``
    from a CLI script and point synaptic-mcp at the resulting ``.db``.

    The MCP server and the MCP client must share a filesystem for
    this tool to be useful.

    Args:
        path: Absolute filesystem path.
        source: Source identifier attached to every new node.
    """
    graph = await _ensure_graph()
    info = _inspect_path(path)
    if not info["exists"]:
        return {"success": False, "error": f"path not found: {path}"}

    if info["is_file"] and info["suffix"] == ".csv":
        from synaptic.extensions.table_ingester import TableIngester

        rows = _read_csv_rows(info["path"])
        if not rows:
            return {"success": True, "format": "csv", "rows": 0}
        columns = [{"name": k, "type": "str"} for k in rows[0]]
        ingester = TableIngester()
        nodes = await ingester.ingest(
            graph,
            info["stem"],
            columns,
            rows,
            source=source or info["path"],
        )
        return {
            "success": True,
            "format": "csv",
            "table_name": info["stem"],
            "rows": len(nodes),
        }

    if info["is_file"] and info["suffix"] == ".jsonl":
        records = _read_jsonl_records(info["path"])
        count = 0
        for obj in records:
            title = obj.get("title") or obj.get("id") or f"doc-{count}"
            content = obj.get("content") or obj.get("text") or ""
            if not content:
                continue
            await graph.add_document(
                title=str(title),
                content=str(content),
                source=source or info["path"],
            )
            count += 1
        return {"success": True, "format": "jsonl", "documents": count}

    if info["is_file"]:
        try:
            text = _read_text_file(info["path"])
        except (OSError, UnicodeDecodeError) as exc:
            return {"success": False, "error": f"cannot read {path}: {exc}"}
        if not text.strip():
            return {"success": True, "format": "text", "documents": 0}
        nodes = await graph.add_document(
            title=info["stem"],
            content=text,
            source=source or info["path"],
        )
        return {
            "success": True,
            "format": "text",
            "title": info["stem"],
            "chunks": len(nodes),
        }

    return {
        "success": False,
        "error": (
            "directory ingest not supported from MCP yet — "
            "run a CLI job with SynapticGraph.from_data() and point "
            "synaptic-mcp at the resulting .db file"
        ),
    }


@server.tool()
async def knowledge_remove(node_id: str) -> dict[str, Any]:
    """Delete a single node and cascade-remove its edges.

    Use when a node was ingested incorrectly or is stale. Bulk
    deletion is intentionally not exposed — for large cleanups
    drop the graph file and re-ingest.
    """
    graph = await _ensure_graph()
    removed = await graph.remove(node_id)
    return {"success": removed, "node_id": node_id}


@server.tool()
async def knowledge_sync_from_database(
    connection_string: str = "",
    tables: list[str] | None = None,
) -> dict[str, Any]:
    """Incrementally sync the graph with a live database (CDC).

    First call on a fresh graph seeds the sync state and does a
    deterministic full load; subsequent calls read only rows whose
    change column advanced past the last watermark (or whose row
    hash changed, for tables without an ``updated_at``-style
    column). Tables without a primary key in the source schema are
    skipped with a clear error entry.

    Args:
        connection_string: Source database DSN. Falls back to
            ``--source-dsn`` passed on the command line when omitted.
            Supports ``sqlite://``, ``postgresql://``, ``mysql://``.
        tables: Optional allow-list of table names. Empty / null
            means sync every table in the source schema.
    """
    graph = await _ensure_graph()
    dsn = connection_string or _source_dsn
    if not dsn:
        return {
            "success": False,
            "error": (
                "no source DSN — either pass connection_string or start "
                "synaptic-mcp with --source-dsn"
            ),
        }
    result = await graph.sync_from_database(dsn, tables=tables)
    return {
        "success": True,
        "added": result.added,
        "updated": result.updated,
        "deleted": result.deleted,
        "elapsed_ms": round(result.elapsed_ms, 1),
        "tables": [
            {
                "table": t.table,
                "strategy": t.strategy,
                "added": t.added,
                "updated": t.updated,
                "deleted": t.deleted,
                "fk_edges_added": t.fk_edges_added,
                "fk_edges_removed": t.fk_edges_removed,
                "error": t.error,
            }
            for t in result.tables
        ],
    }


# --- Agent Workflow Tools ---


@server.tool()
async def agent_start_session(
    agent_id: str = "",
    description: str = "",
) -> dict[str, Any]:
    """Start an agent work session. All subsequent actions can be linked to this session.

    Args:
        agent_id: Identifier for the agent (e.g. "claude-code", "deploy-bot")
        description: What this session is about
    """
    tracker = await _ensure_tracker()
    session = await tracker.start_session(agent_id=agent_id, description=description)
    return {
        "success": True,
        "session_id": session.id,
        "agent_id": agent_id,
    }


@server.tool()
async def agent_log_action(
    session_id: str,
    tool_name: str,
    result: str = "",
    parameters: str = "",
    success: bool = True,
    duration_ms: float = 0.0,
) -> dict[str, Any]:
    """Log a tool call or action within an agent session.

    Args:
        session_id: Session ID from agent_start_session
        tool_name: Name of the tool that was called
        result: Summary of the tool's output
        parameters: JSON string of parameters passed to the tool
        success: Whether the tool call succeeded
        duration_ms: How long the tool call took in milliseconds
    """
    import json as _json

    tracker = await _ensure_tracker()
    params = _json.loads(parameters) if parameters else None
    node = await tracker.log_tool_call(
        session_id,
        tool_name=tool_name,
        parameters=params,
        result=result,
        success=success,
        duration_ms=duration_ms,
    )
    return {
        "success": True,
        "node_id": node.id,
        "tool_name": tool_name,
    }


@server.tool()
async def agent_record_decision(
    session_id: str,
    title: str,
    rationale: str,
    alternatives: str = "",
    context_node_ids: str = "",
) -> dict[str, Any]:
    """Record a decision made by the agent with rationale and considered alternatives.

    Args:
        session_id: Session ID from agent_start_session
        title: What was decided
        rationale: Why this choice was made
        alternatives: Comma-separated list of alternatives that were considered
        context_node_ids: Comma-separated IDs of related knowledge nodes
    """
    tracker = await _ensure_tracker()
    alt_list = [a.strip() for a in alternatives.split(",") if a.strip()] if alternatives else None
    ctx_ids = (
        [c.strip() for c in context_node_ids.split(",") if c.strip()] if context_node_ids else None
    )

    node = await tracker.record_decision(
        session_id,
        title=title,
        rationale=rationale,
        alternatives=alt_list,
        context_node_ids=ctx_ids,
    )
    return {
        "success": True,
        "decision_id": node.id,
        "title": title,
    }


@server.tool()
async def agent_record_outcome(
    decision_id: str,
    title: str,
    content: str,
    success: bool = True,
) -> dict[str, Any]:
    """Record the outcome of a previous decision. Triggers Hebbian learning.

    Args:
        decision_id: ID of the decision this outcome relates to
        title: Short summary of the outcome
        content: Detailed description of what happened
        success: Whether the outcome was positive
    """
    tracker = await _ensure_tracker()
    node = await tracker.record_outcome(
        decision_id,
        title=title,
        content=content,
        success=success,
    )
    return {
        "success": True,
        "outcome_id": node.id,
        "decision_id": decision_id,
        "outcome": "success" if success else "failure",
    }


# --- Semantic Search Tools ---


@server.tool()
async def agent_find_similar(
    query: str,
    intent: str = "general",
    context_tags: str = "",
    limit: int = 10,
) -> dict[str, Any]:
    """Search knowledge with agent-aware intent for smarter results.

    Intents:
    - similar_decisions: find past decisions on similar problems
    - past_failures: find what went wrong before
    - related_rules: find governing rules and constraints
    - reasoning_chain: follow decision → outcome → lesson paths
    - context_explore: explore neighborhood of a topic
    - general: standard hybrid search

    Args:
        query: Search query (Korean or English)
        intent: Search intent (see above)
        context_tags: Comma-separated tags for context-aware ranking
        limit: Maximum results
    """
    graph = await _ensure_graph()
    tags = [t.strip() for t in context_tags.split(",") if t.strip()] if context_tags else None

    try:
        result = await graph.agent_search(