diff --git a/.claude/settings.local.json b/.claude/settings.local.json
index 76d3bfe..e67423d 100644
--- a/.claude/settings.local.json
+++ b/.claude/settings.local.json
@@ -2,7 +2,17 @@
"permissions": {
"allow": [
"Bash(git push -u origin claude/determined-volhard)",
- "Bash(git add .)"
+ "Bash(git add .)",
+ "Bash(git add -A)",
+ "Bash(git commit:*)",
+ "Bash(git push)",
+ "Bash(python3 -m pytest tests/ -q)",
+ "Bash(pip3 install pytest -q)",
+ "Bash(python3 -m venv /tmp/wisdom-test-venv)",
+ "Bash(/tmp/wisdom-test-venv/bin/pip install pytest -q)",
+ "Bash(/tmp/wisdom-test-venv/bin/python -m pytest tests/ -q)",
+ "Bash(chmod +x /Users/chinkeonglam/wisdomGraph/.claude/worktrees/determined-volhard/ci.sh)",
+ "Bash(git rm -r --cached tests/__pycache__ wisdom/__pycache__)"
]
}
}
diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml
new file mode 100644
index 0000000..b08bee6
--- /dev/null
+++ b/.github/workflows/ci.yml
@@ -0,0 +1,81 @@
+name: CI
+
+on:
+ push:
+ branches: ["main", "claude/**"]
+ tags: ["v*"]
+ pull_request:
+ branches: ["main"]
+
+jobs:
+ test:
+ name: Unit tests (Python ${{ matrix.python-version }})
+ runs-on: ubuntu-latest
+ strategy:
+ matrix:
+ python-version: ["3.10", "3.11", "3.12"]
+
+ steps:
+ - uses: actions/checkout@v4
+
+ - name: Set up Python ${{ matrix.python-version }}
+ uses: actions/setup-python@v5
+ with:
+ python-version: ${{ matrix.python-version }}
+
+ - name: Install dependencies
+ run: pip install pytest
+
+ - name: Run unit tests
+ run: python -m pytest tests/ -v --tb=short
+
+ package:
+ name: Build package
+ runs-on: ubuntu-latest
+ needs: test
+
+ steps:
+ - uses: actions/checkout@v4
+
+ - name: Set up Python
+ uses: actions/setup-python@v5
+ with:
+ python-version: "3.11"
+
+ - name: Install build tools
+ run: pip install build
+
+ - name: Build wheel + sdist
+ run: python -m build
+
+ - name: Upload dist artifacts
+ uses: actions/upload-artifact@v4
+ with:
+ name: dist
+ path: dist/
+
+ publish:
+ name: Publish to PyPI
+ runs-on: ubuntu-latest
+ needs: [test, package]
+ if: startsWith(github.ref, 'refs/tags/v')
+ environment: pypi
+ permissions:
+ id-token: write
+
+ steps:
+ - uses: actions/checkout@v4
+
+ - name: Set up Python
+ uses: actions/setup-python@v5
+ with:
+ python-version: "3.11"
+
+ - name: Install build tools
+ run: pip install build
+
+ - name: Build wheel + sdist
+ run: python -m build
+
+ - name: Publish to PyPI (trusted publishing)
+ uses: pypa/gh-action-pypi-publish@release/v1
diff --git a/.gitignore b/.gitignore
new file mode 100644
index 0000000..66625e6
--- /dev/null
+++ b/.gitignore
@@ -0,0 +1,11 @@
+__pycache__/
+*.pyc
+*.pyo
+.venv/
+venv/
+*.egg-info/
+dist/
+build/
+wisdom-out/
+.wisdom/
+*.tmp
diff --git a/LICENSE b/LICENSE
new file mode 100644
index 0000000..8d4a8c8
--- /dev/null
+++ b/LICENSE
@@ -0,0 +1,21 @@
+MIT License
+
+Copyright (c) 2026 wisdomGraph contributors
+
+Permission is hereby granted, free of charge, to any person obtaining a copy
+of this software and associated documentation files (the "Software"), to deal
+in the Software without restriction, including without limitation the rights
+to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+copies of the Software, and to permit persons to whom the Software is
+furnished to do so, subject to the following conditions:
+
+The above copyright notice and this permission notice shall be included in all
+copies or substantial portions of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+SOFTWARE.
diff --git a/README.md b/README.md
index 00a02ec..adeff31 100644
--- a/README.md
+++ b/README.md
@@ -2,6 +2,7 @@
[English](README.md) | [简体中文](README.zh-CN.md)
+[](https://pypi.org/project/wisdomgraph/)
[](LICENSE)
[](https://neo4j.com)
[](https://claude.ai/code)
diff --git a/README.zh-CN.md b/README.zh-CN.md
new file mode 100644
index 0000000..a8fa69f
--- /dev/null
+++ b/README.zh-CN.md
@@ -0,0 +1,250 @@
+# wisdomGraph
+
+[English](README.md) | [简体中文](README.zh-CN.md)
+
+[](https://pypi.org/project/wisdomgraph/)
+[](LICENSE)
+[](https://neo4j.com)
+[](https://claude.ai/code)
+[](https://openclaw.ai)
+
+> **graphify 给你快照。wisdomGraph 给你复利增长的记忆。**
+
+在 Claude Code 或 OpenClaw 中输入 `/wisdom`。把你的代码库、笔记、论文、对话喂给它 —— 每次运行都会**合并**进一个活跃的 Neo4j 图谱。图谱不会重置,只会积累。事实变成模式,模式变成洞察,洞察变成智慧。
+
+```
+/wisdom . # 将当前项目吸收进智慧图谱
+/wisdom ask "我所有项目中有哪些反复出现的模式?"
+/wisdom reflect # 启动 DIKW 晋升,形成智慧闭环
+```
+
+---
+
+## 相较于 graphify 的质变
+
+graphify 在其定位上做得很好:把一个文件夹变成知识图谱快照。跑一次,生成 `graph.json` 和 `GRAPH_REPORT.md`,读完,下次会话从头开始。
+
+wisdomGraph 做的是根本不同的事。
+
+| | graphify | wisdomGraph |
+|---|---|---|
+| **存储** | `graph.json` 文件(每个项目独立) | Neo4j(持久化,跨所有项目) |
+| **节点类型** | 扁平(代码实体、概念) | DIKW 分层:知识 / 经验 / 洞察 / 智慧 |
+| **每次运行** | 快照,覆盖写入 | MERGE —— 每次运行都在扩张图谱 |
+| **查询方式** | 读取 GRAPH_REPORT.md | 运行时实时 Cypher 遍历 |
+| **记忆** | 每次会话重置 | 跨会话、跨项目、跨月份积累 |
+| **推理** | Leiden 社区检测(拓扑) | 图路径遍历 + DIKW 层次 |
+| **反馈闭环** | 无 | 智慧 → 知识(神经可塑性) |
+| **数据库** | 不需要 | Neo4j Aura(免费)或 DozerDB Docker |
+
+这个差异不是量变,而是质变。graphify 把代码库压缩成可读报告;wisdomGraph 构建的是一套人工认识论 —— 能记忆、能关联、能成长。
+
+---
+
+## DIKW 金字塔,工程化落地
+
+人类专家不是把事实平铺存储的,他们按层次组织经验:
+
+```
+智慧(Wisdom) ← 从模式中提炼出的可执行原则
+ ↑
+洞察(Insight) ← 从多次经验中发现的规律
+ ↑
+经验(Experience)← 有上下文的事件、决策与结果
+ ↑
+知识(Knowledge) ← 已验证的事实、文档行为、提取的结构
+```
+
+wisdomGraph 中每个节点都带有 `tier` 标签。图谱的拓扑结构**就是**认知架构本身。当你提问时,Cypher 沿层级向上遍历 —— 不是关键词匹配扁平文本,而是跨越亲历经验的推理。
+
+反馈闭环至关重要:当某个智慧节点被查询并确认有效时,它会强化连接的知识节点。图谱在学习什么重要。
+
+---
+
+## 安装
+
+**环境要求:** Python 3.10+ 以及以下之一:[Claude Code](https://claude.ai/code)、[OpenClaw](https://openclaw.ai)
+
+**加上以下之一:** [Neo4j Aura 免费版](https://neo4j.com/cloud/platform/aura-graph-database/)(云端,无需安装)或 [DozerDB](https://dozerdb.org)(本地 Docker,含 APOC)
+
+```bash
+pip install wisdomgraph && wisdom install
+```
+
+### 方案 A — Neo4j Aura(零基础设施,推荐个人用户)
+
+1. 在 [neo4j.com/cloud/aura](https://neo4j.com/cloud/aura) 注册免费账号
+2. 创建一个免费的 AuraDB 实例,复制连接 URI 和密码
+3. 运行:
+
+```bash
+wisdom connect bolt+s://xxxxxxxx.databases.neo4j.io --user neo4j --password <你的密码>
+```
+
+免费额度:20 万节点,够用好几年。
+
+### 方案 B — DozerDB 本地 Docker(完全掌控,含 APOC)
+
+```bash
+wisdom docker up # 拉取 graphstack/dozerdb:5.26.3.0 并启动
+wisdom connect bolt://localhost:7687 --user neo4j --password password
+```
+
+打开 [localhost:7474](http://localhost:7474) —— Neo4j Browser 是你俯瞰智慧图谱的可视化窗口。
+
+---
+
+## 平台支持
+
+| 平台 | 安装命令 |
+|------|---------|
+| Claude Code (Linux/Mac) | `wisdom install` |
+| Claude Code (Windows) | `wisdom install --platform windows` |
+| OpenClaw | `wisdom install --platform claw` |
+
+然后打开你的 AI 编程助手,输入:
+
+```
+/wisdom .
+```
+
+---
+
+## 使用方式
+
+```
+/wisdom # 吸收当前目录
+/wisdom ./raw # 吸收指定文件夹
+/wisdom ./raw --mode deep # 激进模式,提取更多 INFERRED 边
+/wisdom ./raw --update # 只重新吸收变更文件,MERGE 进图谱
+
+/wisdom add https://arxiv.org/abs/1706.03762 # 吸收一篇论文
+/wisdom add https://x.com/... # 吸收一条推文
+/wisdom add https://... --author "姓名" # 标注来源作者
+
+/wisdom ask "我所有项目中有哪些反复出现的模式?"
+/wisdom ask "我对认证流程了解多少?"
+/wisdom ask "从 attention 到 optimizer 的路径是什么?"
+/wisdom ask "..." --tier wisdom # 只遍历智慧层节点
+
+/wisdom reflect # 运行 DIKW 晋升:知识→经验→洞察→智慧
+/wisdom reflect --project ./raw # 只对该语料库进行反思
+
+/wisdom path "DigestAuth" "OAuth" # 两个概念之间的最短路径
+/wisdom explain "CausalSelfAttention" # 某节点的完整 DIKW 上下文
+/wisdom god-nodes # 所有项目中连接度最高的概念
+
+/wisdom export --cypher # 导出为 Cypher 语句
+/wisdom export --json # 导出 graph.json(与 graphify 兼容)
+/wisdom export --obsidian # 导出 Obsidian 知识库
+
+/wisdom status # 各层节点统计
+/wisdom purge --project ./raw # 删除单个语料库的节点,不影响其他
+```
+
+---
+
+## 智慧如何复利积累
+
+**第 1 次运行** —— 吸收你的 auth 库:
+```
+知识:JWT、session token、cookie flags、PKCE flow
+经验:(暂无 —— 只有一个来源)
+```
+
+**第 2 次运行** —— 吸收另一个项目的 auth:
+```
+知识:JWT、PKCE —— MERGE 去重,增加来源链接
+经验:两个不同实现,检测到相同模式
+洞察:JWT + PKCE 是你工作中收敛的模式
+```
+
+**第 3 次运行** —— `/wisdom reflect`:
+```
+智慧:"API 用无状态 JWT,浏览器端用 PKCE flow。
+ 这个模式在 3 个项目中落地,从未出过问题。"
+```
+
+**第 4 次运行** —— `/wisdom ask "新服务的认证方案怎么定?"`:
+```
+遍历路径:知识 → 经验 → 洞察 → 智慧
+返回结果:你自己经过实战验证的原则,根植于你真实的代码历史
+```
+
+这不是 RAG,不是摘要,而是图谱遍历你积累的经验,把**你自己的智慧还给你**。
+
+---
+
+## 图谱 Schema
+
+```cypher
+// DIKW 节点标签
+(:Knowledge {id, label, content, source_file, confidence, timestamp, project})
+(:Experience {id, label, content, context, outcome, timestamp, project})
+(:Insight {id, label, content, pattern_strength, source_count, timestamp})
+(:Wisdom {id, label, principle, confidence, reinforcement_count, timestamp})
+
+// 关系类型
+(Knowledge)-[:GROUNDS]->(Experience)
+(Experience)-[:REVEALS]->(Insight)
+(Insight)-[:CRYSTALLIZES_INTO]->(Wisdom)
+(Wisdom)-[:REINFORCES]->(Knowledge) // 反馈闭环 —— 图谱在学习
+
+(Knowledge)-[:SEMANTICALLY_SIMILAR_TO]->(Knowledge)
+(Insight)-[:CONTRADICTS]->(Insight) // 张力浮现,需要反思
+(any)-[:SOURCED_FROM]->(Source {uri, author, ingested_at})
+```
+
+置信度沿图谱向上流动。8 个经验支撑的洞察比 2 个支撑的模式强度更高。智慧节点追踪 `reinforcement_count` —— 遍历确认该原则有效的次数。
+
+---
+
+## 你能得到什么
+
+**跨项目神节点** —— 跨越*所有*项目和语料库的核心概念,而不仅是单个仓库的。
+
+**矛盾检测** —— 两个洞察方向相反时,以 `CONTRADICTS` 边的形式浮现。图谱展示冲突,由你解决,形成更好的智慧。
+
+**时间衰减** —— 节点带时间戳。长时间未被强化的旧知识会被标记。图谱优雅地老化,如同专家的记忆。
+
+**完整溯源链** —— 每个节点关联到其 `Source`。`/wisdom explain "节点名"` 返回完整 DIKW 路径:事实 → 上下文 → 模式 → 原则。
+
+---
+
+## 部署方案对比
+
+| | Aura 免费版 | DozerDB 本地 |
+|---|---|---|
+| **配置** | 3 步点击 + URI | 1 条 docker 命令 |
+| **费用** | 免费(20 万节点) | 永久免费 |
+| **APOC** | 可用 | 内置 |
+| **数据位置** | Neo4j 云端 | 你自己的机器 |
+| **可视化** | neo4j.com 控制台 | localhost:7474 |
+| **适合** | 快速上手、个人用户 | 团队、离线、完全掌控 |
+
+---
+
+## 隐私说明
+
+wisdomGraph 将文件内容发送给你的 AI 编程助手的底层模型 API 进行语义提取 —— Anthropic(Claude Code)或你所在平台使用的任何模型。代码文件通过 tree-sitter AST 在本地处理,不会发送到外部。所有图谱数据存储在*你的* Neo4j 实例中(Aura 或本地)。无遥测、无使用追踪、无任何形式的数据分析。
+
+---
+
+## 技术栈
+
+Neo4j(Aura 或 DozerDB)+ tree-sitter + APOC。语义提取通过 Claude(Claude Code)或你平台的模型完成。图数据库就是智能层 —— 遍历、路径查找和社区检测通过 Neo4j GDS(图数据科学库)原生 Cypher 运行。
+
+---
+
+
+贡献指南
+
+**工作示例**是最有说服力的贡献。在真实的多项目语料库上跑 `/wisdom`,让它反思几轮,记录涌现出哪些智慧节点、是否与你的直觉吻合。提交到 `worked/{slug}/`。
+
+**Schema 提案** —— 如果你有捕捉当前 Schema 遗漏语义的关系类型,欢迎提 issue,附上 Cypher 模式和工作示例。
+
+**DIKW 晋升启发式** —— 更好的知识→经验→洞察→智慧晋升提示词或规则。晋升逻辑是系统的核心。
+
+详见 [ARCHITECTURE.md](ARCHITECTURE.md) 了解完整流水线设计、Cypher Schema 和如何扩展 DIKW 层次。
+
+
diff --git a/ci.sh b/ci.sh
new file mode 100755
index 0000000..255005f
--- /dev/null
+++ b/ci.sh
@@ -0,0 +1,15 @@
+#!/usr/bin/env bash
+# wisdomGraph CI — unit tests (no Neo4j required)
+set -euo pipefail
+
+echo "==> Creating venv"
+python3 -m venv .venv
+source .venv/bin/activate
+
+echo "==> Installing dependencies"
+pip install pytest -q
+
+echo "==> Running tests"
+python -m pytest tests/ -q
+
+echo "==> All tests passed"
diff --git a/pyproject.toml b/pyproject.toml
new file mode 100644
index 0000000..2850504
--- /dev/null
+++ b/pyproject.toml
@@ -0,0 +1,87 @@
+[build-system]
+requires = ["setuptools>=68"]
+build-backend = "setuptools.build_meta"
+
+[project]
+name = "wisdomgraph"
+version = "0.1.0"
+description = "Accumulative Neo4j-native DIKW wisdom memory for AI coding assistants (Claude Code, OpenClaw)"
+readme = "README.md"
+license = "MIT"
+license-files = ["LICENSE"]
+keywords = [
+ "claude", "claude-code", "openclaw", "neo4j", "knowledge-graph",
+ "graphrag", "dikw", "wisdom", "memory", "accumulative", "llm",
+ "skill", "agent-memory", "dozerdb",
+]
+requires-python = ">=3.10"
+dependencies = [
+ "neo4j>=5.0",
+]
+
+[project.urls]
+Homepage = "https://github.com/cklam12345/wisdomGraph"
+Repository = "https://github.com/cklam12345/wisdomGraph"
+Issues = "https://github.com/cklam12345/wisdomGraph/issues"
+
+[project.optional-dependencies]
+ast = [
+ "tree-sitter",
+ "tree-sitter-python",
+ "tree-sitter-javascript",
+ "tree-sitter-typescript",
+ "tree-sitter-go",
+ "tree-sitter-rust",
+ "tree-sitter-java",
+ "tree-sitter-c",
+ "tree-sitter-cpp",
+ "tree-sitter-ruby",
+ "tree-sitter-c-sharp",
+ "tree-sitter-kotlin",
+ "tree-sitter-scala",
+ "tree-sitter-php",
+ "tree-sitter-swift",
+ "tree-sitter-lua",
+ "tree-sitter-zig",
+ "tree-sitter-powershell",
+ "tree-sitter-elixir",
+ "tree-sitter-objc",
+]
+pdf = ["pypdf", "html2text"]
+office = ["python-docx", "openpyxl"]
+all = [
+ "tree-sitter",
+ "tree-sitter-python",
+ "tree-sitter-javascript",
+ "tree-sitter-typescript",
+ "tree-sitter-go",
+ "tree-sitter-rust",
+ "tree-sitter-java",
+ "tree-sitter-c",
+ "tree-sitter-cpp",
+ "tree-sitter-ruby",
+ "tree-sitter-c-sharp",
+ "tree-sitter-kotlin",
+ "tree-sitter-scala",
+ "tree-sitter-php",
+ "tree-sitter-swift",
+ "tree-sitter-lua",
+ "tree-sitter-zig",
+ "tree-sitter-powershell",
+ "tree-sitter-elixir",
+ "tree-sitter-objc",
+ "pypdf",
+ "html2text",
+ "python-docx",
+ "openpyxl",
+]
+
+[project.scripts]
+wisdom = "wisdom.__main__:main"
+
+[tool.setuptools.packages.find]
+where = ["."]
+include = ["wisdom*"]
+
+[tool.setuptools.package-data]
+wisdom = ["skill.md", "skill-claw.md", "skill-windows.md"]
diff --git a/tests/__init__.py b/tests/__init__.py
new file mode 100644
index 0000000..e69de29
diff --git a/tests/test_cache.py b/tests/test_cache.py
new file mode 100644
index 0000000..957d955
--- /dev/null
+++ b/tests/test_cache.py
@@ -0,0 +1,68 @@
+"""Tests for wisdom/cache.py"""
+import pytest
+from pathlib import Path
+from wisdom.cache import file_hash, load_cached, save_cached, check_cache, save_extractions
+
+
+def test_file_hash_consistent(tmp_path):
+ f = tmp_path / "test.py"
+ f.write_text("hello world")
+ h1 = file_hash(f)
+ h2 = file_hash(f)
+ assert h1 == h2
+
+
+def test_file_hash_changes_on_content(tmp_path):
+ f = tmp_path / "test.py"
+ f.write_text("version 1")
+ h1 = file_hash(f)
+ f.write_text("version 2")
+ h2 = file_hash(f)
+ assert h1 != h2
+
+
+def test_load_cached_miss(tmp_path):
+ f = tmp_path / "test.py"
+ f.write_text("hello")
+ assert load_cached(f, root=tmp_path) is None
+
+
+def test_save_and_load_cached(tmp_path):
+ f = tmp_path / "test.py"
+ f.write_text("def foo(): pass")
+ data = {"nodes": [{"id": "foo", "label": "foo"}], "edges": []}
+ save_cached(f, data, root=tmp_path)
+ result = load_cached(f, root=tmp_path)
+ assert result == data
+
+
+def test_cache_invalidated_on_change(tmp_path):
+ f = tmp_path / "test.py"
+ f.write_text("original")
+ data = {"nodes": [], "edges": []}
+ save_cached(f, data, root=tmp_path)
+ f.write_text("changed")
+ assert load_cached(f, root=tmp_path) is None
+
+
+def test_check_cache_splits(tmp_path):
+ f1 = tmp_path / "cached.py"
+ f1.write_text("x = 1")
+ f2 = tmp_path / "uncached.py"
+ f2.write_text("y = 2")
+
+ data = {"nodes": [{"id": "x"}], "edges": [], "source_file": str(f1)}
+ save_cached(f1, data, root=tmp_path)
+
+ cached, uncached = check_cache([str(f1), str(f2)], root=tmp_path)
+ assert len(cached) == 1
+ assert str(f2) in uncached
+
+
+def test_save_extractions(tmp_path):
+ f = tmp_path / "src.py"
+ f.write_text("code")
+ ext = {"nodes": [{"id": "n1"}], "edges": [], "source_file": str(f)}
+ count = save_extractions([ext], root=tmp_path)
+ assert count == 1
+ assert load_cached(f, root=tmp_path) is not None
diff --git a/tests/test_classify.py b/tests/test_classify.py
new file mode 100644
index 0000000..d21aaa4
--- /dev/null
+++ b/tests/test_classify.py
@@ -0,0 +1,113 @@
+"""Tests for wisdom/classify.py"""
+from wisdom.classify import classify_nodes, build_dikw_edges, promote_experiences
+
+
+def _node(id, label, tier=None, **kwargs):
+ n = {"id": id, "label": label}
+ if tier:
+ n["tier"] = tier
+ n.update(kwargs)
+ return n
+
+
+def _edge(src, tgt, relation="calls", conf_tag="EXTRACTED"):
+ return {"source": src, "target": tgt, "relation": relation, "confidence_tag": conf_tag}
+
+
+# ── classify_nodes ────────────────────────────────────────────────────────────
+
+def test_default_tier_is_knowledge():
+ nodes = [_node("n1", "MyFunction")]
+ result = classify_nodes(nodes, [], project="proj-a")
+ assert result[0]["tier"] == "knowledge"
+
+
+def test_explicit_tier_respected():
+ nodes = [_node("w1", "Use JWT", tier="wisdom")]
+ result = classify_nodes(nodes, [], project="proj-a")
+ assert result[0]["tier"] == "wisdom"
+
+
+def test_insight_heuristic_3_similarity_edges():
+ nodes = [_node("n1", "Auth")]
+ edges = [
+ _edge("n1", "n2", "semantically_similar_to"),
+ _edge("n1", "n3", "semantically_similar_to"),
+ _edge("n1", "n4", "conceptually_related_to"),
+ ]
+ result = classify_nodes(nodes, edges, project="proj-a")
+ assert result[0]["tier"] == "insight"
+
+
+def test_project_injected():
+ nodes = [_node("n1", "Foo")]
+ result = classify_nodes(nodes, [], project="my-project")
+ assert result[0]["project"] == "my-project"
+
+
+def test_confidence_set_for_extracted():
+ nodes = [_node("n1", "Foo", confidence_tag="EXTRACTED")]
+ result = classify_nodes(nodes, [], project="p")
+ assert result[0]["confidence"] == 1.0
+
+
+def test_confidence_lower_for_inferred():
+ nodes = [_node("n1", "Foo", confidence_tag="INFERRED")]
+ result = classify_nodes(nodes, [], project="p")
+ assert result[0]["confidence"] < 1.0
+
+
+# ── build_dikw_edges ─────────────────────────────────────────────────────────
+
+def test_grounds_edge_added_for_k_to_e():
+ nodes = [
+ _node("k1", "JWT", tier="knowledge"),
+ _node("e1", "JWT Context", tier="experience"),
+ ]
+ edges = [_edge("k1", "e1", "uses")]
+ result = build_dikw_edges(nodes, edges)
+ relations = [e["relation"] for e in result]
+ assert "GROUNDS" in relations
+
+
+def test_reveals_edge_for_e_to_i():
+ nodes = [
+ _node("e1", "Pattern", tier="experience"),
+ _node("i1", "Auth Insight", tier="insight"),
+ ]
+ edges = [_edge("e1", "i1", "semantically_similar_to")]
+ result = build_dikw_edges(nodes, edges)
+ relations = [e["relation"] for e in result]
+ assert "REVEALS" in relations
+
+
+def test_no_duplicate_dikw_edges():
+ nodes = [
+ _node("k1", "A", tier="knowledge"),
+ _node("e1", "B", tier="experience"),
+ ]
+ edges = [_edge("k1", "e1", "uses"), _edge("k1", "e1", "calls")]
+ result = build_dikw_edges(nodes, edges)
+ grounds_count = sum(1 for e in result if e["relation"] == "GROUNDS")
+ assert grounds_count == 1
+
+
+def test_same_tier_no_dikw_edge():
+ nodes = [_node("k1", "A", tier="knowledge"), _node("k2", "B", tier="knowledge")]
+ edges = [_edge("k1", "k2", "calls")]
+ result = build_dikw_edges(nodes, edges)
+ assert all(e["relation"] != "GROUNDS" for e in result if e["source"] == "k1" and e["target"] == "k2" and e["relation"] not in ("calls",))
+
+
+# ── promote_experiences ───────────────────────────────────────────────────────
+
+def test_promotes_knowledge_when_in_existing_projects():
+ nodes = [_node("k1", "JWT", tier="knowledge")]
+ result = promote_experiences(nodes, existing_projects=["k1"])
+ assert result[0]["tier"] == "experience"
+
+
+def test_no_promotion_when_not_in_existing():
+ nodes = [_node("k1", "JWT", tier="knowledge")]
+ result = promote_experiences(nodes, existing_projects=["other_id"])
+ assert result[0]["tier"] == "knowledge"
diff --git a/tests/test_detect.py b/tests/test_detect.py
new file mode 100644
index 0000000..65f49aa
--- /dev/null
+++ b/tests/test_detect.py
@@ -0,0 +1,98 @@
+"""Tests for wisdom/detect.py"""
+import pytest
+from pathlib import Path
+from wisdom.detect import classify_file, detect, FileType, _looks_like_paper
+
+
+def test_classify_python():
+ assert classify_file(Path("main.py")) == FileType.CODE
+
+
+def test_classify_typescript():
+ assert classify_file(Path("app.tsx")) == FileType.CODE
+
+
+def test_classify_markdown():
+ assert classify_file(Path("README.md")) == FileType.DOCUMENT
+
+
+def test_classify_pdf():
+ assert classify_file(Path("paper.pdf")) == FileType.PAPER
+
+
+def test_classify_image():
+ assert classify_file(Path("diagram.png")) == FileType.IMAGE
+
+
+def test_classify_unknown():
+ assert classify_file(Path("file.xyz")) is None
+
+
+def test_classify_docx():
+ assert classify_file(Path("report.docx")) == FileType.DOCUMENT
+
+
+def test_detect_finds_files(tmp_path):
+ (tmp_path / "main.py").write_text("def foo(): pass")
+ (tmp_path / "README.md").write_text("# Hello")
+ (tmp_path / "diagram.png").write_bytes(b"\x89PNG\r\n")
+
+ result = detect(tmp_path)
+ assert result["total_files"] == 3
+ assert len(result["files"]["code"]) == 1
+ assert len(result["files"]["document"]) == 1
+ assert len(result["files"]["image"]) == 1
+
+
+def test_detect_skips_hidden_files(tmp_path):
+ (tmp_path / ".env").write_text("SECRET=abc")
+ (tmp_path / "main.py").write_text("x = 1")
+ result = detect(tmp_path)
+ # .env starts with '.' so it is skipped silently (not in skipped_sensitive)
+ assert result["total_files"] == 1
+
+
+def test_detect_skips_sensitive_non_hidden(tmp_path):
+ # A non-hidden file with a sensitive name should appear in skipped_sensitive
+ (tmp_path / "credentials.json").write_text('{"key": "secret"}')
+ (tmp_path / "main.py").write_text("x = 1")
+ result = detect(tmp_path)
+ assert result["total_files"] == 1
+ assert any("credentials" in s for s in result["skipped_sensitive"])
+
+
+def test_detect_skips_node_modules(tmp_path):
+ nm = tmp_path / "node_modules"
+ nm.mkdir()
+ (nm / "lib.js").write_text("module.exports = {}")
+ (tmp_path / "app.js").write_text("const x = 1")
+ result = detect(tmp_path)
+ assert result["total_files"] == 1
+
+
+def test_detect_wisdomignore(tmp_path):
+ ignore = tmp_path / ".wisdomignore"
+ ignore.write_text("vendor/\n")
+ vendor = tmp_path / "vendor"
+ vendor.mkdir()
+ (vendor / "lib.py").write_text("pass")
+ (tmp_path / "main.py").write_text("pass")
+ result = detect(tmp_path)
+ assert result["total_files"] == 1
+
+
+def test_looks_like_paper_positive(tmp_path):
+ paper = tmp_path / "paper.md"
+ paper.write_text(
+ "Abstract: We propose a new method.\n"
+ "See [1] for details. arXiv:1706.03762\n"
+ "From the literature, we know that [2]\n"
+ "This is a preprint submitted to proceedings.\n"
+ )
+ assert _looks_like_paper(paper) is True
+
+
+def test_looks_like_paper_negative(tmp_path):
+ normal = tmp_path / "notes.md"
+ normal.write_text("# My notes\nTodo list for today.")
+ assert _looks_like_paper(normal) is False
diff --git a/tests/test_ingest.py b/tests/test_ingest.py
new file mode 100644
index 0000000..ccdd846
--- /dev/null
+++ b/tests/test_ingest.py
@@ -0,0 +1,46 @@
+"""Tests for wisdom/ingest.py — URL validation and HTML stripping (no network calls)."""
+import pytest
+from wisdom.ingest import _html_to_text, _utcnow
+
+
+def test_html_to_text_strips_tags():
+ html = "
Hello world
"
+ result = _html_to_text(html)
+ assert "" not in result
+ assert "" not in result
+ assert "Hello" in result
+ assert "world" in result
+
+
+def test_html_to_text_strips_script():
+ html = "Content"
+ result = _html_to_text(html)
+ assert "alert" not in result
+ assert "Content" in result
+
+
+def test_html_to_text_strips_style():
+ html = "
Text
"
+ result = _html_to_text(html)
+ assert "color" not in result
+ assert "Text" in result
+
+
+def test_html_to_text_decodes_entities():
+ html = "& <tag> 'quote'"
+ result = _html_to_text(html)
+ assert "&" in result
+ assert "" in result
+ assert "'" in result
+
+
+def test_html_to_text_normalizes_whitespace():
+ html = " lots of spaces "
+ result = _html_to_text(html)
+ assert " " not in result
+
+
+def test_utcnow_returns_iso_string():
+ ts = _utcnow()
+ assert "T" in ts
+ assert ts.endswith("+00:00") or ts.endswith("Z") or "+" in ts
diff --git a/tests/test_security.py b/tests/test_security.py
new file mode 100644
index 0000000..a77e6c7
--- /dev/null
+++ b/tests/test_security.py
@@ -0,0 +1,74 @@
+"""Tests for wisdom/security.py"""
+import pytest
+from pathlib import Path
+from wisdom.security import validate_url, sanitize_label, is_sensitive_path, validate_graph_path
+
+
+def test_validate_url_http():
+ assert validate_url("http://example.com/page") == "http://example.com/page"
+
+
+def test_validate_url_https():
+ assert validate_url(" https://arxiv.org/abs/1706.03762 ") == "https://arxiv.org/abs/1706.03762"
+
+
+def test_validate_url_rejects_file():
+ with pytest.raises(ValueError, match="file"):
+ validate_url("file:///etc/passwd")
+
+
+def test_validate_url_rejects_ftp():
+ with pytest.raises(ValueError, match="ftp"):
+ validate_url("ftp://example.com")
+
+
+def test_validate_url_rejects_no_host():
+ with pytest.raises(ValueError):
+ validate_url("https://")
+
+
+def test_sanitize_label_strips_control():
+ assert "\x00" not in sanitize_label("hello\x00world")
+ assert "\x1f" not in sanitize_label("foo\x1fbar")
+
+
+def test_sanitize_label_caps_length():
+ long = "a" * 500
+ assert len(sanitize_label(long)) <= 256
+
+
+def test_sanitize_label_html_escapes():
+ result = sanitize_label('')
+ assert "