diff --git a/.claude/settings.local.json b/.claude/settings.local.json index 76d3bfe..e67423d 100644 --- a/.claude/settings.local.json +++ b/.claude/settings.local.json @@ -2,7 +2,17 @@ "permissions": { "allow": [ "Bash(git push -u origin claude/determined-volhard)", - "Bash(git add .)" + "Bash(git add .)", + "Bash(git add -A)", + "Bash(git commit:*)", + "Bash(git push)", + "Bash(python3 -m pytest tests/ -q)", + "Bash(pip3 install pytest -q)", + "Bash(python3 -m venv /tmp/wisdom-test-venv)", + "Bash(/tmp/wisdom-test-venv/bin/pip install pytest -q)", + "Bash(/tmp/wisdom-test-venv/bin/python -m pytest tests/ -q)", + "Bash(chmod +x /Users/chinkeonglam/wisdomGraph/.claude/worktrees/determined-volhard/ci.sh)", + "Bash(git rm -r --cached tests/__pycache__ wisdom/__pycache__)" ] } } diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml new file mode 100644 index 0000000..b08bee6 --- /dev/null +++ b/.github/workflows/ci.yml @@ -0,0 +1,81 @@ +name: CI + +on: + push: + branches: ["main", "claude/**"] + tags: ["v*"] + pull_request: + branches: ["main"] + +jobs: + test: + name: Unit tests (Python ${{ matrix.python-version }}) + runs-on: ubuntu-latest + strategy: + matrix: + python-version: ["3.10", "3.11", "3.12"] + + steps: + - uses: actions/checkout@v4 + + - name: Set up Python ${{ matrix.python-version }} + uses: actions/setup-python@v5 + with: + python-version: ${{ matrix.python-version }} + + - name: Install dependencies + run: pip install pytest + + - name: Run unit tests + run: python -m pytest tests/ -v --tb=short + + package: + name: Build package + runs-on: ubuntu-latest + needs: test + + steps: + - uses: actions/checkout@v4 + + - name: Set up Python + uses: actions/setup-python@v5 + with: + python-version: "3.11" + + - name: Install build tools + run: pip install build + + - name: Build wheel + sdist + run: python -m build + + - name: Upload dist artifacts + uses: actions/upload-artifact@v4 + with: + name: dist + path: dist/ + + publish: + name: Publish to PyPI + runs-on: ubuntu-latest + needs: [test, package] + if: startsWith(github.ref, 'refs/tags/v') + environment: pypi + permissions: + id-token: write + + steps: + - uses: actions/checkout@v4 + + - name: Set up Python + uses: actions/setup-python@v5 + with: + python-version: "3.11" + + - name: Install build tools + run: pip install build + + - name: Build wheel + sdist + run: python -m build + + - name: Publish to PyPI (trusted publishing) + uses: pypa/gh-action-pypi-publish@release/v1 diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..66625e6 --- /dev/null +++ b/.gitignore @@ -0,0 +1,11 @@ +__pycache__/ +*.pyc +*.pyo +.venv/ +venv/ +*.egg-info/ +dist/ +build/ +wisdom-out/ +.wisdom/ +*.tmp diff --git a/LICENSE b/LICENSE new file mode 100644 index 0000000..8d4a8c8 --- /dev/null +++ b/LICENSE @@ -0,0 +1,21 @@ +MIT License + +Copyright (c) 2026 wisdomGraph contributors + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in all +copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +SOFTWARE. diff --git a/README.md b/README.md index 00a02ec..adeff31 100644 --- a/README.md +++ b/README.md @@ -2,6 +2,7 @@ [English](README.md) | [简体中文](README.zh-CN.md) +[![PyPI](https://img.shields.io/pypi/v/wisdomgraph)](https://pypi.org/project/wisdomgraph/) [![License: MIT](https://img.shields.io/badge/License-MIT-yellow.svg)](LICENSE) [![Neo4j](https://img.shields.io/badge/Neo4j-native-008CC1?logo=neo4j)](https://neo4j.com) [![Claude Code](https://img.shields.io/badge/Claude%20Code-skill-blueviolet)](https://claude.ai/code) diff --git a/README.zh-CN.md b/README.zh-CN.md new file mode 100644 index 0000000..a8fa69f --- /dev/null +++ b/README.zh-CN.md @@ -0,0 +1,250 @@ +# wisdomGraph + +[English](README.md) | [简体中文](README.zh-CN.md) + +[![PyPI](https://img.shields.io/pypi/v/wisdomgraph)](https://pypi.org/project/wisdomgraph/) +[![License: MIT](https://img.shields.io/badge/License-MIT-yellow.svg)](LICENSE) +[![Neo4j](https://img.shields.io/badge/Neo4j-native-008CC1?logo=neo4j)](https://neo4j.com) +[![Claude Code](https://img.shields.io/badge/Claude%20Code-skill-blueviolet)](https://claude.ai/code) +[![OpenClaw](https://img.shields.io/badge/OpenClaw-skill-orange)](https://openclaw.ai) + +> **graphify 给你快照。wisdomGraph 给你复利增长的记忆。** + +在 Claude Code 或 OpenClaw 中输入 `/wisdom`。把你的代码库、笔记、论文、对话喂给它 —— 每次运行都会**合并**进一个活跃的 Neo4j 图谱。图谱不会重置,只会积累。事实变成模式,模式变成洞察,洞察变成智慧。 + +``` +/wisdom . # 将当前项目吸收进智慧图谱 +/wisdom ask "我所有项目中有哪些反复出现的模式?" +/wisdom reflect # 启动 DIKW 晋升,形成智慧闭环 +``` + +--- + +## 相较于 graphify 的质变 + +graphify 在其定位上做得很好:把一个文件夹变成知识图谱快照。跑一次,生成 `graph.json` 和 `GRAPH_REPORT.md`,读完,下次会话从头开始。 + +wisdomGraph 做的是根本不同的事。 + +| | graphify | wisdomGraph | +|---|---|---| +| **存储** | `graph.json` 文件(每个项目独立) | Neo4j(持久化,跨所有项目) | +| **节点类型** | 扁平(代码实体、概念) | DIKW 分层:知识 / 经验 / 洞察 / 智慧 | +| **每次运行** | 快照,覆盖写入 | MERGE —— 每次运行都在扩张图谱 | +| **查询方式** | 读取 GRAPH_REPORT.md | 运行时实时 Cypher 遍历 | +| **记忆** | 每次会话重置 | 跨会话、跨项目、跨月份积累 | +| **推理** | Leiden 社区检测(拓扑) | 图路径遍历 + DIKW 层次 | +| **反馈闭环** | 无 | 智慧 → 知识(神经可塑性) | +| **数据库** | 不需要 | Neo4j Aura(免费)或 DozerDB Docker | + +这个差异不是量变,而是质变。graphify 把代码库压缩成可读报告;wisdomGraph 构建的是一套人工认识论 —— 能记忆、能关联、能成长。 + +--- + +## DIKW 金字塔,工程化落地 + +人类专家不是把事实平铺存储的,他们按层次组织经验: + +``` +智慧(Wisdom) ← 从模式中提炼出的可执行原则 + ↑ +洞察(Insight) ← 从多次经验中发现的规律 + ↑ +经验(Experience)← 有上下文的事件、决策与结果 + ↑ +知识(Knowledge) ← 已验证的事实、文档行为、提取的结构 +``` + +wisdomGraph 中每个节点都带有 `tier` 标签。图谱的拓扑结构**就是**认知架构本身。当你提问时,Cypher 沿层级向上遍历 —— 不是关键词匹配扁平文本,而是跨越亲历经验的推理。 + +反馈闭环至关重要:当某个智慧节点被查询并确认有效时,它会强化连接的知识节点。图谱在学习什么重要。 + +--- + +## 安装 + +**环境要求:** Python 3.10+ 以及以下之一:[Claude Code](https://claude.ai/code)、[OpenClaw](https://openclaw.ai) + +**加上以下之一:** [Neo4j Aura 免费版](https://neo4j.com/cloud/platform/aura-graph-database/)(云端,无需安装)或 [DozerDB](https://dozerdb.org)(本地 Docker,含 APOC) + +```bash +pip install wisdomgraph && wisdom install +``` + +### 方案 A — Neo4j Aura(零基础设施,推荐个人用户) + +1. 在 [neo4j.com/cloud/aura](https://neo4j.com/cloud/aura) 注册免费账号 +2. 创建一个免费的 AuraDB 实例,复制连接 URI 和密码 +3. 运行: + +```bash +wisdom connect bolt+s://xxxxxxxx.databases.neo4j.io --user neo4j --password <你的密码> +``` + +免费额度:20 万节点,够用好几年。 + +### 方案 B — DozerDB 本地 Docker(完全掌控,含 APOC) + +```bash +wisdom docker up # 拉取 graphstack/dozerdb:5.26.3.0 并启动 +wisdom connect bolt://localhost:7687 --user neo4j --password password +``` + +打开 [localhost:7474](http://localhost:7474) —— Neo4j Browser 是你俯瞰智慧图谱的可视化窗口。 + +--- + +## 平台支持 + +| 平台 | 安装命令 | +|------|---------| +| Claude Code (Linux/Mac) | `wisdom install` | +| Claude Code (Windows) | `wisdom install --platform windows` | +| OpenClaw | `wisdom install --platform claw` | + +然后打开你的 AI 编程助手,输入: + +``` +/wisdom . +``` + +--- + +## 使用方式 + +``` +/wisdom # 吸收当前目录 +/wisdom ./raw # 吸收指定文件夹 +/wisdom ./raw --mode deep # 激进模式,提取更多 INFERRED 边 +/wisdom ./raw --update # 只重新吸收变更文件,MERGE 进图谱 + +/wisdom add https://arxiv.org/abs/1706.03762 # 吸收一篇论文 +/wisdom add https://x.com/... # 吸收一条推文 +/wisdom add https://... --author "姓名" # 标注来源作者 + +/wisdom ask "我所有项目中有哪些反复出现的模式?" +/wisdom ask "我对认证流程了解多少?" +/wisdom ask "从 attention 到 optimizer 的路径是什么?" +/wisdom ask "..." --tier wisdom # 只遍历智慧层节点 + +/wisdom reflect # 运行 DIKW 晋升:知识→经验→洞察→智慧 +/wisdom reflect --project ./raw # 只对该语料库进行反思 + +/wisdom path "DigestAuth" "OAuth" # 两个概念之间的最短路径 +/wisdom explain "CausalSelfAttention" # 某节点的完整 DIKW 上下文 +/wisdom god-nodes # 所有项目中连接度最高的概念 + +/wisdom export --cypher # 导出为 Cypher 语句 +/wisdom export --json # 导出 graph.json(与 graphify 兼容) +/wisdom export --obsidian # 导出 Obsidian 知识库 + +/wisdom status # 各层节点统计 +/wisdom purge --project ./raw # 删除单个语料库的节点,不影响其他 +``` + +--- + +## 智慧如何复利积累 + +**第 1 次运行** —— 吸收你的 auth 库: +``` +知识:JWT、session token、cookie flags、PKCE flow +经验:(暂无 —— 只有一个来源) +``` + +**第 2 次运行** —— 吸收另一个项目的 auth: +``` +知识:JWT、PKCE —— MERGE 去重,增加来源链接 +经验:两个不同实现,检测到相同模式 +洞察:JWT + PKCE 是你工作中收敛的模式 +``` + +**第 3 次运行** —— `/wisdom reflect`: +``` +智慧:"API 用无状态 JWT,浏览器端用 PKCE flow。 + 这个模式在 3 个项目中落地,从未出过问题。" +``` + +**第 4 次运行** —— `/wisdom ask "新服务的认证方案怎么定?"`: +``` +遍历路径:知识 → 经验 → 洞察 → 智慧 +返回结果:你自己经过实战验证的原则,根植于你真实的代码历史 +``` + +这不是 RAG,不是摘要,而是图谱遍历你积累的经验,把**你自己的智慧还给你**。 + +--- + +## 图谱 Schema + +```cypher +// DIKW 节点标签 +(:Knowledge {id, label, content, source_file, confidence, timestamp, project}) +(:Experience {id, label, content, context, outcome, timestamp, project}) +(:Insight {id, label, content, pattern_strength, source_count, timestamp}) +(:Wisdom {id, label, principle, confidence, reinforcement_count, timestamp}) + +// 关系类型 +(Knowledge)-[:GROUNDS]->(Experience) +(Experience)-[:REVEALS]->(Insight) +(Insight)-[:CRYSTALLIZES_INTO]->(Wisdom) +(Wisdom)-[:REINFORCES]->(Knowledge) // 反馈闭环 —— 图谱在学习 + +(Knowledge)-[:SEMANTICALLY_SIMILAR_TO]->(Knowledge) +(Insight)-[:CONTRADICTS]->(Insight) // 张力浮现,需要反思 +(any)-[:SOURCED_FROM]->(Source {uri, author, ingested_at}) +``` + +置信度沿图谱向上流动。8 个经验支撑的洞察比 2 个支撑的模式强度更高。智慧节点追踪 `reinforcement_count` —— 遍历确认该原则有效的次数。 + +--- + +## 你能得到什么 + +**跨项目神节点** —— 跨越*所有*项目和语料库的核心概念,而不仅是单个仓库的。 + +**矛盾检测** —— 两个洞察方向相反时,以 `CONTRADICTS` 边的形式浮现。图谱展示冲突,由你解决,形成更好的智慧。 + +**时间衰减** —— 节点带时间戳。长时间未被强化的旧知识会被标记。图谱优雅地老化,如同专家的记忆。 + +**完整溯源链** —— 每个节点关联到其 `Source`。`/wisdom explain "节点名"` 返回完整 DIKW 路径:事实 → 上下文 → 模式 → 原则。 + +--- + +## 部署方案对比 + +| | Aura 免费版 | DozerDB 本地 | +|---|---|---| +| **配置** | 3 步点击 + URI | 1 条 docker 命令 | +| **费用** | 免费(20 万节点) | 永久免费 | +| **APOC** | 可用 | 内置 | +| **数据位置** | Neo4j 云端 | 你自己的机器 | +| **可视化** | neo4j.com 控制台 | localhost:7474 | +| **适合** | 快速上手、个人用户 | 团队、离线、完全掌控 | + +--- + +## 隐私说明 + +wisdomGraph 将文件内容发送给你的 AI 编程助手的底层模型 API 进行语义提取 —— Anthropic(Claude Code)或你所在平台使用的任何模型。代码文件通过 tree-sitter AST 在本地处理,不会发送到外部。所有图谱数据存储在*你的* Neo4j 实例中(Aura 或本地)。无遥测、无使用追踪、无任何形式的数据分析。 + +--- + +## 技术栈 + +Neo4j(Aura 或 DozerDB)+ tree-sitter + APOC。语义提取通过 Claude(Claude Code)或你平台的模型完成。图数据库就是智能层 —— 遍历、路径查找和社区检测通过 Neo4j GDS(图数据科学库)原生 Cypher 运行。 + +--- + +
+贡献指南 + +**工作示例**是最有说服力的贡献。在真实的多项目语料库上跑 `/wisdom`,让它反思几轮,记录涌现出哪些智慧节点、是否与你的直觉吻合。提交到 `worked/{slug}/`。 + +**Schema 提案** —— 如果你有捕捉当前 Schema 遗漏语义的关系类型,欢迎提 issue,附上 Cypher 模式和工作示例。 + +**DIKW 晋升启发式** —— 更好的知识→经验→洞察→智慧晋升提示词或规则。晋升逻辑是系统的核心。 + +详见 [ARCHITECTURE.md](ARCHITECTURE.md) 了解完整流水线设计、Cypher Schema 和如何扩展 DIKW 层次。 + +
diff --git a/ci.sh b/ci.sh new file mode 100755 index 0000000..255005f --- /dev/null +++ b/ci.sh @@ -0,0 +1,15 @@ +#!/usr/bin/env bash +# wisdomGraph CI — unit tests (no Neo4j required) +set -euo pipefail + +echo "==> Creating venv" +python3 -m venv .venv +source .venv/bin/activate + +echo "==> Installing dependencies" +pip install pytest -q + +echo "==> Running tests" +python -m pytest tests/ -q + +echo "==> All tests passed" diff --git a/pyproject.toml b/pyproject.toml new file mode 100644 index 0000000..2850504 --- /dev/null +++ b/pyproject.toml @@ -0,0 +1,87 @@ +[build-system] +requires = ["setuptools>=68"] +build-backend = "setuptools.build_meta" + +[project] +name = "wisdomgraph" +version = "0.1.0" +description = "Accumulative Neo4j-native DIKW wisdom memory for AI coding assistants (Claude Code, OpenClaw)" +readme = "README.md" +license = "MIT" +license-files = ["LICENSE"] +keywords = [ + "claude", "claude-code", "openclaw", "neo4j", "knowledge-graph", + "graphrag", "dikw", "wisdom", "memory", "accumulative", "llm", + "skill", "agent-memory", "dozerdb", +] +requires-python = ">=3.10" +dependencies = [ + "neo4j>=5.0", +] + +[project.urls] +Homepage = "https://github.com/cklam12345/wisdomGraph" +Repository = "https://github.com/cklam12345/wisdomGraph" +Issues = "https://github.com/cklam12345/wisdomGraph/issues" + +[project.optional-dependencies] +ast = [ + "tree-sitter", + "tree-sitter-python", + "tree-sitter-javascript", + "tree-sitter-typescript", + "tree-sitter-go", + "tree-sitter-rust", + "tree-sitter-java", + "tree-sitter-c", + "tree-sitter-cpp", + "tree-sitter-ruby", + "tree-sitter-c-sharp", + "tree-sitter-kotlin", + "tree-sitter-scala", + "tree-sitter-php", + "tree-sitter-swift", + "tree-sitter-lua", + "tree-sitter-zig", + "tree-sitter-powershell", + "tree-sitter-elixir", + "tree-sitter-objc", +] +pdf = ["pypdf", "html2text"] +office = ["python-docx", "openpyxl"] +all = [ + "tree-sitter", + "tree-sitter-python", + "tree-sitter-javascript", + "tree-sitter-typescript", + "tree-sitter-go", + "tree-sitter-rust", + "tree-sitter-java", + "tree-sitter-c", + "tree-sitter-cpp", + "tree-sitter-ruby", + "tree-sitter-c-sharp", + "tree-sitter-kotlin", + "tree-sitter-scala", + "tree-sitter-php", + "tree-sitter-swift", + "tree-sitter-lua", + "tree-sitter-zig", + "tree-sitter-powershell", + "tree-sitter-elixir", + "tree-sitter-objc", + "pypdf", + "html2text", + "python-docx", + "openpyxl", +] + +[project.scripts] +wisdom = "wisdom.__main__:main" + +[tool.setuptools.packages.find] +where = ["."] +include = ["wisdom*"] + +[tool.setuptools.package-data] +wisdom = ["skill.md", "skill-claw.md", "skill-windows.md"] diff --git a/tests/__init__.py b/tests/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/tests/test_cache.py b/tests/test_cache.py new file mode 100644 index 0000000..957d955 --- /dev/null +++ b/tests/test_cache.py @@ -0,0 +1,68 @@ +"""Tests for wisdom/cache.py""" +import pytest +from pathlib import Path +from wisdom.cache import file_hash, load_cached, save_cached, check_cache, save_extractions + + +def test_file_hash_consistent(tmp_path): + f = tmp_path / "test.py" + f.write_text("hello world") + h1 = file_hash(f) + h2 = file_hash(f) + assert h1 == h2 + + +def test_file_hash_changes_on_content(tmp_path): + f = tmp_path / "test.py" + f.write_text("version 1") + h1 = file_hash(f) + f.write_text("version 2") + h2 = file_hash(f) + assert h1 != h2 + + +def test_load_cached_miss(tmp_path): + f = tmp_path / "test.py" + f.write_text("hello") + assert load_cached(f, root=tmp_path) is None + + +def test_save_and_load_cached(tmp_path): + f = tmp_path / "test.py" + f.write_text("def foo(): pass") + data = {"nodes": [{"id": "foo", "label": "foo"}], "edges": []} + save_cached(f, data, root=tmp_path) + result = load_cached(f, root=tmp_path) + assert result == data + + +def test_cache_invalidated_on_change(tmp_path): + f = tmp_path / "test.py" + f.write_text("original") + data = {"nodes": [], "edges": []} + save_cached(f, data, root=tmp_path) + f.write_text("changed") + assert load_cached(f, root=tmp_path) is None + + +def test_check_cache_splits(tmp_path): + f1 = tmp_path / "cached.py" + f1.write_text("x = 1") + f2 = tmp_path / "uncached.py" + f2.write_text("y = 2") + + data = {"nodes": [{"id": "x"}], "edges": [], "source_file": str(f1)} + save_cached(f1, data, root=tmp_path) + + cached, uncached = check_cache([str(f1), str(f2)], root=tmp_path) + assert len(cached) == 1 + assert str(f2) in uncached + + +def test_save_extractions(tmp_path): + f = tmp_path / "src.py" + f.write_text("code") + ext = {"nodes": [{"id": "n1"}], "edges": [], "source_file": str(f)} + count = save_extractions([ext], root=tmp_path) + assert count == 1 + assert load_cached(f, root=tmp_path) is not None diff --git a/tests/test_classify.py b/tests/test_classify.py new file mode 100644 index 0000000..d21aaa4 --- /dev/null +++ b/tests/test_classify.py @@ -0,0 +1,113 @@ +"""Tests for wisdom/classify.py""" +from wisdom.classify import classify_nodes, build_dikw_edges, promote_experiences + + +def _node(id, label, tier=None, **kwargs): + n = {"id": id, "label": label} + if tier: + n["tier"] = tier + n.update(kwargs) + return n + + +def _edge(src, tgt, relation="calls", conf_tag="EXTRACTED"): + return {"source": src, "target": tgt, "relation": relation, "confidence_tag": conf_tag} + + +# ── classify_nodes ──────────────────────────────────────────────────────────── + +def test_default_tier_is_knowledge(): + nodes = [_node("n1", "MyFunction")] + result = classify_nodes(nodes, [], project="proj-a") + assert result[0]["tier"] == "knowledge" + + +def test_explicit_tier_respected(): + nodes = [_node("w1", "Use JWT", tier="wisdom")] + result = classify_nodes(nodes, [], project="proj-a") + assert result[0]["tier"] == "wisdom" + + +def test_insight_heuristic_3_similarity_edges(): + nodes = [_node("n1", "Auth")] + edges = [ + _edge("n1", "n2", "semantically_similar_to"), + _edge("n1", "n3", "semantically_similar_to"), + _edge("n1", "n4", "conceptually_related_to"), + ] + result = classify_nodes(nodes, edges, project="proj-a") + assert result[0]["tier"] == "insight" + + +def test_project_injected(): + nodes = [_node("n1", "Foo")] + result = classify_nodes(nodes, [], project="my-project") + assert result[0]["project"] == "my-project" + + +def test_confidence_set_for_extracted(): + nodes = [_node("n1", "Foo", confidence_tag="EXTRACTED")] + result = classify_nodes(nodes, [], project="p") + assert result[0]["confidence"] == 1.0 + + +def test_confidence_lower_for_inferred(): + nodes = [_node("n1", "Foo", confidence_tag="INFERRED")] + result = classify_nodes(nodes, [], project="p") + assert result[0]["confidence"] < 1.0 + + +# ── build_dikw_edges ───────────────────────────────────────────────────────── + +def test_grounds_edge_added_for_k_to_e(): + nodes = [ + _node("k1", "JWT", tier="knowledge"), + _node("e1", "JWT Context", tier="experience"), + ] + edges = [_edge("k1", "e1", "uses")] + result = build_dikw_edges(nodes, edges) + relations = [e["relation"] for e in result] + assert "GROUNDS" in relations + + +def test_reveals_edge_for_e_to_i(): + nodes = [ + _node("e1", "Pattern", tier="experience"), + _node("i1", "Auth Insight", tier="insight"), + ] + edges = [_edge("e1", "i1", "semantically_similar_to")] + result = build_dikw_edges(nodes, edges) + relations = [e["relation"] for e in result] + assert "REVEALS" in relations + + +def test_no_duplicate_dikw_edges(): + nodes = [ + _node("k1", "A", tier="knowledge"), + _node("e1", "B", tier="experience"), + ] + edges = [_edge("k1", "e1", "uses"), _edge("k1", "e1", "calls")] + result = build_dikw_edges(nodes, edges) + grounds_count = sum(1 for e in result if e["relation"] == "GROUNDS") + assert grounds_count == 1 + + +def test_same_tier_no_dikw_edge(): + nodes = [_node("k1", "A", tier="knowledge"), _node("k2", "B", tier="knowledge")] + edges = [_edge("k1", "k2", "calls")] + result = build_dikw_edges(nodes, edges) + assert all(e["relation"] != "GROUNDS" for e in result if e["source"] == "k1" and e["target"] == "k2" and e["relation"] not in ("calls",)) + + +# ── promote_experiences ─────────────────────────────────────────────────────── + +def test_promotes_knowledge_when_in_existing_projects(): + nodes = [_node("k1", "JWT", tier="knowledge")] + result = promote_experiences(nodes, existing_projects=["k1"]) + assert result[0]["tier"] == "experience" + + +def test_no_promotion_when_not_in_existing(): + nodes = [_node("k1", "JWT", tier="knowledge")] + result = promote_experiences(nodes, existing_projects=["other_id"]) + assert result[0]["tier"] == "knowledge" diff --git a/tests/test_detect.py b/tests/test_detect.py new file mode 100644 index 0000000..65f49aa --- /dev/null +++ b/tests/test_detect.py @@ -0,0 +1,98 @@ +"""Tests for wisdom/detect.py""" +import pytest +from pathlib import Path +from wisdom.detect import classify_file, detect, FileType, _looks_like_paper + + +def test_classify_python(): + assert classify_file(Path("main.py")) == FileType.CODE + + +def test_classify_typescript(): + assert classify_file(Path("app.tsx")) == FileType.CODE + + +def test_classify_markdown(): + assert classify_file(Path("README.md")) == FileType.DOCUMENT + + +def test_classify_pdf(): + assert classify_file(Path("paper.pdf")) == FileType.PAPER + + +def test_classify_image(): + assert classify_file(Path("diagram.png")) == FileType.IMAGE + + +def test_classify_unknown(): + assert classify_file(Path("file.xyz")) is None + + +def test_classify_docx(): + assert classify_file(Path("report.docx")) == FileType.DOCUMENT + + +def test_detect_finds_files(tmp_path): + (tmp_path / "main.py").write_text("def foo(): pass") + (tmp_path / "README.md").write_text("# Hello") + (tmp_path / "diagram.png").write_bytes(b"\x89PNG\r\n") + + result = detect(tmp_path) + assert result["total_files"] == 3 + assert len(result["files"]["code"]) == 1 + assert len(result["files"]["document"]) == 1 + assert len(result["files"]["image"]) == 1 + + +def test_detect_skips_hidden_files(tmp_path): + (tmp_path / ".env").write_text("SECRET=abc") + (tmp_path / "main.py").write_text("x = 1") + result = detect(tmp_path) + # .env starts with '.' so it is skipped silently (not in skipped_sensitive) + assert result["total_files"] == 1 + + +def test_detect_skips_sensitive_non_hidden(tmp_path): + # A non-hidden file with a sensitive name should appear in skipped_sensitive + (tmp_path / "credentials.json").write_text('{"key": "secret"}') + (tmp_path / "main.py").write_text("x = 1") + result = detect(tmp_path) + assert result["total_files"] == 1 + assert any("credentials" in s for s in result["skipped_sensitive"]) + + +def test_detect_skips_node_modules(tmp_path): + nm = tmp_path / "node_modules" + nm.mkdir() + (nm / "lib.js").write_text("module.exports = {}") + (tmp_path / "app.js").write_text("const x = 1") + result = detect(tmp_path) + assert result["total_files"] == 1 + + +def test_detect_wisdomignore(tmp_path): + ignore = tmp_path / ".wisdomignore" + ignore.write_text("vendor/\n") + vendor = tmp_path / "vendor" + vendor.mkdir() + (vendor / "lib.py").write_text("pass") + (tmp_path / "main.py").write_text("pass") + result = detect(tmp_path) + assert result["total_files"] == 1 + + +def test_looks_like_paper_positive(tmp_path): + paper = tmp_path / "paper.md" + paper.write_text( + "Abstract: We propose a new method.\n" + "See [1] for details. arXiv:1706.03762\n" + "From the literature, we know that [2]\n" + "This is a preprint submitted to proceedings.\n" + ) + assert _looks_like_paper(paper) is True + + +def test_looks_like_paper_negative(tmp_path): + normal = tmp_path / "notes.md" + normal.write_text("# My notes\nTodo list for today.") + assert _looks_like_paper(normal) is False diff --git a/tests/test_ingest.py b/tests/test_ingest.py new file mode 100644 index 0000000..ccdd846 --- /dev/null +++ b/tests/test_ingest.py @@ -0,0 +1,46 @@ +"""Tests for wisdom/ingest.py — URL validation and HTML stripping (no network calls).""" +import pytest +from wisdom.ingest import _html_to_text, _utcnow + + +def test_html_to_text_strips_tags(): + html = "

Hello world

" + result = _html_to_text(html) + assert "

" not in result + assert "" not in result + assert "Hello" in result + assert "world" in result + + +def test_html_to_text_strips_script(): + html = "Content" + result = _html_to_text(html) + assert "alert" not in result + assert "Content" in result + + +def test_html_to_text_strips_style(): + html = "

Text

" + result = _html_to_text(html) + assert "color" not in result + assert "Text" in result + + +def test_html_to_text_decodes_entities(): + html = "& <tag>   'quote'" + result = _html_to_text(html) + assert "&" in result + assert "" in result + assert "'" in result + + +def test_html_to_text_normalizes_whitespace(): + html = " lots of spaces " + result = _html_to_text(html) + assert " " not in result + + +def test_utcnow_returns_iso_string(): + ts = _utcnow() + assert "T" in ts + assert ts.endswith("+00:00") or ts.endswith("Z") or "+" in ts diff --git a/tests/test_security.py b/tests/test_security.py new file mode 100644 index 0000000..a77e6c7 --- /dev/null +++ b/tests/test_security.py @@ -0,0 +1,74 @@ +"""Tests for wisdom/security.py""" +import pytest +from pathlib import Path +from wisdom.security import validate_url, sanitize_label, is_sensitive_path, validate_graph_path + + +def test_validate_url_http(): + assert validate_url("http://example.com/page") == "http://example.com/page" + + +def test_validate_url_https(): + assert validate_url(" https://arxiv.org/abs/1706.03762 ") == "https://arxiv.org/abs/1706.03762" + + +def test_validate_url_rejects_file(): + with pytest.raises(ValueError, match="file"): + validate_url("file:///etc/passwd") + + +def test_validate_url_rejects_ftp(): + with pytest.raises(ValueError, match="ftp"): + validate_url("ftp://example.com") + + +def test_validate_url_rejects_no_host(): + with pytest.raises(ValueError): + validate_url("https://") + + +def test_sanitize_label_strips_control(): + assert "\x00" not in sanitize_label("hello\x00world") + assert "\x1f" not in sanitize_label("foo\x1fbar") + + +def test_sanitize_label_caps_length(): + long = "a" * 500 + assert len(sanitize_label(long)) <= 256 + + +def test_sanitize_label_html_escapes(): + result = sanitize_label('') + assert "