diff --git a/CHECKLIST.md b/CHECKLIST.md new file mode 100644 index 0000000..ab8c1da --- /dev/null +++ b/CHECKLIST.md @@ -0,0 +1,13 @@ +# Demo1 CHECKLIST + +## 过关前自检 + +- [ ] `CLIAgent()` 可以正常创建 +- [ ] 初始状态是 `IDLE` +- [ ] `process()` 对普通输入返回字符串 +- [ ] `process()` 对空输入不会崩 +- [ ] 数学输入能触发计算逻辑 +- [ ] `reset()` 后状态恢复为 `IDLE` +- [ ] `python demo1_cli_agent/main.py` 能启动 +- [ ] `pytest demo1_cli_agent/tests/test_agent.py -q` 通过 +- [ ] push 到 `demo1-starter` 后,Actions 通过 diff --git a/FAQ.md b/FAQ.md new file mode 100644 index 0000000..ad37b63 --- /dev/null +++ b/FAQ.md @@ -0,0 +1,27 @@ +# Demo1 FAQ + +## 1. 为什么 `process()` 一运行就报错? + +先确认你是不是还保留了 `NotImplementedError`。 +这一关最常见的问题就是只改了一部分逻辑,但忘了去掉占位异常。 + +## 2. 数学输入一定要做很完整吗? + +不用。 +先满足测试里最基本的表达式场景,再逐步扩展。 + +## 3. 为什么测试强调 `process()` 返回字符串? + +因为这一关先考察“接口稳定性”。 +哪怕还不够智能,也要保证外部调用方始终拿到字符串结果。 + +## 4. `reset()` 到底要做什么? + +至少两件事: + +- 清空上下文 +- 把状态恢复为 `IDLE` + +## 5. 怎么排查多轮对话相关问题? + +先打印或观察你保存的 history 结构是否真的在追加,而不是每次都被覆盖。 diff --git a/HINTS.md b/HINTS.md new file mode 100644 index 0000000..b793eec --- /dev/null +++ b/HINTS.md @@ -0,0 +1,22 @@ +# Demo1 HINTS + +只给思路,不给答案。 + +## 你可以先想清楚的点 + +- `process()` 的最低要求不是“聪明”,而是“稳定返回字符串” +- 测试最关心的是接口行为,不是类设计得多复杂 +- 问候、数学、空输入,这三类先处理好,其他输入再统一兜底 + +## 容易卡住的地方 + +- 计算逻辑不需要一开始就支持很复杂的表达式 +- `reset()` 不只是清空历史,还要让 `state` 回到 `IDLE` +- 不要让 `process()` 在空字符串时抛异常 + +## 实现顺序建议 + +1. 先让类能实例化 +2. 再让 `process()` 返回固定字符串 +3. 再逐步加意图分支 +4. 最后补 CLI 循环 diff --git a/README.md b/README.md index 77b381d..1a7d1f9 100644 --- a/README.md +++ b/README.md @@ -21,6 +21,32 @@ Agent · ML · FastAPI · RAG · SSE · CI Unlock Flow

+## 当前分支 + +你现在位于 `demo1-starter`。 + +- 这个分支是第一关学习骨架,不是完整答案 +- 目标是自己实现 `demo1_cli_agent/` 里的核心逻辑 +- 当前关卡通过后,push 到 `demo1-starter` 会自动解锁下一关 +- 完整参考实现保留在 `main` 分支 + +## 学习入口 + +- 先读 [TODO.md](TODO.md) +- 卡住时看 [HINTS.md](HINTS.md) +- 易错点和排查看 [FAQ.md](FAQ.md) +- 提交前对照 [CHECKLIST.md](CHECKLIST.md) +- 完成后回看 [REFLECTION.md](REFLECTION.md) +- 再看 `demo1_cli_agent/tests/test_agent.py` +- 然后补 `demo1_cli_agent/agent.py` 和 `demo1_cli_agent/main.py` + +## 建议实现步骤 + +1. 先让 `CLIAgent` 能被正常实例化。 +2. 让 `process()` 对任何输入都返回字符串。 +3. 加上简单问候和计算逻辑。 +4. 最后补 `reset()` 和 CLI 循环。 + ## 快速导航 - [在线演示](#在线演示) diff --git a/REFLECTION.md b/REFLECTION.md new file mode 100644 index 0000000..a7b9859 --- /dev/null +++ b/REFLECTION.md @@ -0,0 +1,13 @@ +# Demo1 REFLECTION + +学完这一关,你应该能说清楚这些事: + +- 一个最小可用的 Agent 至少需要输入、状态、上下文和输出 +- 状态机为什么适合描述 Agent 的执行过程 +- 为什么即使没有 LLM,也能先实现一个规则版 Agent +- 为什么测试优先关注接口稳定性,而不是复杂能力 + +如果你已经完成本关,说明你已经具备: + +- 实现一个最小 CLI Agent 的能力 +- 为后续任务型 Agent 打基础的能力 diff --git a/TODO.md b/TODO.md new file mode 100644 index 0000000..bbf7047 --- /dev/null +++ b/TODO.md @@ -0,0 +1,30 @@ +# Demo1 TODO + +当前目标:完成一个最基础的 CLI Agent。 + +## 你需要实现的文件 + +- `demo1_cli_agent/agent.py` +- `demo1_cli_agent/main.py` + +## 建议实现步骤 + +1. 在 `CLIAgent.__init__` 中维护 `state` 和对话历史。 +2. 在 `process()` 中处理空输入,保证始终返回字符串。 +3. 加入简单意图识别:问候、计算、时间查询。 +4. 为数学表达式实现一个最小可用的计算逻辑。 +5. 在 `reset()` 中恢复 `IDLE` 状态并清空上下文。 +6. 在 `main.py` 中补一个最基本的 CLI 循环。 + +## 完成标准 + +- `pytest demo1_cli_agent/tests/test_agent.py -q` 通过 +- 推送到 `demo1-starter` 后,GitHub Actions 成功运行 +- Issues 页面出现 “Demo2 已解锁” + +## 卡住时看哪里 + +- 当前分支的 `README.md` +- `docs/demo_specs.md` +- `docs/interview_qa.md` +- 完整答案在 `main` 分支 diff --git a/demo1_cli_agent/agent.py b/demo1_cli_agent/agent.py index 248b312..cd02857 100644 --- a/demo1_cli_agent/agent.py +++ b/demo1_cli_agent/agent.py @@ -1,98 +1,24 @@ -from __future__ import annotations - from collections import deque -from datetime import datetime -from typing import Deque, Dict, List -import ast -import operator -import re - - -class SafeCalculator: - _bin_ops = { - ast.Add: operator.add, - ast.Sub: operator.sub, - ast.Mult: operator.mul, - ast.Div: operator.truediv, - ast.Pow: operator.pow, - ast.Mod: operator.mod, - } - _unary_ops = { - ast.UAdd: operator.pos, - ast.USub: operator.neg, - } - - def evaluate(self, expression: str) -> float: - tree = ast.parse(expression, mode="eval") - return float(self._eval_node(tree.body)) - - def _eval_node(self, node: ast.AST) -> float: - if isinstance(node, ast.Constant) and isinstance(node.value, (int, float)): - return float(node.value) - if isinstance(node, ast.BinOp) and type(node.op) in self._bin_ops: - left = self._eval_node(node.left) - right = self._eval_node(node.right) - return self._bin_ops[type(node.op)](left, right) - if isinstance(node, ast.UnaryOp) and type(node.op) in self._unary_ops: - operand = self._eval_node(node.operand) - return self._unary_ops[type(node.op)](operand) - raise ValueError("Unsupported expression") class CLIAgent: + """Demo1 starter: implement the basic CLI agent yourself.""" + def __init__(self, max_context_len: int = 10): self.max_context_len = max_context_len self.state = "IDLE" - self._history: Deque[Dict[str, str]] = deque(maxlen=max_context_len) - self._calculator = SafeCalculator() + self.history = deque(maxlen=max_context_len) def process(self, user_input: str) -> str: - cleaned = (user_input or "").strip() - self.state = "THINKING" - self._history.append({"role": "user", "content": cleaned}) - - if not cleaned: - reply = "可以继续发我一个问题,或者让我帮你算一道题。" - elif self._looks_like_math(cleaned): - self.state = "ACTING" - reply = self._handle_math(cleaned) - elif any(token in cleaned.lower() for token in ["时间", "日期", "date", "time"]): - self.state = "ACTING" - reply = f"当前时间是 {datetime.now().strftime('%Y-%m-%d %H:%M:%S')}。" - elif any(token in cleaned.lower() for token in ["你好", "hi", "hello"]): - reply = "你好,我是这个学习项目里的 CLI Agent,可以陪你聊天,也能做简单计算。" - elif cleaned.lower() in {"exit", "quit", "bye"}: - reply = "本轮会话先到这里,随时可以继续。" - else: - reply = "我理解到这是一个普通问题。目前我支持基础对话、时间查询和简单数学计算。" - - self.state = "DONE" - self._history.append({"role": "assistant", "content": reply}) - return reply + """ + TODO: + 1. update agent state + 2. record conversation history + 3. route simple intents such as greeting / math / datetime + 4. return a string reply instead of raising + """ + raise NotImplementedError("Implement CLIAgent.process for Demo1") def reset(self) -> None: - self._history.clear() - self.state = "IDLE" - - def _looks_like_math(self, text: str) -> bool: - if "计算" in text: - return True - return bool(re.search(r"\d+\s*[\+\-\*/%]\s*\d+", text)) - - def _handle_math(self, text: str) -> str: - match = re.search(r"([-+*/%().\d\s]+)", text) - expression = match.group(1).strip() if match else "" - try: - result = self._calculator.evaluate(expression) - except Exception: - return "这道题我没能正确解析,你可以换成更标准的表达式,例如 1 + 1。" - - if result.is_integer(): - result_text = str(int(result)) - else: - result_text = f"{result:.4f}".rstrip("0").rstrip(".") - return f"计算结果是 {result_text}。" - - @property - def history(self) -> List[Dict[str, str]]: - return list(self._history) + """TODO: clear context and set state back to IDLE.""" + raise NotImplementedError("Implement CLIAgent.reset for Demo1") diff --git a/demo1_cli_agent/main.py b/demo1_cli_agent/main.py index 8ec3987..710f135 100644 --- a/demo1_cli_agent/main.py +++ b/demo1_cli_agent/main.py @@ -2,14 +2,14 @@ def main() -> None: + """ + Demo1 starter: + - instantiate CLIAgent + - build a simple REPL loop + - support exit / quit commands + """ agent = CLIAgent() - print("CLI Agent 已启动,输入 exit 结束。") - while True: - user_input = input(">>> ").strip() - if user_input.lower() in {"exit", "quit"}: - print("Bye.") - break - print(agent.process(user_input)) + print("Demo1 starter loaded. Please implement the CLI loop in main.py.") if __name__ == "__main__": diff --git a/demo2_task_agent/priority_queue.py b/demo2_task_agent/priority_queue.py deleted file mode 100644 index 0a0e411..0000000 --- a/demo2_task_agent/priority_queue.py +++ /dev/null @@ -1,23 +0,0 @@ -from __future__ import annotations - -import heapq -from typing import List, Tuple - - -class PriorityQueue: - _priority_rank = {"high": 0, "mid": 1, "medium": 1, "low": 2} - - def __init__(self) -> None: - self._heap: List[Tuple[int, int, dict]] = [] - self._counter = 0 - - def push(self, task: dict) -> None: - rank = self._priority_rank.get(task.get("priority", "low"), 2) - heapq.heappush(self._heap, (rank, self._counter, task)) - self._counter += 1 - - def pop(self) -> dict: - return heapq.heappop(self._heap)[2] - - def peek(self) -> dict: - return self._heap[0][2] diff --git a/demo2_task_agent/task_store.py b/demo2_task_agent/task_store.py deleted file mode 100644 index 4cc42d6..0000000 --- a/demo2_task_agent/task_store.py +++ /dev/null @@ -1,40 +0,0 @@ -from __future__ import annotations - -from dataclasses import dataclass, asdict -from typing import Dict, List, Optional -from uuid import uuid4 - - -@dataclass -class Task: - id: str - title: str - priority: str - due_date: str - - -class TaskStore: - def __init__(self) -> None: - self._tasks: Dict[str, Task] = {} - - def add(self, title: str, priority: str, due_date: str) -> dict: - task = Task(id=str(uuid4()), title=title, priority=priority, due_date=due_date) - self._tasks[task.id] = task - return asdict(task) - - def get_all(self) -> List[dict]: - return [asdict(task) for task in self._tasks.values()] - - def get(self, task_id: str) -> Optional[dict]: - task = self._tasks.get(task_id) - return asdict(task) if task else None - - def update(self, task_id: str, **kwargs) -> dict: - task = self._tasks[task_id] - for field in ("title", "priority", "due_date"): - if field in kwargs and kwargs[field] is not None: - setattr(task, field, kwargs[field]) - return asdict(task) - - def delete(self, task_id: str) -> bool: - return self._tasks.pop(task_id, None) is not None diff --git a/demo2_task_agent/tool_registry.py b/demo2_task_agent/tool_registry.py deleted file mode 100644 index 41f6da7..0000000 --- a/demo2_task_agent/tool_registry.py +++ /dev/null @@ -1,40 +0,0 @@ -from __future__ import annotations - -from dataclasses import dataclass -from typing import Any, Callable, Dict - - -class ToolRegistryError(Exception): - pass - - -class PermissionDeniedError(ToolRegistryError): - pass - - -class ToolNotFoundError(ToolRegistryError): - pass - - -@dataclass -class ToolSpec: - func: Callable[..., Any] - permission: str - - -class ToolRegistry: - _permission_order = {"read": 1, "write": 2, "admin": 3} - - def __init__(self) -> None: - self._tools: Dict[str, ToolSpec] = {} - - def register(self, name: str, func: Callable[..., Any], permission: str) -> None: - self._tools[name] = ToolSpec(func=func, permission=permission) - - def call(self, name: str, args: Dict[str, Any], ctx_permission: str) -> Any: - if name not in self._tools: - raise ToolNotFoundError(name) - spec = self._tools[name] - if self._permission_order.get(ctx_permission, 0) < self._permission_order.get(spec.permission, 0): - raise PermissionDeniedError(f"{ctx_permission} cannot call {name}") - return spec.func(**args) diff --git a/demo3_ml_visual/data_loader.py b/demo3_ml_visual/data_loader.py deleted file mode 100644 index 4fb71a0..0000000 --- a/demo3_ml_visual/data_loader.py +++ /dev/null @@ -1,26 +0,0 @@ -from __future__ import annotations - -from pathlib import Path -from typing import Tuple - -import pandas as pd -from sklearn.datasets import load_iris, load_wine, load_breast_cancer - - -class DataLoader: - def load_csv(self, path: str) -> Tuple[pd.DataFrame, pd.Series]: - df = pd.read_csv(path) - if df.shape[1] < 2: - raise ValueError("CSV must contain features and a target column") - return df.iloc[:, :-1], df.iloc[:, -1] - - def load_builtin(self, name: str = "iris"): - mapping = { - "iris": load_iris, - "wine": load_wine, - "breast_cancer": load_breast_cancer, - } - if name not in mapping: - raise ValueError(f"Unsupported builtin dataset: {name}") - data = mapping[name](return_X_y=True) - return data diff --git a/demo3_ml_visual/feature_engineer.py b/demo3_ml_visual/feature_engineer.py deleted file mode 100644 index 1df1846..0000000 --- a/demo3_ml_visual/feature_engineer.py +++ /dev/null @@ -1,24 +0,0 @@ -from __future__ import annotations - -import numpy as np -from sklearn.impute import SimpleImputer -from sklearn.pipeline import Pipeline -from sklearn.preprocessing import StandardScaler - - -class FeatureEngineer: - def __init__(self) -> None: - self.pipeline = Pipeline( - steps=[ - ("imputer", SimpleImputer(strategy="median")), - ("scaler", StandardScaler()), - ] - ) - - def fit_transform(self, X): - transformed = self.pipeline.fit_transform(X) - return np.asarray(transformed) - - def transform(self, X): - transformed = self.pipeline.transform(X) - return np.asarray(transformed) diff --git a/demo3_ml_visual/main.py b/demo3_ml_visual/main.py deleted file mode 100644 index eaa504f..0000000 --- a/demo3_ml_visual/main.py +++ /dev/null @@ -1,20 +0,0 @@ -from sklearn.model_selection import train_test_split - -from data_loader import DataLoader -from feature_engineer import FeatureEngineer -from model_trainer import ModelTrainer - - -def main() -> None: - X, y = DataLoader().load_builtin("iris") - fe = FeatureEngineer() - X_t = fe.fit_transform(X) - X_train, X_test, y_train, y_test = train_test_split(X_t, y, test_size=0.2, random_state=42) - trainer = ModelTrainer() - models = trainer.train(X_train, y_train) - metrics = trainer.evaluate(models, X_test, y_test) - print(metrics) - - -if __name__ == "__main__": - main() diff --git a/demo3_ml_visual/model_io.py b/demo3_ml_visual/model_io.py deleted file mode 100644 index f59ec06..0000000 --- a/demo3_ml_visual/model_io.py +++ /dev/null @@ -1,11 +0,0 @@ -from __future__ import annotations - -import joblib - - -def save_model(model, path: str) -> None: - joblib.dump(model, path) - - -def load_model(path: str): - return joblib.load(path) diff --git a/demo3_ml_visual/model_trainer.py b/demo3_ml_visual/model_trainer.py deleted file mode 100644 index 8ec1b8d..0000000 --- a/demo3_ml_visual/model_trainer.py +++ /dev/null @@ -1,42 +0,0 @@ -from __future__ import annotations - -from typing import Dict - -from sklearn.ensemble import RandomForestClassifier -from sklearn.linear_model import LogisticRegression -from sklearn.metrics import accuracy_score, f1_score - -try: - from xgboost import XGBClassifier # type: ignore -except Exception: # pragma: no cover - XGBClassifier = None - - -class ModelTrainer: - def train(self, X_train, y_train) -> Dict[str, object]: - models: Dict[str, object] = { - "logistic_regression": LogisticRegression(max_iter=500), - "random_forest": RandomForestClassifier(n_estimators=100, random_state=42), - } - if XGBClassifier is not None: - models["xgboost"] = XGBClassifier( - n_estimators=50, - max_depth=3, - learning_rate=0.1, - eval_metric="mlogloss", - random_state=42, - ) - - for model in models.values(): - model.fit(X_train, y_train) - return models - - def evaluate(self, models, X_test, y_test): - metrics = {} - for name, model in models.items(): - preds = model.predict(X_test) - metrics[name] = { - "accuracy": float(accuracy_score(y_test, preds)), - "f1": float(f1_score(y_test, preds, average="weighted")), - } - return metrics diff --git "a/demo4_\347\273\274\345\220\210\351\241\271\347\233\256/main.py" "b/demo4_\347\273\274\345\220\210\351\241\271\347\233\256/main.py" deleted file mode 100644 index 8612b7a..0000000 --- "a/demo4_\347\273\274\345\220\210\351\241\271\347\233\256/main.py" +++ /dev/null @@ -1,75 +0,0 @@ -from __future__ import annotations - -import json -from typing import Iterator, List -from uuid import uuid4 - -from fastapi import FastAPI, HTTPException -from pydantic import BaseModel -from fastapi.responses import StreamingResponse - - -app = FastAPI(title="Demo4 Integrated App") -TASKS = {} - - -class ChatRequest(BaseModel): - message: str - - -class TaskCreateRequest(BaseModel): - title: str - priority: str - due_date: str - - -@app.get("/health") -def health(): - return {"status": "ok", "version": "0.4.0"} - - -@app.post("/chat") -def chat(payload: ChatRequest): - message = payload.message.strip() - tools_used: List[str] = [] - if "任务" in message and any(token in message for token in ["列", "list", "所有"]): - tools_used.append("list_tasks") - reply = f"当前共有 {len(TASKS)} 个任务。" - elif any(token in message for token in ["你好", "hi", "hello"]): - reply = "你好,这里是 Demo4 综合项目接口。" - else: - reply = "消息已收到,我可以处理聊天和任务相关请求。" - return {"reply": reply, "tools_used": tools_used} - - -@app.get("/tasks") -def get_tasks(): - return {"tasks": list(TASKS.values())} - - -@app.post("/tasks", status_code=201) -def create_task(payload: TaskCreateRequest): - task = payload.model_dump() - task["id"] = str(uuid4()) - TASKS[task["id"]] = task - return task - - -@app.delete("/tasks/{task_id}") -def delete_task(task_id: str): - if task_id not in TASKS: - raise HTTPException(status_code=404, detail="Task not found") - del TASKS[task_id] - return {"deleted": True} - - -def _sse_event_stream(message: str) -> Iterator[str]: - for chunk in ["demo4", "stream", message]: - payload = json.dumps({"type": "token", "content": chunk}, ensure_ascii=False) - yield f"data: {payload}\n\n" - yield "data: {\"type\": \"done\"}\n\n" - - -@app.get("/stream") -def stream(message: str): - return StreamingResponse(_sse_event_stream(message), media_type="text/event-stream") diff --git a/demo5_full_project/app/__init__.py b/demo5_full_project/app/__init__.py deleted file mode 100644 index 8b13789..0000000 --- a/demo5_full_project/app/__init__.py +++ /dev/null @@ -1 +0,0 @@ - diff --git a/demo5_full_project/app/core/__init__.py b/demo5_full_project/app/core/__init__.py deleted file mode 100644 index 8b13789..0000000 --- a/demo5_full_project/app/core/__init__.py +++ /dev/null @@ -1 +0,0 @@ - diff --git a/demo5_full_project/app/core/rag/__init__.py b/demo5_full_project/app/core/rag/__init__.py deleted file mode 100644 index 8b13789..0000000 --- a/demo5_full_project/app/core/rag/__init__.py +++ /dev/null @@ -1 +0,0 @@ - diff --git a/demo5_full_project/app/core/rag/embedder.py b/demo5_full_project/app/core/rag/embedder.py deleted file mode 100644 index b4ee5dd..0000000 --- a/demo5_full_project/app/core/rag/embedder.py +++ /dev/null @@ -1,21 +0,0 @@ -from __future__ import annotations - -import hashlib - -import numpy as np - - -class Embedder: - def __init__(self, dimension: int = 32): - self.dimension = dimension - - def encode(self, text: str) -> np.ndarray: - text = text or "" - buckets = np.zeros(self.dimension, dtype=float) - for token in text.encode("utf-8"): - buckets[token % self.dimension] += 1.0 - digest = hashlib.sha256(text.encode("utf-8")).digest() - for idx, byte in enumerate(digest[: self.dimension]): - buckets[idx] += byte / 255.0 - norm = np.linalg.norm(buckets) - return buckets if norm == 0 else buckets / norm diff --git a/demo5_full_project/app/core/rag/retriever.py b/demo5_full_project/app/core/rag/retriever.py deleted file mode 100644 index 66146da..0000000 --- a/demo5_full_project/app/core/rag/retriever.py +++ /dev/null @@ -1,35 +0,0 @@ -from __future__ import annotations - -from dataclasses import dataclass -from typing import List - -import numpy as np - -from app.core.rag.embedder import Embedder - - -@dataclass -class DocumentChunk: - text: str - - -class Retriever: - def __init__(self) -> None: - self.embedder = Embedder() - self._documents = [ - DocumentChunk("Agent 是一种能够感知、决策并执行动作的软件实体。"), - DocumentChunk("RAG 会先检索知识,再把检索结果拼到生成提示词中。"), - DocumentChunk("SSE 适合服务端向客户端单向推送流式文本。"), - ] - - def retrieve(self, query: str, top_k: int = 3) -> List[str]: - if not self._documents: - return [] - query_vec = self.embedder.encode(query) - scored = [] - for doc in self._documents: - doc_vec = self.embedder.encode(doc.text) - score = float(np.dot(query_vec, doc_vec)) - scored.append((score, doc.text)) - scored.sort(reverse=True, key=lambda item: item[0]) - return [text for _, text in scored[: max(0, min(top_k, 5))]] diff --git a/demo5_full_project/app/ml/__init__.py b/demo5_full_project/app/ml/__init__.py deleted file mode 100644 index 8b13789..0000000 --- a/demo5_full_project/app/ml/__init__.py +++ /dev/null @@ -1 +0,0 @@ - diff --git a/demo5_full_project/app/ml/model_io.py b/demo5_full_project/app/ml/model_io.py deleted file mode 100644 index 11b195b..0000000 --- a/demo5_full_project/app/ml/model_io.py +++ /dev/null @@ -1,24 +0,0 @@ -from __future__ import annotations - -import json -from pathlib import Path - -import joblib - - -def save_model(model, path: str, metadata=None) -> None: - path_obj = Path(path) - path_obj.parent.mkdir(parents=True, exist_ok=True) - joblib.dump(model, path_obj) - meta_path = path_obj.with_suffix(path_obj.suffix + ".meta.json") - meta_path.write_text(json.dumps(metadata or {}, ensure_ascii=False, indent=2), encoding="utf-8") - - -def load_model(path: str, return_metadata: bool = False): - path_obj = Path(path) - model = joblib.load(path_obj) - meta_path = path_obj.with_suffix(path_obj.suffix + ".meta.json") - metadata = json.loads(meta_path.read_text(encoding="utf-8")) if meta_path.exists() else {} - if return_metadata: - return model, metadata - return model diff --git a/demo5_full_project/app/ml/tuner.py b/demo5_full_project/app/ml/tuner.py deleted file mode 100644 index 3a71c66..0000000 --- a/demo5_full_project/app/ml/tuner.py +++ /dev/null @@ -1,26 +0,0 @@ -from __future__ import annotations - -from sklearn.ensemble import RandomForestClassifier -from sklearn.model_selection import cross_val_score - - -def tune_model(X_train, y_train, n_trials: int = 5): - candidate_depths = [2, 3, 4, 5, None] - candidate_estimators = [20, 50, 100, 150, 200] - best_model = None - best_score = -1.0 - - for idx in range(max(1, n_trials)): - model = RandomForestClassifier( - n_estimators=candidate_estimators[idx % len(candidate_estimators)], - max_depth=candidate_depths[idx % len(candidate_depths)], - random_state=42 + idx, - ) - score = float(cross_val_score(model, X_train, y_train, cv=3).mean()) - if score > best_score: - best_score = score - best_model = model - - assert best_model is not None - best_model.fit(X_train, y_train) - return best_model, best_score diff --git a/demo5_full_project/main.py b/demo5_full_project/main.py deleted file mode 100644 index 1918571..0000000 --- a/demo5_full_project/main.py +++ /dev/null @@ -1,75 +0,0 @@ -from __future__ import annotations - -import json -from typing import Iterator, List -from uuid import uuid4 - -from fastapi import FastAPI -from fastapi.responses import StreamingResponse -from pydantic import BaseModel - - -app = FastAPI(title="Demo5 Full Project") -TASKS = {} - - -class ChatRequest(BaseModel): - message: str - - -class TaskCreateRequest(BaseModel): - title: str - priority: str - due_date: str - - -@app.get("/health") -def health(): - return {"status": "ok", "version": "1.0.0"} - - -@app.post("/chat") -def chat(payload: ChatRequest): - message = payload.message.strip() - tools_used: List[str] = [] - if "任务" in message and any(token in message for token in ["列", "list", "所有"]): - tools_used.append("list_tasks") - reply = f"当前共有 {len(TASKS)} 个任务。" - else: - reply = "Demo5 已收到请求,支持任务、流式输出和检索模块演示。" - return {"reply": reply, "tools_used": tools_used} - - -@app.get("/tasks") -def get_tasks(): - return {"tasks": list(TASKS.values())} - - -@app.post("/tasks", status_code=201) -def create_task(payload: TaskCreateRequest): - task = payload.model_dump() - task["id"] = str(uuid4()) - TASKS[task["id"]] = task - return task - - -@app.delete("/tasks/{task_id}") -def delete_task(task_id: str): - if task_id not in TASKS: - from fastapi import HTTPException - - raise HTTPException(status_code=404, detail="Task not found") - del TASKS[task_id] - return {"deleted": True} - - -def _sse_event_stream(message: str) -> Iterator[str]: - for chunk in ["收到消息", "正在处理", message]: - payload = json.dumps({"type": "token", "content": chunk}, ensure_ascii=False) - yield f"data: {payload}\n\n" - yield "data: {\"type\": \"done\"}\n\n" - - -@app.get("/stream") -def stream(message: str): - return StreamingResponse(_sse_event_stream(message), media_type="text/event-stream")