# 使用 MLX (Apple Silicon)
python main.py --backend mlx
# 使用 PyTorch
python main.py --backend pytorch
# 使用 7B 模型
python main.py --model Qwen/Qwen2.5-7B-Instruct
# 使用 MLX 量化模型
python main.py --model mlx-community/Qwen2.5-1.5B-Instruct-4bit
from src .inference .pytorch_inference import PyTorchInference
# 创建推理引擎
inference = PyTorchInference (
model_id = "Qwen/Qwen2.5-1.5B-Instruct"
)
# 单轮对话
response = inference .generate ("你好,请介绍一下你自己" )
print (response )
from src .inference .pytorch_inference import PyTorchInference
inference = PyTorchInference ()
# 构建对话历史
messages = [
{"role" : "system" , "content" : "你是一个Python编程助手" },
{"role" : "user" , "content" : "如何读取文件?" }
]
# 获取回复
response = inference .chat (messages )
print (f"AI: { response } " )
# 继续对话
messages .append ({"role" : "assistant" , "content" : response })
messages .append ({"role" : "user" , "content" : "能给个示例吗?" })
response = inference .chat (messages )
print (f"AI: { response } " )
示例 3: 使用 MLX (Apple Silicon)
from src .inference .mlx_inference import MLXInference
# 创建 MLX 推理引擎
inference = MLXInference (
model_id = "mlx-community/Qwen2.5-1.5B-Instruct-4bit"
)
# 流式输出
messages = [
{"role" : "user" , "content" : "写一首关于秋天的诗" }
]
print ("AI: " , end = "" , flush = True )
response = inference .chat (messages , stream = True )
from src .inference .pytorch_inference import PyTorchInference
inference = PyTorchInference (
model_id = "Qwen/Qwen2.5-3B-Instruct" ,
torch_dtype = "float16" ,
device = "mps" , # 或 "cuda" / "cpu"
max_new_tokens = 1024 ,
temperature = 0.8 ,
top_p = 0.9
)
# 生成创意内容
response = inference .generate (
"写一个科幻故事的开头" ,
temperature = 0.9 , # 更高的随机性
max_new_tokens = 512
)
print (response )
from src .utils .config_manager import ConfigManager
from src .inference .pytorch_inference import PyTorchInference
# 加载配置
config = ConfigManager ()
# 获取模型配置
model_config = config .get_model_config ()
# 创建推理引擎
inference = PyTorchInference (
model_id = model_config ["default_model" ],
torch_dtype = model_config ["torch_dtype" ],
max_new_tokens = model_config ["max_new_tokens" ],
temperature = model_config ["temperature" ],
top_p = model_config ["top_p" ]
)
# 使用
response = inference .generate ("你好" )
print (response )
from src .inference .pytorch_inference import PyTorchInference
inference = PyTorchInference ()
prompts = [
"Python 是什么?" ,
"机器学习的应用有哪些?" ,
"如何学习编程?"
]
for i , prompt in enumerate (prompts , 1 ):
print (f"\n 问题 { i } : { prompt } " )
response = inference .generate (prompt )
print (f"回答: { response } " )
from src .inference .pytorch_inference import PyTorchInference
# 创建代码生成助手
code_assistant = PyTorchInference (
model_id = "Qwen/Qwen2.5-7B-Instruct" ,
temperature = 0.2 , # 低温度,更确定的输出
max_new_tokens = 2048
)
messages = [
{
"role" : "system" ,
"content" : "你是一个专业的Python编程助手。请提供清晰、可运行的代码。"
},
{
"role" : "user" ,
"content" : "写一个函数,实现快速排序算法"
}
]
response = code_assistant .chat (messages )
print (response )
from src .inference .pytorch_inference import PyTorchInference
translator = PyTorchInference (temperature = 0.3 )
def translate (text , target_lang = "English" ):
messages = [
{
"role" : "system" ,
"content" : f"你是一个专业的翻译助手,请将用户输入翻译成{ target_lang } "
},
{"role" : "user" , "content" : text }
]
return translator .chat (messages )
# 使用
chinese_text = "人工智能正在改变世界"
english_translation = translate (chinese_text , "English" )
print (f"原文: { chinese_text } " )
print (f"译文: { english_translation } " )
from src .inference .pytorch_inference import PyTorchInference
summarizer = PyTorchInference (max_new_tokens = 256 )
long_text = """
[长文本内容...]
"""
messages = [
{
"role" : "system" ,
"content" : "你是一个文本摘要助手。请提取关键信息,生成简洁的摘要。"
},
{
"role" : "user" ,
"content" : f"请总结以下内容:\n \n { long_text } "
}
]
summary = summarizer .chat (messages )
print (f"摘要: { summary } " )
from src .inference .mlx_inference import MLXInference
writer = MLXInference (
temperature = 0.9 , # 高温度,更有创意
top_p = 0.95 ,
max_tokens = 1024
)
messages = [
{
"role" : "system" ,
"content" : "你是一个富有创意的作家,擅长写各种类型的故事。"
},
{
"role" : "user" ,
"content" : "写一个关于时间旅行的科幻短故事,500字左右。"
}
]
print ("正在创作中...\n " )
story = writer .chat (messages , stream = True )
python main.py --show-config
python main.py --config /path/to/custom_config.yaml
python main.py \
--backend mlx \
--model mlx-community/Qwen2.5-7B-Instruct-4bit \
--config config/creative_writing.yaml