-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathMakefile
More file actions
243 lines (187 loc) Β· 9.81 KB
/
Makefile
File metadata and controls
243 lines (187 loc) Β· 9.81 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
# SWE-Smith + GEPA gskill gym targets
# Run from swe-gym/ directory or via root Makefile (make swe-gym-*)
.PHONY: help setup build-env generate-tasks validate-tasks train evaluate \
smoke-test deploy-skills gen-trajectories prepare-sft train-sft \
train-sft-unsloth train-dpo-unsloth train-grpo train-grpo-gptoss-120b \
train-sft-unsloth-gptoss-120b train-sft-fp8 train-grpo-fp8 \
train-sft-nemotron-super train-grpo-nemotron-super hpo hpo-thorough clean
# Load repo config (REPO_OWNER, REPO_NAME, REPO_KEY, etc.)
include repo.conf
export REPO_OWNER REPO_NAME REPO_KEY
# Override HF_HOME so the project .env (HF_HOME=/models/hf) doesn't interfere
export HF_HOME := $(HOME)/.cache/huggingface
help:
@echo "SWE-Smith RL Gym β targeting $(REPO_OWNER)/$(REPO_NAME)"
@echo ""
@echo "Setup:"
@echo " make setup Install swesmith + gepa[gskill] + deps"
@echo ""
@echo "Pipeline (run in order):"
@echo " make build-env Step 1: Build Docker execution environment"
@echo " make generate-tasks Step 2: Synthesize bug tasks from commits"
@echo " make validate-tasks Step 3: Filter tasks via test harness"
@echo " make train Step 4: GEPA gskill optimization loop"
@echo " make evaluate Step 5: Evaluate with/without learned skills"
@echo ""
@echo "SFT Pipeline (after step 3):"
@echo " make gen-trajectories Step 6: Generate expert solve traces"
@echo " make prepare-sft Step 7: Evaluate & convert to JSONL"
@echo " make train-sft Step 8: torchtune fine-tune (legacy)"
@echo " make train-sft-unsloth Step 8: Unsloth LoRA fine-tune (recommended)"
@echo " make train-dpo-unsloth DPO alignment (after SFT)"
@echo " make train-cpt-unsloth Continued pretraining (domain adaptation)"
@echo ""
@echo "GRPO RL (after step 3):"
@echo " make train-grpo Step 9: GRPO RL with gym tasks as reward"
@echo " make train-grpo-test Full Docker test suite reward"
@echo " make train-grpo-fast Format-only reward (fast iteration)"
@echo ""
@echo "Multi-GPU:"
@echo " make train-sft-multigpu DDP LoRA (2 GPUs)"
@echo " make train-grpo-multigpu DDP GRPO (2 GPUs)"
@echo ""
@echo "Shortcuts:"
@echo " make smoke-test Quick 3-task validation run"
@echo " make deploy-skills Copy best_skills.txt to .claude/skills/"
@echo " make train-recipe RECIPE=configs/unsloth/xxx.yaml"
@echo " make clean Remove generated logs and results"
setup:
uv pip install -e ".[dev]"
@echo ""
@echo "Verify Docker: docker ps"
@echo "Set OPENAI_API_KEY before running pipeline."
build-env:
./scripts/01_build_env.sh
build-env-force:
./scripts/01_build_env.sh --force
generate-tasks:
./scripts/02_generate_tasks.sh
generate-tasks-from:
@test -n "$(FROM)" || (echo "Usage: make generate-tasks-from FROM=2d"; exit 1)
./scripts/02_generate_tasks.sh --from $(FROM)
validate-tasks:
./scripts/03_validate_tasks.sh
train:
./scripts/04_train_skills.sh
smoke-test:
./scripts/04_train_skills.sh --smoke-test
# Smoke test using a repo already in SWE-smith (no local task gen needed)
smoke-test-demo:
uv run python -m gepa.gskill.gskill.train_optimize_anything \
--repo pygments__pygments --smoke-test --model gpt-4o-mini \
--reflection-model gpt-4o --workers 2
resume:
@RUN_DIR=$$(ls -td gepa_results/logs/run_* 2>/dev/null | head -1); \
if [ -z "$$RUN_DIR" ]; then echo "No run found to resume."; exit 1; fi; \
echo "Resuming from $$RUN_DIR"; \
./scripts/04_train_skills.sh --resume "$$RUN_DIR"
evaluate:
@RUN_DIR=$$(ls -td gepa_results/logs/run_* 2>/dev/null | head -1); \
if [ -z "$$RUN_DIR" ]; then echo "No training run found. Run 'make train' first."; exit 1; fi; \
./scripts/05_evaluate.sh "$$RUN_DIR"
evaluate-claude:
@RUN_DIR=$$(ls -td gepa_results/logs/run_* 2>/dev/null | head -1); \
if [ -z "$$RUN_DIR" ]; then echo "No training run found. Run 'make train' first."; exit 1; fi; \
./scripts/05_evaluate.sh "$$RUN_DIR" claude haiku 4
deploy-skills:
@RUN_DIR=$$(ls -td gepa_results/logs/run_* 2>/dev/null | head -1); \
if [ -z "$$RUN_DIR" ]; then echo "No training run found."; exit 1; fi; \
if [ ! -f "$$RUN_DIR/prompts/best_skills.txt" ]; then echo "No best_skills.txt found in $$RUN_DIR"; exit 1; fi; \
mkdir -p ../.claude/skills/$(REPO_NAME); \
cp "$$RUN_DIR/prompts/best_skills.txt" ../.claude/skills/$(REPO_NAME)/SKILL.md; \
echo "Deployed skills to .claude/skills/$(REPO_NAME)/SKILL.md"
gen-trajectories:
./scripts/06_gen_trajectories.sh
prepare-sft:
./scripts/07_prepare_sft.sh
# ββ Legacy torchtune training (configs in configs/torchtune/) β
train-sft:
./scripts/08_train_sft.sh
train-sft-full:
./scripts/08_train_sft.sh --full
train-sft-32b:
./scripts/08_train_sft.sh --32b --gpus 2
train-dpo:
./scripts/08_train_sft.sh --dpo
# ββ Unsloth training (recommended) ββββββββββββββββββββββββββ
# Default: Qwen3-8B LoRA from repo.conf SFT_BASE_MODEL
train-sft-unsloth:
uv run python scripts/08_train_sft_unsloth.py --recipe configs/unsloth/qwen3_8b_lora.yaml
train-sft-unsloth-4bit:
uv run python scripts/08_train_sft_unsloth.py --recipe configs/unsloth/qwen3_8b_lora.yaml --four-bit
train-sft-unsloth-full:
uv run python scripts/08_train_sft_unsloth.py --full
train-sft-unsloth-32b:
uv run python scripts/08_train_sft_unsloth.py --recipe configs/unsloth/qwen3_32b_lora.yaml
train-sft-unsloth-gptoss:
uv run python scripts/08_train_sft_unsloth.py --recipe configs/unsloth/gpt_oss_20b_lora.yaml
train-sft-unsloth-gptoss-120b:
uv run python scripts/08_train_sft_unsloth.py --recipe configs/unsloth/gpt_oss_120b_lora.yaml --device-map balanced
# New model recipes
train-sft-nemotron:
uv run python scripts/08_train_sft_unsloth.py --recipe configs/unsloth/nemotron3_nano_lora.yaml
train-sft-qwen3-coder:
uv run python scripts/08_train_sft_unsloth.py --recipe configs/unsloth/qwen3_coder_next_lora.yaml --device-map balanced
train-sft-nemotron-super:
uv run python scripts/08_train_sft_unsloth.py --recipe configs/unsloth/nemotron3_super_lora.yaml --device-map balanced
# DPO and CPT
train-dpo-unsloth:
uv run python scripts/08_train_sft_unsloth.py --dpo --sft-checkpoint ./sft_output/
train-cpt-unsloth:
uv run python scripts/08_train_sft_unsloth.py --cpt --data-dir ./corpus/
# Custom recipe: make train-recipe RECIPE=configs/unsloth/my_recipe.yaml
train-recipe:
@test -n "$(RECIPE)" || (echo "Usage: make train-recipe RECIPE=configs/unsloth/xxx.yaml"; exit 1)
uv run python scripts/08_train_sft_unsloth.py --recipe $(RECIPE)
# ββ Multi-GPU training (DDP via torchrun) βββββββββββββββββββββ
# Scales linearly with GPU count. Each GPU gets distinct samples.
NGPUS ?= 2
train-sft-multigpu:
torchrun --nproc_per_node=$(NGPUS) scripts/08_train_sft_unsloth.py --recipe configs/unsloth/qwen3_8b_lora.yaml
train-sft-multigpu-4bit:
torchrun --nproc_per_node=$(NGPUS) scripts/08_train_sft_unsloth.py --recipe configs/unsloth/qwen3_8b_lora.yaml --four-bit
train-grpo-multigpu:
torchrun --nproc_per_node=$(NGPUS) scripts/09_train_grpo.py --recipe configs/unsloth/grpo_qwen3_8b.yaml
# Model splitting (pipeline parallelism β for models that don't fit on one GPU)
train-sft-split:
uv run python scripts/08_train_sft_unsloth.py --recipe configs/unsloth/qwen3_coder_next_lora.yaml --device-map balanced
train-grpo-split:
uv run python scripts/09_train_grpo.py --recipe configs/unsloth/grpo_qwen3_coder_next.yaml --device-map balanced
# ββ GRPO RL training (step 9) βββββββββββββββββββββββββββββββ
train-grpo:
uv run python scripts/09_train_grpo.py --recipe configs/unsloth/grpo_qwen3_8b.yaml
train-grpo-test:
uv run python scripts/09_train_grpo.py --recipe configs/unsloth/grpo_qwen3_8b.yaml --reward-mode test
train-grpo-fast:
uv run python scripts/09_train_grpo.py --recipe configs/unsloth/grpo_qwen3_4b.yaml --reward-mode format --steps 50
train-grpo-nemotron:
uv run python scripts/09_train_grpo.py --recipe configs/unsloth/grpo_nemotron3_nano.yaml
train-grpo-qwen3-coder:
uv run python scripts/09_train_grpo.py --recipe configs/unsloth/grpo_qwen3_coder_next.yaml --device-map balanced
train-grpo-nemotron-super:
uv run python scripts/09_train_grpo.py --recipe configs/unsloth/grpo_nemotron3_super.yaml --device-map balanced
train-grpo-gptoss-120b:
uv run python scripts/09_train_grpo.py --recipe configs/unsloth/grpo_gpt_oss_120b.yaml --device-map balanced
# ββ Hyperparameter optimization βββββββββββββββββββββββββββββββ
hpo:
uv run python scripts/08b_hpo.py --recipe configs/unsloth/qwen3_8b_lora.yaml --four-bit
hpo-thorough:
uv run python scripts/08b_hpo.py --recipe configs/unsloth/qwen3_8b_lora.yaml --four-bit --n-trials 30 --steps-per-trial 100
# ββ FP8 training (RTX 40/50, H100+) ββββββββββββββββββββββββββ
train-sft-fp8:
uv run python scripts/08_train_sft_unsloth.py --recipe configs/unsloth/qwen3_8b_lora.yaml --fp8
train-grpo-fp8:
uv run python scripts/09_train_grpo.py --recipe configs/unsloth/grpo_qwen3_8b.yaml --fp8 --vllm-standby
# ββ Export ββββββββββββββββββββββββββββββββββββββββββββββββββββ
# Save to GGUF for llama.cpp / Ollama deployment
export-gguf:
@test -d "./sft_output/final" || (echo "No model found. Run training first."; exit 1)
uv run python scripts/08_train_sft_unsloth.py --recipe configs/unsloth/qwen3_8b_lora.yaml --save-gguf q4_k_m
clean:
rm -rf logs/ gepa_results/ sft_output/ grpo_output/ cpt_output/
@echo "Cleaned generated logs and results."
# Remove stale swesmith Docker containers (from interrupted validation runs)
clean-containers:
@echo "Removing stale swesmith containers..."
@docker rm -f $$(docker ps -aq --filter "name=swesmith") 2>/dev/null || true
@echo "Done."