-
Notifications
You must be signed in to change notification settings - Fork 1
Expand file tree
/
Copy pathMakefile
More file actions
131 lines (108 loc) · 4.24 KB
/
Makefile
File metadata and controls
131 lines (108 loc) · 4.24 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
# ==============================================================================
# Foundry Makefile
# ==============================================================================
REGISTRY ?= ghcr.io/infernet-org/foundry
# Default model (can be overridden: make run MODEL=hermes-4.3-36b)
MODEL ?= qwen3.5-9b
MODEL_TAG ?= $(REGISTRY)/$(MODEL)
PORT ?= 8080
MODELS_DIR ?= $(HOME)/.cache/foundry
.PHONY: help build run run-profile test benchmark monitoring down push push-all clean clean-models download
help: ## Show this help
@echo "Available models: qwen3.5-9b (default), qwen3-coder-30b-a3b, hermes-4.3-36b, qwen3.5-35b-a3b"
@echo "Usage: make run MODEL=qwen3-coder-30b-a3b"
@echo ""
@grep -E '^[a-zA-Z_-]+:.*?## .*$$' $(MAKEFILE_LIST) | sort | \
awk 'BEGIN {FS = ":.*?## "}; {printf "\033[36m%-20s\033[0m %s\n", $$1, $$2}'
# --- Build -------------------------------------------------------------------
build: ## Build the model image
@cp scripts/entrypoint.sh models/$(MODEL)/entrypoint.sh
docker build \
-t $(MODEL_TAG):latest \
models/$(MODEL)/
# --- Run ---------------------------------------------------------------------
up: ## Start via docker compose (detatched)
FOUNDRY_MODEL=$(MODEL) docker compose up -d
monitoring: ## Start via docker compose with full monitoring stack
FOUNDRY_MODEL=$(MODEL) docker compose --profile monitoring up -d
down: ## Stop all docker compose services
docker compose --profile monitoring down
run: ## Run the model container directly (auto-detect GPU)
@mkdir -p $(MODELS_DIR)
docker run --gpus all \
--shm-size 2g \
--ulimit memlock=-1:-1 \
--ulimit stack=67108864:67108864 \
--sysctl net.core.somaxconn=4096 \
--sysctl net.ipv4.tcp_keepalive_time=60 \
-p $(PORT):8080 \
-v $(MODELS_DIR):/models \
--name foundry-$(MODEL) \
--rm \
$(MODEL_TAG):latest
run-profile: ## Run with explicit profile (PROFILE=rtx5090)
@mkdir -p $(MODELS_DIR)
docker run --gpus all \
--shm-size 2g \
--ulimit memlock=-1:-1 \
--ulimit stack=67108864:67108864 \
--sysctl net.core.somaxconn=4096 \
--sysctl net.ipv4.tcp_keepalive_time=60 \
-p $(PORT):8080 \
-v $(MODELS_DIR):/models \
-e FOUNDRY_PROFILE=$(PROFILE) \
--name foundry-$(MODEL) \
--rm \
$(MODEL_TAG):latest
# --- Test --------------------------------------------------------------------
test: ## Smoke test: start container, wait for health, send one request
@echo "Starting container..."
@mkdir -p $(MODELS_DIR)
@docker run --gpus all -d \
--shm-size 2g \
--ulimit memlock=-1:-1 \
--ulimit stack=67108864:67108864 \
--sysctl net.core.somaxconn=4096 \
--sysctl net.ipv4.tcp_keepalive_time=60 \
-p $(PORT):8080 \
-v $(MODELS_DIR):/models \
--name foundry-test-$(MODEL) \
$(MODEL_TAG):latest
@echo "Waiting for server to be ready..."
@for i in $$(seq 1 60); do \
if curl -sf http://localhost:$(PORT)/health > /dev/null 2>&1; then \
echo "Server ready after $$i seconds"; \
break; \
fi; \
if [ $$i -eq 60 ]; then \
echo "Timeout waiting for server"; \
docker logs foundry-test-$(MODEL); \
docker rm -f foundry-test-$(MODEL); \
exit 1; \
fi; \
sleep 1; \
done
@echo "Sending test request..."
@curl -s http://localhost:$(PORT)/v1/chat/completions \
-H "Content-Type: application/json" \
-d '{"model":"$(MODEL)","messages":[{"role":"user","content":"Say hello in one sentence."}],"max_tokens":64}' \
| python3 -m json.tool
@echo ""
@echo "Test passed. Cleaning up..."
@docker rm -f foundry-test-$(MODEL)
# --- Download ----------------------------------------------------------------
download: ## Download the GGUF model file
./scripts/download-model.sh --model $(MODEL)
# --- Benchmark ---------------------------------------------------------------
benchmark: ## Run benchmark against a running server (PORT=8080)
python3 scripts/benchmark.py --url http://localhost:$(PORT) --mode all
# --- Push --------------------------------------------------------------------
push: ## Push model image to GHCR
docker push $(MODEL_TAG):latest
push-all: ## Push all tags to GHCR
docker push --all-tags $(MODEL_TAG)
# --- Clean -------------------------------------------------------------------
clean: ## Remove local images
-docker rmi $(MODEL_TAG):latest
clean-models: ## Remove downloaded models
rm -rf "$(MODELS_DIR)"/*.gguf