-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathsetup.sh
More file actions
135 lines (114 loc) · 4.08 KB
/
setup.sh
File metadata and controls
135 lines (114 loc) · 4.08 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
#!/bin/bash
# =============================================================================
# setup.sh - Native Setup Script for Llama.cpp on AWS EC2
# Deploys OpenAI GPT-OSS-20B via llama.cpp server
# =============================================================================
set -e
# Colors
RED='\033[0;31m'
GREEN='\033[0;32m'
YELLOW='\033[1;33m'
BLUE='\033[0;34m'
NC='\033[0m'
print_status() { echo -e "${GREEN}[✓]${NC} $1"; }
print_warning() { echo -e "${YELLOW}[!]${NC} $1"; }
print_error() { echo -e "${RED}[✗]${NC} $1"; }
print_info() { echo -e "${BLUE}[i]${NC} $1"; }
print_header() { echo -e "\n${BLUE}=== $1 ===${NC}\n"; }
# Check sudo
if [ "$EUID" -ne 0 ] && ! sudo -v &>/dev/null; then
print_error "This script requires sudo access"
exit 1
fi
print_header "AWS EC2 Native Llama.cpp Deployment (GPT-OSS-20B)"
# 1. Install Dependencies
print_header "Step 1: Installing Dependencies"
sudo apt-get update
sudo apt-get install -y build-essential cmake git curl wget make g++
# Check for CUDA/NVCC
PROCEED_WITHOUT_CUDA=false
if ! command -v nvcc &> /dev/null; then
print_warning "CUDA Toolkit (nvcc) not found."
if command -v nvidia-smi &> /dev/null; then
print_info "NVIDIA Driver detected. Installing CUDA Toolkit..."
sudo apt-get install -y nvidia-cuda-toolkit
else
print_warning "No NVIDIA GPU detected (nvidia-smi failed)."
print_warning "Proceeding will build for CPU only (slow!)."
read -p "Continue without GPU support? (y/N): " -n 1 -r
echo
if [[ ! $REPLY =~ ^[Yy]$ ]]; then exit 1; fi
PROCEED_WITHOUT_CUDA=true
fi
else
print_status "CUDA Toolkit found $(nvcc --version | grep release | awk '{print $5,$6}')"
fi
# 2. Clone and Build llama.cpp
print_header "Step 2: Building llama.cpp"
INSTALL_DIR="/home/ubuntu/llama.cpp"
if [ -d "$INSTALL_DIR" ]; then
print_info "llama.cpp directory exists. updating..."
cd "$INSTALL_DIR"
git pull
else
git clone https://github.com/ggerganov/llama.cpp "$INSTALL_DIR"
cd "$INSTALL_DIR"
fi
print_info "Compiling llama.cpp..."
if [ "$PROCEED_WITHOUT_CUDA" = true ]; then
make -j$(nproc)
else
make -j$(nproc) GGML_CUDA=1
fi
if [ -f "./llama-server" ]; then
print_status "Build successful: $(./llama-server --version | head -n 1)"
else
print_error "Build failed. llama-server binary not found."
exit 1
fi
# 3. Model Verification
print_header "Step 3: Model Verification"
MODEL_DIR="/models"
GPT_OSS_MODEL="$MODEL_DIR/gpt-oss-20b.gguf"
if [ ! -d "$MODEL_DIR" ]; then
print_warning "$MODEL_DIR does not exist. Creating..."
sudo mkdir -p "$MODEL_DIR"
sudo chown -R ubuntu:ubuntu "$MODEL_DIR"
fi
if [ ! -f "$GPT_OSS_MODEL" ]; then
print_warning "Model missing: $GPT_OSS_MODEL"
echo "Expected:"
echo " - $GPT_OSS_MODEL"
echo
echo "Download with:"
echo " wget -O $GPT_OSS_MODEL https://huggingface.co/ggml-org/gpt-oss-20b-GGUF/resolve/main/gpt-oss-20b-mxfp4.gguf"
echo
echo "Please ensure the model is downloaded or moved to $MODEL_DIR."
else
print_status "Model found."
fi
# 4. Systemd Configuration
print_header "Step 4: Configure Services"
REPO_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
# Install Environment File
sudo cp "$REPO_DIR/config/llama.env" /etc/default/llama-cpp
sudo chmod 600 /etc/default/llama-cpp
print_status "Config installed to /etc/default/llama-cpp"
# Install Service File
sudo cp "$REPO_DIR/llama-gpt-oss.service" /etc/systemd/system/
sudo systemctl daemon-reload
sudo systemctl enable llama-gpt-oss
print_status "Systemd service enabled."
# 5. Start Services
print_header "Step 5: Start Service"
read -p "Start service now? (Y/n): " -n 1 -r
echo
if [[ ! $REPLY =~ ^[Nn]$ ]]; then
sudo systemctl restart llama-gpt-oss
print_info "Service started. Check status with: sudo systemctl status llama-gpt-oss"
else
print_info "Skipping start. Run 'sudo systemctl start llama-gpt-oss' manually."
fi
print_header "Setup Complete!"
echo "GPT-OSS-20B API: http://$(curl -s ifconfig.me):8080/v1/chat/completions"
echo "API Key: $(grep API_KEY /etc/default/llama-cpp | cut -d= -f2)"