From c026c77bf9aecb44451e1db7b5427112b16b2acb Mon Sep 17 00:00:00 2001 From: jiao Date: Sun, 3 May 2026 13:49:27 +0800 Subject: [PATCH 1/2] ops: PowerShell operator scripts for AWS EC2 dev-loop Adds scripts/ops/ with four parameterized PowerShell helpers for running the gateway on a single EC2 GPU host without paying for idle time: - setup-ssh.ps1 one-time-per-laptop key bootstrap (ed25519 + transient SG :22 inbound /32 + EC2 Instance Connect for first-connect key push) - fix-and-start.ps1 start instance, disable idle alarm/cron, sed-fix systemd unit for compose v2 compat, wait for "Application startup complete.", smoke /health /ready /v1/chat/completions - restore-idle-protection re-enable alarm + cron; -StopNow to lock in savings - teardown-ssh.ps1 revoke the SG :22 rule when done Tag-based discovery (tag:application=vllm-serving + tag:environment=) means no hardcoded instance IDs/EIPs - script params override when needed. checkip.amazonaws.com response is IPv4-validated before authoring SG rules. First .ps1 files in the repo; CI is Python-only so no new lint surface. Used in production by the convilyn dev-loop, externalized here so any operator running llm-gateway on EC2 can pick them up. Co-Authored-By: Claude Opus 4.7 (1M context) --- README.md | 10 ++ scripts/ops/README.md | 84 +++++++++++ scripts/ops/fix-and-start.ps1 | 183 ++++++++++++++++++++++++ scripts/ops/restore-idle-protection.ps1 | 104 ++++++++++++++ scripts/ops/setup-ssh.ps1 | 178 +++++++++++++++++++++++ scripts/ops/teardown-ssh.ps1 | 62 ++++++++ 6 files changed, 621 insertions(+) create mode 100644 scripts/ops/README.md create mode 100644 scripts/ops/fix-and-start.ps1 create mode 100644 scripts/ops/restore-idle-protection.ps1 create mode 100644 scripts/ops/setup-ssh.ps1 create mode 100644 scripts/ops/teardown-ssh.ps1 diff --git a/README.md b/README.md index c1e0570..35f16de 100644 --- a/README.md +++ b/README.md @@ -94,6 +94,16 @@ What the gateway protects against out of the box: - Access logger redacts `authorization` / `bearer` / `api_key` / `token` / `secret` / `password` `extra={…}` fields automatically. +## Ops scripts + +PowerShell helpers for running the gateway on a single AWS EC2 GPU host +with idle-shutdown cost guardrails live in +[`scripts/ops/`](scripts/ops/) — `setup-ssh.ps1`, `fix-and-start.ps1`, +`restore-idle-protection.ps1`, `teardown-ssh.ps1`. Tag-based instance +discovery (`tag:application=vllm-serving + tag:environment=`) means +zero hardcoded IDs. See [`scripts/ops/README.md`](scripts/ops/README.md) +for the operator workflow + IAM permissions list. + ## License MIT — see [`LICENSE`](LICENSE). diff --git a/scripts/ops/README.md b/scripts/ops/README.md new file mode 100644 index 0000000..2ec50e4 --- /dev/null +++ b/scripts/ops/README.md @@ -0,0 +1,84 @@ +# Operator scripts (AWS EC2, PowerShell) + +Day-to-day workflow for an `llm-gateway` instance running on a single EC2 +GPU host with idle-shutdown cost guardrails. Designed to incur near-zero +fixed cost — the box only runs while you're actively developing. + +``` +setup-ssh.ps1 (one-time per laptop) + ↓ +fix-and-start.ps1 → ssh -L 8000:... → develop → restore-idle-protection.ps1 -StopNow +``` + +## Prerequisites + +- AWS CLI v2 on Windows at `C:\Program Files\Amazon\AWSCLIV2\aws.exe` +- An `llm-gateway` EC2 instance deployed via the CDK stack pattern, tagged: + - `application=vllm-serving` + - `environment=` +- A CloudWatch alarm whose name contains `VLLMIdleBackstop` wired as the + idle backstop (the script disables/re-enables its actions) +- `/etc/cron.d/llm-gateway-idle-shutdown` on the instance (idle cron) +- Bearer token in Secrets Manager (the bootstrap helper reads it; not + used directly by these scripts) + +## Discovery + +All four scripts default to `-Environment dev` and discover the instance ++ EIP via tags. To run against a different env or pin explicit values: + +```powershell +.\fix-and-start.ps1 -Environment prod +.\fix-and-start.ps1 -InstanceId i-1234 -Eip 1.2.3.4 -Region us-east-1 +``` + +## Usage + +```powershell +# 1. One-time per laptop (or after teardown): generate key, open SG :22, push key +.\setup-ssh.ps1 + +# 2. Daily: start instance, fix systemd unit, start service, smoke-test +.\fix-and-start.ps1 + +# 3. Open SSH tunnel in a separate PowerShell window +ssh -i $env:USERPROFILE\.ssh\id_ed25519_vllm -L 8000:127.0.0.1:8000 -N ubuntu@ + +# 4. Use the gateway at http://127.0.0.1:8000/v1 (any OpenAI-compatible client) + +# 5. Done for the day - restore idle protection + stop instance +.\restore-idle-protection.ps1 -StopNow + +# 6. Done for a long while - revoke the SG :22 inbound rule +.\teardown-ssh.ps1 +``` + +## Notes + +- `fix-and-start.ps1` includes a `sed` patch that rewrites + `docker compose --no-color` → `docker compose --ansi never` in the + systemd unit for compatibility with Compose v2.x. Safe no-op if your + unit is already correct. +- `setup-ssh.ps1` opens port 22 only to **your current public IP /32** + (resolved via `checkip.amazonaws.com` and IPv4-validated). The rule + persists until you run `teardown-ssh.ps1`. +- The SSH key pair (`~/.ssh/id_ed25519_vllm[.pub]`) is generated once and + reused across runs. EC2 Instance Connect pushes the public key into + `authorized_keys` on first connect; subsequent connects are direct SSH. +- All four scripts are idempotent — safe to re-run. + +## Required IAM permissions (operator) + +``` +ec2:DescribeInstances +ec2:DescribeSecurityGroups +ec2:DescribeSecurityGroupRules +ec2:AuthorizeSecurityGroupIngress +ec2:RevokeSecurityGroupIngress +ec2:StartInstances +ec2:StopInstances +ec2-instance-connect:SendSSHPublicKey +cloudwatch:DescribeAlarms +cloudwatch:DisableAlarmActions +cloudwatch:EnableAlarmActions +``` diff --git a/scripts/ops/fix-and-start.ps1 b/scripts/ops/fix-and-start.ps1 new file mode 100644 index 0000000..ba3591d --- /dev/null +++ b/scripts/ops/fix-and-start.ps1 @@ -0,0 +1,183 @@ +# One-shot fix-and-start for the llm-gateway + vLLM stack on EC2. +# +# What this script does (all idempotent): +# 1. Make sure the EC2 is running. +# 2. Disable the CloudWatch idle backstop alarm action (so it doesn't +# stop the box mid-startup). +# 3. SSH in: +# a. Disable the idle-shutdown cron (mv to .disabled). +# b. sed-fix the systemd unit (--no-color -> --ansi never) for +# compatibility with docker compose v2.x. Safe no-op if already +# on a fresh systemd unit. +# c. systemctl daemon-reload + start llm-gateway.service. +# d. Tail journal until "Application startup complete." or fail. +# 4. From the EC2, smoke /health, /ready, /v1/chat/completions. +# 5. Print the SSH tunnel command + restore command. +# +# Usage: +# .\fix-and-start.ps1 # discovers dev instance via tags +# .\fix-and-start.ps1 -Environment prod +# .\fix-and-start.ps1 -InstanceId i-abc -Eip 1.2.3.4 -Region us-east-1 + +param( + [string]$Environment = 'dev', + [string]$InstanceId, + [string]$Eip, + [string]$Region = 'ap-northeast-1', + [string]$Ec2User = 'ubuntu', + [string]$KeyPath = (Join-Path $HOME ".ssh\id_ed25519_vllm"), + [string]$ServedModelName = 'selfhost-qwen', + [string]$AlarmNameContains = 'VLLMIdleBackstop' +) + +$ErrorActionPreference = 'Stop' +$AWS = "C:\Program Files\Amazon\AWSCLIV2\aws.exe" + +# --------------------------------------------------------------------- +# 0. Discover instance + EIP via tags +# --------------------------------------------------------------------- +if (-not $InstanceId) { + $InstanceId = (& $AWS ec2 describe-instances ` + --filters "Name=tag:application,Values=vllm-serving" "Name=tag:environment,Values=$Environment" ` + "Name=instance-state-name,Values=running,stopped,stopping,starting" ` + --region $Region ` + --query 'Reservations[0].Instances[0].InstanceId' --output text).Trim() + if ([string]::IsNullOrWhiteSpace($InstanceId) -or $InstanceId -eq 'None') { + throw "No vllm-serving instance found for environment=$Environment in $Region. Pass -InstanceId explicitly." + } +} +if (-not $Eip) { + $Eip = (& $AWS ec2 describe-instances --instance-ids $InstanceId --region $Region ` + --query 'Reservations[0].Instances[0].PublicIpAddress' --output text).Trim() + if ([string]::IsNullOrWhiteSpace($Eip) -or $Eip -eq 'None') { + throw "Instance $InstanceId has no public IP. Pass -Eip explicitly." + } +} + +# --------------------------------------------------------------------- +# 1. Start instance if needed +# --------------------------------------------------------------------- +$state = (& $AWS ec2 describe-instances ` + --instance-ids $InstanceId --region $Region ` + --query 'Reservations[0].Instances[0].State.Name' --output text).Trim() +Write-Host ">>> Instance state: $state" -ForegroundColor Cyan +if ($state -ne 'running') { + if ($state -eq 'stopped') { + & $AWS ec2 start-instances --instance-ids $InstanceId --region $Region | Out-Null + } + & $AWS ec2 wait instance-status-ok --instance-ids $InstanceId --region $Region + Write-Host " Instance OK." -ForegroundColor Green +} + +# --------------------------------------------------------------------- +# 2. Disable CloudWatch idle alarm (action only - the alarm itself stays +# so we can re-enable in restore-idle-protection.ps1). +# --------------------------------------------------------------------- +$alarmName = (& $AWS cloudwatch describe-alarms --region $Region ` + --query "MetricAlarms[?contains(AlarmName,'$AlarmNameContains')].AlarmName | [0]" ` + --output text).Trim() +if ($alarmName -and $alarmName -ne 'None') { + Write-Host ">>> Disabling alarm actions: $alarmName" -ForegroundColor Cyan + & $AWS cloudwatch disable-alarm-actions --alarm-names $alarmName --region $Region | Out-Null + Write-Host " Disabled (will re-enable in restore script)." -ForegroundColor Green +} else { + Write-Host ">>> No alarm matching '$AlarmNameContains' - skipping" -ForegroundColor DarkGray +} + +# --------------------------------------------------------------------- +# 3. SSH: disable cron + sed fix + start + watch journal +# --------------------------------------------------------------------- +$sshArgs = @( + '-i', $KeyPath, + '-o', 'StrictHostKeyChecking=accept-new', + '-o', 'IdentitiesOnly=yes', + '-o', 'PasswordAuthentication=no', + '-o', 'ConnectTimeout=15', + "$Ec2User@$Eip" +) + +$fixScript = @' +set -e +echo === 3a. disable idle cron === +if [ -f /etc/cron.d/llm-gateway-idle-shutdown ]; then + sudo mv /etc/cron.d/llm-gateway-idle-shutdown /etc/cron.d/llm-gateway-idle-shutdown.disabled + echo "moved to .disabled" +else + echo "(already disabled or missing)" +fi +echo +echo === 3b. sed fix systemd unit (compose v1->v2 compat) === +sudo cp /etc/systemd/system/llm-gateway.service /etc/systemd/system/llm-gateway.service.bak 2>/dev/null || true +sudo sed -i 's|docker compose --no-color|docker compose --ansi never|g' /etc/systemd/system/llm-gateway.service +sudo grep -E '^Exec(StartPre|Start|Stop)=' /etc/systemd/system/llm-gateway.service +echo +echo === 3c. daemon-reload + start === +sudo systemctl daemon-reload +sudo systemctl reset-failed llm-gateway.service 2>/dev/null || true +sudo systemctl start llm-gateway.service +echo "start command issued" +echo +echo === 3d. follow journal until ready or fail === +sudo timeout 1200 bash -c ' + while true; do + if sudo journalctl -u llm-gateway.service --since "20 min ago" --no-pager | grep -q "Application startup complete"; then + echo "GATEWAY_READY" + exit 0 + fi + if sudo systemctl is-failed --quiet llm-gateway.service; then + echo "SERVICE_FAILED" + sudo journalctl -xeu llm-gateway.service --no-pager | tail -40 + exit 1 + fi + STATE=$(sudo systemctl is-active llm-gateway.service) + echo "[$(date +%H:%M:%S)] state=$STATE" + sleep 15 + done +' +echo +echo === final === +sudo systemctl is-active llm-gateway.service +sudo docker ps --format "table {{.Names}}\t{{.Status}}\t{{.Ports}}" +echo === DONE === +'@ + +Write-Host ">>> SSH: applying fix and starting service (10-20 min for first boot)" -ForegroundColor Cyan +$fixScript | & ssh @sshArgs 'bash -s' +if ($LASTEXITCODE -ne 0) { + Write-Host "SSH script ended with non-zero exit code $LASTEXITCODE" -ForegroundColor Red + Write-Host "Run restore-idle-protection.ps1 to re-arm CW alarm + cron when done." -ForegroundColor Yellow + throw "Fix failed" +} + +# --------------------------------------------------------------------- +# 4. Smoke test from inside the EC2 (no tunnel needed). +# --------------------------------------------------------------------- +Write-Host "" +Write-Host ">>> Smoke test from EC2 host" -ForegroundColor Cyan +$smokeScript = @" +TOKEN=`$(grep "^BEARER_TOKEN=" /opt/llm-gateway/deploy/.env | cut -d= -f2) +echo === health === +curl -fsS http://127.0.0.1:8000/health +echo +echo === ready === +curl -fsS http://127.0.0.1:8000/ready +echo +echo === chat completion === +curl -fsS -H "Authorization: Bearer `$TOKEN" -H "Content-Type: application/json" \ + http://127.0.0.1:8000/v1/chat/completions \ + -d '{"model":"$ServedModelName","messages":[{"role":"user","content":"reply with the word ok"}],"max_tokens":8}' +echo +"@ +$smokeScript | & ssh @sshArgs 'sudo bash -s' + +Write-Host "" +Write-Host "llm-gateway is up." -ForegroundColor Green +Write-Host "" +Write-Host "Open the local tunnel in another PowerShell window:" -ForegroundColor Yellow +Write-Host " ssh -i `"$KeyPath`" -L 8000:127.0.0.1:8000 -N $Ec2User@$Eip" +Write-Host "" +Write-Host "Point your client at http://127.0.0.1:8000/v1 with the bearer token" -ForegroundColor Yellow +Write-Host "from your Secrets Manager (any OpenAI-compatible client works)." +Write-Host "" +Write-Host "When you finish development, restore protection:" -ForegroundColor Yellow +Write-Host " .\restore-idle-protection.ps1 -Environment $Environment -StopNow" diff --git a/scripts/ops/restore-idle-protection.ps1 b/scripts/ops/restore-idle-protection.ps1 new file mode 100644 index 0000000..7345636 --- /dev/null +++ b/scripts/ops/restore-idle-protection.ps1 @@ -0,0 +1,104 @@ +# Restore the cost-control safeguards that fix-and-start.ps1 disabled. +# Run when you're done developing for the day. +# +# What this restores: +# 1. Re-enable the CloudWatch idle backstop alarm action. +# 2. Move /etc/cron.d/llm-gateway-idle-shutdown.disabled back to +# /etc/cron.d/llm-gateway-idle-shutdown. +# 3. Optionally stop the instance now (to lock in the savings). +# +# Usage: +# .\restore-idle-protection.ps1 +# .\restore-idle-protection.ps1 -StopNow +# .\restore-idle-protection.ps1 -Environment prod -StopNow + +param( + [switch]$StopNow, + [string]$Environment = 'dev', + [string]$InstanceId, + [string]$Eip, + [string]$Region = 'ap-northeast-1', + [string]$Ec2User = 'ubuntu', + [string]$KeyPath = (Join-Path $HOME ".ssh\id_ed25519_vllm"), + [string]$AlarmNameContains = 'VLLMIdleBackstop' +) + +$ErrorActionPreference = 'Stop' +$AWS = "C:\Program Files\Amazon\AWSCLIV2\aws.exe" + +# --------------------------------------------------------------------- +# 0. Discover instance + EIP via tags +# --------------------------------------------------------------------- +if (-not $InstanceId) { + $InstanceId = (& $AWS ec2 describe-instances ` + --filters "Name=tag:application,Values=vllm-serving" "Name=tag:environment,Values=$Environment" ` + "Name=instance-state-name,Values=running,stopped,stopping,starting" ` + --region $Region ` + --query 'Reservations[0].Instances[0].InstanceId' --output text).Trim() + if ([string]::IsNullOrWhiteSpace($InstanceId) -or $InstanceId -eq 'None') { + throw "No vllm-serving instance found for environment=$Environment in $Region. Pass -InstanceId explicitly." + } +} +if (-not $Eip) { + $Eip = (& $AWS ec2 describe-instances --instance-ids $InstanceId --region $Region ` + --query 'Reservations[0].Instances[0].PublicIpAddress' --output text).Trim() +} + +# --------------------------------------------------------------------- +# 1. Re-enable CloudWatch alarm action +# --------------------------------------------------------------------- +$alarmName = (& $AWS cloudwatch describe-alarms --region $Region ` + --query "MetricAlarms[?contains(AlarmName,'$AlarmNameContains')].AlarmName | [0]" ` + --output text).Trim() +if ($alarmName -and $alarmName -ne 'None') { + Write-Host ">>> Re-enabling alarm actions: $alarmName" -ForegroundColor Cyan + & $AWS cloudwatch enable-alarm-actions --alarm-names $alarmName --region $Region | Out-Null + Write-Host " OK." -ForegroundColor Green +} else { + Write-Host ">>> No alarm matching '$AlarmNameContains' - skipping" -ForegroundColor DarkGray +} + +# --------------------------------------------------------------------- +# 2. Re-enable instance-side idle cron (only if instance is running) +# --------------------------------------------------------------------- +$state = (& $AWS ec2 describe-instances ` + --instance-ids $InstanceId --region $Region ` + --query 'Reservations[0].Instances[0].State.Name' --output text).Trim() +Write-Host ">>> Instance state: $state" -ForegroundColor Cyan + +if ($state -eq 'running' -and $Eip -and $Eip -ne 'None') { + $sshArgs = @( + '-i', $KeyPath, + '-o', 'StrictHostKeyChecking=accept-new', + '-o', 'IdentitiesOnly=yes', + '-o', 'PasswordAuthentication=no', + '-o', 'ConnectTimeout=15', + "$Ec2User@$Eip" + ) + $restoreScript = @' +if [ -f /etc/cron.d/llm-gateway-idle-shutdown.disabled ]; then + sudo mv /etc/cron.d/llm-gateway-idle-shutdown.disabled /etc/cron.d/llm-gateway-idle-shutdown + echo "cron re-enabled" +else + echo "(cron file not found - already enabled or never disabled)" +fi +ls -la /etc/cron.d/llm-gateway-idle-shutdown* 2>&1 +'@ + Write-Host ">>> Re-enabling idle cron via SSH" -ForegroundColor Cyan + $restoreScript | & ssh @sshArgs 'bash -s' +} else { + Write-Host " Instance not running - cron file is in EBS, will be restored on next start." -ForegroundColor Yellow +} + +# --------------------------------------------------------------------- +# 3. Optionally stop the instance +# --------------------------------------------------------------------- +if ($StopNow -and $state -eq 'running') { + Write-Host ">>> Stopping instance now (-StopNow flag)" -ForegroundColor Cyan + & $AWS ec2 stop-instances --instance-ids $InstanceId --region $Region ` + --query 'StoppingInstances[0].CurrentState.Name' --output text + Write-Host " Stop initiated." -ForegroundColor Green +} + +Write-Host "" +Write-Host "Idle protection restored." -ForegroundColor Green diff --git a/scripts/ops/setup-ssh.ps1 b/scripts/ops/setup-ssh.ps1 new file mode 100644 index 0000000..3ff93e5 --- /dev/null +++ b/scripts/ops/setup-ssh.ps1 @@ -0,0 +1,178 @@ +# One-time SSH setup for an llm-gateway EC2 instance. +# +# Generates a dedicated ed25519 key, opens an SG :22 inbound rule scoped +# to your current public IP /32, pushes the public key via EC2 Instance +# Connect (60s TTL), and persists it into authorized_keys for ongoing use. +# +# Instance discovery is tag-based so you don't have to hardcode IDs: +# tag:application = vllm-serving +# tag:environment = $Environment (default: dev) +# +# Usage: +# .\setup-ssh.ps1 # discovers dev instance via tags +# .\setup-ssh.ps1 -Environment prod +# .\setup-ssh.ps1 -InstanceId i-abc -Eip 1.2.3.4 -Region us-east-1 # explicit +# +# Pair with teardown-ssh.ps1 when you're done with the dev box for a while. + +param( + [string]$Environment = 'dev', + [string]$InstanceId, + [string]$Eip, + [string]$Region = 'ap-northeast-1', + [string]$Ec2User = 'ubuntu', + [string]$KeyPath = (Join-Path $HOME ".ssh\id_ed25519_vllm") +) + +$ErrorActionPreference = 'Stop' +$AWS = "C:\Program Files\Amazon\AWSCLIV2\aws.exe" +$KEY_PUB = "$KeyPath.pub" +$KEY_COMMENT = "vllm-dev-$env:USERNAME" + +# --------------------------------------------------------------------- +# 0. Discover instance + EIP via tags (unless explicitly passed) +# --------------------------------------------------------------------- +if (-not $InstanceId) { + Write-Host ">>> Discovering instance via tag:application=vllm-serving + tag:environment=$Environment" -ForegroundColor Cyan + $InstanceId = (& $AWS ec2 describe-instances ` + --filters "Name=tag:application,Values=vllm-serving" "Name=tag:environment,Values=$Environment" ` + "Name=instance-state-name,Values=running,stopped,stopping,starting" ` + --region $Region ` + --query 'Reservations[0].Instances[0].InstanceId' --output text).Trim() + if ([string]::IsNullOrWhiteSpace($InstanceId) -or $InstanceId -eq 'None') { + throw "No vllm-serving instance found for environment=$Environment in $Region. Pass -InstanceId explicitly." + } + Write-Host " InstanceId: $InstanceId" +} +if (-not $Eip) { + $Eip = (& $AWS ec2 describe-instances --instance-ids $InstanceId --region $Region ` + --query 'Reservations[0].Instances[0].PublicIpAddress' --output text).Trim() + if ([string]::IsNullOrWhiteSpace($Eip) -or $Eip -eq 'None') { + throw "Instance $InstanceId has no public IP. Pass -Eip or attach an EIP first." + } + Write-Host " Eip: $Eip" +} + +# Refuse to proceed if instance is in a state where SSH won't work. +$state = (& $AWS ec2 describe-instances ` + --instance-ids $InstanceId --region $Region ` + --query 'Reservations[0].Instances[0].State.Name' --output text).Trim() +Write-Host ">>> Instance state: $state" -ForegroundColor Cyan +switch ($state) { + 'running' { } + 'stopped' { + Write-Host " Starting instance..." -ForegroundColor Yellow + & $AWS ec2 start-instances --instance-ids $InstanceId --region $Region | Out-Null + & $AWS ec2 wait instance-status-ok --instance-ids $InstanceId --region $Region + } + default { throw "Refusing to handle instance in state '$state'" } +} + +# --------------------------------------------------------------------- +# 1. Generate dedicated SSH key (skip if exists) +# --------------------------------------------------------------------- +if (-not (Test-Path $KeyPath)) { + Write-Host ">>> Generating dedicated key $KeyPath" -ForegroundColor Cyan + ssh-keygen -t ed25519 -f $KeyPath -N '""' -C $KEY_COMMENT | Out-Host +} else { + Write-Host ">>> Key already exists at $KeyPath (reusing)" -ForegroundColor DarkGray +} + +# --------------------------------------------------------------------- +# 2. Get current public IP and current SG id +# --------------------------------------------------------------------- +Write-Host ">>> Resolving your current public IP and the vLLM SG id" -ForegroundColor Cyan +$myIp = (Invoke-RestMethod -Uri https://checkip.amazonaws.com -TimeoutSec 5).Trim() +if ($myIp -notmatch '^\d{1,3}(\.\d{1,3}){3}$') { + throw "checkip.amazonaws.com returned non-IPv4 response: '$myIp' - refusing to author SG rule" +} +Write-Host " Your IP: $myIp" + +$sgId = (& $AWS ec2 describe-instances ` + --instance-ids $InstanceId ` + --region $Region ` + --query 'Reservations[0].Instances[0].SecurityGroups[0].GroupId' ` + --output text).Trim() +if ($LASTEXITCODE -ne 0 -or [string]::IsNullOrWhiteSpace($sgId)) { + throw "Cannot resolve SG id for $InstanceId" +} +Write-Host " SG id: $sgId" + +# Idempotency: skip if a rule already covers this exact IP. +$existing = & $AWS ec2 describe-security-group-rules ` + --filters "Name=group-id,Values=$sgId" ` + --region $Region ` + --query "SecurityGroupRules[?IpProtocol=='tcp' && FromPort==``22`` && ToPort==``22`` && CidrIpv4=='$myIp/32' && IsEgress==``false``].SecurityGroupRuleId" ` + --output text +if (-not [string]::IsNullOrWhiteSpace($existing)) { + Write-Host ">>> SG rule already allows 22/tcp from $myIp/32 (rule $existing)" -ForegroundColor DarkGray +} else { + Write-Host ">>> Adding SG inbound 22/tcp from $myIp/32" -ForegroundColor Cyan + $ipPerm = "IpProtocol=tcp,FromPort=22,ToPort=22,IpRanges=[{CidrIp=$myIp/32}]" + & $AWS ec2 authorize-security-group-ingress ` + --group-id $sgId ` + --ip-permissions $ipPerm ` + --region $Region | Out-Null + if ($LASTEXITCODE -ne 0) { throw "authorize-security-group-ingress failed" } +} + +# --------------------------------------------------------------------- +# 3. Push public key via EC2 Instance Connect (60s TTL) +# --------------------------------------------------------------------- +Write-Host ">>> Pushing public key via EC2 Instance Connect (60s TTL)" -ForegroundColor Cyan +$pubKey = Get-Content $KEY_PUB -Raw +& $AWS ec2-instance-connect send-ssh-public-key ` + --instance-id $InstanceId ` + --instance-os-user $Ec2User ` + --ssh-public-key $pubKey ` + --region $Region ` + --output json | Out-Null +if ($LASTEXITCODE -ne 0) { throw "send-ssh-public-key failed" } +Write-Host " OK - have 60 seconds before that key is rotated out" + +# --------------------------------------------------------------------- +# 4. SSH in once and persist the key to authorized_keys +# --------------------------------------------------------------------- +Write-Host ">>> Persisting key into ~$Ec2User/.ssh/authorized_keys" -ForegroundColor Cyan +$sshArgs = @( + '-i', $KeyPath, + '-o', 'StrictHostKeyChecking=accept-new', + '-o', 'IdentitiesOnly=yes', + '-o', 'PasswordAuthentication=no', + '-o', 'ConnectTimeout=10', + "$Ec2User@$Eip" +) + +$keyLine = $pubKey.Trim() +$persistCmd = 'mkdir -p ~/.ssh && chmod 700 ~/.ssh && touch ~/.ssh/authorized_keys && chmod 600 ~/.ssh/authorized_keys && ' + + "grep -qxF '$keyLine' ~/.ssh/authorized_keys || " + + "printf '%s\n' '$keyLine' | tee -a ~/.ssh/authorized_keys | head -c 0 && " + + 'echo PERSIST_OK_done' + +& ssh @sshArgs $persistCmd +if ($LASTEXITCODE -ne 0) { + throw "Persist SSH failed - if you see Permission denied, the EIC TTL likely expired; rerun this script" +} + +# --------------------------------------------------------------------- +# 5. Verify - run a few cheap diagnostic commands +# --------------------------------------------------------------------- +Write-Host ">>> Verify connectivity + GPU + bootstrap status" -ForegroundColor Cyan +$verifyCmd = @' +echo === host === +whoami; hostname; uname -r +echo === gpu === +nvidia-smi --query-gpu=name,memory.total --format=csv,noheader 2>&1 | head -1 +echo === bootstrap script === +ls -la /usr/local/bin/llm-gateway-bootstrap 2>&1 +echo === models mount === +df -h /models 2>&1 | tail -2 +'@ +$verifyCmd | & ssh @sshArgs 'bash -s' + +Write-Host "" +Write-Host "SSH ready. Daily driver:" -ForegroundColor Green +Write-Host " ssh -i `"$KeyPath`" $Ec2User@$Eip" +Write-Host "" +Write-Host "When done with the dev box for a while, revoke the SG rule:" -ForegroundColor Yellow +Write-Host " .\teardown-ssh.ps1 -Environment $Environment" diff --git a/scripts/ops/teardown-ssh.ps1 b/scripts/ops/teardown-ssh.ps1 new file mode 100644 index 0000000..0e88e02 --- /dev/null +++ b/scripts/ops/teardown-ssh.ps1 @@ -0,0 +1,62 @@ +# Revoke any SG :22 inbound rules added by setup-ssh.ps1. +# +# Idempotent - reports what got revoked. +# +# Keeps: +# - The dedicated key file (~/.ssh/id_ed25519_vllm[.pub]) - reuse next time. +# - The key inside ~ubuntu/.ssh/authorized_keys on the EC2 - harmless +# once port 22 is closed; cleared on stack destroy. +# +# Usage: +# .\teardown-ssh.ps1 # discovers dev instance via tags +# .\teardown-ssh.ps1 -Environment prod +# .\teardown-ssh.ps1 -InstanceId i-abc -Region us-east-1 + +param( + [string]$Environment = 'dev', + [string]$InstanceId, + [string]$Region = 'ap-northeast-1' +) + +$ErrorActionPreference = 'Stop' +$AWS = "C:\Program Files\Amazon\AWSCLIV2\aws.exe" + +if (-not $InstanceId) { + $InstanceId = (& $AWS ec2 describe-instances ` + --filters "Name=tag:application,Values=vllm-serving" "Name=tag:environment,Values=$Environment" ` + "Name=instance-state-name,Values=running,stopped,stopping,starting" ` + --region $Region ` + --query 'Reservations[0].Instances[0].InstanceId' --output text).Trim() + if ([string]::IsNullOrWhiteSpace($InstanceId) -or $InstanceId -eq 'None') { + throw "No vllm-serving instance found for environment=$Environment in $Region. Pass -InstanceId explicitly." + } +} + +$sgId = (& $AWS ec2 describe-instances ` + --instance-ids $InstanceId ` + --region $Region ` + --query 'Reservations[0].Instances[0].SecurityGroups[0].GroupId' ` + --output text).Trim() +Write-Host ">>> SG: $sgId" -ForegroundColor Cyan + +$rules = & $AWS ec2 describe-security-group-rules ` + --filters "Name=group-id,Values=$sgId" "Name=ip-protocol,Values=tcp" ` + --region $Region ` + --query "SecurityGroupRules[?FromPort==``22`` && ToPort==``22`` && IsEgress==``false``].SecurityGroupRuleId" ` + --output text + +if ([string]::IsNullOrWhiteSpace($rules)) { + Write-Host ">>> No :22 inbound rules found - already torn down." -ForegroundColor DarkGray + return +} + +$ruleIds = $rules -split '\s+' | Where-Object { $_ } +Write-Host ">>> Revoking $($ruleIds.Count) rule(s): $ruleIds" -ForegroundColor Cyan + +& $AWS ec2 revoke-security-group-ingress ` + --group-id $sgId ` + --security-group-rule-ids $ruleIds ` + --region $Region | Out-Null +if ($LASTEXITCODE -ne 0) { throw "revoke-security-group-ingress failed" } + +Write-Host "Done. Port 22 closed." -ForegroundColor Green From 87918474b02608901d4b0aae8934a387fe4f8097 Mon Sep 17 00:00:00 2001 From: jiao Date: Sun, 3 May 2026 13:55:56 +0800 Subject: [PATCH 2/2] chore: black format llm_gateway/api/schemas.py Drive-by fix to unblock CI on this PR. The previous commit on main (2c2c48d "fix(schemas): cap messages, tools, and per-message content length") landed pre-formatted lines that black 26.3.1 wants collapsed into single lines under the configured line-length=100. Pure formatting, no semantic change. Verified: poetry run black --check llm_gateway/ tests/ now reports "52 files would be left unchanged." Co-Authored-By: Claude Opus 4.7 (1M context) --- llm_gateway/api/schemas.py | 12 +++--------- 1 file changed, 3 insertions(+), 9 deletions(-) diff --git a/llm_gateway/api/schemas.py b/llm_gateway/api/schemas.py index db6ae34..43a581d 100644 --- a/llm_gateway/api/schemas.py +++ b/llm_gateway/api/schemas.py @@ -60,9 +60,7 @@ def _content_length_cap(cls, value: str | list[dict[str, Any]] | None) -> Any: responsibility to validate. """ if isinstance(value, str) and len(value) > _MAX_CONTENT_CHARS: - raise ValueError( - f"content exceeds {_MAX_CONTENT_CHARS} chars (got {len(value)})" - ) + raise ValueError(f"content exceeds {_MAX_CONTENT_CHARS} chars (got {len(value)})") return value @@ -72,16 +70,12 @@ class ChatCompletionRequest(BaseModel): model_config = ConfigDict(extra="allow") model: str = Field(..., min_length=1) - messages: list[ChatMessage] = Field( - ..., min_length=1, max_length=_MAX_MESSAGES_PER_REQUEST - ) + messages: list[ChatMessage] = Field(..., min_length=1, max_length=_MAX_MESSAGES_PER_REQUEST) max_tokens: int | None = Field(default=None, gt=0) temperature: float | None = Field(default=None, ge=0.0, le=2.0) top_p: float | None = Field(default=None, gt=0.0, le=1.0) stream: bool = False - tools: list[dict[str, Any]] | None = Field( - default=None, max_length=_MAX_TOOLS_PER_REQUEST - ) + tools: list[dict[str, Any]] | None = Field(default=None, max_length=_MAX_TOOLS_PER_REQUEST) tool_choice: str | dict[str, Any] | None = None stop: str | list[str] | None = None n: int | None = Field(default=None, ge=1)