#!/usr/bin/env bash # gpu-up — provision a vast.ai GPU instance, bind its Ollama to OpenCode on Host B # via reverse SSH tunnel. Mirrors DL-0012 (Host A) topology — no cookies, no proxies. # # Usage: gpu-up [--model ] [--gpu ] [--max-price ] [--disk ] [--dry-run] # Defaults: --model qwen3.6:35b-a3b --gpu RTX_4090 --max-price 0.50 --disk 80 # # Requires: # - vastai authenticated: vastai set api-key # - /root/.ssh/vast_provisioning_ed25519 (created by bootstrap; see /opt/gpu-tools/README.md) # - vasttun user on Host B with permitlisten 127.0.0.1:11440 set -euo pipefail # === config === MODEL="${MODEL:-qwen3.6:35b-a3b}" GPU_FILTER="${GPU_FILTER:-RTX_4090}" MAX_PRICE="${MAX_PRICE:-0.50}" DISK_GB="${DISK_GB:-80}" DRY_RUN=0 HOST_B_PUBLIC_IP="${HOST_B_PUBLIC_IP:-YOUR_HOST_B_PUBLIC_IP}" HOST_B_SSH_PORT="${HOST_B_SSH_PORT:-2222}" TUNNEL_PORT="${TUNNEL_PORT:-11440}" PROVISIONING_KEY="${PROVISIONING_KEY:-/root/.ssh/vast_provisioning_ed25519}" STATE_FILE="/var/lib/specker/vast-current.json" OPENCODE_CONFIG="/home/opencode/.config/opencode/opencode.json" OPENCODE_BACKUP="${OPENCODE_CONFIG}.host-a-backup" EXA_KEY="${EXA_KEY:-YOUR_EXA_API_KEY}" # Vast-friendly image (has ssh-client; vast /.launch needs it). # pytorch/pytorch is bigger but known-good; ollama is installed in onstart. IMAGE="${IMAGE:-pytorch/pytorch:2.5.1-cuda12.4-cudnn9-devel}" # === args === while [[ $# -gt 0 ]]; do case $1 in --model) MODEL=$2; shift 2 ;; --gpu) GPU_FILTER=$2; shift 2 ;; --max-price) MAX_PRICE=$2; shift 2 ;; --disk) DISK_GB=$2; shift 2 ;; --image) IMAGE=$2; shift 2 ;; --dry-run) DRY_RUN=1; shift ;; -h|--help) sed -n '2,15p' "$0"; exit 0 ;; *) echo "[!] unknown arg: $1" >&2; exit 1 ;; esac done PATH="/root/.local/bin:$PATH" command -v vastai >/dev/null || { echo "[!] vastai CLI not found in PATH" >&2; exit 1; } command -v jq >/dev/null || { echo "[!] jq not found" >&2; exit 1; } [[ -f $PROVISIONING_KEY ]] || { echo "[!] $PROVISIONING_KEY missing" >&2; exit 1; } [[ -f ${PROVISIONING_KEY}.pub ]] || { echo "[!] ${PROVISIONING_KEY}.pub missing" >&2; exit 1; } # auth probe if ! vastai show user --raw 2>/dev/null | jq -e .id >/dev/null 2>&1; then echo "[!] vastai not authenticated. Run: vastai set api-key " >&2 exit 1 fi # existing instance? if [[ -f $STATE_FILE ]]; then echo "[!] instance already active (state file exists). Run gpu-down first." >&2 echo " state: $(cat "$STATE_FILE")" exit 1 fi # === search offers === QUERY="gpu_name=$GPU_FILTER disk_space>=$DISK_GB rentable=true verified=true dph_total<=$MAX_PRICE" echo "[*] searching: $QUERY" OFFERS=$(vastai search offers "$QUERY" -o "dph_total" --raw 2>/dev/null || echo "[]") OFFER_COUNT=$(echo "$OFFERS" | jq 'length') [[ "$OFFER_COUNT" -ge 1 ]] || { echo "[!] no offers" >&2; exit 1; } OFFER_ID=$(echo "$OFFERS" | jq -r '.[0].id') PRICE=$( echo "$OFFERS" | jq -r '.[0].dph_total') GPU_NAME=$(echo "$OFFERS" | jq -r '.[0].gpu_name') DISK_AV=$( echo "$OFFERS" | jq -r '.[0].disk_space') echo "[+] picked offer $OFFER_ID ($GPU_NAME, ${DISK_AV}GB disk, \$$PRICE/h)" echo "[+] image: $IMAGE" if [[ $DRY_RUN -eq 1 ]]; then echo "[dry-run] stopping here"; exit 0 fi # === build onstart-cmd === PRIVKEY=$(cat "$PROVISIONING_KEY") ONSTART_TMP=$(mktemp) cat > "$ONSTART_TMP" < /var/log/onstart.log 2>&1 echo "[onstart] \$(date -Is) start" export DEBIAN_FRONTEND=noninteractive apt-get update -qq apt-get install -y -qq autossh curl ca-certificates # Install ollama via official installer (binary install; we run serve ourselves since no systemd) curl -fsSL https://ollama.com/install.sh | sh || true echo "[onstart] \$(date -Is) ollama binary installed" # Provisioning key mkdir -p /root/.ssh && chmod 700 /root/.ssh cat > /root/.ssh/id_vasttun <<'KEYEOF' ${PRIVKEY} KEYEOF chmod 600 /root/.ssh/id_vasttun # Start ollama serve pkill -f "ollama serve" || true nohup ollama serve > /var/log/ollama.log 2>&1 & sleep 8 # Pull model (blocking) ollama pull ${MODEL} echo "[onstart] \$(date -Is) model ${MODEL} pulled" # Reverse tunnel → Host B :${TUNNEL_PORT} ← Ollama :11434 on this box nohup autossh -M 0 -N \\ -o StrictHostKeyChecking=no \\ -o ServerAliveInterval=60 -o ServerAliveCountMax=3 \\ -o ExitOnForwardFailure=yes \\ -R 127.0.0.1:${TUNNEL_PORT}:127.0.0.1:11434 \\ -p ${HOST_B_SSH_PORT} -i /root/.ssh/id_vasttun \\ vasttun@${HOST_B_PUBLIC_IP} > /var/log/autossh.log 2>&1 & echo "[onstart] \$(date -Is) tunnel up" EOSCRIPT # === create instance === echo "[*] creating instance from offer $OFFER_ID ..." CREATE_OUT=$(vastai create instance "$OFFER_ID" \ --image "$IMAGE" \ --disk "$DISK_GB" \ --onstart "$ONSTART_TMP" \ --raw 2>&1) || { echo "[!] create failed:"; echo "$CREATE_OUT"; exit 1; } INSTANCE_ID=$(echo "$CREATE_OUT" | jq -r '.new_contract // empty') rm -f "$ONSTART_TMP" [[ -n "$INSTANCE_ID" ]] || { echo "[!] no instance id in response: $CREATE_OUT"; exit 1; } echo "[+] instance $INSTANCE_ID created." echo "[*] cold-start budget: image pull (~5GB pytorch) ~1min + ollama install ~30s + model pull (~24GB) ~2min = ~3-5 min total" # === wait for tunnel listen on Host B side (this very host) === echo "[*] waiting up to 20 min for reverse tunnel on 127.0.0.1:${TUNNEL_PORT} ..." TUNNEL_UP=0 for i in $(seq 1 120); do if ss -tlnp 2>/dev/null | grep -q "127.0.0.1:${TUNNEL_PORT} "; then TUNNEL_UP=1 echo "[+] tunnel up after ~${i}0s" break fi sleep 10 done if [[ $TUNNEL_UP -ne 1 ]]; then echo "[!] tunnel did not come up in 20 min. Check: vastai logs $INSTANCE_ID --tail 100" echo " (instance still running — destroy with: gpu-down)" exit 1 fi # === verify model present in remote Ollama === echo "[*] verifying model $MODEL is loaded via tunnel ..." MODEL_OK=0 for i in $(seq 1 20); do if curl -sS --max-time 10 "http://127.0.0.1:${TUNNEL_PORT}/api/tags" 2>/dev/null \ | jq -e --arg m "$MODEL" '.models[] | select(.name == $m)' >/dev/null 2>&1; then MODEL_OK=1 break fi sleep 5 done [[ $MODEL_OK -eq 1 ]] || echo "[!] model not in /api/tags yet (pull may still be running). Tunnel is up, will keep going." # === switch opencode.json === [[ -f $OPENCODE_BACKUP ]] || cp "$OPENCODE_CONFIG" "$OPENCODE_BACKUP" cat > "$OPENCODE_CONFIG" < "$STATE_FILE" <