- setup.sh: 시스템 자동 감지 (Apple Silicon/NVIDIA GPU/RAM) → 최적 방식 선택 - stop.sh: 실행 중인 서비스 자동 감지 후 종료 - setup-vllm.sh / stop-vllm.sh: NVIDIA GPU + Docker 기반 vLLM 서빙 - docker-compose.vllm.yml 자동 생성 (vLLM + Open WebUI) - README 전면 개편 Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
167 lines
4.9 KiB
Bash
Executable File
167 lines
4.9 KiB
Bash
Executable File
#!/bin/bash
|
|
set -e
|
|
|
|
#====================================================================
|
|
# Qwen3.5 + Open WebUI (vLLM + NVIDIA GPU) 원클릭 셋업
|
|
# 환경: Linux / NVIDIA GPU / Docker
|
|
#====================================================================
|
|
|
|
PROJECT_DIR="$(cd "$(dirname "$0")" && pwd)"
|
|
MODEL="Qwen/Qwen3.5-35B-A3B"
|
|
PORT=8090
|
|
WEBUI_PORT=3000
|
|
MAX_TOKENS=8192
|
|
MAX_MODEL_LEN=8192
|
|
|
|
echo "============================================"
|
|
echo " Qwen3.5 + Open WebUI (vLLM) 셋업"
|
|
echo "============================================"
|
|
echo ""
|
|
|
|
#--------------------------------------------------------------------
|
|
# 1. 사전 요구사항 확인
|
|
#--------------------------------------------------------------------
|
|
echo "[1/4] 사전 요구사항 확인..."
|
|
|
|
# Docker
|
|
if ! command -v docker &>/dev/null; then
|
|
echo "❌ Docker가 설치되어 있지 않습니다."
|
|
exit 1
|
|
fi
|
|
if ! docker info &>/dev/null; then
|
|
echo "❌ Docker 데몬이 실행 중이 아닙니다."
|
|
exit 1
|
|
fi
|
|
echo " ✓ Docker"
|
|
|
|
# NVIDIA GPU
|
|
if ! command -v nvidia-smi &>/dev/null; then
|
|
echo "❌ nvidia-smi를 찾을 수 없습니다. NVIDIA 드라이버가 설치되어 있는지 확인하세요."
|
|
exit 1
|
|
fi
|
|
|
|
GPU_NAME=$(nvidia-smi --query-gpu=name --format=csv,noheader | head -1)
|
|
GPU_VRAM=$(nvidia-smi --query-gpu=memory.total --format=csv,noheader,nounits | head -1)
|
|
echo " ✓ GPU: $GPU_NAME (${GPU_VRAM}MB)"
|
|
|
|
if [ "$GPU_VRAM" -lt 20000 ]; then
|
|
echo " ⚠️ VRAM이 20GB 미만입니다. 4bit 양자화 모델을 사용하세요."
|
|
fi
|
|
|
|
# nvidia-container-toolkit
|
|
if ! docker run --rm --gpus all nvidia/cuda:12.0.0-base-ubuntu22.04 nvidia-smi &>/dev/null; then
|
|
echo "❌ nvidia-container-toolkit이 설치되어 있지 않습니다."
|
|
echo " 설치: https://docs.nvidia.com/datacenter/cloud-native/container-toolkit/install-guide.html"
|
|
exit 1
|
|
fi
|
|
echo " ✓ nvidia-container-toolkit"
|
|
|
|
echo ""
|
|
|
|
#--------------------------------------------------------------------
|
|
# 2. Docker Compose 설정
|
|
#--------------------------------------------------------------------
|
|
echo "[2/4] Docker Compose 설정..."
|
|
|
|
if [ ! -f "$PROJECT_DIR/docker-compose.vllm.yml" ]; then
|
|
cat > "$PROJECT_DIR/docker-compose.vllm.yml" << EOF
|
|
services:
|
|
vllm:
|
|
image: vllm/vllm-openai:latest
|
|
container_name: vllm-server
|
|
ports:
|
|
- "${PORT}:8000"
|
|
volumes:
|
|
- vllm-models:/root/.cache/huggingface
|
|
environment:
|
|
- HUGGING_FACE_HUB_TOKEN=\${HUGGING_FACE_HUB_TOKEN:-}
|
|
deploy:
|
|
resources:
|
|
reservations:
|
|
devices:
|
|
- driver: nvidia
|
|
count: all
|
|
capabilities: [gpu]
|
|
command: >
|
|
--model ${MODEL}
|
|
--max-model-len ${MAX_MODEL_LEN}
|
|
--max-num-seqs 4
|
|
--gpu-memory-utilization 0.9
|
|
--trust-remote-code
|
|
restart: unless-stopped
|
|
|
|
open-webui:
|
|
image: ghcr.io/open-webui/open-webui:main
|
|
container_name: open-webui-vllm
|
|
ports:
|
|
- "${WEBUI_PORT}:8080"
|
|
environment:
|
|
- OPENAI_API_BASE_URL=http://vllm:8000/v1
|
|
- OPENAI_API_KEY=none
|
|
- OLLAMA_BASE_URL=
|
|
volumes:
|
|
- open-webui-vllm-data:/app/backend/data
|
|
depends_on:
|
|
- vllm
|
|
restart: unless-stopped
|
|
|
|
volumes:
|
|
vllm-models:
|
|
open-webui-vllm-data:
|
|
EOF
|
|
echo " ✓ docker-compose.vllm.yml 생성"
|
|
else
|
|
echo " ✓ docker-compose.vllm.yml 이미 존재"
|
|
fi
|
|
|
|
echo ""
|
|
|
|
#--------------------------------------------------------------------
|
|
# 3. 서비스 실행
|
|
#--------------------------------------------------------------------
|
|
echo "[3/4] vLLM + Open WebUI 실행..."
|
|
echo " (첫 실행 시 Docker 이미지 pull + 모델 다운로드로 시간이 걸립니다)"
|
|
|
|
cd "$PROJECT_DIR"
|
|
docker compose -f docker-compose.vllm.yml up -d 2>&1 | grep -v "^$"
|
|
|
|
echo ""
|
|
echo " 서버 준비 대기 중..."
|
|
for i in $(seq 1 300); do
|
|
if curl -s http://localhost:$PORT/v1/models > /dev/null 2>&1; then
|
|
echo ""
|
|
echo " ✓ vLLM 서버 준비 완료!"
|
|
break
|
|
fi
|
|
# 컨테이너가 죽었는지 확인
|
|
if ! docker ps -q --filter name=vllm-server | grep -q .; then
|
|
echo ""
|
|
echo " ❌ vLLM 서버 시작 실패. 로그를 확인하세요:"
|
|
echo " docker logs vllm-server"
|
|
exit 1
|
|
fi
|
|
printf "."
|
|
sleep 1
|
|
done
|
|
|
|
if ! curl -s http://localhost:$PORT/v1/models > /dev/null 2>&1; then
|
|
echo ""
|
|
echo " ⚠️ 서버가 아직 준비 중입니다. (모델 다운로드 중일 수 있음)"
|
|
echo " 로그 확인: docker logs -f vllm-server"
|
|
fi
|
|
|
|
echo ""
|
|
|
|
#--------------------------------------------------------------------
|
|
# 4. 완료
|
|
#--------------------------------------------------------------------
|
|
echo "[4/4] 셋업 완료!"
|
|
echo ""
|
|
echo "============================================"
|
|
echo " 브라우저에서 http://localhost:${WEBUI_PORT} 접속"
|
|
echo " (첫 접속 시 회원가입 → 첫 계정이 admin)"
|
|
echo ""
|
|
echo " 로그 확인: docker logs -f vllm-server"
|
|
echo " 종료: ./stop-vllm.sh"
|
|
echo "============================================"
|