206 lines
5.6 KiB
Python
206 lines
5.6 KiB
Python
"""
|
|
-------------------------------------------------------------------------
|
|
File: gpu_config.py
|
|
Description: 로컬 GPU 감지 및 설정 유틸리티
|
|
Author: 소지안 프로
|
|
Created: 2026-02-02
|
|
Last Modified: 2026-02-02
|
|
-------------------------------------------------------------------------
|
|
"""
|
|
import os
|
|
import subprocess
|
|
|
|
|
|
def check_gpu_status():
|
|
"""GPU 상태 확인 및 출력"""
|
|
print("=" * 50)
|
|
print("GPU Status Check")
|
|
print("=" * 50)
|
|
|
|
# NVIDIA GPU 확인 (nvidia-smi)
|
|
try:
|
|
result = subprocess.run(
|
|
['nvidia-smi'],
|
|
capture_output=True,
|
|
text=True,
|
|
timeout=10
|
|
)
|
|
if result.returncode == 0:
|
|
print("\n[NVIDIA GPU Detected]")
|
|
print(result.stdout)
|
|
else:
|
|
print("\n[NVIDIA GPU] Not found or driver not installed")
|
|
except FileNotFoundError:
|
|
print("\n[NVIDIA GPU] nvidia-smi not found")
|
|
except Exception as e:
|
|
print(f"\n[NVIDIA GPU] Error: {e}")
|
|
|
|
# TensorFlow GPU 확인
|
|
try:
|
|
import tensorflow as tf
|
|
gpus = tf.config.list_physical_devices('GPU')
|
|
print(f"\n[TensorFlow] GPU devices: {len(gpus)}")
|
|
for gpu in gpus:
|
|
print(f" - {gpu}")
|
|
except ImportError:
|
|
print("\n[TensorFlow] Not installed")
|
|
except Exception as e:
|
|
print(f"\n[TensorFlow] Error: {e}")
|
|
|
|
# PyTorch GPU 확인
|
|
try:
|
|
import torch
|
|
print(f"\n[PyTorch] CUDA available: {torch.cuda.is_available()}")
|
|
if torch.cuda.is_available():
|
|
print(f" - Device count: {torch.cuda.device_count()}")
|
|
print(f" - Current device: {torch.cuda.current_device()}")
|
|
print(f" - Device name: {torch.cuda.get_device_name(0)}")
|
|
except ImportError:
|
|
print("\n[PyTorch] Not installed")
|
|
except Exception as e:
|
|
print(f"\n[PyTorch] Error: {e}")
|
|
|
|
print("\n" + "=" * 50)
|
|
|
|
|
|
def get_gpu_info():
|
|
"""GPU 정보 딕셔너리로 반환"""
|
|
info = {
|
|
'nvidia_available': False,
|
|
'tensorflow_gpus': [],
|
|
'pytorch_cuda': False,
|
|
'gpu_name': None,
|
|
'gpu_memory': None,
|
|
}
|
|
|
|
# NVIDIA 정보
|
|
try:
|
|
result = subprocess.run(
|
|
['nvidia-smi', '--query-gpu=name,memory.total', '--format=csv,noheader'],
|
|
capture_output=True,
|
|
text=True,
|
|
timeout=10
|
|
)
|
|
if result.returncode == 0:
|
|
info['nvidia_available'] = True
|
|
parts = result.stdout.strip().split(',')
|
|
if len(parts) >= 2:
|
|
info['gpu_name'] = parts[0].strip()
|
|
info['gpu_memory'] = parts[1].strip()
|
|
except:
|
|
pass
|
|
|
|
# TensorFlow
|
|
try:
|
|
import tensorflow as tf
|
|
gpus = tf.config.list_physical_devices('GPU')
|
|
info['tensorflow_gpus'] = [str(gpu) for gpu in gpus]
|
|
except:
|
|
pass
|
|
|
|
# PyTorch
|
|
try:
|
|
import torch
|
|
info['pytorch_cuda'] = torch.cuda.is_available()
|
|
except:
|
|
pass
|
|
|
|
return info
|
|
|
|
|
|
def setup_tensorflow_gpu(memory_limit=None, allow_growth=True):
|
|
"""
|
|
TensorFlow GPU 설정
|
|
|
|
Args:
|
|
memory_limit: GPU 메모리 제한 (MB). None이면 제한 없음
|
|
allow_growth: 메모리 동적 할당 허용
|
|
|
|
Returns:
|
|
설정된 GPU 리스트
|
|
"""
|
|
import tensorflow as tf
|
|
|
|
gpus = tf.config.list_physical_devices('GPU')
|
|
|
|
if not gpus:
|
|
print("[Warning] No GPU found for TensorFlow")
|
|
return []
|
|
|
|
try:
|
|
for gpu in gpus:
|
|
if memory_limit:
|
|
# 메모리 제한 설정
|
|
tf.config.set_logical_device_configuration(
|
|
gpu,
|
|
[tf.config.LogicalDeviceConfiguration(memory_limit=memory_limit)]
|
|
)
|
|
print(f"[TensorFlow] GPU memory limited to {memory_limit}MB")
|
|
elif allow_growth:
|
|
# 동적 메모리 할당
|
|
tf.config.experimental.set_memory_growth(gpu, True)
|
|
print("[TensorFlow] GPU memory growth enabled")
|
|
|
|
print(f"[TensorFlow] Configured {len(gpus)} GPU(s)")
|
|
return gpus
|
|
|
|
except RuntimeError as e:
|
|
print(f"[TensorFlow] GPU configuration error: {e}")
|
|
return []
|
|
|
|
|
|
def setup_pytorch_gpu(device_id=0):
|
|
"""
|
|
PyTorch GPU 설정
|
|
|
|
Args:
|
|
device_id: 사용할 GPU 디바이스 ID
|
|
|
|
Returns:
|
|
torch.device 객체
|
|
"""
|
|
import torch
|
|
|
|
if torch.cuda.is_available():
|
|
device = torch.device(f'cuda:{device_id}')
|
|
torch.cuda.set_device(device_id)
|
|
print(f"[PyTorch] Using GPU: {torch.cuda.get_device_name(device_id)}")
|
|
else:
|
|
device = torch.device('cpu')
|
|
print("[PyTorch] CUDA not available, using CPU")
|
|
|
|
return device
|
|
|
|
|
|
def limit_gpu_memory(fraction=0.5):
|
|
"""
|
|
GPU 메모리 사용량 제한 (TensorFlow)
|
|
|
|
Args:
|
|
fraction: 전체 메모리 중 사용할 비율 (0.0 ~ 1.0)
|
|
"""
|
|
import tensorflow as tf
|
|
|
|
gpus = tf.config.list_physical_devices('GPU')
|
|
if gpus:
|
|
try:
|
|
for gpu in gpus:
|
|
tf.config.experimental.set_memory_growth(gpu, True)
|
|
|
|
# 환경변수로 메모리 제한
|
|
os.environ['TF_FORCE_GPU_ALLOW_GROWTH'] = 'true'
|
|
print(f"[TensorFlow] GPU memory fraction set to {fraction * 100}%")
|
|
except Exception as e:
|
|
print(f"[Error] {e}")
|
|
|
|
|
|
def set_visible_gpus(gpu_ids):
|
|
"""
|
|
사용할 GPU 지정
|
|
|
|
Args:
|
|
gpu_ids: GPU ID 리스트 (예: [0, 1])
|
|
"""
|
|
os.environ['CUDA_VISIBLE_DEVICES'] = ','.join(map(str, gpu_ids))
|
|
print(f"[GPU] Visible devices set to: {gpu_ids}")
|