goheung/app/AI_modules/gpu_utils/gpu_config.py
2026-02-02 19:07:53 +09:00

206 lines
5.6 KiB
Python

"""
-------------------------------------------------------------------------
File: gpu_config.py
Description: 로컬 GPU 감지 및 설정 유틸리티
Author: 소지안 프로
Created: 2026-02-02
Last Modified: 2026-02-02
-------------------------------------------------------------------------
"""
import os
import subprocess
def check_gpu_status():
"""GPU 상태 확인 및 출력"""
print("=" * 50)
print("GPU Status Check")
print("=" * 50)
# NVIDIA GPU 확인 (nvidia-smi)
try:
result = subprocess.run(
['nvidia-smi'],
capture_output=True,
text=True,
timeout=10
)
if result.returncode == 0:
print("\n[NVIDIA GPU Detected]")
print(result.stdout)
else:
print("\n[NVIDIA GPU] Not found or driver not installed")
except FileNotFoundError:
print("\n[NVIDIA GPU] nvidia-smi not found")
except Exception as e:
print(f"\n[NVIDIA GPU] Error: {e}")
# TensorFlow GPU 확인
try:
import tensorflow as tf
gpus = tf.config.list_physical_devices('GPU')
print(f"\n[TensorFlow] GPU devices: {len(gpus)}")
for gpu in gpus:
print(f" - {gpu}")
except ImportError:
print("\n[TensorFlow] Not installed")
except Exception as e:
print(f"\n[TensorFlow] Error: {e}")
# PyTorch GPU 확인
try:
import torch
print(f"\n[PyTorch] CUDA available: {torch.cuda.is_available()}")
if torch.cuda.is_available():
print(f" - Device count: {torch.cuda.device_count()}")
print(f" - Current device: {torch.cuda.current_device()}")
print(f" - Device name: {torch.cuda.get_device_name(0)}")
except ImportError:
print("\n[PyTorch] Not installed")
except Exception as e:
print(f"\n[PyTorch] Error: {e}")
print("\n" + "=" * 50)
def get_gpu_info():
"""GPU 정보 딕셔너리로 반환"""
info = {
'nvidia_available': False,
'tensorflow_gpus': [],
'pytorch_cuda': False,
'gpu_name': None,
'gpu_memory': None,
}
# NVIDIA 정보
try:
result = subprocess.run(
['nvidia-smi', '--query-gpu=name,memory.total', '--format=csv,noheader'],
capture_output=True,
text=True,
timeout=10
)
if result.returncode == 0:
info['nvidia_available'] = True
parts = result.stdout.strip().split(',')
if len(parts) >= 2:
info['gpu_name'] = parts[0].strip()
info['gpu_memory'] = parts[1].strip()
except:
pass
# TensorFlow
try:
import tensorflow as tf
gpus = tf.config.list_physical_devices('GPU')
info['tensorflow_gpus'] = [str(gpu) for gpu in gpus]
except:
pass
# PyTorch
try:
import torch
info['pytorch_cuda'] = torch.cuda.is_available()
except:
pass
return info
def setup_tensorflow_gpu(memory_limit=None, allow_growth=True):
"""
TensorFlow GPU 설정
Args:
memory_limit: GPU 메모리 제한 (MB). None이면 제한 없음
allow_growth: 메모리 동적 할당 허용
Returns:
설정된 GPU 리스트
"""
import tensorflow as tf
gpus = tf.config.list_physical_devices('GPU')
if not gpus:
print("[Warning] No GPU found for TensorFlow")
return []
try:
for gpu in gpus:
if memory_limit:
# 메모리 제한 설정
tf.config.set_logical_device_configuration(
gpu,
[tf.config.LogicalDeviceConfiguration(memory_limit=memory_limit)]
)
print(f"[TensorFlow] GPU memory limited to {memory_limit}MB")
elif allow_growth:
# 동적 메모리 할당
tf.config.experimental.set_memory_growth(gpu, True)
print("[TensorFlow] GPU memory growth enabled")
print(f"[TensorFlow] Configured {len(gpus)} GPU(s)")
return gpus
except RuntimeError as e:
print(f"[TensorFlow] GPU configuration error: {e}")
return []
def setup_pytorch_gpu(device_id=0):
"""
PyTorch GPU 설정
Args:
device_id: 사용할 GPU 디바이스 ID
Returns:
torch.device 객체
"""
import torch
if torch.cuda.is_available():
device = torch.device(f'cuda:{device_id}')
torch.cuda.set_device(device_id)
print(f"[PyTorch] Using GPU: {torch.cuda.get_device_name(device_id)}")
else:
device = torch.device('cpu')
print("[PyTorch] CUDA not available, using CPU")
return device
def limit_gpu_memory(fraction=0.5):
"""
GPU 메모리 사용량 제한 (TensorFlow)
Args:
fraction: 전체 메모리 중 사용할 비율 (0.0 ~ 1.0)
"""
import tensorflow as tf
gpus = tf.config.list_physical_devices('GPU')
if gpus:
try:
for gpu in gpus:
tf.config.experimental.set_memory_growth(gpu, True)
# 환경변수로 메모리 제한
os.environ['TF_FORCE_GPU_ALLOW_GROWTH'] = 'true'
print(f"[TensorFlow] GPU memory fraction set to {fraction * 100}%")
except Exception as e:
print(f"[Error] {e}")
def set_visible_gpus(gpu_ids):
"""
사용할 GPU 지정
Args:
gpu_ids: GPU ID 리스트 (예: [0, 1])
"""
os.environ['CUDA_VISIBLE_DEVICES'] = ','.join(map(str, gpu_ids))
print(f"[GPU] Visible devices set to: {gpu_ids}")