""" ------------------------------------------------------------------------- File: gpu_config.py Description: 로컬 GPU 감지 및 설정 유틸리티 Author: 소지안 프로 Created: 2026-02-02 Last Modified: 2026-02-02 ------------------------------------------------------------------------- """ import os import subprocess def check_gpu_status(): """GPU 상태 확인 및 출력""" print("=" * 50) print("GPU Status Check") print("=" * 50) # NVIDIA GPU 확인 (nvidia-smi) try: result = subprocess.run( ['nvidia-smi'], capture_output=True, text=True, timeout=10 ) if result.returncode == 0: print("\n[NVIDIA GPU Detected]") print(result.stdout) else: print("\n[NVIDIA GPU] Not found or driver not installed") except FileNotFoundError: print("\n[NVIDIA GPU] nvidia-smi not found") except Exception as e: print(f"\n[NVIDIA GPU] Error: {e}") # TensorFlow GPU 확인 try: import tensorflow as tf gpus = tf.config.list_physical_devices('GPU') print(f"\n[TensorFlow] GPU devices: {len(gpus)}") for gpu in gpus: print(f" - {gpu}") except ImportError: print("\n[TensorFlow] Not installed") except Exception as e: print(f"\n[TensorFlow] Error: {e}") # PyTorch GPU 확인 try: import torch print(f"\n[PyTorch] CUDA available: {torch.cuda.is_available()}") if torch.cuda.is_available(): print(f" - Device count: {torch.cuda.device_count()}") print(f" - Current device: {torch.cuda.current_device()}") print(f" - Device name: {torch.cuda.get_device_name(0)}") except ImportError: print("\n[PyTorch] Not installed") except Exception as e: print(f"\n[PyTorch] Error: {e}") print("\n" + "=" * 50) def get_gpu_info(): """GPU 정보 딕셔너리로 반환""" info = { 'nvidia_available': False, 'tensorflow_gpus': [], 'pytorch_cuda': False, 'gpu_name': None, 'gpu_memory': None, } # NVIDIA 정보 try: result = subprocess.run( ['nvidia-smi', '--query-gpu=name,memory.total', '--format=csv,noheader'], capture_output=True, text=True, timeout=10 ) if result.returncode == 0: info['nvidia_available'] = True parts = result.stdout.strip().split(',') if len(parts) >= 2: info['gpu_name'] = parts[0].strip() info['gpu_memory'] = parts[1].strip() except: pass # TensorFlow try: import tensorflow as tf gpus = tf.config.list_physical_devices('GPU') info['tensorflow_gpus'] = [str(gpu) for gpu in gpus] except: pass # PyTorch try: import torch info['pytorch_cuda'] = torch.cuda.is_available() except: pass return info def setup_tensorflow_gpu(memory_limit=None, allow_growth=True): """ TensorFlow GPU 설정 Args: memory_limit: GPU 메모리 제한 (MB). None이면 제한 없음 allow_growth: 메모리 동적 할당 허용 Returns: 설정된 GPU 리스트 """ import tensorflow as tf gpus = tf.config.list_physical_devices('GPU') if not gpus: print("[Warning] No GPU found for TensorFlow") return [] try: for gpu in gpus: if memory_limit: # 메모리 제한 설정 tf.config.set_logical_device_configuration( gpu, [tf.config.LogicalDeviceConfiguration(memory_limit=memory_limit)] ) print(f"[TensorFlow] GPU memory limited to {memory_limit}MB") elif allow_growth: # 동적 메모리 할당 tf.config.experimental.set_memory_growth(gpu, True) print("[TensorFlow] GPU memory growth enabled") print(f"[TensorFlow] Configured {len(gpus)} GPU(s)") return gpus except RuntimeError as e: print(f"[TensorFlow] GPU configuration error: {e}") return [] def setup_pytorch_gpu(device_id=0): """ PyTorch GPU 설정 Args: device_id: 사용할 GPU 디바이스 ID Returns: torch.device 객체 """ import torch if torch.cuda.is_available(): device = torch.device(f'cuda:{device_id}') torch.cuda.set_device(device_id) print(f"[PyTorch] Using GPU: {torch.cuda.get_device_name(device_id)}") else: device = torch.device('cpu') print("[PyTorch] CUDA not available, using CPU") return device def limit_gpu_memory(fraction=0.5): """ GPU 메모리 사용량 제한 (TensorFlow) Args: fraction: 전체 메모리 중 사용할 비율 (0.0 ~ 1.0) """ import tensorflow as tf gpus = tf.config.list_physical_devices('GPU') if gpus: try: for gpu in gpus: tf.config.experimental.set_memory_growth(gpu, True) # 환경변수로 메모리 제한 os.environ['TF_FORCE_GPU_ALLOW_GROWTH'] = 'true' print(f"[TensorFlow] GPU memory fraction set to {fraction * 100}%") except Exception as e: print(f"[Error] {e}") def set_visible_gpus(gpu_ids): """ 사용할 GPU 지정 Args: gpu_ids: GPU ID 리스트 (예: [0, 1]) """ os.environ['CUDA_VISIBLE_DEVICES'] = ','.join(map(str, gpu_ids)) print(f"[GPU] Visible devices set to: {gpu_ids}")