aai/config.py
2026-01-26 16:19:15 +01:00

214 lines
7.2 KiB
Python

"""
🎯 CONFIG - Wspólna konfiguracja dla MiniGPT-60M
"""
import os
import sys
import random
import numpy as np
import torch
from pathlib import Path
from typing import List, Dict, Any, Optional
import logging
import json
# ==================== LOGGING ====================
logging.basicConfig(
level=logging.INFO,
format='%(asctime)s - %(levelname)s - %(message)s',
handlers=[
logging.FileHandler('training.log', encoding='utf-8'),
logging.StreamHandler()
]
)
logger = logging.getLogger(__name__)
# ==================== KONFIGURACJA SYSTEMU ====================
class SystemConfig:
"""Konfiguracja systemu i urządzeń"""
def __init__(self):
self.device = self._get_device()
self.set_seeds(42)
self._print_info()
def _get_device(self) -> str:
"""Automatycznie wybiera najlepsze urządzenie"""
if torch.cuda.is_available():
return "cuda"
elif torch.backends.mps.is_available():
return "mps"
else:
return "cpu"
def set_seeds(self, seed: int = 42):
"""Ustawia seed dla reprodukowalności"""
random.seed(seed)
np.random.seed(seed)
torch.manual_seed(seed)
if torch.cuda.is_available():
torch.cuda.manual_seed_all(seed)
torch.backends.cudnn.deterministic = True
torch.backends.cudnn.benchmark = False
def _print_info(self):
"""Wyświetla informacje o systemie"""
logger.info("=" * 60)
logger.info("🎯 SYSTEM MINIGPT-60M")
logger.info("=" * 60)
logger.info(f"Python: {sys.version.split()[0]}")
logger.info(f"PyTorch: {torch.__version__}")
logger.info(f"Device: {self.device.upper()}")
if self.device == "cuda":
gpu_count = torch.cuda.device_count()
logger.info(f"CUDA dostępne: {torch.cuda.is_available()}")
logger.info(f"Liczba GPU: {gpu_count}")
for i in range(gpu_count):
mem = torch.cuda.get_device_properties(i).total_memory / 1e9
logger.info(f"GPU {i}: {torch.cuda.get_device_name(i)} ({mem:.1f} GB)")
logger.info("=" * 60)
# ==================== KONFIGURACJA MODELU ====================
class ModelConfig:
"""Konfiguracja modelu 60M parametrów"""
def __init__(self):
# Słownik
self.vocab_chars = list("aąbcćdeęfghijklłmnńoóprsśtuwyzźżAĄBCĆDEĘFGHIJKLŁMNŃOÓPRSŚTUWYZŹŻ")
self.vocab_chars += list("0123456789")
self.vocab_chars += list(" .,?!:;()[]{}+-*/=<>_\"'`~@#$%^&|\\/\n\t")
self.vocab_chars += [" ", "\n\n", "\t\t", "->", "::", "=>"]
self.vocab = self.vocab_chars
self.vocab_size = len(self.vocab)
# Architektura dla ~60M parametrów
self.embed_dim = 768
self.n_layers = 12
self.n_heads = 12
self.max_len = 512
self.ff_dim = self.embed_dim * 4
self.dropout = 0.1
self.activation = "gelu"
self.norm_eps = 1e-5
# Trening
self.epochs = 3
self.batch_size = 16 if torch.cuda.is_available() else 4
self.grad_accum_steps = 4
self.learning_rate = 3e-4
self.weight_decay = 0.1
self.adam_beta1 = 0.9
self.adam_beta2 = 0.95
self.adam_eps = 1e-8
self.clip_grad = 1.0
self.warmup_steps = 2000
# Mixed Precision
self.use_amp = torch.cuda.is_available()
# Parallel
self.num_workers = 4 if torch.cuda.is_available() else 0
self.pin_memory = True
# Generowanie
self.generation_temperature = 0.8
self.top_k = 50
self.top_p = 0.95
self.repetition_penalty = 1.1
# Ścieżki
self.model_dir = "models"
self.data_dir = "data"
self.prepared_dir = "prepared_data"
self.log_dir = "logs"
self.tensorboard_dir = "runs"
self.cache_dir = ".cache"
self.checkpoints_dir = "checkpoints"
self.resume_file = "resume_state.json" # Plik stanu do wznowienia
# Tworzenie katalogów
self._create_dirs()
def _create_dirs(self):
"""Tworzy wymagane katalogi"""
dirs = [self.model_dir, self.data_dir, self.prepared_dir,
self.log_dir, self.tensorboard_dir, self.cache_dir,
"backups", "results", self.checkpoints_dir]
for d in dirs:
Path(d).mkdir(parents=True, exist_ok=True)
def print_config(self):
"""Wyświetla konfigurację"""
logger.info("=" * 60)
logger.info("⚙️ KONFIGURACJA MODELU")
logger.info("=" * 60)
logger.info(f"• Vocab size: {self.vocab_size}")
logger.info(f"• Embed dim: {self.embed_dim}")
logger.info(f"• Warstwy: {self.n_layers}")
logger.info(f"• Głowy: {self.n_heads}")
logger.info(f"• Kontekst: {self.max_len}")
logger.info(f"• Batch size: {self.batch_size}")
logger.info(f"• Learning rate: {self.learning_rate}")
logger.info(f"• Mixed precision: {self.use_amp}")
logger.info("=" * 60)
def save_resume_state(self, state: Dict[str, Any]):
"""Zapisuje stan do wznowienia"""
state_path = Path(self.checkpoints_dir) / self.resume_file
with open(state_path, 'w', encoding='utf-8') as f:
json.dump(state, f, indent=2, ensure_ascii=False)
logger.info(f"💾 Stan zapisany do {state_path}")
def load_resume_state(self) -> Optional[Dict[str, Any]]:
"""Wczytuje stan do wznowienia"""
state_path = Path(self.checkpoints_dir) / self.resume_file
if state_path.exists():
with open(state_path, 'r', encoding='utf-8') as f:
return json.load(f)
return None
def get_latest_checkpoint(self) -> Optional[Path]:
"""Znajduje najnowszy checkpoint"""
checkpoints = list(Path(self.checkpoints_dir).glob("checkpoint_*.pt"))
if checkpoints:
# Sortuj po czasie modyfikacji
checkpoints.sort(key=lambda x: x.stat().st_mtime, reverse=True)
return checkpoints[0]
return None
def get_latest_model(self) -> Optional[Path]:
"""Znajduje najnowszy model"""
models = list(Path(self.model_dir).glob("model_*.pt"))
if models:
# Szukaj model_final.pt, potem model_epoch_X.pt
final_model = Path(self.model_dir) / "model_final.pt"
if final_model.exists():
return final_model
# Sortuj po numerze epoki
def get_epoch_num(path: Path) -> int:
try:
# model_epoch_10.pt -> 10
name = path.stem
return int(name.split('_')[-1])
except:
return 0
models.sort(key=get_epoch_num, reverse=True)
return models[0]
return None
def get_device(prefer_gpu=True):
"""Inteligentnie wybiera urządzenie"""
if prefer_gpu and torch.cuda.is_available():
return 'cuda'
elif torch.backends.mps.is_available(): # Apple Silicon
return 'mps'
else:
return 'cpu'
# Inicjalizacja konfiguracji
sys_config = SystemConfig()
cfg = ModelConfig()