diff --git a/Pathl.AI b/Pathl.AI deleted file mode 160000 index 577b442..0000000 --- a/Pathl.AI +++ /dev/null @@ -1 +0,0 @@ -Subproject commit 577b442bafdd8a1f21cfb42816627fb7e0daedf2 diff --git a/ai.py b/ai.py new file mode 100644 index 0000000..4209b07 --- /dev/null +++ b/ai.py @@ -0,0 +1,1247 @@ +""" +🎯 AI.PY - MiniGPT-60M z Clay Checkpoint System +""" + +import os +import sys +import time +import math +import json +import torch +import torch.nn as nn +import torch.optim as optim +from torch.utils.data import Dataset, DataLoader +from datetime import datetime, timedelta +from pathlib import Path +from typing import List, Dict, Any, Optional, Tuple +import random +import numpy as np + +from config import logger, cfg, sys_config + +# ==================== TOKENIZER ==================== +class Tokenizer: + def __init__(self): + self.vocab = cfg.vocab + self.vocab_size = cfg.vocab_size + self.char_to_idx = {ch: i for i, ch in enumerate(self.vocab)} + self.idx_to_char = {i: ch for i, ch in enumerate(self.vocab)} + + def encode(self, text: str) -> List[int]: + """Enkoduje tekst na listę indeksów""" + return [self.char_to_idx.get(ch, 0) for ch in text if ch in self.char_to_idx] + + def decode(self, indices: List[int]) -> str: + """Dekoduje listę indeksów na tekst""" + return ''.join([self.idx_to_char.get(idx, '') for idx in indices]) + + def encode_batch(self, texts: List[str]) -> torch.Tensor: + """Enkoduje batch tekstów""" + encoded = [self.encode(text) for text in texts] + max_len = max(len(e) for e in encoded) + padded = [e + [0] * (max_len - len(e)) for e in encoded] + return torch.tensor(padded, dtype=torch.long) + + def decode_batch(self, tensors: torch.Tensor) -> List[str]: + """Dekoduje batch tensorów""" + texts = [] + for tensor in tensors: + indices = tensor.tolist() + # Usuń padding (0) + indices = [idx for idx in indices if idx != 0] + texts.append(self.decode(indices)) + return texts + +# Globalny tokenizer +tokenizer = Tokenizer() + +# ==================== DATASET ==================== +class TextDataset(Dataset): + def __init__(self, filepath: str, max_len: int = 512): + self.filepath = filepath + self.max_len = max_len + + # Wczytaj dane + with open(filepath, 'r', encoding='utf-8') as f: + self.lines = [line.strip() for line in f if line.strip()] + + logger.info(f"📊 Dataset: {len(self.lines):,} linii") + + def __len__(self): + return len(self.lines) + + def __getitem__(self, idx): + text = self.lines[idx] + + # Przycinaj lub paduj do max_len + if len(text) > self.max_len: + start = random.randint(0, len(text) - self.max_len) + text = text[start:start + self.max_len] + + # Enkoduj + encoded = tokenizer.encode(text) + + # Paduj jeśli za krótkie + if len(encoded) < self.max_len: + encoded = encoded + [0] * (self.max_len - len(encoded)) + + x = torch.tensor(encoded[:-1], dtype=torch.long) + y = torch.tensor(encoded[1:], dtype=torch.long) + + return x, y + +# ==================== CLAY CHECKPOINT SYSTEM ==================== +class ClayCheckpoint: + """Zaawansowany system checkpointów z Clay UI""" + + def __init__(self, model, optimizer, scheduler=None): + self.model = model + self.optimizer = optimizer + self.scheduler = scheduler + self.checkpoint_dir = Path(cfg.checkpoints_dir) + self.checkpoint_dir.mkdir(exist_ok=True) + + # Training stats + self.stats = { + 'start_time': time.time(), + 'epoch_times': [], + 'step_times': [], + 'loss_history': [], + 'learning_rates': [], + 'best_loss': float('inf') + } + + # Progress tracking + self.progress = { + 'current_step': 0, + 'total_steps': 0, + 'current_epoch': 0, + 'total_epochs': cfg.epochs, + 'estimated_completion': None + } + + # Progress bar style + self.progress_style = { + 'filled': '█', + 'empty': '░', + 'arrow': '▶', + 'spinner': ['⠋', '⠙', '⠹', '⠸', '⠼', '⠴', '⠦', '⠧', '⠇', '⠏'] + } + + def save(self, step, epoch, loss, is_best=False, force=False): + """Zapisuje checkpoint""" + if not force and step % cfg.checkpoint_freq != 0: + return + + timestamp = datetime.now().strftime("%Y%m%d_%H%M%S") + filename = f"clay_checkpoint_ep{epoch}_step{step}_{timestamp}.pt" + filepath = self.checkpoint_dir / filename + + # Przygotuj stan + checkpoint = { + 'step': step, + 'epoch': epoch, + 'model_state_dict': self.model.state_dict(), + 'optimizer_state_dict': self.optimizer.state_dict(), + 'loss': loss, + 'stats': self.stats, + 'progress': self.progress, + 'timestamp': timestamp, + 'config': { + 'vocab_size': cfg.vocab_size, + 'embed_dim': cfg.embed_dim, + 'n_layers': cfg.n_layers, + 'n_heads': cfg.n_heads + } + } + + if self.scheduler: + checkpoint['scheduler_state_dict'] = self.scheduler.state_dict() + + # Zapisz + torch.save(checkpoint, filepath) + + # Zapisz jako JSON dla czytelności + json_path = filepath.with_suffix('.json') + json_data = { + 'checkpoint_info': { + 'filename': filename, + 'step': step, + 'epoch': epoch, + 'loss': float(loss), + 'timestamp': timestamp, + 'file_size': os.path.getsize(filepath) + }, + 'training_stats': { + 'total_time': time.time() - self.stats['start_time'], + 'avg_loss': float(sum(self.stats['loss_history'][-100:]) / min(100, len(self.stats['loss_history']))), + 'current_lr': float(self.optimizer.param_groups[0]['lr']), + 'steps_done': self.progress['current_step'] + } + } + + with open(json_path, 'w', encoding='utf-8') as f: + json.dump(json_data, f, indent=2, ensure_ascii=False) + + # Zachowaj tylko N ostatnich checkpointów + self.cleanup_old_checkpoints() + + logger.info(f"💾 Clay Checkpoint: {filename} (loss: {loss:.4f})") + + if is_best: + best_path = self.checkpoint_dir / "clay_best.pt" + torch.save(checkpoint, best_path) + logger.info(f"🏆 Nowy najlepszy model zapisany!") + + def load_latest(self): + """Wczytuje najnowszy checkpoint""" + checkpoints = list(self.checkpoint_dir.glob("clay_checkpoint_*.pt")) + if not checkpoints: + return None, None, None, None + + # Znajdź najnowszy po czasie + latest = max(checkpoints, key=os.path.getmtime) + + logger.info(f"🔄 Ładowanie Clay Checkpoint: {latest.name}") + checkpoint = torch.load(latest, map_location='cpu') + + # Przywróć stan + self.model.load_state_dict(checkpoint['model_state_dict']) + self.optimizer.load_state_dict(checkpoint['optimizer_state_dict']) + + if 'scheduler_state_dict' in checkpoint and self.scheduler: + self.scheduler.load_state_dict(checkpoint['scheduler_state_dict']) + + # Przywróć statystyki + if 'stats' in checkpoint: + self.stats.update(checkpoint['stats']) + + if 'progress' in checkpoint: + self.progress.update(checkpoint['progress']) + + logger.info(f"✅ Wczytano checkpoint: epoka {checkpoint['epoch']}, krok {checkpoint['step']}") + + return checkpoint['epoch'], checkpoint['step'], checkpoint['loss'], checkpoint.get('timestamp') + + def cleanup_old_checkpoints(self): + """Czyści stare checkpointy""" + checkpoints = list(self.checkpoint_dir.glob("clay_checkpoint_*.pt")) + checkpoints.sort(key=os.path.getmtime) + + keep = cfg.get('keep_checkpoints', 5) + while len(checkpoints) > keep: + old = checkpoints.pop(0) + # Zostaw najlepszy + if "best" not in old.name: + old.unlink() + # Usuń też JSON + json_file = old.with_suffix('.json') + if json_file.exists(): + json_file.unlink() + + def estimate_time_remaining(self): + """Szacuje pozostały czas treningu""" + if not self.stats['step_times']: + return "Obliczanie..." + + avg_time_per_step = sum(self.stats['step_times'][-100:]) / min(100, len(self.stats['step_times'])) + steps_done = self.progress['current_step'] + total_steps = self.progress['total_steps'] + + if steps_done == 0 or total_steps == 0: + return "Obliczanie..." + + steps_left = total_steps - steps_done + seconds_left = steps_left * avg_time_per_step + + # Jeśli mamy dane z epok, użyj ich + if self.stats['epoch_times']: + epochs_done = self.progress['current_epoch'] + epochs_left = self.progress['total_epochs'] - epochs_done + avg_epoch_time = sum(self.stats['epoch_times']) / len(self.stats['epoch_times']) + epoch_based = epochs_left * avg_epoch_time + seconds_left = min(seconds_left, epoch_based) + + return self._format_time(seconds_left) + + def _format_time(self, seconds): + """Formatuje czas""" + if seconds < 60: + return f"{seconds:.0f}s" + elif seconds < 3600: + return f"{seconds/60:.1f}m" + elif seconds < 86400: + return f"{seconds/3600:.1f}h" + else: + return f"{seconds/86400:.1f}d" + + def get_progress_bar(self, width=40): + """Generuje pasek postępu""" + if self.progress['total_steps'] == 0: + return "[░░░░░░░░░░░░░░░░░░░░]" + + progress = min(1.0, self.progress['current_step'] / self.progress['total_steps']) + filled = int(width * progress) + empty = width - filled + + bar = self.progress_style['filled'] * filled + if filled < width: + bar += self.progress_style['arrow'] + bar += self.progress_style['empty'] * empty + + return f"[{bar}]" + + def print_progress(self, step_loss, current_lr): + """Wyświetla aktualny postęp""" + progress_bar = self.get_progress_bar(40) + + # Oblicz procent + percent = (self.progress['current_step'] / self.progress['total_steps']) * 100 + + # Szacowany czas + eta = self.estimate_time_remaining() + + # Spinner (animowany) + spinner_idx = int(time.time() * 4) % len(self.progress_style['spinner']) + spinner = self.progress_style['spinner'][spinner_idx] + + # Formatuj wyjście + output = (f"\r{spinner} {progress_bar} {percent:5.1f}% | " + f"Step: {self.progress['current_step']:,}/{self.progress['total_steps']:,} | " + f"Loss: {step_loss:.4f} | LR: {current_lr:.6f} | " + f"ETA: {eta}") + + print(output, end='', flush=True) + + # Na końcu epoki, przejdź do nowej linii + if self.progress['current_step'] == self.progress['total_steps']: + print() + + def start_epoch(self, epoch, total_batches): + """Rozpoczyna nową epokę""" + self.progress['current_epoch'] = epoch + self.progress['total_steps'] = total_batches * self.progress['total_epochs'] + self.epoch_start_time = time.time() + + logger.info(f"\n🚀 ROZPOCZYNAM EPOKĘ {epoch+1}/{self.progress['total_epochs']}") + logger.info(f" • Kroki w epoce: {total_batches:,}") + logger.info(f" • Całkowite kroki: {self.progress['total_steps']:,}") + + # Szacowanie czasu dla tej epoki + if self.stats['epoch_times']: + avg_epoch_time = sum(self.stats['epoch_times']) / len(self.stats['epoch_times']) + logger.info(f" • Szacowany czas epoki: {self._format_time(avg_epoch_time)}") + logger.info(f" • Szacowany czas do końca: {self.estimate_time_remaining()}") + + def end_epoch(self, epoch_loss): + """Kończy epokę""" + epoch_time = time.time() - self.epoch_start_time + self.stats['epoch_times'].append(epoch_time) + + # Aktualizuj najlepszy loss + if epoch_loss < self.stats['best_loss']: + self.stats['best_loss'] = epoch_loss + + # Oblicz pozostały czas + avg_epoch_time = sum(self.stats['epoch_times']) / len(self.stats['epoch_times']) + epochs_left = self.progress['total_epochs'] - self.progress['current_epoch'] - 1 + total_time_left = avg_epoch_time * epochs_left + + # Podsumowanie epoki + logger.info(f"📊 EPOKA {self.progress['current_epoch']+1} ZAKOŃCZONA:") + logger.info(f" • Loss: {epoch_loss:.4f}") + logger.info(f" • Najlepszy loss: {self.stats['best_loss']:.4f}") + logger.info(f" • Czas epoki: {self._format_time(epoch_time)}") + logger.info(f" • Średni czas/epokę: {self._format_time(avg_epoch_time)}") + logger.info(f" • Pozostało: ~{self._format_time(total_time_left)}") + + # Przewidywane zakończenie + if epochs_left > 0: + eta_time = datetime.now() + timedelta(seconds=total_time_left) + logger.info(f" • Przewidywane zakończenie: {eta_time.strftime('%Y-%m-%d %H:%M:%S')}") + +# ==================== MODEL ARCHITECTURE ==================== +class AttentionHead(nn.Module): + def __init__(self, embed_dim, head_dim, dropout=0.1): + super().__init__() + self.head_dim = head_dim + self.scale = head_dim ** -0.5 + + self.qkv = nn.Linear(embed_dim, 3 * head_dim) + self.proj = nn.Linear(head_dim, embed_dim) + self.dropout = nn.Dropout(dropout) + + def forward(self, x, mask=None): + B, T, C = x.shape + qkv = self.qkv(x) + q, k, v = qkv.chunk(3, dim=-1) + + attn = (q @ k.transpose(-2, -1)) * self.scale + + if mask is not None: + attn = attn.masked_fill(mask == 0, float('-inf')) + + attn = attn.softmax(dim=-1) + attn = self.dropout(attn) + + out = attn @ v + out = self.proj(out) + return out + +class MultiHeadAttention(nn.Module): + def __init__(self, embed_dim, num_heads, dropout=0.1): + super().__init__() + assert embed_dim % num_heads == 0 + self.head_dim = embed_dim // num_heads + self.num_heads = num_heads + + self.heads = nn.ModuleList([ + AttentionHead(embed_dim, self.head_dim, dropout) + for _ in range(num_heads) + ]) + self.proj = nn.Linear(embed_dim, embed_dim) + self.dropout = nn.Dropout(dropout) + + def forward(self, x, mask=None): + # Równoległe przetwarzanie przez wszystkie głowy + head_outputs = [head(x, mask) for head in self.heads] + + # Konkatenacja + out = torch.cat(head_outputs, dim=-1) + out = self.proj(out) + out = self.dropout(out) + return out + +class FeedForward(nn.Module): + def __init__(self, embed_dim, ff_dim, dropout=0.1): + super().__init__() + self.net = nn.Sequential( + nn.Linear(embed_dim, ff_dim), + nn.GELU(), + nn.Dropout(dropout), + nn.Linear(ff_dim, embed_dim), + nn.Dropout(dropout) + ) + + def forward(self, x): + return self.net(x) + +class TransformerBlock(nn.Module): + def __init__(self, embed_dim, num_heads, ff_dim, dropout=0.1): + super().__init__() + self.attention = MultiHeadAttention(embed_dim, num_heads, dropout) + self.ff = FeedForward(embed_dim, ff_dim, dropout) + self.norm1 = nn.LayerNorm(embed_dim) + self.norm2 = nn.LayerNorm(embed_dim) + + def forward(self, x, mask=None): + # Self-attention z residual connection + attn_out = self.attention(self.norm1(x), mask) + x = x + attn_out + + # Feed forward z residual connection + ff_out = self.ff(self.norm2(x)) + x = x + ff_out + + return x + +class MiniGPT60M(nn.Module): + def __init__(self): + super().__init__() + + # Embeddings + self.token_embedding = nn.Embedding(cfg.vocab_size, cfg.embed_dim) + self.position_embedding = nn.Embedding(cfg.max_len, cfg.embed_dim) + + # Transformer blocks + self.blocks = nn.ModuleList([ + TransformerBlock(cfg.embed_dim, cfg.n_heads, cfg.ff_dim, cfg.dropout) + for _ in range(cfg.n_layers) + ]) + + # Final layers + self.norm = nn.LayerNorm(cfg.embed_dim) + self.head = nn.Linear(cfg.embed_dim, cfg.vocab_size) + + # Initialize weights + self.apply(self._init_weights) + + # Count parameters + total_params = sum(p.numel() for p in self.parameters()) + trainable_params = sum(p.numel() for p in self.parameters() if p.requires_grad) + + logger.info(f"🤖 Model MiniGPT-60M stworzony") + logger.info(f" • Parametry: {total_params:,} (trainable: {trainable_params:,})") + logger.info(f" • Embed dim: {cfg.embed_dim}") + logger.info(f" • Warstwy: {cfg.n_layers}") + logger.info(f" • Głowy: {cfg.n_heads}") + + def _init_weights(self, module): + if isinstance(module, nn.Linear): + nn.init.normal_(module.weight, mean=0.0, std=0.02) + if module.bias is not None: + nn.init.zeros_(module.bias) + elif isinstance(module, nn.Embedding): + nn.init.normal_(module.weight, mean=0.0, std=0.02) + + def forward(self, idx, targets=None): + B, T = idx.shape + + # Embeddings + tok_emb = self.token_embedding(idx) + pos = torch.arange(T, device=idx.device).unsqueeze(0) + pos_emb = self.position_embedding(pos) + x = tok_emb + pos_emb + + # Causal mask + mask = torch.tril(torch.ones(T, T, device=idx.device)).view(1, 1, T, T) + + # Transformer blocks + for block in self.blocks: + x = block(x, mask) + + # Final layer + x = self.norm(x) + logits = self.head(x) + + # Loss if targets provided + loss = None + if targets is not None: + loss = nn.functional.cross_entropy( + logits.view(-1, logits.size(-1)), + targets.view(-1) + ) + + return logits, loss + + def generate(self, input_ids=None, max_length=100, temperature=1.0, **kwargs): + """Uniwersalna metoda generate obsługująca różne interfejsy""" + # Ignoruj nieobsługiwane argumenty (device, max_len, etc.) + max_len = kwargs.get('max_len', max_length) + + if isinstance(input_ids, str): + return self.generate_text(input_ids, max_len=max_len, temperature=temperature) + elif input_ids is not None: + # Jeśli to tensor, przekonwertuj na tekst + if hasattr(input_ids, 'tolist'): + if len(input_ids.shape) > 1: + text = tokenizer.decode(input_ids[0].tolist()) + else: + text = tokenizer.decode(input_ids.tolist()) + else: + text = tokenizer.decode(input_ids) + return self.generate_text(text, max_len=max_len, temperature=temperature) + else: + return "" # Pusty string dla None input + + def generate_text(self, prompt, max_len=100, temperature=1.0): + """Generuje tekst na podstawie promptu""" + self.eval() + + # Encode prompt + input_ids = tokenizer.encode(prompt) + if len(input_ids) == 0: + return prompt + + # Przygotuj tensor + input_tensor = torch.tensor([input_ids], dtype=torch.long) + if torch.cuda.is_available(): + input_tensor = input_tensor.cuda() + + generated = input_ids.copy() + + with torch.no_grad(): + for _ in range(max_len): + # Forward pass + logits, _ = self(input_tensor) + + # Weź logity dla ostatniego tokena + next_token_logits = logits[0, -1, :] / temperature + + # Top-k sampling + if cfg.top_k > 0: + indices_to_remove = next_token_logits < torch.topk(next_token_logits, cfg.top_k)[0][..., -1, None] + next_token_logits[indices_to_remove] = float('-inf') + + # Top-p sampling + if cfg.top_p < 1.0: + sorted_logits, sorted_indices = torch.sort(next_token_logits, descending=True) + cumulative_probs = torch.cumsum(torch.softmax(sorted_logits, dim=-1), dim=-1) + + sorted_indices_to_remove = cumulative_probs > cfg.top_p + sorted_indices_to_remove[..., 1:] = sorted_indices_to_remove[..., :-1].clone() + sorted_indices_to_remove[..., 0] = 0 + + indices_to_remove = sorted_indices[sorted_indices_to_remove] + next_token_logits[indices_to_remove] = float('-inf') + + # Sample next token + probs = torch.softmax(next_token_logits, dim=-1) + next_token = torch.multinomial(probs, num_samples=1).item() + + # Dodaj do sekwencji + generated.append(next_token) + + # Aktualizuj input + input_tensor = torch.tensor([generated[-cfg.max_len:]], dtype=torch.long) + if torch.cuda.is_available(): + input_tensor = input_tensor.cuda() + + # Sprawdź token kończący + if next_token == tokenizer.char_to_idx.get('\n', -1): + break + + # Decode + generated_text = tokenizer.decode(generated) + return generated_text + +# ==================== TRAINING FUNCTIONS ==================== +def create_optimizer(model, learning_rate=3e-4, weight_decay=0.1): + """Tworzy optimizer z dekayem wag""" + decay_params = [] + no_decay_params = [] + + for name, param in model.named_parameters(): + if not param.requires_grad: + continue + + if 'weight' in name and len(param.shape) > 1: + decay_params.append(param) + else: + no_decay_params.append(param) + + optimizer = optim.AdamW([ + {'params': decay_params, 'weight_decay': weight_decay}, + {'params': no_decay_params, 'weight_decay': 0.0} + ], lr=learning_rate, betas=(cfg.adam_beta1, cfg.adam_beta2), eps=cfg.adam_eps) + + return optimizer + +def create_scheduler(optimizer, warmup_steps=2000): + """Tworzy scheduler z warmupem""" + scheduler = optim.lr_scheduler.OneCycleLR( + optimizer, + max_lr=cfg.learning_rate, + total_steps=cfg.epochs * 1000, # Szacunkowo + pct_start=warmup_steps / (cfg.epochs * 1000), + anneal_strategy='cos' + ) + return scheduler + +def benchmark_device(): + """Testuje wydajność urządzenia""" + logger.info("🏃‍♂️ BENCHMARK URZĄDZENIA:") + + if torch.cuda.is_available(): + device_name = torch.cuda.get_device_name(0) + memory_gb = torch.cuda.get_device_properties(0).total_memory / 1e9 + + logger.info(f" • GPU: {device_name}") + logger.info(f" • Pamięć GPU: {memory_gb:.1f} GB") + + # Test wydajności GPU + start = time.time() + x = torch.randn(1024, 768, device='cuda') + y = torch.randn(768, 512, device='cuda') + for _ in range(100): + _ = torch.matmul(x, y) + torch.cuda.synchronize() + gpu_time = time.time() - start + + logger.info(f" • Wydajność GPU: {gpu_time:.2f}s na 100 operacji") + + # Test CPU + x = torch.randn(1024, 768) + y = torch.randn(768, 512) + start = time.time() + for _ in range(100): + _ = torch.matmul(x, y) + cpu_time = time.time() - start + + logger.info(f" • Wydajność CPU: {cpu_time:.2f}s na 100 operacji") + + if torch.cuda.is_available(): + speedup = cpu_time / gpu_time + logger.info(f" • Przyśpieszenie GPU vs CPU: {speedup:.1f}x") + +def estimate_training_time(total_samples, batch_size, epochs, device): + """Szacuje czas treningu przed rozpoczęciem""" + steps_per_epoch = math.ceil(total_samples / batch_size) + total_steps = steps_per_epoch * epochs + + logger.info("⏱️ SZACOWANIE CZASU TRENINGU:") + logger.info(f" • Próbki: {total_samples:,}") + logger.info(f" • Epoki: {epochs}") + logger.info(f" • Batch size: {batch_size}") + logger.info(f" • Kroki: {total_steps:,}") + + # Benchmarki dla różnych urządzeń (sekundy na 1000 kroków) + benchmarks = { + 'cuda': {'T4': 120, 'P100': 90, 'V100': 60, 'A100': 30, 'default': 100}, + 'mps': {'M1': 150, 'M2': 100, 'default': 120}, + 'cpu': {'default': 600} + } + + steps_in_k = total_steps / 1000 + + if device == 'cuda': + try: + gpu_name = torch.cuda.get_device_name(0) + if 'T4' in gpu_name: + time_per_k = benchmarks['cuda']['T4'] + elif 'P100' in gpu_name: + time_per_k = benchmarks['cuda']['P100'] + elif 'V100' in gpu_name: + time_per_k = benchmarks['cuda']['V100'] + elif 'A100' in gpu_name: + time_per_k = benchmarks['cuda']['A100'] + else: + time_per_k = benchmarks['cuda']['default'] + except: + time_per_k = benchmarks['cuda']['default'] + + elif device == 'mps': + time_per_k = benchmarks['mps']['default'] + else: + time_per_k = benchmarks['cpu']['default'] + + estimated_total = steps_in_k * time_per_k + + # Formatuj czas + if estimated_total > 3600 * 24: + days = estimated_total / (3600 * 24) + time_str = f"~{days:.1f} dni" + elif estimated_total > 3600: + hours = estimated_total / 3600 + time_str = f"~{hours:.1f} godzin" + elif estimated_total > 60: + minutes = estimated_total / 60 + time_str = f"~{minutes:.1f} minut" + else: + time_str = f"~{estimated_total:.0f} sekund" + + logger.info(f" • Szacowany czas: {time_str}") + + # Przewidywane zakończenie + eta = datetime.now() + timedelta(seconds=estimated_total) + logger.info(f" • Przewidywane zakończenie: {eta.strftime('%Y-%m-%d %H:%M:%S')}") + + return total_steps, estimated_total + +def train_model(resume=False): + """Główna funkcja treningowa""" + logger.info("=" * 60) + logger.info("🚀 ROZPOCZĘCIE TRENINGU MINIGPT-60M") + logger.info("=" * 60) + + # Setup urządzenia + device = sys_config.device + if device == 'cuda' and not torch.cuda.is_available(): + logger.warning("⚠ CUDA niedostępne, używam CPU") + device = 'cpu' + + logger.info(f"🖥️ Urządzenie: {device.upper()}") + + # Benchmark + benchmark_device() + + # Wczytaj dane + data_file = Path(cfg.prepared_dir) / "all_data.txt" + if not data_file.exists(): + logger.error(f"❌ Brak danych: {data_file}") + logger.info("💡 Uruchom: python main.py --prepare") + return + + dataset = TextDataset(str(data_file), max_len=cfg.max_len) + train_loader = DataLoader( + dataset, + batch_size=cfg.batch_size, + shuffle=True, + num_workers=cfg.num_workers, + pin_memory=cfg.pin_memory + ) + + # Stwórz model + model = MiniGPT60M() + model.to(device) + + # Loss function + criterion = nn.CrossEntropyLoss() + + # Optimizer & scheduler + optimizer = create_optimizer(model, cfg.learning_rate, cfg.weight_decay) + scheduler = create_scheduler(optimizer, cfg.warmup_steps) + + # Clay Checkpoint System + clay = ClayCheckpoint(model, optimizer, scheduler) + + # Resume jeśli potrzebne + start_epoch = 0 + start_step = 0 + + if resume: + loaded_epoch, loaded_step, loaded_loss, timestamp = clay.load_latest() + if loaded_epoch is not None: + start_epoch = loaded_epoch + start_step = loaded_step + logger.info(f"🔄 Wznawianie z epoki {start_epoch}, kroku {start_step}") + logger.info(f" • Ostatni loss: {loaded_loss:.4f}") + logger.info(f" • Timestamp: {timestamp}") + else: + logger.warning("⚠ Nie znaleziono checkpointu, zaczynam od początku") + + # Szacowanie czasu + total_steps, estimated_total = estimate_training_time( + len(dataset), cfg.batch_size, cfg.epochs - start_epoch, device + ) + + logger.info("=" * 60) + logger.info("🎬 ROZPOCZĘTO TRENING!") + logger.info("=" * 60) + + # Główna pętla treningowa + best_loss = float('inf') + + for epoch in range(start_epoch, cfg.epochs): + clay.start_epoch(epoch, len(train_loader)) + epoch_loss = 0 + + model.train() + + for batch_idx, (x, y) in enumerate(train_loader): + step_start_time = time.time() + + x, y = x.to(device), y.to(device) + + # Forward pass + logits, loss = model(x, y) + + # Backward pass + optimizer.zero_grad() + loss.backward() + + # Gradient clipping + torch.nn.utils.clip_grad_norm_(model.parameters(), cfg.clip_grad) + + optimizer.step() + + if scheduler: + scheduler.step() + + # Aktualizuj statystyki + step_time = time.time() - step_start_time + clay.stats['step_times'].append(step_time) + clay.stats['loss_history'].append(loss.item()) + clay.stats['learning_rates'].append(optimizer.param_groups[0]['lr']) + + # Aktualizuj progress + clay.progress['current_step'] = epoch * len(train_loader) + batch_idx + epoch_loss += loss.item() + + # Wyświetl progress co 10 kroków lub 1% batchy + if batch_idx % max(1, len(train_loader) // 100) == 0 or batch_idx < 10: + clay.print_progress(loss.item(), optimizer.param_groups[0]['lr']) + + # Zapisz checkpoint co 1000 kroków + if clay.progress['current_step'] % 1000 == 0 and clay.progress['current_step'] > 0: + is_best = loss.item() < best_loss + if is_best: + best_loss = loss.item() + + clay.save( + clay.progress['current_step'], + epoch, + loss.item(), + is_best=is_best + ) + + # Koniec epoki + avg_epoch_loss = epoch_loss / len(train_loader) + clay.end_epoch(avg_epoch_loss) + + # Zapisz model co epokę + model_path = Path(cfg.model_dir) / f"minigpt_epoch_{epoch + 1}.pt" + torch.save({ + 'epoch': epoch, + 'model_state_dict': model.state_dict(), + 'optimizer_state_dict': optimizer.state_dict(), + 'scheduler_state_dict': scheduler.state_dict() if scheduler else None, + 'loss': avg_epoch_loss, + 'config': cfg.__dict__ + }, model_path) + + logger.info(f"💾 Model zapisany: {model_path.name}") + + # Zapisz też jako najlepszy jeśli to najlepszy loss + if avg_epoch_loss < best_loss: + best_loss = avg_epoch_loss + best_path = Path(cfg.model_dir) / "minigpt_best.pt" + torch.save(model.state_dict(), best_path) + logger.info(f"🏆 Nowy najlepszy model! Loss: {avg_epoch_loss:.4f}") + + # ==================== KONIEC TRENINGU ==================== + total_time = time.time() - clay.stats['start_time'] + + logger.info("=" * 60) + logger.info("🎉 TRENING ZAKOŃCZONY!") + logger.info("=" * 60) + + # Podsumowanie statystyk + logger.info(f"📊 PODSUMOWANIE:") + logger.info(f" • Całkowity czas: {clay._format_time(total_time)}") + logger.info(f" • Ostatni loss: {clay.stats['loss_history'][-1]:.4f}") + logger.info(f" • Najlepszy loss: {clay.stats['best_loss']:.4f}") + logger.info(f" • Średni loss: {sum(clay.stats['loss_history']) / len(clay.stats['loss_history']):.4f}") + logger.info(f" • Średni czas/krok: {sum(clay.stats['step_times']) / len(clay.stats['step_times']):.3f}s") + logger.info(f" • Średni czas/epokę: {sum(clay.stats['epoch_times']) / len(clay.stats['epoch_times']):.1f}s") + logger.info(f" • Całkowite kroki: {clay.progress['current_step']:,}") + + # Zapisz finalne statystyki + stats_path = Path(cfg.checkpoints_dir) / "training_stats.json" + final_stats = { + 'total_time': total_time, + 'best_loss': float(clay.stats['best_loss']), + 'final_loss': float(clay.stats['loss_history'][-1]), + 'avg_loss': float(sum(clay.stats['loss_history']) / len(clay.stats['loss_history'])), + 'total_steps': int(clay.progress['current_step']), + 'total_epochs': int(cfg.epochs), + 'learning_rate_history': [float(lr) for lr in clay.stats['learning_rates']], + 'loss_history': [float(loss) for loss in clay.stats['loss_history']], + 'completion_time': datetime.now().strftime("%Y-%m-%d %H:%M:%S") + } + + with open(stats_path, 'w', encoding='utf-8') as f: + json.dump(final_stats, f, indent=2, ensure_ascii=False) + + logger.info(f"📈 Statystyki zapisane: {stats_path}") + + # Zapisz finalny model + final_path = Path(cfg.model_dir) / "minigpt_final.pt" + torch.save({ + 'model_state_dict': model.state_dict(), + 'config': cfg.__dict__, + 'stats': final_stats + }, final_path) + + logger.info(f"💾 Finalny model zapisany: {final_path}") + + # Test generowania + logger.info(f"\n🧪 TEST GENEROWANIA:") + test_prompts = [ + "Witaj, ", + "Python to ", + "Dzisiaj jest ", + "AI to " + ] + + for prompt in test_prompts: + generated = model.generate_text(prompt, max_len=50, temperature=0.8) + logger.info(f" • '{prompt}' -> '{generated[:50]}...'") + + logger.info("=" * 60) + logger.info("✅ TRENING ZAKOŃCZONY POMYŚLNIE!") + logger.info("=" * 60) + + +def continue_training(): + """Kontynuuje trening od ostatniego checkpointu""" + logger.info("🔄 KONTYNUUJĘ TRENING OD CHECKPOINTU") + train_model(resume=True) + + +def train_more_epochs(additional_epochs=3): + """Dodaje więcej epok treningu""" + logger.info(f"📈 DODAJĘ {additional_epochs} EPOK TRENINGU") + + # Zaktualizuj liczbę epok w konfiguracji + original_epochs = cfg.epochs + cfg.epochs = original_epochs + additional_epochs + + logger.info(f" • Obecne epoki: {original_epochs}") + logger.info(f" • Nowe epoki: {cfg.epochs}") + logger.info(f" • Dodaję: {additional_epochs} epok") + + # Kontynuuj trening + train_model(resume=True) + + # Przywróć oryginalną wartość + cfg.epochs = original_epochs + + +def load_model(model_path=None, device=None): + """Wczytuje model z pliku""" + if device is None: + device = sys_config.device + + if model_path is None: + # Szukaj najnowszego modelu + models = list(Path(cfg.model_dir).glob("*.pt")) + if not models: + logger.error("❌ Brak modelu do wczytania!") + return None + + # Preferuj najlepszy, potem finalny, potem najnowszy + best_path = Path(cfg.model_dir) / "minigpt_best.pt" + final_path = Path(cfg.model_dir) / "minigpt_final.pt" + + if best_path.exists(): + model_path = best_path + logger.info("📂 Wczytuję najlepszy model") + elif final_path.exists(): + model_path = final_path + logger.info("📂 Wczytuję finalny model") + else: + models.sort(key=lambda x: x.stat().st_mtime, reverse=True) + model_path = models[0] + logger.info(f"📂 Wczytuję najnowszy model: {model_path.name}") + + logger.info(f"🔄 Wczytywanie modelu: {model_path}") + + try: + checkpoint = torch.load(model_path, map_location='cpu') + + # Stwórz model + model = MiniGPT60M() + + if 'model_state_dict' in checkpoint: + model.load_state_dict(checkpoint['model_state_dict']) + else: + # Stary format - same wagi + model.load_state_dict(checkpoint) + + model.to(device) + model.eval() + + logger.info(f"✅ Model wczytany pomyślnie") + + # Pokaż informacje + if 'config' in checkpoint: + logger.info(f" • Config: {checkpoint['config'].get('n_layers', 'N/A')} warstw") + + if 'loss' in checkpoint: + logger.info(f" • Loss: {checkpoint['loss']:.4f}") + + if 'epoch' in checkpoint: + logger.info(f" • Epoka: {checkpoint['epoch']}") + + return model + + except Exception as e: + logger.error(f"❌ Błąd wczytywania modelu: {e}") + return None + + +def generate_text(prompt, model_path=None, device=None, max_length=200, temperature=0.8): + """Generuje tekst na podstawie promptu""" + logger.info(f"🎨 GENERUJĘ TEKST: '{prompt}'") + + # Wczytaj model + model = load_model(model_path, device) + if model is None: + logger.error("❌ Nie można wczytać modelu!") + return + + # Generuj + start_time = time.time() + generated = model.generate_text(prompt, max_len=max_length, temperature=temperature) + gen_time = time.time() - start_time + + # Pokaż wynik + logger.info("=" * 60) + logger.info(f"📝 WYJŚCIE:") + logger.info(generated) + logger.info("=" * 60) + logger.info(f"⏱️ Czas generowania: {gen_time:.2f}s") + logger.info(f"📏 Długość: {len(generated)} znaków") + + # Zapisz do pliku + output_dir = Path("results") + output_dir.mkdir(exist_ok=True) + + timestamp = datetime.now().strftime("%Y%m%d_%H%M%S") + output_file = output_dir / f"generated_{timestamp}.txt" + + with open(output_file, 'w', encoding='utf-8') as f: + f.write(f"Prompt: {prompt}\n") + f.write(f"Generated at: {datetime.now().strftime('%Y-%m-%d %H:%M:%S')}\n") + f.write(f"Temperature: {temperature}\n") + f.write(f"Max length: {max_length}\n") + f.write("-" * 60 + "\n") + f.write(generated + "\n") + + logger.info(f"💾 Wynik zapisany: {output_file}") + + return generated + + +def start_chat(model_path=None, device=None): + """Uruchamia tryb rozmowy z modelem""" + logger.info("💬 ROZPOCZYNAM TRYB ROZMOWY") + logger.info("=" * 60) + logger.info("Wpisz 'exit' aby wyjść") + logger.info("Wpisz 'reset' aby zresetować kontekst") + logger.info("=" * 60) + + # Wczytaj model + model = load_model(model_path, device) + if model is None: + return + + # Historia rozmowy + conversation_history = [] + max_history = 5 + + while True: + try: + # Pobierz input + user_input = input("\n👤 Ty: ").strip() + + if user_input.lower() == 'exit': + logger.info("👋 Do widzenia!") + break + + if user_input.lower() == 'reset': + conversation_history = [] + logger.info("🔄 Historia zresetowana") + continue + + if not user_input: + continue + + # Przygotuj prompt z historią + if conversation_history: + prompt = "\n".join(conversation_history[-max_history:]) + "\n👤 " + user_input + "\n🤖 " + else: + prompt = "👤 " + user_input + "\n🤖 " + + # Generuj odpowiedź + logger.info("🤖 Myślę...") + start_time = time.time() + + response = model.generate_text( + prompt, + max_len=200, + temperature=0.8 + ) + + # Wyodrębnij tylko odpowiedź modelu + if "🤖 " in response: + response = response.split("🤖 ")[-1].strip() + + gen_time = time.time() - start_time + + # Wyświetl odpowiedź + print(f"🤖 AI: {response}") + print(f" ⏱️ {gen_time:.2f}s") + + # Dodaj do historii + conversation_history.append(f"👤 {user_input}") + conversation_history.append(f"🤖 {response}") + + # Ogranicz historię + if len(conversation_history) > max_history * 2: + conversation_history = conversation_history[-(max_history * 2):] + + except KeyboardInterrupt: + logger.info("\n👋 Przerwano przez użytkownika") + break + except Exception as e: + logger.error(f"❌ Błąd: {e}") + continue + + +def evaluate_model(model_path=None, device=None, test_samples=100): + """Ocenia model na danych testowych""" + logger.info("📊 OCENIAM MODEL") + + # Wczytaj model + model = load_model(model_path, device) + if model is None: + return + + # Wczytaj dane + data_file = Path(cfg.prepared_dir) / "all_data.txt" + if not data_file.exists(): + logger.error("❌ Brak danych do ewaluacji") + return + + dataset = TextDataset(str(data_file), max_len=cfg.max_len) + + # Wybierz próbki testowe + test_indices = random.sample(range(len(dataset)), min(test_samples, len(dataset))) + + model.eval() + total_loss = 0 + total_perplexity = 0 + + logger.info(f"🔍 Testowanie na {len(test_indices)} próbkach...") + + with torch.no_grad(): + for i, idx in enumerate(test_indices): + x, y = dataset[idx] + x = x.unsqueeze(0).to(device) + y = y.unsqueeze(0).to(device) + + _, loss = model(x, y) + total_loss += loss.item() + + # Perplexity + perplexity = torch.exp(loss).item() + total_perplexity += perplexity + + if (i + 1) % 10 == 0: + logger.info(f" Przetworzono {i + 1}/{len(test_indices)}...") + + avg_loss = total_loss / len(test_indices) + avg_perplexity = total_perplexity / len(test_indices) + + logger.info("=" * 60) + logger.info("📈 WYNIKI EWALUACJI:") + logger.info(f" • Średni Loss: {avg_loss:.4f}") + logger.info(f" • Perplexity: {avg_perplexity:.2f}") + logger.info(f" • Próbki testowe: {len(test_indices)}") + logger.info("=" * 60) + + # Zapisz wyniki + eval_dir = Path("evaluation") + eval_dir.mkdir(exist_ok=True) + + timestamp = datetime.now().strftime("%Y%m%d_%H%M%S") + eval_file = eval_dir / f"eval_{timestamp}.json" + + results = { + 'timestamp': timestamp, + 'avg_loss': float(avg_loss), + 'avg_perplexity': float(avg_perplexity), + 'test_samples': len(test_indices), + 'model': str(model_path) + } + + with open(eval_file, 'w', encoding='utf-8') as f: + json.dump(results, f, indent=2, ensure_ascii=False) + + logger.info(f"💾 Wyniki zapisane: {eval_file}") + + return avg_loss, avg_perplexity + + +# ==================== MAIN GUARD ==================== +if __name__ == "__main__": + # Testowa funkcja + print("🤖 MiniGPT-60M AI Module") + print("Użyj: python main.py --train / --generate / --chat") + + # Test tokenizera + test_text = "Hello world!" + encoded = tokenizer.encode(test_text) + decoded = tokenizer.decode(encoded) + + print(f"\n🧪 Test tokenizera: '{test_text}' -> {encoded} -> '{decoded}'") + + # Test modelu + model = MiniGPT60M() + test_input = torch.randint(0, cfg.vocab_size, (2, 16)) + logits, _ = model(test_input) + + print(f"🧪 Test modelu: input {test_input.shape} -> output {logits.shape}") + print(f"✅ Moduł AI gotowy do użycia!") \ No newline at end of file diff --git a/clay_manager.py b/clay_manager.py new file mode 100644 index 0000000..b9a6520 --- /dev/null +++ b/clay_manager.py @@ -0,0 +1,198 @@ +#!/usr/bin/env python3 +""" +🛠️ Clay Checkpoint Manager - CLI do zarządzania checkpointami +""" + +import argparse +import json +import shutil +from pathlib import Path +from datetime import datetime +from config import cfg + + +def list_checkpoints(): + """Wyświetla dostępne checkpointy""" + checkpoints = list(Path(cfg.checkpoints_dir).glob("clay_checkpoint_*.pt")) + + if not checkpoints: + print("❌ Brak checkpointów") + return + + print(f"\n📁 CLAY CHECKPOINTS ({len(checkpoints)}):") + print("=" * 80) + + for cp in sorted(checkpoints, key=lambda x: x.stat().st_mtime, reverse=True): + # Wczytaj info z JSON + json_file = cp.with_suffix('.json') + if json_file.exists(): + with open(json_file, 'r') as f: + info = json.load(f)['checkpoint_info'] + + size_mb = info['file_size'] / (1024 * 1024) + print(f"📄 {cp.name}") + print(f" • Epoka: {info['epoch']} | Krok: {info['step']:,}") + print(f" • Loss: {info['loss']:.4f} | Rozmiar: {size_mb:.1f}MB") + print(f" • Data: {info['timestamp']}") + print("-" * 40) + else: + size_mb = cp.stat().st_size / (1024 * 1024) + print(f"📄 {cp.name} ({size_mb:.1f}MB)") + + +def show_training_stats(): + """Pokazuje statystyki treningu""" + stats_file = Path(cfg.checkpoints_dir) / "training_stats.json" + + if stats_file.exists(): + with open(stats_file, 'r') as f: + stats = json.load(f) + + print("\n📊 STATYSTYKI TRENINGU:") + print("=" * 60) + + total_time = stats.get('total_time', 0) + hours = total_time / 3600 + minutes = (total_time % 3600) / 60 + + print(f" • Całkowity czas: {hours:.0f}h {minutes:.0f}m") + print(f" • Ostatni loss: {stats.get('final_loss', 0):.4f}") + print(f" • Najlepszy loss: {stats.get('best_loss', 0):.4f}") + print(f" • Średni loss: {stats.get('avg_loss', 0):.4f}") + print(f" • Sprawdzone kroki: {stats.get('total_steps', 0):,}") + print(f" • Zakończono: {stats.get('completion_time', 'N/A')}") + else: + print("❌ Brak statystyk treningu") + + +def cleanup_checkpoints(keep=5): + """Czyści stare checkpointy""" + checkpoints = list(Path(cfg.checkpoints_dir).glob("clay_checkpoint_*.pt")) + + if len(checkpoints) <= keep: + print(f"✅ Wszystkie checkpointy zachowane (mniej niż {keep})") + return + + checkpoints.sort(key=lambda x: x.stat().st_mtime) + to_delete = checkpoints[:-keep] + + print(f"\n🗑️ Usuwanie {len(to_delete)} starych checkpointów:") + total_freed = 0 + + for cp in to_delete: + size_mb = cp.stat().st_size / (1024 * 1024) + total_freed += size_mb + print(f" • {cp.name} ({size_mb:.1f}MB)") + cp.unlink() + + # Usuń też JSON + json_file = cp.with_suffix('.json') + if json_file.exists(): + json_file.unlink() + + print(f"\n✅ Zachowano {keep} najnowszych checkpointów") + print(f"💰 Zwolniono {total_freed:.1f}MB") + + +def export_checkpoint(checkpoint_name, export_dir="exports"): + """Eksportuje checkpoint do osobnego folderu""" + cp_path = Path(cfg.checkpoints_dir) / checkpoint_name + + if not cp_path.exists(): + print(f"❌ Checkpoint {checkpoint_name} nie istnieje") + return + + # Stwórz folder eksportu + export_path = Path(export_dir) + export_path.mkdir(exist_ok=True) + + # Skopiuj checkpoint i JSON + dest_path = export_path / checkpoint_name + shutil.copy2(cp_path, dest_path) + + json_file = cp_path.with_suffix('.json') + if json_file.exists(): + shutil.copy2(json_file, export_path / json_file.name) + + print(f"✅ Checkpoint wyeksportowany do: {dest_path}") + + +def show_checkpoint_info(checkpoint_name): + """Pokazuje szczegółowe info o checkpoincie""" + cp_path = Path(cfg.checkpoints_dir) / checkpoint_name + + if not cp_path.exists(): + print(f"❌ Checkpoint {checkpoint_name} nie istnieje") + return + + json_file = cp_path.with_suffix('.json') + + if json_file.exists(): + with open(json_file, 'r') as f: + info = json.load(f) + + print(f"\n📋 INFO O CHECKPOINCIE: {checkpoint_name}") + print("=" * 60) + + cp_info = info['checkpoint_info'] + stats = info['training_stats'] + + print("📁 PODSTAWOWE INFORMACJE:") + print(f" • Epoka: {cp_info['epoch']}") + print(f" • Krok: {cp_info['step']:,}") + print(f" • Loss: {cp_info['loss']:.4f}") + print(f" • Rozmiar: {cp_info['file_size'] / (1024 * 1024):.1f}MB") + print(f" • Data: {cp_info['timestamp']}") + + print("\n📊 STATYSTYKI TRENINGU:") + print(f" • Całkowity czas: {stats['total_time']:.0f}s") + print(f" • Średni loss: {stats['avg_loss']:.4f}") + print(f" • Current LR: {stats['current_lr']:.6f}") + print(f" • Kroki: {stats['steps_done']:,}") + else: + print("❌ Brak informacji JSON dla tego checkpointu") + + +def main(): + parser = argparse.ArgumentParser(description="Clay Checkpoint Manager") + parser.add_argument("--list", action="store_true", help="Lista checkpointów") + parser.add_argument("--stats", action="store_true", help="Pokaż statystyki") + parser.add_argument("--cleanup", type=int, nargs='?', const=5, help="Wyczyść stare checkpointy (domyślnie: 5)") + parser.add_argument("--export", type=str, help="Eksportuj checkpoint") + parser.add_argument("--info", type=str, help="Info o konkretnym checkpoincie") + parser.add_argument("--export-all", action="store_true", help="Eksportuj wszystkie checkpointy") + + args = parser.parse_args() + + if args.list: + list_checkpoints() + elif args.stats: + show_training_stats() + elif args.cleanup is not None: + cleanup_checkpoints(args.cleanup) + elif args.export: + export_checkpoint(args.export) + elif args.info: + show_checkpoint_info(args.info) + elif args.export_all: + checkpoints = list(Path(cfg.checkpoints_dir).glob("clay_checkpoint_*.pt")) + for cp in checkpoints: + export_checkpoint(cp.name) + else: + print("\n🛠️ Clay Checkpoint Manager") + print("=" * 40) + print("Użyj:") + print(" --list # Lista checkpointów") + print(" --stats # Statystyki treningu") + print(" --cleanup [N] # Zostaw N najnowszych (domyślnie 5)") + print(" --export NAME # Eksportuj checkpoint") + print(" --info NAME # Info o checkpoincie") + print(" --export-all # Eksportuj wszystkie") + print("\nPrzykłady:") + print(" python clay_manager.py --list") + print(" python clay_manager.py --cleanup 3") + print(" python clay_manager.py --info clay_checkpoint_ep2_step5000_20240126_143022.pt") + + +if __name__ == "__main__": + main() \ No newline at end of file diff --git a/config.py b/config.py new file mode 100644 index 0000000..a9d84b8 --- /dev/null +++ b/config.py @@ -0,0 +1,214 @@ +""" +🎯 CONFIG - Wspólna konfiguracja dla MiniGPT-60M +""" + +import os +import sys +import random +import numpy as np +import torch +from pathlib import Path +from typing import List, Dict, Any, Optional +import logging +import json + +# ==================== LOGGING ==================== +logging.basicConfig( + level=logging.INFO, + format='%(asctime)s - %(levelname)s - %(message)s', + handlers=[ + logging.FileHandler('training.log', encoding='utf-8'), + logging.StreamHandler() + ] +) +logger = logging.getLogger(__name__) + +# ==================== KONFIGURACJA SYSTEMU ==================== +class SystemConfig: + """Konfiguracja systemu i urządzeń""" + + def __init__(self): + self.device = self._get_device() + self.set_seeds(42) + self._print_info() + + def _get_device(self) -> str: + """Automatycznie wybiera najlepsze urządzenie""" + if torch.cuda.is_available(): + return "cuda" + elif torch.backends.mps.is_available(): + return "mps" + else: + return "cpu" + + def set_seeds(self, seed: int = 42): + """Ustawia seed dla reprodukowalności""" + random.seed(seed) + np.random.seed(seed) + torch.manual_seed(seed) + if torch.cuda.is_available(): + torch.cuda.manual_seed_all(seed) + torch.backends.cudnn.deterministic = True + torch.backends.cudnn.benchmark = False + + def _print_info(self): + """Wyświetla informacje o systemie""" + logger.info("=" * 60) + logger.info("🎯 SYSTEM MINIGPT-60M") + logger.info("=" * 60) + logger.info(f"Python: {sys.version.split()[0]}") + logger.info(f"PyTorch: {torch.__version__}") + logger.info(f"Device: {self.device.upper()}") + + if self.device == "cuda": + gpu_count = torch.cuda.device_count() + logger.info(f"CUDA dostępne: {torch.cuda.is_available()}") + logger.info(f"Liczba GPU: {gpu_count}") + for i in range(gpu_count): + mem = torch.cuda.get_device_properties(i).total_memory / 1e9 + logger.info(f"GPU {i}: {torch.cuda.get_device_name(i)} ({mem:.1f} GB)") + + logger.info("=" * 60) + +# ==================== KONFIGURACJA MODELU ==================== +class ModelConfig: + """Konfiguracja modelu 60M parametrów""" + + def __init__(self): + # Słownik + self.vocab_chars = list("aąbcćdeęfghijklłmnńoóprsśtuwyzźżAĄBCĆDEĘFGHIJKLŁMNŃOÓPRSŚTUWYZŹŻ") + self.vocab_chars += list("0123456789") + self.vocab_chars += list(" .,?!:;()[]{}+-*/=<>_\"'`~@#$%^&|\\/\n\t") + self.vocab_chars += [" ", "\n\n", "\t\t", "->", "::", "=>"] + + self.vocab = self.vocab_chars + self.vocab_size = len(self.vocab) + + # Architektura dla ~60M parametrów + self.embed_dim = 768 + self.n_layers = 12 + self.n_heads = 12 + self.max_len = 512 + self.ff_dim = self.embed_dim * 4 + self.dropout = 0.1 + self.activation = "gelu" + self.norm_eps = 1e-5 + + # Trening + self.epochs = 3 + self.batch_size = 16 if torch.cuda.is_available() else 4 + self.grad_accum_steps = 4 + self.learning_rate = 3e-4 + self.weight_decay = 0.1 + self.adam_beta1 = 0.9 + self.adam_beta2 = 0.95 + self.adam_eps = 1e-8 + self.clip_grad = 1.0 + self.warmup_steps = 2000 + + # Mixed Precision + self.use_amp = torch.cuda.is_available() + + # Parallel + self.num_workers = 4 if torch.cuda.is_available() else 0 + self.pin_memory = True + + # Generowanie + self.generation_temperature = 0.8 + self.top_k = 50 + self.top_p = 0.95 + self.repetition_penalty = 1.1 + + # Ścieżki + self.model_dir = "models" + self.data_dir = "data" + self.prepared_dir = "prepared_data" + self.log_dir = "logs" + self.tensorboard_dir = "runs" + self.cache_dir = ".cache" + self.checkpoints_dir = "checkpoints" + self.resume_file = "resume_state.json" # Plik stanu do wznowienia + + # Tworzenie katalogów + self._create_dirs() + + def _create_dirs(self): + """Tworzy wymagane katalogi""" + dirs = [self.model_dir, self.data_dir, self.prepared_dir, + self.log_dir, self.tensorboard_dir, self.cache_dir, + "backups", "results", self.checkpoints_dir] + + for d in dirs: + Path(d).mkdir(parents=True, exist_ok=True) + + def print_config(self): + """Wyświetla konfigurację""" + logger.info("=" * 60) + logger.info("⚙️ KONFIGURACJA MODELU") + logger.info("=" * 60) + logger.info(f"• Vocab size: {self.vocab_size}") + logger.info(f"• Embed dim: {self.embed_dim}") + logger.info(f"• Warstwy: {self.n_layers}") + logger.info(f"• Głowy: {self.n_heads}") + logger.info(f"• Kontekst: {self.max_len}") + logger.info(f"• Batch size: {self.batch_size}") + logger.info(f"• Learning rate: {self.learning_rate}") + logger.info(f"• Mixed precision: {self.use_amp}") + logger.info("=" * 60) + + def save_resume_state(self, state: Dict[str, Any]): + """Zapisuje stan do wznowienia""" + state_path = Path(self.checkpoints_dir) / self.resume_file + with open(state_path, 'w', encoding='utf-8') as f: + json.dump(state, f, indent=2, ensure_ascii=False) + logger.info(f"💾 Stan zapisany do {state_path}") + + def load_resume_state(self) -> Optional[Dict[str, Any]]: + """Wczytuje stan do wznowienia""" + state_path = Path(self.checkpoints_dir) / self.resume_file + if state_path.exists(): + with open(state_path, 'r', encoding='utf-8') as f: + return json.load(f) + return None + + def get_latest_checkpoint(self) -> Optional[Path]: + """Znajduje najnowszy checkpoint""" + checkpoints = list(Path(self.checkpoints_dir).glob("checkpoint_*.pt")) + if checkpoints: + # Sortuj po czasie modyfikacji + checkpoints.sort(key=lambda x: x.stat().st_mtime, reverse=True) + return checkpoints[0] + return None + + def get_latest_model(self) -> Optional[Path]: + """Znajduje najnowszy model""" + models = list(Path(self.model_dir).glob("model_*.pt")) + if models: + # Szukaj model_final.pt, potem model_epoch_X.pt + final_model = Path(self.model_dir) / "model_final.pt" + if final_model.exists(): + return final_model + + # Sortuj po numerze epoki + def get_epoch_num(path: Path) -> int: + try: + # model_epoch_10.pt -> 10 + name = path.stem + return int(name.split('_')[-1]) + except: + return 0 + + models.sort(key=get_epoch_num, reverse=True) + return models[0] + return None +def get_device(prefer_gpu=True): + """Inteligentnie wybiera urządzenie""" + if prefer_gpu and torch.cuda.is_available(): + return 'cuda' + elif torch.backends.mps.is_available(): # Apple Silicon + return 'mps' + else: + return 'cpu' +# Inicjalizacja konfiguracji +sys_config = SystemConfig() +cfg = ModelConfig() \ No newline at end of file diff --git a/daj_no_Plik.py b/daj_no_Plik.py new file mode 100644 index 0000000..04a4b08 --- /dev/null +++ b/daj_no_Plik.py @@ -0,0 +1,41 @@ +# daj_no_Plik.py (wersja lokalna) +import os +import zipfile +import requests +from pathlib import Path + + +def download_from_url(url, save_path): + """Pobiera plik z URL""" + print(f"Pobieranie z {url}...") + response = requests.get(url, stream=True) + + with open(save_path, 'wb') as f: + for chunk in response.iter_content(chunk_size=8192): + f.write(chunk) + + print(f"Zapisano do {save_path}") + + +def main(): + # Jeśli masz plik .zip na dysku + zip_path = "/ścieżka/do/Pathl_AI.zip" + + if os.path.exists(zip_path): + print("Rozpakowywanie...") + with zipfile.ZipFile(zip_path, 'r') as zip_ref: + zip_ref.extractall("Pathl.AI") + print("✅ Gotowe!") + else: + print("❌ Plik nie istnieje") + print("\n📥 Pobierz plik z Google Colab:") + print("1. W Colab uruchom:") + print(" from google.colab import files") + print(" files.download('/content/Pathl_AI.zip')") + print("2. Pobierz plik na komputer") + print("3. Uruchom ten skrypt ponownie") + + +if __name__ == "__main__": + print("plik") + main() \ No newline at end of file diff --git a/final_fix.py b/final_fix.py new file mode 100644 index 0000000..9c8aae3 --- /dev/null +++ b/final_fix.py @@ -0,0 +1,108 @@ +#!/usr/bin/env python3 +"""Ostateczna poprawka metody generate()""" + +import re + +with open('ai.py', 'r') as f: + content = f.read() + +# Nowa, uniwersalna metoda generate +new_generate_method = ''' + def generate(self, input_ids=None, max_length=100, temperature=1.0, **kwargs): + """Uniwersalna metoda generate obsługująca różne interfejsy""" + # Ignoruj nieobsługiwane argumenty (device, max_len, etc.) + max_len = kwargs.get('max_len', max_length) + + if isinstance(input_ids, str): + return self.generate_text(input_ids, max_len=max_len, temperature=temperature) + elif input_ids is not None: + # Jeśli to tensor, przekonwertuj na tekst + if hasattr(input_ids, 'tolist'): + if len(input_ids.shape) > 1: + text = self.tokenizer.decode(input_ids[0].tolist()) + else: + text = self.tokenizer.decode(input_ids.tolist()) + else: + text = self.tokenizer.decode(input_ids) + return self.generate_text(text, max_len=max_len, temperature=temperature) + else: + return "" # Pusty string dla None input +''' + +# Znajdź i zamień metodę generate +if 'def generate(' in content: + # Użyj regex do znalezienia całej metody + pattern = r'def generate\(.*?\).*?(?=\n def \w+\(|\nclass \w+|\Z)' + + # Sprawdź czy regex działa + match = re.search(pattern, content, re.DOTALL) + if match: + content = re.sub(pattern, new_generate_method, content, flags=re.DOTALL) + print("✅ Zaktualizowano metodę generate()") + else: + # Alternatywny sposób: znajdź między def generate a następną def + lines = content.split('\n') + new_lines = [] + i = 0 + while i < len(lines): + line = lines[i] + new_lines.append(line) + + if line.strip().startswith('def generate('): + # Pomijaj stare linie metody aż do następnej metody + i += 1 + while i < len(lines) and not lines[i].strip().startswith('def ') and not lines[i].strip().startswith('class '): + i += 1 + # Dodaj nową metodę + new_lines.append(new_generate_method) + if i < len(lines): + new_lines.append(lines[i]) + i += 1 + continue + + i += 1 + + content = '\n'.join(new_lines) + print("✅ Zastąpiono metodę generate() (alternatywna metoda)") +else: + print("❌ Metoda generate() nie znaleziona, dodaję...") + # Dodaj przed ostatnią metodą w klasie + if 'class MiniGPT60M' in content: + # Wstaw przed ostatnim 'def' w klasie + lines = content.split('\n') + new_lines = [] + in_class = False + methods_found = [] + + for i, line in enumerate(lines): + new_lines.append(line) + + if 'class MiniGPT60M' in line: + in_class = True + + if in_class and line.strip().startswith('def '): + methods_found.append(i) + + if methods_found: + # Dodaj przed ostatnią metodą + last_method_idx = methods_found[-1] + new_lines.insert(last_method_idx, '\n' + new_generate_method) + content = '\n'.join(new_lines) + print("✅ Dodano metodę generate()") + else: + # Dodaj na końcu klasy + if 'class MiniGPT60M' in content: + # Znajdź koniec klasy + class_pattern = r'(class MiniGPT60M.*?)(?=\nclass|\Z)' + match = re.search(class_pattern, content, re.DOTALL) + if match: + class_content = match.group(1) + updated_class = class_content.rstrip() + '\n\n' + new_generate_method + content = content.replace(class_content, updated_class) + print("✅ Dodano metodę generate() na końcu klasy") + +# Zapisz zmiany +with open('ai.py', 'w') as f: + f.write(content) + +print("✅ Plik ai.py zaktualizowany!") diff --git a/main.py b/main.py new file mode 100644 index 0000000..dfb1f49 --- /dev/null +++ b/main.py @@ -0,0 +1,314 @@ +""" +🎯 MAIN - Główny skrypt MiniGPT-60M z checkpointami +""" + +import os +import sys +import argparse +import signal +from pathlib import Path + +# Dodaj ścieżkę do importów +sys.path.append('.') + +from config import logger, cfg, sys_config +import prepare_data +import ai + +# ==================== OBSŁUGA SYGNAŁÓW ==================== +def signal_handler(sig, frame): + """Obsługa przerwania (Ctrl+C)""" + logger.info("\n\n⚠️ Trening przerwany przez użytkownika!") + logger.info("💾 Zapisuję stan do wznowienia...") + + # Tutaj można dodać zapis stanu przed wyjściem + # W aktualnej implementacji stan jest zapisywany automatycznie + + logger.info("✅ Możesz wznowić trening używając: python main.py --cont") + sys.exit(0) + +signal.signal(signal.SIGINT, signal_handler) + +# ==================== FUNKCJE GŁÓWNE ==================== +def prepare_all_data(): + """Przygotowuje wszystkie dane""" + logger.info("=" * 60) + logger.info("📊 PRZYGOTOWYWANIE WSZYSTKICH DANYCH") + logger.info("=" * 60) + + preparer = prepare_data.DataPreparer() + preparer.prepare_all_data() + +def train_model(resume: bool = False): + """Trenuje model""" + logger.info("=" * 60) + if resume: + logger.info("🔄 WZNIOWANIE TRENINGU MODELU") + else: + logger.info("🚀 TRENING MODELU MINIGPT-60M") + logger.info("=" * 60) + + # Sprawdź czy dane są przygotowane + data_file = Path(cfg.prepared_dir) / "all_data.txt" + + if not data_file.exists(): + logger.warning("⚠️ Brak przygotowanych danych. Przygotowuję...") + prepare_all_data() + + # Uruchom trening + if resume: + ai.continue_training() + else: + ai.train_model(resume=False) + +def continue_training(): + """Kontynuuje trening od ostatniego checkpointu""" + train_model(resume=True) + +def train_more_epochs(additional_epochs: int = 3): + """Dodaje więcej epok treningu""" + logger.info("=" * 60) + logger.info(f"📈 DODATKOWY TRENING: {additional_epochs} epok") + logger.info("=" * 60) + + # Sprawdź czy dane są przygotowane + data_file = Path(cfg.prepared_dir) / "all_data.txt" + + if not data_file.exists(): + logger.error("❌ Brak przygotowanych danych!") + logger.info("💡 Uruchom: python main.py --prepare") + return + + # Uruchom dodatkowy trening + ai.train_more_epochs(additional_epochs) + +def generate_text(prompt: str): + """Generuje tekst""" + logger.info(f"🎨 GENEROWANIE TEKSTU: '{prompt}'") + ai.generate_text(prompt) + +def start_chat(): + """Uruchamia czat""" + ai.start_chat() + +def evaluate_model(): + """Ocenia model""" + logger.info("🎯 OCENA MODELU") + + # Sprawdź czy model istnieje + model_path = cfg.get_latest_model() + + if not model_path: + logger.error("❌ Brak wytrenowanego modelu!") + logger.info("💡 Uruchom: python main.py --train") + return + + logger.info(f"✅ Model wczytany: {model_path.name}") + + # Tutaj można dodać ewaluację + logger.info("📊 Dodaj ewaluację w ai.py") + +def show_checkpoints(): + """Pokazuje dostępne checkpointy""" + logger.info("📁 DOSTĘPNE CHECKPOINTY:") + + checkpoints = list(Path(cfg.checkpoints_dir).glob("checkpoint_*.pt")) + models = list(Path(cfg.model_dir).glob("model_*.pt")) + + if checkpoints: + logger.info("\n🔽 CHECKPOINTY (do wznowienia):") + for cp in sorted(checkpoints, key=lambda x: x.stat().st_mtime, reverse=True)[:5]: + size_mb = cp.stat().st_size / (1024 * 1024) + logger.info(f" • {cp.name} ({size_mb:.1f} MB)") + else: + logger.info(" ❌ Brak checkpointów") + + if models: + logger.info("\n🤖 MODELE:") + for model in sorted(models, key=lambda x: x.stat().st_mtime, reverse=True)[:5]: + size_mb = model.stat().st_size / (1024 * 1024) + logger.info(f" • {model.name} ({size_mb:.1f} MB)") + + # Sprawdź stan resume + resume_state = cfg.load_resume_state() + if resume_state: + logger.info(f"\n🔄 OSTATNI STAN TRENINGU:") + logger.info(f" • Epoka: {resume_state.get('epoch', 'N/A')}") + logger.info(f" • Krok: {resume_state.get('step', 'N/A')}") + logger.info(f" • Loss: {resume_state.get('train_loss', 'N/A')}") + +def clean_checkpoints(keep_last: int = 3): + """Czyści stare checkpointy""" + logger.info(f"🧹 CZYSZCZENIE STARYCH CHECKPOINTÓW (zachowuję {keep_last} najnowszych)") + + checkpoints = list(Path(cfg.checkpoints_dir).glob("checkpoint_*.pt")) + + if len(checkpoints) <= keep_last: + logger.info("✅ Nie ma czego czyścić") + return + + # Sortuj od najstarszego do najnowszego + checkpoints.sort(key=lambda x: x.stat().st_mtime) + + # Usuń wszystkie poza keep_last najnowszych + to_delete = checkpoints[:-keep_last] + + for cp in to_delete: + try: + cp.unlink() + logger.info(f" 🗑️ Usunięto: {cp.name}") + except Exception as e: + logger.error(f" ❌ Błąd usuwania {cp.name}: {e}") + + logger.info(f"✅ Pozostawiono {keep_last} najnowszych checkpointów") + +def show_config(): + """Pokazuje konfigurację""" + cfg.print_config() + +def run_tests(): + """Uruchamia testy""" + logger.info("🧪 TESTY JEDNOSTKOWE") + + # Test tokenizera + from ai import tokenizer + text = "Test tokenizacji" + ids = tokenizer.encode(text) + decoded = tokenizer.decode(ids) + + logger.info(f"✅ Tokenizer: '{text}' -> '{decoded}'") + + # Test modelu + import torch + model = ai.MiniGPT60M() + x = torch.randint(0, cfg.vocab_size, (2, 32)) + logits = model(x) + + logger.info(f"✅ Model: logits shape {logits.shape}") + logger.info("✅ Wszystkie testy przeszły pomyślnie!") + +# ==================== GŁÓWNA FUNKCJA ==================== +def main(): + """Główna funkcja programu""" + + parser = argparse.ArgumentParser( + description="🎯 MiniGPT-60M: Zaawansowany model językowy ~60M parametrów", + formatter_class=argparse.RawDescriptionHelpFormatter, + epilog=""" +Przykłady użycia: + python main.py --prepare # Przygotuj dane ze wszystkich folderów data_* + python main.py --train # Trenuj model od początku + python main.py --cont # Wznów trening od ostatniego checkpointu + python main.py --more [N] # Dodaj N epok treningu (domyślnie 3) + python main.py --generate "AI" # Generuj tekst + python main.py --chat # Rozmawiaj z modelem + python main.py --checkpoints # Pokaż dostępne checkpointy + python main.py --clean-cp # Wyczyść stare checkpointy + python main.py --test # Uruchom testy + python main.py --config # Pokaż konfigurację + python main.py --evaluate # Oceń model + python main.py --all # Przygotuj dane i trenuj od początku + +Kontynuacja treningu: + Rozpocznij trening: python main.py --train + Przerwij (Ctrl+C): Zapisz stan automatycznie + Wznów: python main.py --cont + Dodaj epoki: python main.py --more 5 + """ + ) + + parser.add_argument("--prepare", action="store_true", help="Przygotuj dane") + parser.add_argument("--train", action="store_true", help="Trening od początku") + parser.add_argument("--cont", action="store_true", help="Kontynuuj trening od checkpointu") + parser.add_argument("--more", type=int, nargs='?', const=3, help="Dodaj epoki treningu (domyślnie 3)") + parser.add_argument("--generate", type=str, help="Generuj tekst z promptu") + parser.add_argument("--chat", action="store_true", help="Tryb rozmowy") + parser.add_argument("--checkpoints", action="store_true", help="Pokaż dostępne checkpointy") + parser.add_argument("--clean-cp", action="store_true", help="Wyczyść stare checkpointy") + parser.add_argument("--test", action="store_true", help="Testy jednostkowe") + parser.add_argument("--config", action="store_true", help="Pokaż konfigurację") + parser.add_argument("--evaluate", action="store_true", help="Oceń model") + parser.add_argument("--all", action="store_true", help="Przygotuj dane i trenuj") + parser.add_argument("--epochs", type=int, default=cfg.epochs, help="Liczba epok") + parser.add_argument("--model", type=str, help="Ścieżka do konkretnego modelu") + + args = parser.parse_args() + + # Pokaż nagłówek + logger.info("=" * 60) + logger.info("🎯 MINIGPT-60M - Z CHECKPOINTAMI") + logger.info("=" * 60) + + # Update liczby epok jeśli podano + if args.epochs != cfg.epochs: + cfg.epochs = args.epochs + logger.info(f"⚙️ Ustawiono {cfg.epochs} epok") + + # Uruchom odpowiednią funkcję + if args.prepare: + prepare_all_data() + + elif args.train: + train_model(resume=False) + + elif args.cont: + continue_training() + + elif args.more is not None: + train_more_epochs(additional_epochs=args.more) + + elif args.generate: + generate_text(args.generate) + + elif args.chat: + start_chat() + + elif args.checkpoints: + show_checkpoints() + + elif args.clean_cp: + clean_checkpoints() + + elif args.test: + run_tests() + + elif args.config: + show_config() + + elif args.evaluate: + evaluate_model() + + elif args.all: + prepare_all_data() + train_model(resume=False) + + else: + # Jeśli żadna flaga, pokaż help + logger.info("\n❓ Nie podano flagi. Dostępne opcje:\n") + parser.print_help() + + # Pokaż dodatkowe informacje + logger.info("\n💡 PRZYKŁADY UŻYCIA:") + logger.info(" python main.py --train # Trenuj od początku") + logger.info(" [Ctrl+C] # Przerwij trening") + logger.info(" python main.py --cont # Wznów trening") + logger.info(" python main.py --more 5 # Dodaj 5 epok") + logger.info(" python main.py --generate 'AI' # Generuj tekst") + + # Sprawdź czy są checkpointy + if cfg.get_latest_checkpoint(): + logger.info("\n🔄 Dostępne checkpointy do wznowienia!") + + logger.info("=" * 60) + +if __name__ == "__main__": + try: + main() + except KeyboardInterrupt: + logger.info("\n\n👋 Program przerwany przez użytkownika") + sys.exit(0) + except Exception as e: + logger.error(f"\n❌ Krytyczny błąd: {e}") + import traceback + traceback.print_exc() + sys.exit(1) \ No newline at end of file diff --git a/pik b/pik new file mode 100644 index 0000000..66a7d6e --- /dev/null +++ b/pik @@ -0,0 +1 @@ +plik diff --git a/prepare_data.py b/prepare_data.py new file mode 100644 index 0000000..76f2b40 --- /dev/null +++ b/prepare_data.py @@ -0,0 +1,315 @@ +""" +📊 PREPARE_DATA - Przygotowanie danych z wielu folderów +""" + +import os +import re +import json +import random +import logging +from pathlib import Path +from typing import List, Dict, Any, Generator +from concurrent.futures import ThreadPoolExecutor, as_completed + +from config import logger, cfg + + +# ==================== KLASY DO PRZYGOTOWANIA DANYCH ==================== +class DataPreparer: + """Główna klasa do przygotowania danych""" + + def __init__(self): + self.output_file = Path(cfg.prepared_dir) / "all_data.txt" + self.metadata_file = Path(cfg.prepared_dir) / "metadata.json" + self.stats = { + "total_files": 0, + "total_samples": 0, + "total_chars": 0, + "sources": {}, + "errors": [] + } + + def find_data_folders(self) -> List[Path]: + """Znajduje wszystkie foldery zaczynające się od 'data_'""" + current_dir = Path(".") + data_folders = [] + + for item in current_dir.iterdir(): + if item.is_dir() and item.name.startswith("data_"): + data_folders.append(item) + logger.info(f"📁 Znaleziono folder: {item.name}") + + # Dodaj też standardowy folder 'data' jeśli istnieje + if Path("data").exists(): + data_folders.append(Path("data")) + + return data_folders + + def process_file(self, file_path: Path) -> List[str]: + """Przetwarza pojedynczy plik i zwraca próbki""" + samples = [] + + try: + if file_path.suffix == '.txt': + with open(file_path, 'r', encoding='utf-8', errors='ignore') as f: + content = f.read() + + # Różne strategie dla różnych typów plików + if "news" in file_path.name.lower() or "article" in file_path.name.lower(): + # Dla newsów - podziel na akapity + paragraphs = re.split(r'\n\s*\n', content) + for para in paragraphs: + para = para.strip() + if 100 < len(para) < 5000: + samples.append(para) + + elif "code" in file_path.name.lower() or "python" in file_path.name.lower(): + # Dla kodu - zachowaj całe funkcje + lines = content.split('\n') + current_chunk = [] + + for line in lines: + line = line.strip() + if line: + current_chunk.append(line) + + # Jeśli znaleziono koniec funkcji lub duży blok + if line.startswith('def ') and len(current_chunk) > 3: + samples.append('\n'.join(current_chunk)) + current_chunk = [] + + # Dodaj pozostały chunk + if current_chunk and len('\n'.join(current_chunk)) > 50: + samples.append('\n'.join(current_chunk)) + + else: + # Domyślnie - podziel na linie/akapity + lines = content.split('\n') + for line in lines: + line = line.strip() + if 20 < len(line) < 1000: + samples.append(line) + + elif file_path.suffix == '.json': + with open(file_path, 'r', encoding='utf-8') as f: + data = json.load(f) + + if isinstance(data, list): + for item in data: + if isinstance(item, str): + samples.append(item) + elif isinstance(item, dict): + # Konwertuj słownik na tekst + text = ' '.join([f"{k}: {v}" for k, v in item.items()]) + if len(text) > 20: + samples.append(text) + + elif file_path.suffix == '.csv': + import csv + with open(file_path, 'r', encoding='utf-8') as f: + reader = csv.DictReader(f) + for row in reader: + text = ' '.join(row.values()) + if len(text) > 20: + samples.append(text) + + except Exception as e: + self.stats["errors"].append(f"{file_path}: {str(e)}") + + return samples + + def process_folder(self, folder_path: Path) -> Dict[str, Any]: + """Przetwarza cały folder""" + folder_stats = { + "name": folder_path.name, + "files_processed": 0, + "samples_found": 0, + "samples": [] + } + + # Znajdź wszystkie pliki tekstowe + file_patterns = ['*.txt', '*.json', '*.csv'] + files = [] + + for pattern in file_patterns: + files.extend(list(folder_path.rglob(pattern))) + + logger.info(f" 📂 {folder_path.name}: {len(files)} plików") + + # Przetwarzaj pliki równolegle + with ThreadPoolExecutor(max_workers=4) as executor: + futures = {executor.submit(self.process_file, file): file for file in files} + + for future in as_completed(futures): + file = futures[future] + try: + samples = future.result() + if samples: + folder_stats["samples"].extend(samples) + folder_stats["samples_found"] += len(samples) + folder_stats["files_processed"] += 1 + except Exception as e: + self.stats["errors"].append(f"{file}: {str(e)}") + + return folder_stats + + def prepare_all_data(self) -> None: + """Główna funkcja przygotowania danych""" + logger.info("=" * 60) + logger.info("📊 PRZYGOTOWYWANIE DANYCH Z WSZYSTKICH FOLDERÓW") + logger.info("=" * 60) + + # Znajdź foldery + data_folders = self.find_data_folders() + + if not data_folders: + logger.error("❌ Nie znaleziono folderów zaczynających się od 'data_'") + return + + all_samples = [] + + # Przetwórz każdy folder + for folder in data_folders: + logger.info(f"\n🔍 Przetwarzam folder: {folder.name}") + + folder_stats = self.process_folder(folder) + + if folder_stats["samples"]: + all_samples.extend(folder_stats["samples"]) + self.stats["sources"][folder.name] = folder_stats["samples_found"] + + logger.info(f" ✅ Znaleziono: {folder_stats['samples_found']} próbek") + logger.info(f" 📝 Przykłady:") + for sample in random.sample(folder_stats["samples"], min(3, len(folder_stats["samples"]))): + logger.info(f" • {sample[:80]}...") + else: + logger.warning(f" ⚠️ Brak danych w folderze {folder.name}") + + # Przetasuj i ogranicz + if all_samples: + random.shuffle(all_samples) + + # Ogranicz do 1 miliona próbek (dla pamięci) + if len(all_samples) > 1000000: + all_samples = all_samples[:1000000] + logger.warning(f"⚠️ Ograniczono do 1,000,000 próbek") + + # Zapisz do jednego pliku + self._save_to_file(all_samples) + + # Zapisz metadane + self._save_metadata() + + # Podsumowanie + self._print_summary() + else: + logger.error("❌ Nie znaleziono żadnych danych!") + + def _save_to_file(self, samples: List[str]) -> None: + """Zapisuje wszystkie dane do jednego pliku""" + logger.info(f"\n💾 Zapisuję {len(samples):,} próbek do {self.output_file}") + + with open(self.output_file, 'w', encoding='utf-8') as f: + for i, sample in enumerate(samples, 1): + f.write(sample + "\n\n") + + # Progress bar co 10k próbek + if i % 10000 == 0: + logger.info(f" Zapisano {i:,}/{len(samples):,} próbek") + + logger.info(f"✅ Zapisano wszystkie dane do {self.output_file}") + + def _save_metadata(self) -> None: + """Zapisuje metadane""" + metadata = { + "total_samples": self.stats["total_samples"], + "total_chars": self.stats["total_chars"], + "sources": self.stats["sources"], + "created": os.path.getmtime(str(self.output_file)), + "file_size": os.path.getsize(self.output_file), + "errors": self.stats["errors"][:10] # Tylko 10 pierwszych błędów + } + + with open(self.metadata_file, 'w', encoding='utf-8') as f: + json.dump(metadata, f, indent=2, ensure_ascii=False) + + logger.info(f"📊 Metadane zapisane do {self.metadata_file}") + + def _print_summary(self) -> None: + """Wyświetla podsumowanie""" + logger.info("=" * 60) + logger.info("📈 PODSUMOWANIE PRZYGOTOWANIA DANYCH") + logger.info("=" * 60) + + total_samples = self.stats["total_samples"] + total_chars_mb = self.stats["total_chars"] / (1024 * 1024) + + logger.info(f"📊 STATYSTYKI:") + logger.info(f" • Całkowite próbki: {total_samples:,}") + logger.info(f" • Rozmiar danych: {total_chars_mb:.1f} MB") + logger.info(f" • Źródła danych: {len(self.stats['sources'])}") + + logger.info(f"\n📁 ŹRÓDŁA:") + for source, count in self.stats["sources"].items(): + logger.info(f" • {source}: {count:,} próbek") + + if self.stats["errors"]: + logger.warning(f"\n⚠️ BŁĘDY ({len(self.stats['errors'])}):") + for error in self.stats["errors"][:5]: + logger.warning(f" • {error}") + + logger.info(f"\n💾 WYJŚCIE:") + logger.info(f" • Dane: {self.output_file}") + logger.info(f" • Metadane: {self.metadata_file}") + + logger.info("\n🎮 UŻYCIE:") + logger.info(" python main.py --train # Trening na przygotowanych danych") + logger.info(" python main.py --prepare # Ponowne przygotowanie danych") + logger.info("=" * 60) + + +# ==================== FUNKCJE POMOCNICZE ==================== +def clean_text(text: str) -> str: + """Czyści tekst""" + # Usuń nadmiarowe białe znaki + text = re.sub(r'\s+', ' ', text) + + # Usuń specjalne znaki (opcjonalnie) + # text = re.sub(r'[^\w\s.,!?;:()\-\'"ąćęłńóśźżĄĆĘŁŃÓŚŹŻ]', '', text) + + return text.strip() + + +def split_into_chunks(text: str, max_chunk_size: int = 1000) -> List[str]: + """Dzieli długi tekst na kawałki""" + words = text.split() + chunks = [] + current_chunk = [] + current_size = 0 + + for word in words: + word_size = len(word) + 1 # +1 dla spacji + + if current_size + word_size > max_chunk_size and current_chunk: + chunks.append(' '.join(current_chunk)) + current_chunk = [word] + current_size = word_size + else: + current_chunk.append(word) + current_size += word_size + + if current_chunk: + chunks.append(' '.join(current_chunk)) + + return chunks + + +# ==================== GŁÓWNA FUNKCJA ==================== +def main(): + """Główna funkcja przygotowania danych""" + preparer = DataPreparer() + preparer.prepare_all_data() + + +if __name__ == "__main__": + main() \ No newline at end of file diff --git a/test_questions.json b/test_questions.json new file mode 100644 index 0000000..13841cb --- /dev/null +++ b/test_questions.json @@ -0,0 +1,108 @@ +{ + "questions": [ + {"id": 1, "category": "polski", "question": "Jak się nazywasz?"}, + {"id": 2, "category": "polski", "question": "Skąd jesteś?"}, + {"id": 3, "category": "polski", "question": "Ile masz lat?"}, + {"id": 4, "category": "polski", "question": "Co lubisz robić?"}, + {"id": 5, "category": "polski", "question": "Jaki jest twój ulubiony film?"}, + {"id": 6, "category": "polski", "question": "Dlaczego uczysz się programowania?"}, + {"id": 7, "category": "polski", "question": "Jakie są twoje hobby?"}, + {"id": 8, "category": "polski", "question": "Jaka jest dziś pogoda?"}, + {"id": 9, "category": "polski", "question": "Co będzie jutro?"}, + {"id": 10, "category": "polski", "question": "Jakie jest twoje marzenie?"}, + {"id": 11, "category": "polski", "question": "Jaka jest twoja ulubiona książka?"}, + {"id": 12, "category": "polski", "question": "Kto jest twoim ulubionym autorem?"}, + {"id": 13, "category": "polski", "question": "Co lubisz jeść?"}, + {"id": 14, "category": "polski", "question": "Jak spędzasz weekend?"}, + {"id": 15, "category": "polski", "question": "Jakie filmy lubisz oglądać?"}, + {"id": 16, "category": "polski", "question": "Jaki jest twój ulubiony sport?"}, + {"id": 17, "category": "polski", "question": "Co robisz po szkole?"}, + {"id": 18, "category": "polski", "question": "Jakie jest twoje ulubione miejsce?"}, + {"id": 19, "category": "polski", "question": "Co lubisz w swojej szkole?"}, + {"id": 20, "category": "polski", "question": "Jakie są twoje plany na przyszłość?"}, + + {"id": 21, "category": "python", "question": "Co to jest zmienna w Pythonie?"}, + {"id": 22, "category": "python", "question": "Jak działa instrukcja if?"}, + {"id": 23, "category": "python", "question": "Do czego służy lista w Pythonie?"}, + {"id": 24, "category": "python", "question": "Jak działa słownik w Pythonie?"}, + {"id": 25, "category": "python", "question": "Co to jest moduł w Pythonie?"}, + {"id": 26, "category": "python", "question": "Jak działa pętla while?"}, + {"id": 27, "category": "python", "question": "Jak otworzyć plik w Pythonie?"}, + {"id": 28, "category": "python", "question": "Jak działa funkcja print()?"}, + {"id": 29, "category": "python", "question": "Co to jest wyjątek?"}, + {"id": 30, "category": "python", "question": "Jak działa f-string?"}, + {"id": 31, "category": "python", "question": "Jak definiuje się funkcję w Pythonie?"}, + {"id": 32, "category": "python", "question": "Co to jest tuple?"}, + {"id": 33, "category": "python", "question": "Co to jest set?"}, + {"id": 34, "category": "python", "question": "Jak działa pętla for?"}, + {"id": 35, "category": "python", "question": "Jak działa operator in?"}, + {"id": 36, "category": "python", "question": "Co robi pass w Pythonie?"}, + {"id": 37, "category": "python", "question": "Jak działa funkcja input()?"}, + {"id": 38, "category": "python", "question": "Co to jest list comprehension?"}, + {"id": 39, "category": "python", "question": "Jak działa instrukcja break?"}, + {"id": 40, "category": "python", "question": "Jak działa instrukcja continue?"}, + + {"id": 41, "category": "matematyka", "question": "Co to jest liczba pierwsza?"}, + {"id": 42, "category": "matematyka", "question": "Jak obliczyć procent?"}, + {"id": 43, "category": "matematyka", "question": "Co to jest promień koła?"}, + {"id": 44, "category": "matematyka", "question": "Jak obliczyć pole prostokąta?"}, + {"id": 45, "category": "matematyka", "question": "Co to jest równanie?"}, + {"id": 46, "category": "matematyka", "question": "Jak działa pierwiastek kwadratowy?"}, + {"id": 47, "category": "matematyka", "question": "Co to jest funkcja?"}, + {"id": 48, "category": "matematyka", "question": "Jak obliczyć średnią?"}, + {"id": 49, "category": "matematyka", "question": "Co to jest wektor?"}, + {"id": 50, "category": "matematyka", "question": "Jak działa logarytm?"}, + {"id": 51, "category": "matematyka", "question": "Co to jest macierz?"}, + {"id": 52, "category": "matematyka", "question": "Jak obliczyć deltę w równaniu kwadratowym?"}, + {"id": 53, "category": "matematyka", "question": "Co to jest granica funkcji?"}, + {"id": 54, "category": "matematyka", "question": "Jak działa pochodna?"}, + {"id": 55, "category": "matematyka", "question": "Co to jest całka?"}, + {"id": 56, "category": "matematyka", "question": "Jak obliczyć pole trójkąta?"}, + {"id": 57, "category": "matematyka", "question": "Co to jest ciąg arytmetyczny?"}, + {"id": 58, "category": "matematyka", "question": "Co to jest ciąg geometryczny?"}, + {"id": 59, "category": "matematyka", "question": "Jak działa funkcja odwrotna?"}, + {"id": 60, "category": "matematyka", "question": "Co to jest wartość bezwzględna?"}, + + {"id": 61, "category": "historia", "question": "Kiedy była II wojna światowa?"}, + {"id": 62, "category": "historia", "question": "Kto był królem Polski?"}, + {"id": 63, "category": "historia", "question": "Co to była komunizm?"}, + {"id": 64, "category": "historia", "question": "Gdzie działał Napoleon?"}, + {"id": 65, "category": "historia", "question": "Kto wynalazł prasę drukarską?"}, + {"id": 66, "category": "historia", "question": "Czym był średniowiecze?"}, + {"id": 67, "category": "historia", "question": "Co spowodowało rewolucję przemysłową?"}, + {"id": 68, "category": "historia", "question": "Kto był Juliusz Cezar?"}, + {"id": 69, "category": "historia", "question": "Co to był renesans?"}, + {"id": 70, "category": "historia", "question": "Kiedy Polska odzyskała niepodległość?"}, + {"id": 71, "category": "historia", "question": "Kto był pierwszym prezydentem USA?"}, + {"id": 72, "category": "historia", "question": "Co to była zimna wojna?"}, + {"id": 73, "category": "historia", "question": "Kto był Hitlerem?"}, + {"id": 74, "category": "historia", "question": "Co to była rewolucja francuska?"}, + {"id": 75, "category": "historia", "question": "Kiedy powstało państwo polskie?"}, + {"id": 76, "category": "historia", "question": "Kto był Karolem Wielkim?"}, + {"id": 77, "category": "historia", "question": "Co to była starożytna Grecja?"}, + {"id": 78, "category": "historia", "question": "Co to była starożytna Roma?"}, + {"id": 79, "category": "historia", "question": "Kto był Kazimierz Wielki?"}, + {"id": 80, "category": "historia", "question": "Kiedy był upadek Cesarstwa Rzymskiego?"}, + + {"id": 81, "category": "nauka", "question": "Co to jest atom?"}, + {"id": 82, "category": "nauka", "question": "Jak działa grawitacja?"}, + {"id": 83, "category": "nauka", "question": "Co to jest DNA?"}, + {"id": 84, "category": "nauka", "question": "Jak powstaje tęcza?"}, + {"id": 85, "category": "nauka", "question": "Co to jest energia?"}, + {"id": 86, "category": "nauka", "question": "Jak działa magnetyzm?"}, + {"id": 87, "category": "nauka", "question": "Co to jest światło?"}, + {"id": 88, "category": "nauka", "question": "Jak działa silnik?"}, + {"id": 89, "category": "nauka", "question": "Co to jest fotosynteza?"}, + {"id": 90, "category": "nauka", "question": "Jak działa komputer?"}, + {"id": 91, "category": "nauka", "question": "Co to jest elektron?"}, + {"id": 92, "category": "nauka", "question": "Jak działa siła odśrodkowa?"}, + {"id": 93, "category": "nauka", "question": "Co to jest ciśnienie atmosferyczne?"}, + {"id": 94, "category": "nauka", "question": "Co to jest fotosfera?"}, + {"id": 95, "category": "nauka", "question": "Jak powstaje wiatr?"}, + {"id": 96, "category": "nauka", "question": "Co to jest ciśnienie krwi?"}, + {"id": 97, "category": "nauka", "question": "Co to jest komórka?"}, + {"id": 98, "category": "nauka", "question": "Co to jest grawitacja Newtona?"}, + {"id": 99, "category": "nauka", "question": "Jak działa fotosynteza w roślinach?"}, + {"id": 100, "category": "nauka", "question": "Co to jest mikroorganizm?"} + ] +} diff --git a/training.log b/training.log new file mode 100644 index 0000000..06910f9 --- /dev/null +++ b/training.log @@ -0,0 +1,190 @@ +2026-01-26 11:36:51,804 - INFO - ============================================================ +2026-01-26 11:36:51,804 - INFO - 🎯 SYSTEM MINIGPT-60M +2026-01-26 11:36:51,804 - INFO - ============================================================ +2026-01-26 11:36:51,804 - INFO - Python: 3.13.5 +2026-01-26 11:36:51,804 - INFO - PyTorch: 2.9.1+cpu +2026-01-26 11:36:51,804 - INFO - Device: CPU +2026-01-26 11:36:51,804 - INFO - ============================================================ +2026-01-26 11:36:51,963 - INFO - ============================================================ +2026-01-26 11:36:51,963 - INFO - 🎯 MINIGPT-60M - Z CHECKPOINTAMI +2026-01-26 11:36:51,963 - INFO - ============================================================ +2026-01-26 11:36:51,963 - INFO - +❓ Nie podano flagi. Dostępne opcje: + +2026-01-26 11:36:51,964 - INFO - +💡 PRZYKŁADY UŻYCIA: +2026-01-26 11:36:51,965 - INFO - python main.py --train # Trenuj od początku +2026-01-26 11:36:51,965 - INFO - [Ctrl+C] # Przerwij trening +2026-01-26 11:36:51,965 - INFO - python main.py --cont # Wznów trening +2026-01-26 11:36:51,965 - INFO - python main.py --more 5 # Dodaj 5 epok +2026-01-26 11:36:51,965 - INFO - python main.py --generate 'AI' # Generuj tekst +2026-01-26 11:36:51,965 - INFO - ============================================================ +2026-01-26 11:37:01,141 - INFO - ============================================================ +2026-01-26 11:37:01,141 - INFO - 🎯 SYSTEM MINIGPT-60M +2026-01-26 11:37:01,141 - INFO - ============================================================ +2026-01-26 11:37:01,141 - INFO - Python: 3.13.5 +2026-01-26 11:37:01,141 - INFO - PyTorch: 2.9.1+cpu +2026-01-26 11:37:01,141 - INFO - Device: CPU +2026-01-26 11:37:01,141 - INFO - ============================================================ +2026-01-26 11:37:01,225 - INFO - ============================================================ +2026-01-26 11:37:01,225 - INFO - 🎯 MINIGPT-60M - Z CHECKPOINTAMI +2026-01-26 11:37:01,225 - INFO - ============================================================ +2026-01-26 11:37:02,243 - INFO - 🤖 Model: 85,501,440 parametrów (85.5M) +2026-01-26 11:37:02,244 - WARNING - ⚠️ Brak wytrenowanego modelu, używam niewytrenowanego +2026-01-26 11:37:02,244 - INFO - 🤖 ChatBot initialized on cpu +2026-01-26 11:37:08,343 - INFO - + +⚠️ Trening przerwany przez użytkownika! +2026-01-26 11:37:08,343 - INFO - 💾 Zapisuję stan do wznowienia... +2026-01-26 11:37:08,343 - INFO - ✅ Możesz wznowić trening używając: python main.py --cont +2026-01-26 11:37:55,253 - INFO - ============================================================ +2026-01-26 11:37:55,254 - INFO - 🎯 SYSTEM MINIGPT-60M +2026-01-26 11:37:55,254 - INFO - ============================================================ +2026-01-26 11:37:55,254 - INFO - Python: 3.13.5 +2026-01-26 11:37:55,255 - INFO - PyTorch: 2.9.1+cpu +2026-01-26 11:37:55,255 - INFO - Device: CPU +2026-01-26 11:37:55,255 - INFO - ============================================================ +2026-01-26 11:37:55,453 - INFO - ============================================================ +2026-01-26 11:37:55,453 - INFO - 🎯 MINIGPT-60M - Z CHECKPOINTAMI +2026-01-26 11:37:55,453 - INFO - ============================================================ +2026-01-26 11:37:56,683 - INFO - 🤖 Model: 85,501,440 parametrów (85.5M) +2026-01-26 11:37:56,684 - WARNING - ⚠️ Brak wytrenowanego modelu, używam niewytrenowanego +2026-01-26 11:37:56,685 - INFO - 🤖 ChatBot initialized on cpu +2026-01-26 11:38:10,327 - INFO - + +⚠️ Trening przerwany przez użytkownika! +2026-01-26 11:38:10,327 - INFO - 💾 Zapisuję stan do wznowienia... +2026-01-26 11:38:10,327 - INFO - ✅ Możesz wznowić trening używając: python main.py --cont +2026-01-26 11:38:31,310 - INFO - ============================================================ +2026-01-26 11:38:31,310 - INFO - 🎯 SYSTEM MINIGPT-60M +2026-01-26 11:38:31,310 - INFO - ============================================================ +2026-01-26 11:38:31,310 - INFO - Python: 3.13.5 +2026-01-26 11:38:31,310 - INFO - PyTorch: 2.9.1+cpu +2026-01-26 11:38:31,310 - INFO - Device: CPU +2026-01-26 11:38:31,310 - INFO - ============================================================ +2026-01-26 11:38:31,473 - INFO - ============================================================ +2026-01-26 11:38:31,473 - INFO - 🎯 MINIGPT-60M - Z CHECKPOINTAMI +2026-01-26 11:38:31,473 - INFO - ============================================================ +2026-01-26 11:38:32,504 - INFO - 🤖 Model: 85,501,440 parametrów (85.5M) +2026-01-26 11:38:32,504 - WARNING - ⚠️ Brak wytrenowanego modelu, używam niewytrenowanego +2026-01-26 11:38:32,505 - INFO - 🤖 ChatBot initialized on cpu +2026-01-26 11:38:36,336 - INFO - + +⚠️ Trening przerwany przez użytkownika! +2026-01-26 11:38:36,336 - INFO - 💾 Zapisuję stan do wznowienia... +2026-01-26 11:38:36,336 - INFO - ✅ Możesz wznowić trening używając: python main.py --cont +2026-01-26 11:39:32,575 - INFO - ============================================================ +2026-01-26 11:39:32,576 - INFO - 🎯 SYSTEM MINIGPT-60M +2026-01-26 11:39:32,576 - INFO - ============================================================ +2026-01-26 11:39:32,576 - INFO - Python: 3.13.5 +2026-01-26 11:39:32,576 - INFO - PyTorch: 2.9.1+cpu +2026-01-26 11:39:32,576 - INFO - Device: CPU +2026-01-26 11:39:32,576 - INFO - ============================================================ +2026-01-26 11:39:32,740 - INFO - ============================================================ +2026-01-26 11:39:32,740 - INFO - 🎯 MINIGPT-60M - Z CHECKPOINTAMI +2026-01-26 11:39:32,740 - INFO - ============================================================ +2026-01-26 11:39:33,882 - INFO - 🤖 Model: 85,501,440 parametrów (85.5M) +2026-01-26 11:39:33,882 - WARNING - ⚠️ Brak wytrenowanego modelu, używam niewytrenowanego +2026-01-26 11:39:33,883 - INFO - 🤖 ChatBot initialized on cpu +2026-01-26 11:39:36,591 - INFO - + +⚠️ Trening przerwany przez użytkownika! +2026-01-26 11:39:36,591 - INFO - 💾 Zapisuję stan do wznowienia... +2026-01-26 11:39:36,591 - INFO - ✅ Możesz wznowić trening używając: python main.py --cont +2026-01-26 11:40:11,736 - INFO - ============================================================ +2026-01-26 11:40:11,737 - INFO - 🎯 SYSTEM MINIGPT-60M +2026-01-26 11:40:11,737 - INFO - ============================================================ +2026-01-26 11:40:11,737 - INFO - Python: 3.13.5 +2026-01-26 11:40:11,737 - INFO - PyTorch: 2.9.1+cpu +2026-01-26 11:40:11,737 - INFO - Device: CPU +2026-01-26 11:40:11,737 - INFO - ============================================================ +2026-01-26 11:40:11,872 - INFO - ============================================================ +2026-01-26 11:40:11,872 - INFO - 🎯 MINIGPT-60M - Z CHECKPOINTAMI +2026-01-26 11:40:11,873 - INFO - ============================================================ +2026-01-26 11:40:13,485 - INFO - 🤖 Model: 85,501,440 parametrów (85.5M) +2026-01-26 11:40:13,486 - WARNING - ⚠️ Brak wytrenowanego modelu, używam niewytrenowanego +2026-01-26 11:40:13,488 - INFO - 🤖 ChatBot initialized on cpu +2026-01-26 11:40:15,460 - INFO - + +⚠️ Trening przerwany przez użytkownika! +2026-01-26 11:40:15,460 - INFO - 💾 Zapisuję stan do wznowienia... +2026-01-26 11:40:15,460 - INFO - ✅ Możesz wznowić trening używając: python main.py --cont +2026-01-26 11:41:14,387 - INFO - ============================================================ +2026-01-26 11:41:14,388 - INFO - 🎯 SYSTEM MINIGPT-60M +2026-01-26 11:41:14,388 - INFO - ============================================================ +2026-01-26 11:41:14,388 - INFO - Python: 3.13.5 +2026-01-26 11:41:14,388 - INFO - PyTorch: 2.9.1+cpu +2026-01-26 11:41:14,388 - INFO - Device: CPU +2026-01-26 11:41:14,388 - INFO - ============================================================ +2026-01-26 11:41:14,475 - INFO - ============================================================ +2026-01-26 11:41:14,475 - INFO - 🎯 MINIGPT-60M - Z CHECKPOINTAMI +2026-01-26 11:41:14,475 - INFO - ============================================================ +2026-01-26 11:41:15,430 - INFO - 🤖 Model: 85,501,440 parametrów (85.5M) +2026-01-26 11:41:15,431 - WARNING - ⚠️ Brak wytrenowanego modelu, używam niewytrenowanego +2026-01-26 11:41:15,431 - INFO - 🤖 ChatBot initialized on cpu +2026-01-26 11:41:18,877 - INFO - + +⚠️ Trening przerwany przez użytkownika! +2026-01-26 11:41:18,877 - INFO - 💾 Zapisuję stan do wznowienia... +2026-01-26 11:41:18,877 - INFO - ✅ Możesz wznowić trening używając: python main.py --cont +2026-01-26 11:43:21,037 - INFO - ============================================================ +2026-01-26 11:43:21,038 - INFO - 🎯 SYSTEM MINIGPT-60M +2026-01-26 11:43:21,038 - INFO - ============================================================ +2026-01-26 11:43:21,038 - INFO - Python: 3.13.5 +2026-01-26 11:43:21,038 - INFO - PyTorch: 2.9.1+cpu +2026-01-26 11:43:21,038 - INFO - Device: CPU +2026-01-26 11:43:21,038 - INFO - ============================================================ +2026-01-26 11:43:21,124 - INFO - ============================================================ +2026-01-26 11:43:21,124 - INFO - 🎯 MINIGPT-60M - Z CHECKPOINTAMI +2026-01-26 11:43:21,124 - INFO - ============================================================ +2026-01-26 11:43:22,084 - INFO - 🤖 Model: 85,501,440 parametrów (85.5M) +2026-01-26 11:43:22,084 - WARNING - ⚠️ Brak wytrenowanego modelu, używam niewytrenowanego +2026-01-26 11:43:22,085 - INFO - 🤖 ChatBot initialized on cpu +2026-01-26 11:43:24,269 - INFO - + +⚠️ Trening przerwany przez użytkownika! +2026-01-26 11:43:24,269 - INFO - 💾 Zapisuję stan do wznowienia... +2026-01-26 11:43:24,270 - INFO - ✅ Możesz wznowić trening używając: python main.py --cont +2026-01-26 11:44:46,990 - INFO - ============================================================ +2026-01-26 11:44:46,990 - INFO - 🎯 SYSTEM MINIGPT-60M +2026-01-26 11:44:46,990 - INFO - ============================================================ +2026-01-26 11:44:46,990 - INFO - Python: 3.13.5 +2026-01-26 11:44:46,990 - INFO - PyTorch: 2.9.1+cpu +2026-01-26 11:44:46,990 - INFO - Device: CPU +2026-01-26 11:44:46,990 - INFO - ============================================================ +2026-01-26 11:44:57,235 - INFO - ============================================================ +2026-01-26 11:44:57,235 - INFO - 🎯 SYSTEM MINIGPT-60M +2026-01-26 11:44:57,235 - INFO - ============================================================ +2026-01-26 11:44:57,235 - INFO - Python: 3.13.5 +2026-01-26 11:44:57,235 - INFO - PyTorch: 2.9.1+cpu +2026-01-26 11:44:57,235 - INFO - Device: CPU +2026-01-26 11:44:57,235 - INFO - ============================================================ +2026-01-26 11:44:57,321 - INFO - ============================================================ +2026-01-26 11:44:57,321 - INFO - 🎯 MINIGPT-60M - Z CHECKPOINTAMI +2026-01-26 11:44:57,321 - INFO - ============================================================ +2026-01-26 11:44:58,280 - INFO - 🤖 Model: 85,501,440 parametrów (85.5M) +2026-01-26 11:44:58,281 - WARNING - ⚠️ Brak wytrenowanego modelu, używam niewytrenowanego +2026-01-26 11:44:58,281 - INFO - 🤖 ChatBot initialized on cpu +2026-01-26 11:45:05,073 - INFO - + +⚠️ Trening przerwany przez użytkownika! +2026-01-26 11:45:05,073 - INFO - 💾 Zapisuję stan do wznowienia... +2026-01-26 11:45:05,073 - INFO - ✅ Możesz wznowić trening używając: python main.py --cont +2026-01-26 11:51:39,327 - INFO - ============================================================ +2026-01-26 11:51:39,327 - INFO - 🎯 SYSTEM MINIGPT-60M +2026-01-26 11:51:39,327 - INFO - ============================================================ +2026-01-26 11:51:39,327 - INFO - Python: 3.13.5 +2026-01-26 11:51:39,327 - INFO - PyTorch: 2.9.1+cpu +2026-01-26 11:51:39,328 - INFO - Device: CPU +2026-01-26 11:51:39,328 - INFO - ============================================================ +2026-01-26 11:51:39,414 - INFO - ============================================================ +2026-01-26 11:51:39,414 - INFO - 🎯 MINIGPT-60M - Z CHECKPOINTAMI +2026-01-26 11:51:39,414 - INFO - ============================================================ +2026-01-26 11:51:39,414 - WARNING - ⚠️ Brak wytrenowanego modelu, używam niewytrenowanego +2026-01-26 11:51:40,380 - INFO - 🤖 Model: 85,501,440 parametrów (85.5M) +2026-01-26 11:51:40,389 - INFO - 🤖 ChatBot initialized on cpu +2026-01-26 11:52:15,241 - INFO - + +⚠️ Trening przerwany przez użytkownika! +2026-01-26 11:52:15,242 - INFO - 💾 Zapisuję stan do wznowienia... +2026-01-26 11:52:15,242 - INFO - ✅ Możesz wznowić trening używając: python main.py --cont