#!/usr/bin/env python3 """ 🛠️ Clay Checkpoint Manager - CLI do zarządzania checkpointami """ import argparse import json import shutil from pathlib import Path from datetime import datetime from config import cfg def list_checkpoints(): """Wyświetla dostępne checkpointy""" checkpoints = list(Path(cfg.checkpoints_dir).glob("clay_checkpoint_*.pt")) if not checkpoints: print("❌ Brak checkpointów") return print(f"\n📁 CLAY CHECKPOINTS ({len(checkpoints)}):") print("=" * 80) for cp in sorted(checkpoints, key=lambda x: x.stat().st_mtime, reverse=True): # Wczytaj info z JSON json_file = cp.with_suffix('.json') if json_file.exists(): with open(json_file, 'r') as f: info = json.load(f)['checkpoint_info'] size_mb = info['file_size'] / (1024 * 1024) print(f"📄 {cp.name}") print(f" • Epoka: {info['epoch']} | Krok: {info['step']:,}") print(f" • Loss: {info['loss']:.4f} | Rozmiar: {size_mb:.1f}MB") print(f" • Data: {info['timestamp']}") print("-" * 40) else: size_mb = cp.stat().st_size / (1024 * 1024) print(f"📄 {cp.name} ({size_mb:.1f}MB)") def show_training_stats(): """Pokazuje statystyki treningu""" stats_file = Path(cfg.checkpoints_dir) / "training_stats.json" if stats_file.exists(): with open(stats_file, 'r') as f: stats = json.load(f) print("\n📊 STATYSTYKI TRENINGU:") print("=" * 60) total_time = stats.get('total_time', 0) hours = total_time / 3600 minutes = (total_time % 3600) / 60 print(f" • Całkowity czas: {hours:.0f}h {minutes:.0f}m") print(f" • Ostatni loss: {stats.get('final_loss', 0):.4f}") print(f" • Najlepszy loss: {stats.get('best_loss', 0):.4f}") print(f" • Średni loss: {stats.get('avg_loss', 0):.4f}") print(f" • Sprawdzone kroki: {stats.get('total_steps', 0):,}") print(f" • Zakończono: {stats.get('completion_time', 'N/A')}") else: print("❌ Brak statystyk treningu") def cleanup_checkpoints(keep=5): """Czyści stare checkpointy""" checkpoints = list(Path(cfg.checkpoints_dir).glob("clay_checkpoint_*.pt")) if len(checkpoints) <= keep: print(f"✅ Wszystkie checkpointy zachowane (mniej niż {keep})") return checkpoints.sort(key=lambda x: x.stat().st_mtime) to_delete = checkpoints[:-keep] print(f"\n🗑️ Usuwanie {len(to_delete)} starych checkpointów:") total_freed = 0 for cp in to_delete: size_mb = cp.stat().st_size / (1024 * 1024) total_freed += size_mb print(f" • {cp.name} ({size_mb:.1f}MB)") cp.unlink() # Usuń też JSON json_file = cp.with_suffix('.json') if json_file.exists(): json_file.unlink() print(f"\n✅ Zachowano {keep} najnowszych checkpointów") print(f"💰 Zwolniono {total_freed:.1f}MB") def export_checkpoint(checkpoint_name, export_dir="exports"): """Eksportuje checkpoint do osobnego folderu""" cp_path = Path(cfg.checkpoints_dir) / checkpoint_name if not cp_path.exists(): print(f"❌ Checkpoint {checkpoint_name} nie istnieje") return # Stwórz folder eksportu export_path = Path(export_dir) export_path.mkdir(exist_ok=True) # Skopiuj checkpoint i JSON dest_path = export_path / checkpoint_name shutil.copy2(cp_path, dest_path) json_file = cp_path.with_suffix('.json') if json_file.exists(): shutil.copy2(json_file, export_path / json_file.name) print(f"✅ Checkpoint wyeksportowany do: {dest_path}") def show_checkpoint_info(checkpoint_name): """Pokazuje szczegółowe info o checkpoincie""" cp_path = Path(cfg.checkpoints_dir) / checkpoint_name if not cp_path.exists(): print(f"❌ Checkpoint {checkpoint_name} nie istnieje") return json_file = cp_path.with_suffix('.json') if json_file.exists(): with open(json_file, 'r') as f: info = json.load(f) print(f"\n📋 INFO O CHECKPOINCIE: {checkpoint_name}") print("=" * 60) cp_info = info['checkpoint_info'] stats = info['training_stats'] print("📁 PODSTAWOWE INFORMACJE:") print(f" • Epoka: {cp_info['epoch']}") print(f" • Krok: {cp_info['step']:,}") print(f" • Loss: {cp_info['loss']:.4f}") print(f" • Rozmiar: {cp_info['file_size'] / (1024 * 1024):.1f}MB") print(f" • Data: {cp_info['timestamp']}") print("\n📊 STATYSTYKI TRENINGU:") print(f" • Całkowity czas: {stats['total_time']:.0f}s") print(f" • Średni loss: {stats['avg_loss']:.4f}") print(f" • Current LR: {stats['current_lr']:.6f}") print(f" • Kroki: {stats['steps_done']:,}") else: print("❌ Brak informacji JSON dla tego checkpointu") def main(): parser = argparse.ArgumentParser(description="Clay Checkpoint Manager") parser.add_argument("--list", action="store_true", help="Lista checkpointów") parser.add_argument("--stats", action="store_true", help="Pokaż statystyki") parser.add_argument("--cleanup", type=int, nargs='?', const=5, help="Wyczyść stare checkpointy (domyślnie: 5)") parser.add_argument("--export", type=str, help="Eksportuj checkpoint") parser.add_argument("--info", type=str, help="Info o konkretnym checkpoincie") parser.add_argument("--export-all", action="store_true", help="Eksportuj wszystkie checkpointy") args = parser.parse_args() if args.list: list_checkpoints() elif args.stats: show_training_stats() elif args.cleanup is not None: cleanup_checkpoints(args.cleanup) elif args.export: export_checkpoint(args.export) elif args.info: show_checkpoint_info(args.info) elif args.export_all: checkpoints = list(Path(cfg.checkpoints_dir).glob("clay_checkpoint_*.pt")) for cp in checkpoints: export_checkpoint(cp.name) else: print("\n🛠️ Clay Checkpoint Manager") print("=" * 40) print("Użyj:") print(" --list # Lista checkpointów") print(" --stats # Statystyki treningu") print(" --cleanup [N] # Zostaw N najnowszych (domyślnie 5)") print(" --export NAME # Eksportuj checkpoint") print(" --info NAME # Info o checkpoincie") print(" --export-all # Eksportuj wszystkie") print("\nPrzykłady:") print(" python clay_manager.py --list") print(" python clay_manager.py --cleanup 3") print(" python clay_manager.py --info clay_checkpoint_ep2_step5000_20240126_143022.pt") if __name__ == "__main__": main()