198 lines
6.8 KiB
Python
198 lines
6.8 KiB
Python
#!/usr/bin/env python3
|
|
"""
|
|
🛠️ Clay Checkpoint Manager - CLI do zarządzania checkpointami
|
|
"""
|
|
|
|
import argparse
|
|
import json
|
|
import shutil
|
|
from pathlib import Path
|
|
from datetime import datetime
|
|
from config import cfg
|
|
|
|
|
|
def list_checkpoints():
|
|
"""Wyświetla dostępne checkpointy"""
|
|
checkpoints = list(Path(cfg.checkpoints_dir).glob("clay_checkpoint_*.pt"))
|
|
|
|
if not checkpoints:
|
|
print("❌ Brak checkpointów")
|
|
return
|
|
|
|
print(f"\n📁 CLAY CHECKPOINTS ({len(checkpoints)}):")
|
|
print("=" * 80)
|
|
|
|
for cp in sorted(checkpoints, key=lambda x: x.stat().st_mtime, reverse=True):
|
|
# Wczytaj info z JSON
|
|
json_file = cp.with_suffix('.json')
|
|
if json_file.exists():
|
|
with open(json_file, 'r') as f:
|
|
info = json.load(f)['checkpoint_info']
|
|
|
|
size_mb = info['file_size'] / (1024 * 1024)
|
|
print(f"📄 {cp.name}")
|
|
print(f" • Epoka: {info['epoch']} | Krok: {info['step']:,}")
|
|
print(f" • Loss: {info['loss']:.4f} | Rozmiar: {size_mb:.1f}MB")
|
|
print(f" • Data: {info['timestamp']}")
|
|
print("-" * 40)
|
|
else:
|
|
size_mb = cp.stat().st_size / (1024 * 1024)
|
|
print(f"📄 {cp.name} ({size_mb:.1f}MB)")
|
|
|
|
|
|
def show_training_stats():
|
|
"""Pokazuje statystyki treningu"""
|
|
stats_file = Path(cfg.checkpoints_dir) / "training_stats.json"
|
|
|
|
if stats_file.exists():
|
|
with open(stats_file, 'r') as f:
|
|
stats = json.load(f)
|
|
|
|
print("\n📊 STATYSTYKI TRENINGU:")
|
|
print("=" * 60)
|
|
|
|
total_time = stats.get('total_time', 0)
|
|
hours = total_time / 3600
|
|
minutes = (total_time % 3600) / 60
|
|
|
|
print(f" • Całkowity czas: {hours:.0f}h {minutes:.0f}m")
|
|
print(f" • Ostatni loss: {stats.get('final_loss', 0):.4f}")
|
|
print(f" • Najlepszy loss: {stats.get('best_loss', 0):.4f}")
|
|
print(f" • Średni loss: {stats.get('avg_loss', 0):.4f}")
|
|
print(f" • Sprawdzone kroki: {stats.get('total_steps', 0):,}")
|
|
print(f" • Zakończono: {stats.get('completion_time', 'N/A')}")
|
|
else:
|
|
print("❌ Brak statystyk treningu")
|
|
|
|
|
|
def cleanup_checkpoints(keep=5):
|
|
"""Czyści stare checkpointy"""
|
|
checkpoints = list(Path(cfg.checkpoints_dir).glob("clay_checkpoint_*.pt"))
|
|
|
|
if len(checkpoints) <= keep:
|
|
print(f"✅ Wszystkie checkpointy zachowane (mniej niż {keep})")
|
|
return
|
|
|
|
checkpoints.sort(key=lambda x: x.stat().st_mtime)
|
|
to_delete = checkpoints[:-keep]
|
|
|
|
print(f"\n🗑️ Usuwanie {len(to_delete)} starych checkpointów:")
|
|
total_freed = 0
|
|
|
|
for cp in to_delete:
|
|
size_mb = cp.stat().st_size / (1024 * 1024)
|
|
total_freed += size_mb
|
|
print(f" • {cp.name} ({size_mb:.1f}MB)")
|
|
cp.unlink()
|
|
|
|
# Usuń też JSON
|
|
json_file = cp.with_suffix('.json')
|
|
if json_file.exists():
|
|
json_file.unlink()
|
|
|
|
print(f"\n✅ Zachowano {keep} najnowszych checkpointów")
|
|
print(f"💰 Zwolniono {total_freed:.1f}MB")
|
|
|
|
|
|
def export_checkpoint(checkpoint_name, export_dir="exports"):
|
|
"""Eksportuje checkpoint do osobnego folderu"""
|
|
cp_path = Path(cfg.checkpoints_dir) / checkpoint_name
|
|
|
|
if not cp_path.exists():
|
|
print(f"❌ Checkpoint {checkpoint_name} nie istnieje")
|
|
return
|
|
|
|
# Stwórz folder eksportu
|
|
export_path = Path(export_dir)
|
|
export_path.mkdir(exist_ok=True)
|
|
|
|
# Skopiuj checkpoint i JSON
|
|
dest_path = export_path / checkpoint_name
|
|
shutil.copy2(cp_path, dest_path)
|
|
|
|
json_file = cp_path.with_suffix('.json')
|
|
if json_file.exists():
|
|
shutil.copy2(json_file, export_path / json_file.name)
|
|
|
|
print(f"✅ Checkpoint wyeksportowany do: {dest_path}")
|
|
|
|
|
|
def show_checkpoint_info(checkpoint_name):
|
|
"""Pokazuje szczegółowe info o checkpoincie"""
|
|
cp_path = Path(cfg.checkpoints_dir) / checkpoint_name
|
|
|
|
if not cp_path.exists():
|
|
print(f"❌ Checkpoint {checkpoint_name} nie istnieje")
|
|
return
|
|
|
|
json_file = cp_path.with_suffix('.json')
|
|
|
|
if json_file.exists():
|
|
with open(json_file, 'r') as f:
|
|
info = json.load(f)
|
|
|
|
print(f"\n📋 INFO O CHECKPOINCIE: {checkpoint_name}")
|
|
print("=" * 60)
|
|
|
|
cp_info = info['checkpoint_info']
|
|
stats = info['training_stats']
|
|
|
|
print("📁 PODSTAWOWE INFORMACJE:")
|
|
print(f" • Epoka: {cp_info['epoch']}")
|
|
print(f" • Krok: {cp_info['step']:,}")
|
|
print(f" • Loss: {cp_info['loss']:.4f}")
|
|
print(f" • Rozmiar: {cp_info['file_size'] / (1024 * 1024):.1f}MB")
|
|
print(f" • Data: {cp_info['timestamp']}")
|
|
|
|
print("\n📊 STATYSTYKI TRENINGU:")
|
|
print(f" • Całkowity czas: {stats['total_time']:.0f}s")
|
|
print(f" • Średni loss: {stats['avg_loss']:.4f}")
|
|
print(f" • Current LR: {stats['current_lr']:.6f}")
|
|
print(f" • Kroki: {stats['steps_done']:,}")
|
|
else:
|
|
print("❌ Brak informacji JSON dla tego checkpointu")
|
|
|
|
|
|
def main():
|
|
parser = argparse.ArgumentParser(description="Clay Checkpoint Manager")
|
|
parser.add_argument("--list", action="store_true", help="Lista checkpointów")
|
|
parser.add_argument("--stats", action="store_true", help="Pokaż statystyki")
|
|
parser.add_argument("--cleanup", type=int, nargs='?', const=5, help="Wyczyść stare checkpointy (domyślnie: 5)")
|
|
parser.add_argument("--export", type=str, help="Eksportuj checkpoint")
|
|
parser.add_argument("--info", type=str, help="Info o konkretnym checkpoincie")
|
|
parser.add_argument("--export-all", action="store_true", help="Eksportuj wszystkie checkpointy")
|
|
|
|
args = parser.parse_args()
|
|
|
|
if args.list:
|
|
list_checkpoints()
|
|
elif args.stats:
|
|
show_training_stats()
|
|
elif args.cleanup is not None:
|
|
cleanup_checkpoints(args.cleanup)
|
|
elif args.export:
|
|
export_checkpoint(args.export)
|
|
elif args.info:
|
|
show_checkpoint_info(args.info)
|
|
elif args.export_all:
|
|
checkpoints = list(Path(cfg.checkpoints_dir).glob("clay_checkpoint_*.pt"))
|
|
for cp in checkpoints:
|
|
export_checkpoint(cp.name)
|
|
else:
|
|
print("\n🛠️ Clay Checkpoint Manager")
|
|
print("=" * 40)
|
|
print("Użyj:")
|
|
print(" --list # Lista checkpointów")
|
|
print(" --stats # Statystyki treningu")
|
|
print(" --cleanup [N] # Zostaw N najnowszych (domyślnie 5)")
|
|
print(" --export NAME # Eksportuj checkpoint")
|
|
print(" --info NAME # Info o checkpoincie")
|
|
print(" --export-all # Eksportuj wszystkie")
|
|
print("\nPrzykłady:")
|
|
print(" python clay_manager.py --list")
|
|
print(" python clay_manager.py --cleanup 3")
|
|
print(" python clay_manager.py --info clay_checkpoint_ep2_step5000_20240126_143022.pt")
|
|
|
|
|
|
if __name__ == "__main__":
|
|
main() |