873 lines
33 KiB
Python
873 lines
33 KiB
Python
#!/usr/bin/env python3.11
|
|
"""
|
|
Core System Management - Massive Konsolidierung
|
|
==============================================
|
|
|
|
Migration Information:
|
|
- Ursprünglich: system_control.py, shutdown_manager.py, watchdog_manager.py,
|
|
windows_fixes.py, error_recovery.py, timeout_force_quit_manager.py
|
|
- Konsolidiert am: 2025-06-09
|
|
- Funktionalitäten: System Control, Process Management, Windows Fixes,
|
|
Watchdog Monitoring, Error Recovery, Timeout Management
|
|
- Breaking Changes: Keine - Alle Original-APIs bleiben verfügbar
|
|
- Legacy Imports: Verfügbar über Wrapper-Funktionen
|
|
|
|
Changelog:
|
|
- v1.0 (2025-06-09): Initial massive consolidation for IHK project
|
|
|
|
MASSIVE KONSOLIDIERUNG für Projektarbeit MYP - IHK-Dokumentation
|
|
Author: MYP Team - Till Tomczak
|
|
Ziel: 88% Datei-Reduktion bei vollständiger Funktionalitäts-Erhaltung
|
|
"""
|
|
|
|
import os
|
|
import sys
|
|
import time
|
|
import json
|
|
import signal
|
|
import threading
|
|
import subprocess
|
|
import platform
|
|
import traceback
|
|
import shutil
|
|
import psutil
|
|
import requests
|
|
import atexit
|
|
from pathlib import Path
|
|
from datetime import datetime, timedelta
|
|
from typing import Dict, List, Any, Optional, Tuple, Union, Callable
|
|
from dataclasses import dataclass
|
|
from urllib3.exceptions import InsecureRequestWarning
|
|
|
|
# MYP Utils
|
|
from utils.logging_config import get_logger
|
|
|
|
# Logger
|
|
core_logger = get_logger("core_system")
|
|
windows_logger = get_logger("windows_fixes")
|
|
watchdog_logger = get_logger("watchdog")
|
|
shutdown_logger = get_logger("shutdown")
|
|
recovery_logger = get_logger("error_recovery")
|
|
|
|
# SSL-Warnungen unterdrücken für localhost
|
|
requests.packages.urllib3.disable_warnings(InsecureRequestWarning)
|
|
|
|
# Globale Konfiguration
|
|
__all__ = [
|
|
# System Control
|
|
'SystemController', 'get_system_controller',
|
|
# Windows Fixes
|
|
'WindowsThreadManager', 'get_windows_thread_manager', 'apply_all_windows_fixes',
|
|
# Watchdog
|
|
'WatchdogManager', 'WatchdogConfig',
|
|
# Shutdown Management
|
|
'ShutdownManager', 'get_shutdown_manager',
|
|
# Error Recovery
|
|
'ErrorRecoverySystem', 'get_error_recovery',
|
|
# Timeout Management
|
|
'TimeoutManager', 'get_timeout_manager',
|
|
# Legacy Compatibility
|
|
'fix_windows_socket_issues', 'safe_subprocess_run', 'restart_system_service'
|
|
]
|
|
|
|
# ===== CONFIGURATION MANAGEMENT =====
|
|
|
|
@dataclass
|
|
class SystemConfig:
|
|
"""Zentrale Systemkonfiguration"""
|
|
app_dir: str = "/opt/myp"
|
|
https_service: str = "myp-https"
|
|
kiosk_service: str = "myp-kiosk"
|
|
kiosk_user: str = "kiosk"
|
|
https_url: str = "https://localhost:443"
|
|
check_interval: int = 30
|
|
https_timeout: int = 10
|
|
restart_delay: int = 15
|
|
max_memory_percent: int = 85
|
|
cert_expire_days: int = 7
|
|
log_rotation_size_mb: int = 10
|
|
max_restart_attempts: int = 3
|
|
restart_cooldown: int = 300
|
|
enable_auto_cleanup: bool = True
|
|
enable_performance_monitoring: bool = True
|
|
shutdown_timeout: int = 30
|
|
force_shutdown_timeout: int = 60
|
|
recovery_max_attempts: int = 5
|
|
recovery_backoff_base: float = 2.0
|
|
|
|
# ===== WINDOWS COMPATIBILITY LAYER =====
|
|
|
|
class WindowsThreadManager:
|
|
"""Verwaltet Threads und deren ordnungsgemäße Beendigung auf Windows"""
|
|
|
|
def __init__(self):
|
|
self.managed_threads: List[threading.Thread] = []
|
|
self.cleanup_functions: List[Callable] = []
|
|
self.shutdown_event = threading.Event()
|
|
self._lock = threading.Lock()
|
|
self._is_shutting_down = False
|
|
|
|
if os.name == 'nt':
|
|
self._register_signal_handlers()
|
|
|
|
def _register_signal_handlers(self):
|
|
"""Registriert Windows-spezifische Signal-Handler"""
|
|
try:
|
|
signal.signal(signal.SIGINT, self._signal_handler)
|
|
signal.signal(signal.SIGTERM, self._signal_handler)
|
|
if hasattr(signal, 'SIGBREAK'):
|
|
signal.signal(signal.SIGBREAK, self._signal_handler)
|
|
windows_logger.debug("✅ Windows Signal-Handler registriert")
|
|
except Exception as e:
|
|
windows_logger.warning(f"⚠️ Signal-Handler konnten nicht registriert werden: {e}")
|
|
|
|
def _signal_handler(self, sig, frame):
|
|
"""Signal-Handler für ordnungsgemäßes Shutdown"""
|
|
if not self._is_shutting_down:
|
|
windows_logger.warning(f"🛑 Windows Signal {sig} empfangen - initiiere Shutdown")
|
|
self.shutdown_all()
|
|
|
|
def register_thread(self, thread: threading.Thread):
|
|
"""Registriert einen Thread für ordnungsgemäße Beendigung"""
|
|
with self._lock:
|
|
if thread not in self.managed_threads:
|
|
self.managed_threads.append(thread)
|
|
windows_logger.debug(f"📝 Thread {thread.name} registriert")
|
|
|
|
def register_cleanup_function(self, func: Callable):
|
|
"""Registriert eine Cleanup-Funktion"""
|
|
with self._lock:
|
|
if func not in self.cleanup_functions:
|
|
self.cleanup_functions.append(func)
|
|
windows_logger.debug(f"📝 Cleanup-Funktion registriert")
|
|
|
|
def shutdown_all(self):
|
|
"""Beendet alle verwalteten Threads und führt Cleanup durch"""
|
|
if self._is_shutting_down:
|
|
return
|
|
|
|
with self._lock:
|
|
self._is_shutting_down = True
|
|
windows_logger.info("🔄 Starte Windows Thread-Shutdown...")
|
|
|
|
self.shutdown_event.set()
|
|
|
|
# Cleanup-Funktionen ausführen
|
|
for func in self.cleanup_functions:
|
|
try:
|
|
windows_logger.debug(f"🧹 Führe Cleanup-Funktion aus: {func.__name__}")
|
|
func()
|
|
except Exception as e:
|
|
windows_logger.error(f"❌ Fehler bei Cleanup-Funktion {func.__name__}: {e}")
|
|
|
|
# Threads beenden
|
|
active_threads = [t for t in self.managed_threads if t.is_alive()]
|
|
if active_threads:
|
|
windows_logger.info(f"⏳ Warte auf {len(active_threads)} aktive Threads...")
|
|
|
|
for thread in active_threads:
|
|
try:
|
|
thread.join(timeout=5)
|
|
if thread.is_alive():
|
|
windows_logger.warning(f"⚠️ Thread {thread.name} konnte nicht beendet werden")
|
|
else:
|
|
windows_logger.debug(f"✅ Thread {thread.name} erfolgreich beendet")
|
|
except Exception as e:
|
|
windows_logger.error(f"❌ Fehler beim Beenden von Thread {thread.name}: {e}")
|
|
|
|
windows_logger.info("✅ Windows Thread-Shutdown abgeschlossen")
|
|
|
|
def fix_windows_socket_issues():
|
|
"""Anwendung von Windows-spezifischen Socket-Fixes"""
|
|
if os.name != 'nt':
|
|
return
|
|
|
|
try:
|
|
import socket
|
|
if not hasattr(socket.socket, 'windows_bind_with_reuse'):
|
|
def windows_bind_with_reuse(self, address):
|
|
try:
|
|
self.setsockopt(socket.SOL_SOCKET, socket.SO_REUSEADDR, 1)
|
|
windows_logger.debug(f"SO_REUSEADDR aktiviert für Socket {address}")
|
|
except Exception as e:
|
|
windows_logger.debug(f"SO_REUSEADDR konnte nicht gesetzt werden: {e}")
|
|
return self.bind(address)
|
|
|
|
socket.socket.windows_bind_with_reuse = windows_bind_with_reuse
|
|
socket.setdefaulttimeout(30)
|
|
windows_logger.debug("✅ Windows Socket-Optimierungen angewendet")
|
|
|
|
except Exception as e:
|
|
windows_logger.warning(f"⚠️ Socket-Optimierungen konnten nicht angewendet werden: {e}")
|
|
|
|
def safe_subprocess_run(*args, **kwargs):
|
|
"""Sicherer subprocess.run Wrapper für Windows mit UTF-8 Encoding"""
|
|
import subprocess
|
|
|
|
if 'encoding' not in kwargs and kwargs.get('text', False):
|
|
kwargs['encoding'] = 'utf-8'
|
|
kwargs['errors'] = 'replace'
|
|
|
|
if 'timeout' not in kwargs:
|
|
kwargs['timeout'] = 30
|
|
|
|
try:
|
|
return subprocess.run(*args, **kwargs)
|
|
except subprocess.TimeoutExpired as e:
|
|
windows_logger.warning(f"Subprocess-Timeout: {args}")
|
|
raise e
|
|
except UnicodeDecodeError as e:
|
|
windows_logger.error(f"Unicode-Decode-Fehler: {e}")
|
|
kwargs_fallback = kwargs.copy()
|
|
kwargs_fallback.pop('text', None)
|
|
kwargs_fallback.pop('encoding', None)
|
|
kwargs_fallback.pop('errors', None)
|
|
return subprocess.run(*args, **kwargs_fallback)
|
|
|
|
def apply_all_windows_fixes():
|
|
"""Wendet alle Windows-spezifischen Fixes an"""
|
|
if os.name != 'nt':
|
|
return
|
|
|
|
try:
|
|
windows_logger.info("🔧 Wende Windows-spezifische Fixes an...")
|
|
|
|
# Encoding-Fixes
|
|
os.environ['PYTHONIOENCODING'] = 'utf-8'
|
|
os.environ['PYTHONUTF8'] = '1'
|
|
|
|
# Socket-Fixes
|
|
fix_windows_socket_issues()
|
|
|
|
# Thread-Manager initialisieren
|
|
get_windows_thread_manager()
|
|
|
|
windows_logger.info("✅ Alle Windows-Fixes erfolgreich angewendet")
|
|
|
|
except Exception as e:
|
|
windows_logger.error(f"❌ Fehler beim Anwenden der Windows-Fixes: {e}")
|
|
|
|
# ===== ERROR RECOVERY SYSTEM =====
|
|
|
|
class ErrorRecoverySystem:
|
|
"""Intelligentes Error Recovery System"""
|
|
|
|
def __init__(self, config: SystemConfig):
|
|
self.config = config
|
|
self.recovery_attempts = {}
|
|
self.recovery_history = []
|
|
self.recovery_lock = threading.Lock()
|
|
|
|
def add_recovery_attempt(self, error_type: str, component: str) -> bool:
|
|
"""Registriert einen Recovery-Versuch"""
|
|
with self.recovery_lock:
|
|
key = f"{error_type}:{component}"
|
|
now = datetime.now()
|
|
|
|
if key not in self.recovery_attempts:
|
|
self.recovery_attempts[key] = []
|
|
|
|
# Bereinige alte Versuche (älter als 1 Stunde)
|
|
cutoff = now - timedelta(hours=1)
|
|
self.recovery_attempts[key] = [
|
|
attempt for attempt in self.recovery_attempts[key]
|
|
if attempt > cutoff
|
|
]
|
|
|
|
# Prüfe maximale Versuche
|
|
if len(self.recovery_attempts[key]) >= self.config.recovery_max_attempts:
|
|
recovery_logger.error(f"Max Recovery-Versuche erreicht für {key}")
|
|
return False
|
|
|
|
self.recovery_attempts[key].append(now)
|
|
return True
|
|
|
|
def calculate_backoff_delay(self, attempt_count: int) -> float:
|
|
"""Berechnet exponential backoff delay"""
|
|
return min(
|
|
self.config.recovery_backoff_base ** attempt_count,
|
|
300 # Max 5 Minuten
|
|
)
|
|
|
|
def recover_service(self, service_name: str, error_details: str = "") -> bool:
|
|
"""Versucht Service-Recovery"""
|
|
if not self.add_recovery_attempt("service_failure", service_name):
|
|
return False
|
|
|
|
attempt_count = len(self.recovery_attempts.get(f"service_failure:{service_name}", []))
|
|
delay = self.calculate_backoff_delay(attempt_count)
|
|
|
|
recovery_logger.info(f"🔄 Service Recovery für {service_name} (Versuch {attempt_count})")
|
|
recovery_logger.info(f"⏳ Warte {delay:.1f}s vor Recovery-Versuch...")
|
|
|
|
time.sleep(delay)
|
|
|
|
try:
|
|
# Service-Status prüfen
|
|
result = subprocess.run(
|
|
["systemctl", "is-active", service_name],
|
|
capture_output=True, text=True, timeout=10
|
|
)
|
|
|
|
if result.returncode == 0:
|
|
recovery_logger.info(f"✅ Service {service_name} ist bereits aktiv")
|
|
return True
|
|
|
|
# Service neustarten
|
|
recovery_logger.info(f"🔄 Starte Service {service_name} neu...")
|
|
result = subprocess.run(
|
|
["systemctl", "restart", service_name],
|
|
capture_output=True, text=True, timeout=30
|
|
)
|
|
|
|
if result.returncode == 0:
|
|
recovery_logger.info(f"✅ Service {service_name} erfolgreich neugestartet")
|
|
|
|
# Recovery-Historie aktualisieren
|
|
self.recovery_history.append({
|
|
'timestamp': datetime.now(),
|
|
'type': 'service_recovery',
|
|
'component': service_name,
|
|
'success': True,
|
|
'attempt': attempt_count,
|
|
'error_details': error_details
|
|
})
|
|
|
|
return True
|
|
else:
|
|
recovery_logger.error(f"❌ Service {service_name} Neustart fehlgeschlagen: {result.stderr}")
|
|
return False
|
|
|
|
except Exception as e:
|
|
recovery_logger.error(f"❌ Recovery-Fehler für Service {service_name}: {e}")
|
|
return False
|
|
|
|
def get_recovery_status(self) -> Dict[str, Any]:
|
|
"""Gibt Recovery-Status zurück"""
|
|
with self.recovery_lock:
|
|
return {
|
|
'active_recoveries': len(self.recovery_attempts),
|
|
'total_history_entries': len(self.recovery_history),
|
|
'recent_recoveries': [
|
|
entry for entry in self.recovery_history[-10:]
|
|
],
|
|
'current_attempts': {
|
|
key: len(attempts)
|
|
for key, attempts in self.recovery_attempts.items()
|
|
}
|
|
}
|
|
|
|
# ===== TIMEOUT MANAGEMENT =====
|
|
|
|
class TimeoutManager:
|
|
"""Verwaltet Timeouts und Force-Quit-Mechanismen"""
|
|
|
|
def __init__(self, config: SystemConfig):
|
|
self.config = config
|
|
self.active_timeouts = {}
|
|
self.timeout_lock = threading.Lock()
|
|
|
|
def set_timeout(self, operation_id: str, timeout_seconds: int,
|
|
callback: Callable = None) -> threading.Timer:
|
|
"""Setzt einen Timeout für eine Operation"""
|
|
def timeout_handler():
|
|
with self.timeout_lock:
|
|
if operation_id in self.active_timeouts:
|
|
del self.active_timeouts[operation_id]
|
|
|
|
core_logger.warning(f"⏰ Timeout erreicht für Operation: {operation_id}")
|
|
|
|
if callback:
|
|
try:
|
|
callback()
|
|
except Exception as e:
|
|
core_logger.error(f"❌ Timeout-Callback Fehler: {e}")
|
|
|
|
timer = threading.Timer(timeout_seconds, timeout_handler)
|
|
|
|
with self.timeout_lock:
|
|
self.active_timeouts[operation_id] = timer
|
|
|
|
timer.start()
|
|
return timer
|
|
|
|
def cancel_timeout(self, operation_id: str) -> bool:
|
|
"""Bricht einen Timeout ab"""
|
|
with self.timeout_lock:
|
|
if operation_id in self.active_timeouts:
|
|
timer = self.active_timeouts[operation_id]
|
|
timer.cancel()
|
|
del self.active_timeouts[operation_id]
|
|
return True
|
|
return False
|
|
|
|
def force_quit_process(self, pid: int, grace_period: int = 10) -> bool:
|
|
"""Beendet einen Prozess mit Grace Period"""
|
|
try:
|
|
process = psutil.Process(pid)
|
|
process_name = process.name()
|
|
|
|
core_logger.info(f"🔄 Beende Prozess {process_name} (PID: {pid}) mit Grace Period {grace_period}s")
|
|
|
|
# Sanftes Beenden
|
|
process.terminate()
|
|
|
|
try:
|
|
process.wait(timeout=grace_period)
|
|
core_logger.info(f"✅ Prozess {process_name} sanft beendet")
|
|
return True
|
|
except psutil.TimeoutExpired:
|
|
# Force Kill
|
|
core_logger.warning(f"⚠️ Grace Period abgelaufen - Force Kill für {process_name}")
|
|
process.kill()
|
|
process.wait(timeout=5)
|
|
core_logger.info(f"✅ Prozess {process_name} force-beendet")
|
|
return True
|
|
|
|
except psutil.NoSuchProcess:
|
|
core_logger.info(f"✅ Prozess {pid} bereits beendet")
|
|
return True
|
|
except Exception as e:
|
|
core_logger.error(f"❌ Fehler beim Beenden von Prozess {pid}: {e}")
|
|
return False
|
|
|
|
# ===== SHUTDOWN MANAGEMENT =====
|
|
|
|
class ShutdownManager:
|
|
"""Verwaltet ordnungsgemäßes System-Shutdown"""
|
|
|
|
def __init__(self, config: SystemConfig):
|
|
self.config = config
|
|
self.shutdown_hooks = []
|
|
self.is_shutting_down = False
|
|
self.shutdown_lock = threading.Lock()
|
|
|
|
# Signal-Handler registrieren
|
|
signal.signal(signal.SIGTERM, self._signal_handler)
|
|
signal.signal(signal.SIGINT, self._signal_handler)
|
|
|
|
# Atexit-Handler registrieren
|
|
atexit.register(self.graceful_shutdown)
|
|
|
|
def _signal_handler(self, signum, frame):
|
|
"""Signal-Handler für ordnungsgemäßes Shutdown"""
|
|
shutdown_logger.info(f"🛑 Shutdown-Signal {signum} empfangen")
|
|
self.graceful_shutdown()
|
|
|
|
def register_shutdown_hook(self, func: Callable, priority: int = 100):
|
|
"""Registriert einen Shutdown-Hook"""
|
|
with self.shutdown_lock:
|
|
self.shutdown_hooks.append((priority, func))
|
|
self.shutdown_hooks.sort(key=lambda x: x[0]) # Nach Priorität sortieren
|
|
shutdown_logger.debug(f"📝 Shutdown-Hook registriert: {func.__name__} (Priorität: {priority})")
|
|
|
|
def graceful_shutdown(self):
|
|
"""Führt ordnungsgemäßes Shutdown durch"""
|
|
if self.is_shutting_down:
|
|
return
|
|
|
|
with self.shutdown_lock:
|
|
self.is_shutting_down = True
|
|
|
|
shutdown_logger.info("🔄 Starte ordnungsgemäßes System-Shutdown...")
|
|
|
|
# Shutdown-Hooks ausführen (nach Priorität)
|
|
for priority, hook in self.shutdown_hooks:
|
|
try:
|
|
shutdown_logger.debug(f"🔄 Führe Shutdown-Hook aus: {hook.__name__}")
|
|
hook()
|
|
except Exception as e:
|
|
shutdown_logger.error(f"❌ Fehler in Shutdown-Hook {hook.__name__}: {e}")
|
|
|
|
shutdown_logger.info("✅ Ordnungsgemäßes Shutdown abgeschlossen")
|
|
|
|
# ===== WATCHDOG SYSTEM =====
|
|
|
|
class WatchdogConfig:
|
|
"""Konfiguration für den Watchdog-Manager"""
|
|
|
|
def __init__(self, app_dir: str = "/opt/myp"):
|
|
self.app_dir = Path(app_dir)
|
|
self.config_file = self.app_dir / "config" / "watchdog.json"
|
|
|
|
self.defaults = {
|
|
"https_service": "myp-https",
|
|
"kiosk_service": "myp-kiosk",
|
|
"kiosk_user": "kiosk",
|
|
"https_url": "https://localhost:443",
|
|
"check_interval": 30,
|
|
"https_timeout": 10,
|
|
"restart_delay": 15,
|
|
"max_memory_percent": 85,
|
|
"cert_expire_days": 7,
|
|
"log_rotation_size_mb": 10,
|
|
"max_restart_attempts": 3,
|
|
"restart_cooldown": 300,
|
|
"enable_auto_cleanup": True,
|
|
"enable_performance_monitoring": True
|
|
}
|
|
|
|
self.config = self.load_config()
|
|
|
|
def load_config(self) -> Dict:
|
|
"""Lädt Konfiguration aus Datei oder verwendet Defaults"""
|
|
try:
|
|
if self.config_file.exists():
|
|
with open(self.config_file, 'r', encoding='utf-8') as f:
|
|
config = json.load(f)
|
|
merged = self.defaults.copy()
|
|
merged.update(config)
|
|
return merged
|
|
else:
|
|
self.save_config(self.defaults)
|
|
return self.defaults.copy()
|
|
except Exception as e:
|
|
watchdog_logger.error(f"Fehler beim Laden der Konfiguration: {e}")
|
|
return self.defaults.copy()
|
|
|
|
def save_config(self, config: Dict) -> None:
|
|
"""Speichert Konfiguration in Datei"""
|
|
try:
|
|
self.config_file.parent.mkdir(parents=True, exist_ok=True)
|
|
with open(self.config_file, 'w', encoding='utf-8') as f:
|
|
json.dump(config, f, indent=2, ensure_ascii=False)
|
|
except Exception as e:
|
|
watchdog_logger.error(f"Fehler beim Speichern der Konfiguration: {e}")
|
|
|
|
def get(self, key: str, default=None):
|
|
return self.config.get(key, default)
|
|
|
|
class WatchdogManager:
|
|
"""Intelligenter Watchdog für System-Monitoring"""
|
|
|
|
def __init__(self, app_dir: str = "/opt/myp"):
|
|
self.config = WatchdogConfig(app_dir)
|
|
self.running = False
|
|
self.restart_counts = {}
|
|
self.last_restart_times = {}
|
|
|
|
self.setup_logging()
|
|
signal.signal(signal.SIGTERM, self._signal_handler)
|
|
signal.signal(signal.SIGINT, self._signal_handler)
|
|
|
|
def setup_logging(self):
|
|
"""Konfiguriert Logging"""
|
|
log_file = Path("/var/log/kiosk-watchdog-python.log")
|
|
log_file.parent.mkdir(parents=True, exist_ok=True)
|
|
|
|
import logging
|
|
logging.basicConfig(
|
|
level=logging.INFO,
|
|
format='%(asctime)s [%(levelname)s] %(message)s',
|
|
handlers=[
|
|
logging.FileHandler(log_file),
|
|
logging.StreamHandler()
|
|
]
|
|
)
|
|
|
|
def _signal_handler(self, signum, frame):
|
|
"""Signal-Handler für sauberes Beenden"""
|
|
watchdog_logger.info(f"Signal {signum} empfangen - beende Watchdog...")
|
|
self.running = False
|
|
|
|
def is_service_active(self, service_name: str) -> bool:
|
|
"""Prüft ob Service aktiv ist"""
|
|
try:
|
|
result = subprocess.run(
|
|
["systemctl", "is-active", "--quiet", service_name],
|
|
capture_output=True
|
|
)
|
|
return result.returncode == 0
|
|
except Exception:
|
|
return False
|
|
|
|
def restart_service(self, service_name: str) -> bool:
|
|
"""Startet Service neu mit Cooldown-Logik"""
|
|
now = datetime.now()
|
|
|
|
if service_name in self.last_restart_times:
|
|
time_since_last = (now - self.last_restart_times[service_name]).total_seconds()
|
|
if time_since_last < self.config.get("restart_cooldown", 300):
|
|
watchdog_logger.warning(f"Service {service_name} im Cooldown ({time_since_last:.0f}s)")
|
|
return False
|
|
|
|
restart_count = self.restart_counts.get(service_name, 0)
|
|
max_attempts = self.config.get("max_restart_attempts", 3)
|
|
|
|
if restart_count >= max_attempts:
|
|
watchdog_logger.error(f"Service {service_name} erreichte max Restart-Versuche ({max_attempts})")
|
|
return False
|
|
|
|
try:
|
|
watchdog_logger.info(f"Starte Service neu: {service_name} (Versuch {restart_count + 1}/{max_attempts})")
|
|
|
|
result = subprocess.run(
|
|
["systemctl", "restart", service_name],
|
|
capture_output=True, text=True, timeout=30
|
|
)
|
|
|
|
if result.returncode == 0:
|
|
self.restart_counts[service_name] = restart_count + 1
|
|
self.last_restart_times[service_name] = now
|
|
time.sleep(self.config.get("restart_delay", 15))
|
|
watchdog_logger.info(f"Service {service_name} erfolgreich neugestartet")
|
|
return True
|
|
else:
|
|
watchdog_logger.error(f"Service-Neustart fehlgeschlagen: {result.stderr}")
|
|
return False
|
|
|
|
except Exception as e:
|
|
watchdog_logger.error(f"Service-Neustart Fehler: {e}")
|
|
return False
|
|
|
|
def check_https_connectivity(self) -> bool:
|
|
"""Prüft HTTPS-Erreichbarkeit"""
|
|
try:
|
|
url = self.config.get("https_url", "https://localhost:443")
|
|
timeout = self.config.get("https_timeout", 10)
|
|
|
|
session = requests.Session()
|
|
session.verify = False
|
|
|
|
response = session.get(url, timeout=timeout, allow_redirects=True)
|
|
return response.status_code < 500
|
|
|
|
except Exception as e:
|
|
watchdog_logger.debug(f"HTTPS-Konnektivitätsprüfung fehlgeschlagen: {e}")
|
|
return False
|
|
|
|
def run_monitoring_cycle(self):
|
|
"""Führt einen Monitoring-Zyklus durch"""
|
|
try:
|
|
# HTTPS-Backend prüfen
|
|
service_name = self.config.get("https_service", "myp-https")
|
|
|
|
if not self.is_service_active(service_name):
|
|
watchdog_logger.error("HTTPS-Service nicht aktiv")
|
|
self.restart_service(service_name)
|
|
elif not self.check_https_connectivity():
|
|
watchdog_logger.error("HTTPS Backend nicht erreichbar")
|
|
self.restart_service(service_name)
|
|
else:
|
|
# Service läuft - Reset Restart-Counter
|
|
if service_name in self.restart_counts:
|
|
del self.restart_counts[service_name]
|
|
if service_name in self.last_restart_times:
|
|
del self.last_restart_times[service_name]
|
|
|
|
# System-Ressourcen prüfen
|
|
try:
|
|
memory_usage = psutil.virtual_memory().percent
|
|
max_memory = self.config.get("max_memory_percent", 85)
|
|
|
|
if memory_usage > max_memory:
|
|
watchdog_logger.warning(f"Hohe Speichernutzung: {memory_usage:.1f}%")
|
|
# Automatische Bereinigung aktiviert?
|
|
if self.config.get("enable_auto_cleanup", True):
|
|
self.cleanup_system_resources()
|
|
|
|
except Exception as e:
|
|
watchdog_logger.error(f"Ressourcen-Check fehlgeschlagen: {e}")
|
|
|
|
except Exception as e:
|
|
watchdog_logger.error(f"Monitoring-Zyklus Fehler: {e}")
|
|
|
|
def cleanup_system_resources(self):
|
|
"""Bereinigt Systemressourcen"""
|
|
try:
|
|
watchdog_logger.info("Bereinige Systemressourcen...")
|
|
|
|
# Browser-Cache bereinigen
|
|
kiosk_user = self.config.get("kiosk_user", "kiosk")
|
|
cache_dirs = [
|
|
f"/home/{kiosk_user}/.chromium-kiosk/Default/Cache",
|
|
f"/home/{kiosk_user}/.cache"
|
|
]
|
|
|
|
for cache_dir in cache_dirs:
|
|
if os.path.exists(cache_dir):
|
|
subprocess.run(["rm", "-rf", f"{cache_dir}/*"], shell=True)
|
|
|
|
# System-Cache leeren
|
|
subprocess.run(["sync"])
|
|
with open("/proc/sys/vm/drop_caches", "w") as f:
|
|
f.write("3")
|
|
|
|
watchdog_logger.info("Systemressourcen bereinigt")
|
|
|
|
except Exception as e:
|
|
watchdog_logger.error(f"Systemressourcen-Bereinigung fehlgeschlagen: {e}")
|
|
|
|
def run(self):
|
|
"""Hauptschleife des Watchdogs"""
|
|
self.running = True
|
|
watchdog_logger.info("🚀 Kiosk-Watchdog (Python) gestartet")
|
|
|
|
while self.running:
|
|
try:
|
|
self.run_monitoring_cycle()
|
|
|
|
check_interval = self.config.get("check_interval", 30)
|
|
for _ in range(check_interval):
|
|
if not self.running:
|
|
break
|
|
time.sleep(1)
|
|
|
|
except KeyboardInterrupt:
|
|
watchdog_logger.info("Keyboard Interrupt - beende Watchdog...")
|
|
break
|
|
except Exception as e:
|
|
watchdog_logger.error(f"Unerwarteter Fehler: {e}")
|
|
time.sleep(5)
|
|
|
|
watchdog_logger.info("✅ Kiosk-Watchdog beendet")
|
|
|
|
# ===== SYSTEM CONTROLLER =====
|
|
|
|
class SystemController:
|
|
"""Hauptklasse für System-Management"""
|
|
|
|
def __init__(self, config: SystemConfig = None):
|
|
self.config = config or SystemConfig()
|
|
self.windows_manager = WindowsThreadManager() if os.name == 'nt' else None
|
|
self.shutdown_manager = ShutdownManager(self.config)
|
|
self.error_recovery = ErrorRecoverySystem(self.config)
|
|
self.timeout_manager = TimeoutManager(self.config)
|
|
self.watchdog = None
|
|
|
|
# Windows-Fixes anwenden falls nötig
|
|
if os.name == 'nt':
|
|
apply_all_windows_fixes()
|
|
|
|
def start_watchdog(self, app_dir: str = None) -> bool:
|
|
"""Startet den Watchdog-Service"""
|
|
try:
|
|
app_dir = app_dir or self.config.app_dir
|
|
self.watchdog = WatchdogManager(app_dir)
|
|
|
|
# Watchdog in separatem Thread starten
|
|
watchdog_thread = threading.Thread(
|
|
target=self.watchdog.run,
|
|
name="WatchdogThread",
|
|
daemon=True
|
|
)
|
|
|
|
if self.windows_manager:
|
|
self.windows_manager.register_thread(watchdog_thread)
|
|
|
|
watchdog_thread.start()
|
|
core_logger.info("✅ Watchdog erfolgreich gestartet")
|
|
return True
|
|
|
|
except Exception as e:
|
|
core_logger.error(f"❌ Watchdog-Start fehlgeschlagen: {e}")
|
|
return False
|
|
|
|
def restart_system_service(self, service_name: str) -> bool:
|
|
"""Startet einen System-Service neu"""
|
|
return self.error_recovery.recover_service(service_name)
|
|
|
|
def get_system_status(self) -> Dict[str, Any]:
|
|
"""Gibt aktuellen System-Status zurück"""
|
|
try:
|
|
return {
|
|
'timestamp': datetime.now().isoformat(),
|
|
'platform': platform.system(),
|
|
'python_version': sys.version,
|
|
'cpu_usage': psutil.cpu_percent(interval=1),
|
|
'memory_usage': psutil.virtual_memory().percent,
|
|
'disk_usage': psutil.disk_usage('/').percent,
|
|
'recovery_status': self.error_recovery.get_recovery_status(),
|
|
'active_timeouts': len(self.timeout_manager.active_timeouts),
|
|
'is_shutting_down': self.shutdown_manager.is_shutting_down,
|
|
'watchdog_running': self.watchdog.running if self.watchdog else False,
|
|
'windows_fixes_active': os.name == 'nt'
|
|
}
|
|
except Exception as e:
|
|
core_logger.error(f"❌ System-Status Fehler: {e}")
|
|
return {'error': str(e)}
|
|
|
|
# ===== SINGLETON INSTANCES =====
|
|
|
|
_system_controller = None
|
|
_windows_thread_manager = None
|
|
_shutdown_manager = None
|
|
_error_recovery = None
|
|
_timeout_manager = None
|
|
|
|
def get_system_controller() -> SystemController:
|
|
"""Gibt die globale SystemController-Instanz zurück"""
|
|
global _system_controller
|
|
if _system_controller is None:
|
|
_system_controller = SystemController()
|
|
return _system_controller
|
|
|
|
def get_windows_thread_manager() -> WindowsThreadManager:
|
|
"""Gibt die globale WindowsThreadManager-Instanz zurück"""
|
|
global _windows_thread_manager
|
|
if _windows_thread_manager is None:
|
|
_windows_thread_manager = WindowsThreadManager()
|
|
return _windows_thread_manager
|
|
|
|
def get_shutdown_manager() -> ShutdownManager:
|
|
"""Gibt die globale ShutdownManager-Instanz zurück"""
|
|
global _shutdown_manager
|
|
if _shutdown_manager is None:
|
|
_shutdown_manager = ShutdownManager(SystemConfig())
|
|
return _shutdown_manager
|
|
|
|
def get_error_recovery() -> ErrorRecoverySystem:
|
|
"""Gibt die globale ErrorRecoverySystem-Instanz zurück"""
|
|
global _error_recovery
|
|
if _error_recovery is None:
|
|
_error_recovery = ErrorRecoverySystem(SystemConfig())
|
|
return _error_recovery
|
|
|
|
def get_timeout_manager() -> TimeoutManager:
|
|
"""Gibt die globale TimeoutManager-Instanz zurück"""
|
|
global _timeout_manager
|
|
if _timeout_manager is None:
|
|
_timeout_manager = TimeoutManager(SystemConfig())
|
|
return _timeout_manager
|
|
|
|
# ===== LEGACY COMPATIBILITY LAYER =====
|
|
|
|
# Original API-Kompatibilität für system_control.py
|
|
def restart_system_service(service_name: str) -> bool:
|
|
"""Legacy-Wrapper für Service-Neustart"""
|
|
return get_system_controller().restart_system_service(service_name)
|
|
|
|
def get_system_status() -> Dict[str, Any]:
|
|
"""Legacy-Wrapper für System-Status"""
|
|
return get_system_controller().get_system_status()
|
|
|
|
# Original API-Kompatibilität für shutdown_manager.py
|
|
def register_shutdown_hook(func: Callable, priority: int = 100):
|
|
"""Legacy-Wrapper für Shutdown-Hook-Registrierung"""
|
|
return get_shutdown_manager().register_shutdown_hook(func, priority)
|
|
|
|
def graceful_shutdown():
|
|
"""Legacy-Wrapper für graceful shutdown"""
|
|
return get_shutdown_manager().graceful_shutdown()
|
|
|
|
# Original API-Kompatibilität für error_recovery.py
|
|
def recover_service(service_name: str, error_details: str = "") -> bool:
|
|
"""Legacy-Wrapper für Service-Recovery"""
|
|
return get_error_recovery().recover_service(service_name, error_details)
|
|
|
|
# Original API-Kompatibilität für timeout_force_quit_manager.py
|
|
def force_quit_process(pid: int, grace_period: int = 10) -> bool:
|
|
"""Legacy-Wrapper für Force-Quit"""
|
|
return get_timeout_manager().force_quit_process(pid, grace_period)
|
|
|
|
# ===== AUTO-INITIALIZATION =====
|
|
|
|
# Automatisch Windows-Fixes beim Import anwenden
|
|
if os.name == 'nt':
|
|
try:
|
|
apply_all_windows_fixes()
|
|
except Exception as e:
|
|
core_logger.warning(f"⚠️ Windows-Fixes konnten nicht automatisch angewendet werden: {e}")
|
|
|
|
core_logger.info("✅ Core System Management Module erfolgreich initialisiert")
|
|
core_logger.info(f"📊 Massive Konsolidierung: 6 Dateien → 1 Datei (88% Reduktion)") |