""" Monitoring und Health Check Module für die MYP Flask-Anwendung. Bietet Endpunkte für Systemüberwachung und Performance-Metriken. """ from flask import Blueprint, jsonify, current_app import psutil import os import sqlite3 import datetime import threading import time from collections import defaultdict # Blueprint für Monitoring-Endpunkte monitoring_bp = Blueprint('monitoring', __name__, url_prefix='/monitoring') # Metriken-Speicher metrics = { 'requests_total': defaultdict(int), 'request_duration': defaultdict(list), 'database_queries': 0, 'active_jobs': 0, 'error_count': defaultdict(int), 'startup_time': datetime.datetime.now() } class HealthCheck: """Klasse für System-Health-Checks.""" @staticmethod def check_database(): """ Überprüft die Datenbankverbindung. Returns: dict: Status und Details der Datenbankverbindung """ try: db_path = current_app.config.get('DATABASE', 'instance/myp.db') # Bei In-Memory-DB für Tests if db_path == ':memory:': return {'status': 'healthy', 'message': 'In-Memory-Datenbank aktiv'} # Datei-basierte Datenbank prüfen if not os.path.exists(db_path): return {'status': 'unhealthy', 'message': 'Datenbankdatei nicht gefunden'} # Verbindung testen conn = sqlite3.connect(db_path, timeout=5) cursor = conn.cursor() cursor.execute('SELECT 1') conn.close() # Dateigröße ermitteln db_size = os.path.getsize(db_path) return { 'status': 'healthy', 'message': 'Datenbankverbindung erfolgreich', 'database_path': db_path, 'database_size_bytes': db_size } except Exception as e: return { 'status': 'unhealthy', 'message': f'Datenbankfehler: {str(e)}' } @staticmethod def check_disk_space(): """ Überprüft den verfügbaren Festplattenspeicher. Returns: dict: Status und Details des Festplattenspeichers """ try: disk_usage = psutil.disk_usage('.') free_gb = disk_usage.free / (1024**3) total_gb = disk_usage.total / (1024**3) used_percent = (disk_usage.used / disk_usage.total) * 100 status = 'healthy' if used_percent > 90: status = 'critical' elif used_percent > 80: status = 'warning' return { 'status': status, 'free_gb': round(free_gb, 2), 'total_gb': round(total_gb, 2), 'used_percent': round(used_percent, 2) } except Exception as e: return { 'status': 'unhealthy', 'message': f'Festplattenfehler: {str(e)}' } @staticmethod def check_memory(): """ Überprüft die Speichernutzung. Returns: dict: Status und Details der Speichernutzung """ try: memory = psutil.virtual_memory() status = 'healthy' if memory.percent > 90: status = 'critical' elif memory.percent > 80: status = 'warning' return { 'status': status, 'total_gb': round(memory.total / (1024**3), 2), 'available_gb': round(memory.available / (1024**3), 2), 'used_percent': round(memory.percent, 2) } except Exception as e: return { 'status': 'unhealthy', 'message': f'Speicherfehler: {str(e)}' } @staticmethod def check_background_threads(): """ Überprüft die Hintergrund-Threads. Returns: dict: Status der Hintergrund-Threads """ try: active_threads = [t.name for t in threading.enumerate() if t.is_alive()] job_checker_running = any('job_checker' in name for name in active_threads) return { 'status': 'healthy' if job_checker_running else 'warning', 'job_checker_running': job_checker_running, 'active_threads': active_threads, 'thread_count': len(active_threads) } except Exception as e: return { 'status': 'unhealthy', 'message': f'Thread-Fehler: {str(e)}' } @monitoring_bp.route('/health') def health_check(): """ Umfassender Health Check aller Systemkomponenten. Returns: JSON: Status aller Systemkomponenten """ checks = { 'database': HealthCheck.check_database(), 'disk_space': HealthCheck.check_disk_space(), 'memory': HealthCheck.check_memory(), 'background_threads': HealthCheck.check_background_threads() } # Gesamtstatus bestimmen overall_status = 'healthy' for check in checks.values(): if check['status'] == 'unhealthy': overall_status = 'unhealthy' break elif check['status'] in ['warning', 'critical']: overall_status = 'degraded' response = { 'status': overall_status, 'timestamp': datetime.datetime.now().isoformat(), 'checks': checks } status_code = 200 if overall_status == 'healthy' else 503 return jsonify(response), status_code @monitoring_bp.route('/health/simple') def simple_health_check(): """ Einfacher Health Check für Load Balancer. Returns: JSON: Einfacher Status """ return jsonify({'status': 'ok', 'timestamp': datetime.datetime.now().isoformat()}) @monitoring_bp.route('/metrics') def get_metrics(): """ Sammelt und gibt Performance-Metriken zurück. Returns: JSON: System- und Anwendungsmetriken """ try: # System-Metriken cpu_percent = psutil.cpu_percent(interval=1) memory = psutil.virtual_memory() disk = psutil.disk_usage('.') # Uptime berechnen uptime = datetime.datetime.now() - metrics['startup_time'] # Anwendungsmetriken app_metrics = { 'system': { 'cpu_percent': cpu_percent, 'memory_percent': memory.percent, 'disk_percent': (disk.used / disk.total) * 100, 'uptime_seconds': uptime.total_seconds() }, 'application': { 'requests_total': dict(metrics['requests_total']), 'database_queries_total': metrics['database_queries'], 'active_jobs': metrics['active_jobs'], 'error_count': dict(metrics['error_count']), 'startup_time': metrics['startup_time'].isoformat() } } return jsonify(app_metrics) except Exception as e: current_app.logger.error(f"Fehler beim Sammeln der Metriken: {e}") return jsonify({'error': 'Metriken nicht verfügbar'}), 500 @monitoring_bp.route('/info') def get_info(): """ Gibt allgemeine Informationen über die Anwendung zurück. Returns: JSON: Anwendungsinformationen """ return jsonify({ 'application': 'MYP Backend', 'version': '2.0.0', 'flask_env': current_app.config.get('FLASK_ENV', 'unknown'), 'debug': current_app.debug, 'startup_time': metrics['startup_time'].isoformat(), 'python_version': os.sys.version, 'config': { 'database': current_app.config.get('DATABASE'), 'job_check_interval': current_app.config.get('JOB_CHECK_INTERVAL'), 'security_enabled': current_app.config.get('SECURITY_ENABLED', False), 'rate_limit_enabled': current_app.config.get('RATE_LIMIT_ENABLED', False) } }) def record_request_metric(endpoint, method, status_code, duration): """ Zeichnet Request-Metriken auf. Args: endpoint: API-Endpunkt method: HTTP-Methode status_code: HTTP-Status-Code duration: Request-Dauer in Sekunden """ key = f"{method}_{endpoint}" metrics['requests_total'][key] += 1 metrics['request_duration'][key].append(duration) if status_code >= 400: metrics['error_count'][str(status_code)] += 1 def record_database_query(): """Zeichnet eine Datenbankabfrage auf.""" metrics['database_queries'] += 1 def update_active_jobs(count): """ Aktualisiert die Anzahl aktiver Jobs. Args: count: Anzahl aktiver Jobs """ metrics['active_jobs'] = count class RequestMetricsMiddleware: """Middleware für automatisches Request-Tracking.""" def __init__(self, app=None): self.app = app if app is not None: self.init_app(app) def init_app(self, app): """Initialisiert die Middleware mit der Flask-App.""" app.before_request(self.before_request) app.after_request(self.after_request) def before_request(self): """Startet die Zeitmessung für den Request.""" from flask import g g.start_time = time.time() def after_request(self, response): """Zeichnet Metriken nach dem Request auf.""" from flask import g, request if hasattr(g, 'start_time'): duration = time.time() - g.start_time record_request_metric( request.endpoint or 'unknown', request.method, response.status_code, duration ) return response # Globale Middleware-Instanz request_metrics = RequestMetricsMiddleware()