330 lines
10 KiB
Python
330 lines
10 KiB
Python
"""
|
|
Monitoring und Health Check Module für die MYP Flask-Anwendung.
|
|
Bietet Endpunkte für Systemüberwachung und Performance-Metriken.
|
|
"""
|
|
|
|
from flask import Blueprint, jsonify, current_app
|
|
import psutil
|
|
import os
|
|
import sqlite3
|
|
import datetime
|
|
import threading
|
|
import time
|
|
from collections import defaultdict
|
|
|
|
# Blueprint für Monitoring-Endpunkte
|
|
monitoring_bp = Blueprint('monitoring', __name__, url_prefix='/monitoring')
|
|
|
|
# Metriken-Speicher
|
|
metrics = {
|
|
'requests_total': defaultdict(int),
|
|
'request_duration': defaultdict(list),
|
|
'database_queries': 0,
|
|
'active_jobs': 0,
|
|
'error_count': defaultdict(int),
|
|
'startup_time': datetime.datetime.now()
|
|
}
|
|
|
|
class HealthCheck:
|
|
"""Klasse für System-Health-Checks."""
|
|
|
|
@staticmethod
|
|
def check_database():
|
|
"""
|
|
Überprüft die Datenbankverbindung.
|
|
|
|
Returns:
|
|
dict: Status und Details der Datenbankverbindung
|
|
"""
|
|
try:
|
|
db_path = current_app.config.get('DATABASE', 'instance/myp.db')
|
|
|
|
# Bei In-Memory-DB für Tests
|
|
if db_path == ':memory:':
|
|
return {'status': 'healthy', 'message': 'In-Memory-Datenbank aktiv'}
|
|
|
|
# Datei-basierte Datenbank prüfen
|
|
if not os.path.exists(db_path):
|
|
return {'status': 'unhealthy', 'message': 'Datenbankdatei nicht gefunden'}
|
|
|
|
# Verbindung testen
|
|
conn = sqlite3.connect(db_path, timeout=5)
|
|
cursor = conn.cursor()
|
|
cursor.execute('SELECT 1')
|
|
conn.close()
|
|
|
|
# Dateigröße ermitteln
|
|
db_size = os.path.getsize(db_path)
|
|
|
|
return {
|
|
'status': 'healthy',
|
|
'message': 'Datenbankverbindung erfolgreich',
|
|
'database_path': db_path,
|
|
'database_size_bytes': db_size
|
|
}
|
|
|
|
except Exception as e:
|
|
return {
|
|
'status': 'unhealthy',
|
|
'message': f'Datenbankfehler: {str(e)}'
|
|
}
|
|
|
|
@staticmethod
|
|
def check_disk_space():
|
|
"""
|
|
Überprüft den verfügbaren Festplattenspeicher.
|
|
|
|
Returns:
|
|
dict: Status und Details des Festplattenspeichers
|
|
"""
|
|
try:
|
|
disk_usage = psutil.disk_usage('.')
|
|
free_gb = disk_usage.free / (1024**3)
|
|
total_gb = disk_usage.total / (1024**3)
|
|
used_percent = (disk_usage.used / disk_usage.total) * 100
|
|
|
|
status = 'healthy'
|
|
if used_percent > 90:
|
|
status = 'critical'
|
|
elif used_percent > 80:
|
|
status = 'warning'
|
|
|
|
return {
|
|
'status': status,
|
|
'free_gb': round(free_gb, 2),
|
|
'total_gb': round(total_gb, 2),
|
|
'used_percent': round(used_percent, 2)
|
|
}
|
|
|
|
except Exception as e:
|
|
return {
|
|
'status': 'unhealthy',
|
|
'message': f'Festplattenfehler: {str(e)}'
|
|
}
|
|
|
|
@staticmethod
|
|
def check_memory():
|
|
"""
|
|
Überprüft die Speichernutzung.
|
|
|
|
Returns:
|
|
dict: Status und Details der Speichernutzung
|
|
"""
|
|
try:
|
|
memory = psutil.virtual_memory()
|
|
|
|
status = 'healthy'
|
|
if memory.percent > 90:
|
|
status = 'critical'
|
|
elif memory.percent > 80:
|
|
status = 'warning'
|
|
|
|
return {
|
|
'status': status,
|
|
'total_gb': round(memory.total / (1024**3), 2),
|
|
'available_gb': round(memory.available / (1024**3), 2),
|
|
'used_percent': round(memory.percent, 2)
|
|
}
|
|
|
|
except Exception as e:
|
|
return {
|
|
'status': 'unhealthy',
|
|
'message': f'Speicherfehler: {str(e)}'
|
|
}
|
|
|
|
@staticmethod
|
|
def check_background_threads():
|
|
"""
|
|
Überprüft die Hintergrund-Threads.
|
|
|
|
Returns:
|
|
dict: Status der Hintergrund-Threads
|
|
"""
|
|
try:
|
|
active_threads = [t.name for t in threading.enumerate() if t.is_alive()]
|
|
job_checker_running = any('job_checker' in name for name in active_threads)
|
|
|
|
return {
|
|
'status': 'healthy' if job_checker_running else 'warning',
|
|
'job_checker_running': job_checker_running,
|
|
'active_threads': active_threads,
|
|
'thread_count': len(active_threads)
|
|
}
|
|
|
|
except Exception as e:
|
|
return {
|
|
'status': 'unhealthy',
|
|
'message': f'Thread-Fehler: {str(e)}'
|
|
}
|
|
|
|
@monitoring_bp.route('/health')
|
|
def health_check():
|
|
"""
|
|
Umfassender Health Check aller Systemkomponenten.
|
|
|
|
Returns:
|
|
JSON: Status aller Systemkomponenten
|
|
"""
|
|
checks = {
|
|
'database': HealthCheck.check_database(),
|
|
'disk_space': HealthCheck.check_disk_space(),
|
|
'memory': HealthCheck.check_memory(),
|
|
'background_threads': HealthCheck.check_background_threads()
|
|
}
|
|
|
|
# Gesamtstatus bestimmen
|
|
overall_status = 'healthy'
|
|
for check in checks.values():
|
|
if check['status'] == 'unhealthy':
|
|
overall_status = 'unhealthy'
|
|
break
|
|
elif check['status'] in ['warning', 'critical']:
|
|
overall_status = 'degraded'
|
|
|
|
response = {
|
|
'status': overall_status,
|
|
'timestamp': datetime.datetime.now().isoformat(),
|
|
'checks': checks
|
|
}
|
|
|
|
status_code = 200 if overall_status == 'healthy' else 503
|
|
return jsonify(response), status_code
|
|
|
|
@monitoring_bp.route('/health/simple')
|
|
def simple_health_check():
|
|
"""
|
|
Einfacher Health Check für Load Balancer.
|
|
|
|
Returns:
|
|
JSON: Einfacher Status
|
|
"""
|
|
return jsonify({'status': 'ok', 'timestamp': datetime.datetime.now().isoformat()})
|
|
|
|
@monitoring_bp.route('/metrics')
|
|
def get_metrics():
|
|
"""
|
|
Sammelt und gibt Performance-Metriken zurück.
|
|
|
|
Returns:
|
|
JSON: System- und Anwendungsmetriken
|
|
"""
|
|
try:
|
|
# System-Metriken
|
|
cpu_percent = psutil.cpu_percent(interval=1)
|
|
memory = psutil.virtual_memory()
|
|
disk = psutil.disk_usage('.')
|
|
|
|
# Uptime berechnen
|
|
uptime = datetime.datetime.now() - metrics['startup_time']
|
|
|
|
# Anwendungsmetriken
|
|
app_metrics = {
|
|
'system': {
|
|
'cpu_percent': cpu_percent,
|
|
'memory_percent': memory.percent,
|
|
'disk_percent': (disk.used / disk.total) * 100,
|
|
'uptime_seconds': uptime.total_seconds()
|
|
},
|
|
'application': {
|
|
'requests_total': dict(metrics['requests_total']),
|
|
'database_queries_total': metrics['database_queries'],
|
|
'active_jobs': metrics['active_jobs'],
|
|
'error_count': dict(metrics['error_count']),
|
|
'startup_time': metrics['startup_time'].isoformat()
|
|
}
|
|
}
|
|
|
|
return jsonify(app_metrics)
|
|
|
|
except Exception as e:
|
|
current_app.logger.error(f"Fehler beim Sammeln der Metriken: {e}")
|
|
return jsonify({'error': 'Metriken nicht verfügbar'}), 500
|
|
|
|
@monitoring_bp.route('/info')
|
|
def get_info():
|
|
"""
|
|
Gibt allgemeine Informationen über die Anwendung zurück.
|
|
|
|
Returns:
|
|
JSON: Anwendungsinformationen
|
|
"""
|
|
return jsonify({
|
|
'application': 'MYP Backend',
|
|
'version': '2.0.0',
|
|
'flask_env': current_app.config.get('FLASK_ENV', 'unknown'),
|
|
'debug': current_app.debug,
|
|
'startup_time': metrics['startup_time'].isoformat(),
|
|
'python_version': os.sys.version,
|
|
'config': {
|
|
'database': current_app.config.get('DATABASE'),
|
|
'job_check_interval': current_app.config.get('JOB_CHECK_INTERVAL'),
|
|
'security_enabled': current_app.config.get('SECURITY_ENABLED', False),
|
|
'rate_limit_enabled': current_app.config.get('RATE_LIMIT_ENABLED', False)
|
|
}
|
|
})
|
|
|
|
def record_request_metric(endpoint, method, status_code, duration):
|
|
"""
|
|
Zeichnet Request-Metriken auf.
|
|
|
|
Args:
|
|
endpoint: API-Endpunkt
|
|
method: HTTP-Methode
|
|
status_code: HTTP-Status-Code
|
|
duration: Request-Dauer in Sekunden
|
|
"""
|
|
key = f"{method}_{endpoint}"
|
|
metrics['requests_total'][key] += 1
|
|
metrics['request_duration'][key].append(duration)
|
|
|
|
if status_code >= 400:
|
|
metrics['error_count'][str(status_code)] += 1
|
|
|
|
def record_database_query():
|
|
"""Zeichnet eine Datenbankabfrage auf."""
|
|
metrics['database_queries'] += 1
|
|
|
|
def update_active_jobs(count):
|
|
"""
|
|
Aktualisiert die Anzahl aktiver Jobs.
|
|
|
|
Args:
|
|
count: Anzahl aktiver Jobs
|
|
"""
|
|
metrics['active_jobs'] = count
|
|
|
|
class RequestMetricsMiddleware:
|
|
"""Middleware für automatisches Request-Tracking."""
|
|
|
|
def __init__(self, app=None):
|
|
self.app = app
|
|
if app is not None:
|
|
self.init_app(app)
|
|
|
|
def init_app(self, app):
|
|
"""Initialisiert die Middleware mit der Flask-App."""
|
|
app.before_request(self.before_request)
|
|
app.after_request(self.after_request)
|
|
|
|
def before_request(self):
|
|
"""Startet die Zeitmessung für den Request."""
|
|
from flask import g
|
|
g.start_time = time.time()
|
|
|
|
def after_request(self, response):
|
|
"""Zeichnet Metriken nach dem Request auf."""
|
|
from flask import g, request
|
|
|
|
if hasattr(g, 'start_time'):
|
|
duration = time.time() - g.start_time
|
|
record_request_metric(
|
|
request.endpoint or 'unknown',
|
|
request.method,
|
|
response.status_code,
|
|
duration
|
|
)
|
|
|
|
return response
|
|
|
|
# Globale Middleware-Instanz
|
|
request_metrics = RequestMetricsMiddleware() |