"Feature: Add environment variables example and monitoring script"
This commit is contained in:
330
backend/monitoring.py
Normal file
330
backend/monitoring.py
Normal file
@@ -0,0 +1,330 @@
|
||||
"""
|
||||
Monitoring und Health Check Module für die MYP Flask-Anwendung.
|
||||
Bietet Endpunkte für Systemüberwachung und Performance-Metriken.
|
||||
"""
|
||||
|
||||
from flask import Blueprint, jsonify, current_app
|
||||
import psutil
|
||||
import os
|
||||
import sqlite3
|
||||
import datetime
|
||||
import threading
|
||||
import time
|
||||
from collections import defaultdict
|
||||
|
||||
# Blueprint für Monitoring-Endpunkte
|
||||
monitoring_bp = Blueprint('monitoring', __name__, url_prefix='/monitoring')
|
||||
|
||||
# Metriken-Speicher
|
||||
metrics = {
|
||||
'requests_total': defaultdict(int),
|
||||
'request_duration': defaultdict(list),
|
||||
'database_queries': 0,
|
||||
'active_jobs': 0,
|
||||
'error_count': defaultdict(int),
|
||||
'startup_time': datetime.datetime.now()
|
||||
}
|
||||
|
||||
class HealthCheck:
|
||||
"""Klasse für System-Health-Checks."""
|
||||
|
||||
@staticmethod
|
||||
def check_database():
|
||||
"""
|
||||
Überprüft die Datenbankverbindung.
|
||||
|
||||
Returns:
|
||||
dict: Status und Details der Datenbankverbindung
|
||||
"""
|
||||
try:
|
||||
db_path = current_app.config.get('DATABASE', 'instance/myp.db')
|
||||
|
||||
# Bei In-Memory-DB für Tests
|
||||
if db_path == ':memory:':
|
||||
return {'status': 'healthy', 'message': 'In-Memory-Datenbank aktiv'}
|
||||
|
||||
# Datei-basierte Datenbank prüfen
|
||||
if not os.path.exists(db_path):
|
||||
return {'status': 'unhealthy', 'message': 'Datenbankdatei nicht gefunden'}
|
||||
|
||||
# Verbindung testen
|
||||
conn = sqlite3.connect(db_path, timeout=5)
|
||||
cursor = conn.cursor()
|
||||
cursor.execute('SELECT 1')
|
||||
conn.close()
|
||||
|
||||
# Dateigröße ermitteln
|
||||
db_size = os.path.getsize(db_path)
|
||||
|
||||
return {
|
||||
'status': 'healthy',
|
||||
'message': 'Datenbankverbindung erfolgreich',
|
||||
'database_path': db_path,
|
||||
'database_size_bytes': db_size
|
||||
}
|
||||
|
||||
except Exception as e:
|
||||
return {
|
||||
'status': 'unhealthy',
|
||||
'message': f'Datenbankfehler: {str(e)}'
|
||||
}
|
||||
|
||||
@staticmethod
|
||||
def check_disk_space():
|
||||
"""
|
||||
Überprüft den verfügbaren Festplattenspeicher.
|
||||
|
||||
Returns:
|
||||
dict: Status und Details des Festplattenspeichers
|
||||
"""
|
||||
try:
|
||||
disk_usage = psutil.disk_usage('.')
|
||||
free_gb = disk_usage.free / (1024**3)
|
||||
total_gb = disk_usage.total / (1024**3)
|
||||
used_percent = (disk_usage.used / disk_usage.total) * 100
|
||||
|
||||
status = 'healthy'
|
||||
if used_percent > 90:
|
||||
status = 'critical'
|
||||
elif used_percent > 80:
|
||||
status = 'warning'
|
||||
|
||||
return {
|
||||
'status': status,
|
||||
'free_gb': round(free_gb, 2),
|
||||
'total_gb': round(total_gb, 2),
|
||||
'used_percent': round(used_percent, 2)
|
||||
}
|
||||
|
||||
except Exception as e:
|
||||
return {
|
||||
'status': 'unhealthy',
|
||||
'message': f'Festplattenfehler: {str(e)}'
|
||||
}
|
||||
|
||||
@staticmethod
|
||||
def check_memory():
|
||||
"""
|
||||
Überprüft die Speichernutzung.
|
||||
|
||||
Returns:
|
||||
dict: Status und Details der Speichernutzung
|
||||
"""
|
||||
try:
|
||||
memory = psutil.virtual_memory()
|
||||
|
||||
status = 'healthy'
|
||||
if memory.percent > 90:
|
||||
status = 'critical'
|
||||
elif memory.percent > 80:
|
||||
status = 'warning'
|
||||
|
||||
return {
|
||||
'status': status,
|
||||
'total_gb': round(memory.total / (1024**3), 2),
|
||||
'available_gb': round(memory.available / (1024**3), 2),
|
||||
'used_percent': round(memory.percent, 2)
|
||||
}
|
||||
|
||||
except Exception as e:
|
||||
return {
|
||||
'status': 'unhealthy',
|
||||
'message': f'Speicherfehler: {str(e)}'
|
||||
}
|
||||
|
||||
@staticmethod
|
||||
def check_background_threads():
|
||||
"""
|
||||
Überprüft die Hintergrund-Threads.
|
||||
|
||||
Returns:
|
||||
dict: Status der Hintergrund-Threads
|
||||
"""
|
||||
try:
|
||||
active_threads = [t.name for t in threading.enumerate() if t.is_alive()]
|
||||
job_checker_running = any('job_checker' in name for name in active_threads)
|
||||
|
||||
return {
|
||||
'status': 'healthy' if job_checker_running else 'warning',
|
||||
'job_checker_running': job_checker_running,
|
||||
'active_threads': active_threads,
|
||||
'thread_count': len(active_threads)
|
||||
}
|
||||
|
||||
except Exception as e:
|
||||
return {
|
||||
'status': 'unhealthy',
|
||||
'message': f'Thread-Fehler: {str(e)}'
|
||||
}
|
||||
|
||||
@monitoring_bp.route('/health')
|
||||
def health_check():
|
||||
"""
|
||||
Umfassender Health Check aller Systemkomponenten.
|
||||
|
||||
Returns:
|
||||
JSON: Status aller Systemkomponenten
|
||||
"""
|
||||
checks = {
|
||||
'database': HealthCheck.check_database(),
|
||||
'disk_space': HealthCheck.check_disk_space(),
|
||||
'memory': HealthCheck.check_memory(),
|
||||
'background_threads': HealthCheck.check_background_threads()
|
||||
}
|
||||
|
||||
# Gesamtstatus bestimmen
|
||||
overall_status = 'healthy'
|
||||
for check in checks.values():
|
||||
if check['status'] == 'unhealthy':
|
||||
overall_status = 'unhealthy'
|
||||
break
|
||||
elif check['status'] in ['warning', 'critical']:
|
||||
overall_status = 'degraded'
|
||||
|
||||
response = {
|
||||
'status': overall_status,
|
||||
'timestamp': datetime.datetime.now().isoformat(),
|
||||
'checks': checks
|
||||
}
|
||||
|
||||
status_code = 200 if overall_status == 'healthy' else 503
|
||||
return jsonify(response), status_code
|
||||
|
||||
@monitoring_bp.route('/health/simple')
|
||||
def simple_health_check():
|
||||
"""
|
||||
Einfacher Health Check für Load Balancer.
|
||||
|
||||
Returns:
|
||||
JSON: Einfacher Status
|
||||
"""
|
||||
return jsonify({'status': 'ok', 'timestamp': datetime.datetime.now().isoformat()})
|
||||
|
||||
@monitoring_bp.route('/metrics')
|
||||
def get_metrics():
|
||||
"""
|
||||
Sammelt und gibt Performance-Metriken zurück.
|
||||
|
||||
Returns:
|
||||
JSON: System- und Anwendungsmetriken
|
||||
"""
|
||||
try:
|
||||
# System-Metriken
|
||||
cpu_percent = psutil.cpu_percent(interval=1)
|
||||
memory = psutil.virtual_memory()
|
||||
disk = psutil.disk_usage('.')
|
||||
|
||||
# Uptime berechnen
|
||||
uptime = datetime.datetime.now() - metrics['startup_time']
|
||||
|
||||
# Anwendungsmetriken
|
||||
app_metrics = {
|
||||
'system': {
|
||||
'cpu_percent': cpu_percent,
|
||||
'memory_percent': memory.percent,
|
||||
'disk_percent': (disk.used / disk.total) * 100,
|
||||
'uptime_seconds': uptime.total_seconds()
|
||||
},
|
||||
'application': {
|
||||
'requests_total': dict(metrics['requests_total']),
|
||||
'database_queries_total': metrics['database_queries'],
|
||||
'active_jobs': metrics['active_jobs'],
|
||||
'error_count': dict(metrics['error_count']),
|
||||
'startup_time': metrics['startup_time'].isoformat()
|
||||
}
|
||||
}
|
||||
|
||||
return jsonify(app_metrics)
|
||||
|
||||
except Exception as e:
|
||||
current_app.logger.error(f"Fehler beim Sammeln der Metriken: {e}")
|
||||
return jsonify({'error': 'Metriken nicht verfügbar'}), 500
|
||||
|
||||
@monitoring_bp.route('/info')
|
||||
def get_info():
|
||||
"""
|
||||
Gibt allgemeine Informationen über die Anwendung zurück.
|
||||
|
||||
Returns:
|
||||
JSON: Anwendungsinformationen
|
||||
"""
|
||||
return jsonify({
|
||||
'application': 'MYP Backend',
|
||||
'version': '2.0.0',
|
||||
'flask_env': current_app.config.get('FLASK_ENV', 'unknown'),
|
||||
'debug': current_app.debug,
|
||||
'startup_time': metrics['startup_time'].isoformat(),
|
||||
'python_version': os.sys.version,
|
||||
'config': {
|
||||
'database': current_app.config.get('DATABASE'),
|
||||
'job_check_interval': current_app.config.get('JOB_CHECK_INTERVAL'),
|
||||
'security_enabled': current_app.config.get('SECURITY_ENABLED', False),
|
||||
'rate_limit_enabled': current_app.config.get('RATE_LIMIT_ENABLED', False)
|
||||
}
|
||||
})
|
||||
|
||||
def record_request_metric(endpoint, method, status_code, duration):
|
||||
"""
|
||||
Zeichnet Request-Metriken auf.
|
||||
|
||||
Args:
|
||||
endpoint: API-Endpunkt
|
||||
method: HTTP-Methode
|
||||
status_code: HTTP-Status-Code
|
||||
duration: Request-Dauer in Sekunden
|
||||
"""
|
||||
key = f"{method}_{endpoint}"
|
||||
metrics['requests_total'][key] += 1
|
||||
metrics['request_duration'][key].append(duration)
|
||||
|
||||
if status_code >= 400:
|
||||
metrics['error_count'][str(status_code)] += 1
|
||||
|
||||
def record_database_query():
|
||||
"""Zeichnet eine Datenbankabfrage auf."""
|
||||
metrics['database_queries'] += 1
|
||||
|
||||
def update_active_jobs(count):
|
||||
"""
|
||||
Aktualisiert die Anzahl aktiver Jobs.
|
||||
|
||||
Args:
|
||||
count: Anzahl aktiver Jobs
|
||||
"""
|
||||
metrics['active_jobs'] = count
|
||||
|
||||
class RequestMetricsMiddleware:
|
||||
"""Middleware für automatisches Request-Tracking."""
|
||||
|
||||
def __init__(self, app=None):
|
||||
self.app = app
|
||||
if app is not None:
|
||||
self.init_app(app)
|
||||
|
||||
def init_app(self, app):
|
||||
"""Initialisiert die Middleware mit der Flask-App."""
|
||||
app.before_request(self.before_request)
|
||||
app.after_request(self.after_request)
|
||||
|
||||
def before_request(self):
|
||||
"""Startet die Zeitmessung für den Request."""
|
||||
from flask import g
|
||||
g.start_time = time.time()
|
||||
|
||||
def after_request(self, response):
|
||||
"""Zeichnet Metriken nach dem Request auf."""
|
||||
from flask import g, request
|
||||
|
||||
if hasattr(g, 'start_time'):
|
||||
duration = time.time() - g.start_time
|
||||
record_request_metric(
|
||||
request.endpoint or 'unknown',
|
||||
request.method,
|
||||
response.status_code,
|
||||
duration
|
||||
)
|
||||
|
||||
return response
|
||||
|
||||
# Globale Middleware-Instanz
|
||||
request_metrics = RequestMetricsMiddleware()
|
Reference in New Issue
Block a user