Projektarbeit-MYP/backend/import_analyzer.py

#!/usr/bin/env python3
"""
Import-Analyzer für das MYP Backend

Analysiert alle Python-Dateien auf:
1. Ungenutzte Import-Statements
2. Zirkuläre Imports
3. Redundante Imports
4. Missing imports
"""

import os
import ast
import re
import sys
from pathlib import Path
from collections import defaultdict, Counter
from typing import Dict, List, Set, Tuple, Any
import json

class ImportAnalyzer:
    def __init__(self, backend_path: str):
        self.backend_path = Path(backend_path)
        self.files_data = {}
        self.all_imports = defaultdict(set)
        self.all_usages = defaultdict(set)
        self.module_dependencies = defaultdict(set)
        self.findings = {
            'unused_imports': {},
            'circular_imports': [],
            'redundant_imports': {},
            'missing_imports': {},
            'statistics': {}
        }

    def analyze_file(self, file_path: Path) -> Dict[str, Any]:
        """Analysiert eine einzelne Python-Datei"""
        try:
            with open(file_path, 'r', encoding='utf-8') as f:
                content = f.read()

            tree = ast.parse(content)

            # Sammle Imports
            imports = set()
            for node in ast.walk(tree):
                if isinstance(node, ast.Import):
                    for alias in node.names:
                        imports.add(alias.name)
                elif isinstance(node, ast.ImportFrom):
                    module = node.module or ''
                    for alias in node.names:
                        if module:
                            imports.add(f"{module}.{alias.name}")
                        else:
                            imports.add(alias.name)

            # Sammle verwendete Namen aus dem Code
            used_names = set()
            for node in ast.walk(tree):
                if isinstance(node, ast.Name):
                    used_names.add(node.id)
                elif isinstance(node, ast.Attribute):
                    # Sammle Attribut-Zugriffe
                    if isinstance(node.value, ast.Name):
                        used_names.add(f"{node.value.id}.{node.attr}")
                elif isinstance(node, ast.Call):
                    # Sammle Funktionsaufrufe
                    if isinstance(node.func, ast.Name):
                        used_names.add(node.func.id)
                    elif isinstance(node.func, ast.Attribute):
                        if isinstance(node.func.value, ast.Name):
                            used_names.add(f"{node.func.value.id}.{node.func.attr}")

            # Suche auch in String-Literalen nach Verwendungen
            string_references = set()
            for node in ast.walk(tree):
                if isinstance(node, ast.Str):
                    # Suche nach import-ähnlichen Strings
                    for imp in imports:
                        if imp in node.s:
                            string_references.add(imp)

            return {
                'imports': imports,
                'used_names': used_names,
                'string_references': string_references,
                'content': content,
                'lines': len(content.splitlines())
            }

        except Exception as e:
            print(f"Fehler beim Analysieren von {file_path}: {e}")
            return {
                'imports': set(),
                'used_names': set(),
                'string_references': set(),
                'content': '',
                'lines': 0,
                'error': str(e)
            }

    def find_unused_imports(self):
        """Findet ungenutzte Imports"""
        for file_path, data in self.files_data.items():
            if 'error' in data:
                continue

            unused = []
            imports = data['imports']
            used_names = data['used_names']
            string_refs = data['string_references']

            for imp in imports:
                # Verschiedene Formen der Nutzung prüfen
                is_used = False

                # Direkter Name
                base_name = imp.split('.')[0]
                if base_name in used_names:
                    is_used = True

                # Vollständiger Import-Name
                if imp in used_names:
                    is_used = True

                # In String-Referenzen
                if imp in string_refs:
                    is_used = True

                # Spezielle Fälle für häufige Patterns
                # Flask decorators, etc.
                if not is_used and any(pattern in data['content'] for pattern in [
                    f"@{base_name}",
                    f"'{imp}'",
                    f'"{imp}"',
                    f"{base_name}(",
                    f".{base_name}",
                ]):
                    is_used = True

                if not is_used:
                    unused.append(imp)

            if unused:
                self.findings['unused_imports'][str(file_path)] = unused

    def find_circular_imports(self):
        """Findet zirkuläre Imports"""
        # Baue Dependency-Graph
        for file_path, data in self.files_data.items():
            if 'error' in data:
                continue

            file_module = self.get_module_name(file_path)
            for imp in data['imports']:
                if self.is_local_import(imp):
                    self.module_dependencies[file_module].add(imp)

        # Suche nach Zyklen
        def has_cycle(node, path, visited):
            if node in path:
                cycle_start = path.index(node)
                cycle = path[cycle_start:] + [node]
                return cycle

            if node in visited:
                return None

            visited.add(node)
            path.append(node)

            for neighbor in self.module_dependencies.get(node, []):
                cycle = has_cycle(neighbor, path, visited)
                if cycle:
                    return cycle

            path.pop()
            return None

        visited = set()
        for module in self.module_dependencies:
            if module not in visited:
                cycle = has_cycle(module, [], set())
                if cycle and cycle not in self.findings['circular_imports']:
                    self.findings['circular_imports'].append(cycle)

    def find_redundant_imports(self):
        """Findet redundante Imports (mehrfach importiert)"""
        import_count = Counter()
        import_locations = defaultdict(list)

        for file_path, data in self.files_data.items():
            if 'error' in data:
                continue

            for imp in data['imports']:
                import_count[imp] += 1
                import_locations[imp].append(str(file_path))

        # Finde Imports die in mehreren Dateien vorkommen
        for imp, count in import_count.items():
            if count > 1:
                self.findings['redundant_imports'][imp] = {
                    'count': count,
                    'files': import_locations[imp]
                }

    def get_module_name(self, file_path: Path) -> str:
        """Konvertiert Dateipfad zu Modulname"""
        rel_path = file_path.relative_to(self.backend_path)
        if rel_path.name == '__init__.py':
            return str(rel_path.parent).replace('/', '.')
        else:
            return str(rel_path.with_suffix('')).replace('/', '.')

    def is_local_import(self, imp: str) -> bool:
        """Prüft ob es ein lokaler Import ist"""
        local_prefixes = ['blueprints', 'utils', 'config', 'models']
        return any(imp.startswith(prefix) for prefix in local_prefixes)

    def run_analysis(self):
        """Führt die komplette Analyse durch"""
        print("Sammle Python-Dateien...")

        # Sammle alle Python-Dateien
        for file_path in self.backend_path.rglob("*.py"):
            # Überspringe bestimmte Verzeichnisse
            if any(part in str(file_path) for part in ['__pycache__', '.git', 'node_modules', 'instance/sessions']):
                continue

            print(f"Analysiere: {file_path.relative_to(self.backend_path)}")
            self.files_data[file_path] = self.analyze_file(file_path)

        print(f"\nGefundene Dateien: {len(self.files_data)}")

        # Führe Analysen durch
        print("Suche nach ungenutzten Imports...")
        self.find_unused_imports()

        print("Suche nach zirkulären Imports...")
        self.find_circular_imports()

        print("Suche nach redundanten Imports...")
        self.find_redundant_imports()

        # Statistiken
        total_imports = sum(len(data.get('imports', [])) for data in self.files_data.values())
        total_lines = sum(data.get('lines', 0) for data in self.files_data.values())

        self.findings['statistics'] = {
            'total_files': len(self.files_data),
            'total_imports': total_imports,
            'total_lines': total_lines,
            'files_with_unused_imports': len(self.findings['unused_imports']),
            'total_unused_imports': sum(len(imports) for imports in self.findings['unused_imports'].values()),
            'circular_import_chains': len(self.findings['circular_imports']),
            'redundant_import_types': len(self.findings['redundant_imports'])
        }

    def print_report(self):
        """Druckt einen detaillierten Bericht"""
        print("\n" + "="*80)
        print("IMPORT-ANALYSE BERICHT")
        print("="*80)

        stats = self.findings['statistics']
        print(f"\nSTATISTIKEN:")
        print(f"  Analysierte Dateien: {stats['total_files']}")
        print(f"  Gesamte Imports: {stats['total_imports']}")
        print(f"  Gesamte Zeilen: {stats['total_lines']}")
        print(f"  Dateien mit ungenutzten Imports: {stats['files_with_unused_imports']}")
        print(f"  Ungenutzte Imports gesamt: {stats['total_unused_imports']}")
        print(f"  Zirkuläre Import-Ketten: {stats['circular_import_chains']}")
        print(f"  Redundante Import-Typen: {stats['redundant_import_types']}")

        # Ungenutzte Imports
        if self.findings['unused_imports']:
            print(f"\n🚨 UNGENUTZTE IMPORTS ({stats['total_unused_imports']} gefunden):")
            print("-" * 60)
            for file_path, unused in self.findings['unused_imports'].items():
                rel_path = Path(file_path).relative_to(self.backend_path)
                print(f"\n📁 {rel_path}:")
                for i, imp in enumerate(unused, 1):
                    print(f"  {i:2d}. {imp}")

        # Zirkuläre Imports
        if self.findings['circular_imports']:
            print(f"\n🔄 ZIRKULÄRE IMPORTS ({len(self.findings['circular_imports'])} Ketten):")
            print("-" * 60)
            for i, cycle in enumerate(self.findings['circular_imports'], 1):
                print(f"\n{i}. Import-Kette:")
                for j, module in enumerate(cycle):
                    arrow = " → " if j < len(cycle) - 1 else ""
                    print(f"   {module}{arrow}")

        # Redundante Imports
        if self.findings['redundant_imports']:
            print(f"\n📦 REDUNDANTE IMPORTS (Top 20):")
            print("-" * 60)
            sorted_redundant = sorted(
                self.findings['redundant_imports'].items(),
                key=lambda x: x[1]['count'],
                reverse=True
            )[:20]

            for imp, data in sorted_redundant:
                print(f"\n🔁 {imp} (verwendet in {data['count']} Dateien):")
                for file_path in data['files'][:5]:  # Zeige nur erste 5
                    rel_path = Path(file_path).relative_to(self.backend_path)
                    print(f"   - {rel_path}")
                if len(data['files']) > 5:
                    print(f"   ... und {len(data['files']) - 5} weitere")

        # Empfehlungen
        print(f"\n💡 EMPFEHLUNGEN:")
        print("-" * 60)
        if stats['total_unused_imports'] > 0:
            print(f"✂️  {stats['total_unused_imports']} ungenutzte Imports entfernen")
        if stats['circular_import_chains'] > 0:
            print(f"🔄 {stats['circular_import_chains']} zirkuläre Import-Ketten auflösen")
        if stats['redundant_import_types'] > 10:
            print(f"📦 Häufig verwendete Imports in gemeinsame Module auslagern")

        if (stats['total_unused_imports'] == 0 and
            stats['circular_import_chains'] == 0):
            print("✅ Keine kritischen Import-Probleme gefunden!")

    def save_report(self, output_file: str = "import_analysis_report.json"):
        """Speichert den Bericht als JSON"""
        # Konvertiere Path-Objekte zu Strings für JSON
        json_findings = {}
        for key, value in self.findings.items():
            if key == 'unused_imports':
                json_findings[key] = {
                    str(Path(k).relative_to(self.backend_path)): v
                    for k, v in value.items()
                }
            else:
                json_findings[key] = value

        output_path = self.backend_path / output_file
        with open(output_path, 'w', encoding='utf-8') as f:
            json.dump(json_findings, f, indent=2, ensure_ascii=False)

        print(f"\n💾 Detaillierter Bericht gespeichert: {output_path}")

def main():
    backend_path = Path(__file__).parent

    analyzer = ImportAnalyzer(str(backend_path))
    analyzer.run_analysis()
    analyzer.print_report()
    analyzer.save_report()

if __name__ == "__main__":
    main()