#!/usr/bin/env python3 """AI-LSC Framework Guardrail Validator. Runs after any agent edit to catch: 1. Bloat — files that grew >200% without architectural reason 2. Size — files exceeding max_module_lines (default 300) 3. Subprocess leakage — UI files touching subprocess/psutil directly 4. Parent coupling — UI files reaching into self.parent instead of protocol 5. os.path contamination — should use pathlib 6. Lint — ruff check (if available) Usage: python3 guardrails.py # validate ai_lsc/ in cwd python3 guardrails.py --baseline # snapshot current sizes python3 guardrails.py --fix # auto-fix ruff issues """ from __future__ import annotations import ast import json import os import sys from pathlib import Path BASE_DIR = Path(__file__).resolve().parent BASELINE_FILE = BASE_DIR / ".guardrail_baseline.json" MAX_MODULE_LINES = 300 MAX_GROWTH_FACTOR = 2.0 # Directories where subprocess/psutil calls are ALLOWED RUNTIME_ALLOWED_DIRS = {"utils", "runtime", "core", "scripts", "agents"} # Directories where os.path usage is ALLOWED (legacy tolerance) OSPATH_ALLOWED_DIRS = {"utils"} # Directories where self.parent access is ALLOWED PARENT_ALLOWED_DIRS = set() def get_file_sizes() -> dict[str, int]: """Return {relative_path: line_count} for every .py in the package.""" sizes: dict[str, int] = {} for py_file in sorted(BASE_DIR.rglob("*.py")): rel = py_file.relative_to(BASE_DIR) try: sizes[str(rel)] = sum(1 for _ in py_file.open(encoding="utf-8")) except Exception: sizes[str(rel)] = -1 return sizes def save_baseline(sizes: dict[str, int]) -> None: BASELINE_FILE.write_text( json.dumps(sizes, indent=2, sort_keys=True), encoding="utf-8" ) print(f"Baseline saved: {len(sizes)} files tracked in {BASELINE_FILE}") def load_baseline() -> dict[str, int]: if not BASELINE_FILE.exists(): return {} return json.loads(BASELINE_FILE.read_text(encoding="utf-8")) def check_bloat(sizes: dict[str, int], baseline: dict[str, int]) -> list[str]: """Detect files that grew > MAX_GROWTH_FACTOR without baseline.""" errors: list[str] = [] for path, new_size in sizes.items(): if new_size <= 0: continue old_size = baseline.get(path, 0) if old_size <= 0: continue # new file, skip if new_size > old_size * MAX_GROWTH_FACTOR: growth_pct = (new_size / old_size - 1) * 100 errors.append( f"BLOAT: {path} grew {old_size} -> {new_size} lines " f"(+{growth_pct:.0f}%, limit {MAX_GROWTH_FACTOR}x)" ) return errors def check_size_limits(sizes: dict[str, int]) -> list[str]: """Flag files exceeding max module line count.""" errors: list[str] = [] for path, size in sizes.items(): if size > MAX_MODULE_LINES and not path.endswith("__init__.py"): errors.append( f"OVERSIZED: {path} is {size} lines " f"(limit {MAX_MODULE_LINES})" ) return errors def check_subprocess_leakage() -> list[str]: """Flag UI files that directly call subprocess/psutil.""" errors: list[str] = [] dangerous_patterns = [ "subprocess.run", "subprocess.Popen", "subprocess.call", "threading.Thread", "os.system", "os.popen", "psutil.process_iter", "psutil.cpu_percent", ] for py_file in sorted(BASE_DIR.rglob("*.py")): rel = str(py_file.relative_to(BASE_DIR)) parts = rel.split(os.sep) # Skip if in allowed directory if any(part in RUNTIME_ALLOWED_DIRS for part in parts): continue try: source = py_file.read_text(encoding="utf-8") except Exception: continue tree = ast.parse(source, filename=rel) for node in ast.walk(tree): if isinstance(node, ast.Attribute): full = f"{node.value}.{node.attr}" if isinstance( node.value, ast.Name ) else None if full and full in dangerous_patterns: errors.append( f"SUBPROCESS_LEAK: {rel}:{node.lineno} " f"calls {full} (should delegate to runtime/)" ) return errors def check_parent_coupling() -> list[str]: """Flag UI files accessing self.parent.* directly.""" errors: list[str] = [] for py_file in sorted(BASE_DIR.rglob("*.py")): rel = str(py_file.relative_to(BASE_DIR)) parts = rel.split(os.sep) if not any(part in PARENT_ALLOWED_DIRS for part in parts): pass # check all files try: source = py_file.read_text(encoding="utf-8") except Exception: continue tree = ast.parse(source, filename=rel) for node in ast.walk(tree): if isinstance(node, ast.Attribute): if ( isinstance(node.value, ast.Attribute) and isinstance(node.value.value, ast.Name) and node.value.value.id == "self" and node.value.attr == "parent" ): errors.append( f"PARENT_COUPLING: {rel}:{node.lineno} " f"accesses self.parent.{node.attr} " f"(use MainWindowProtocol instead)" ) return errors def check_ospath_contamination() -> list[str]: """Flag files using os.path instead of pathlib.""" errors: list[str] = [] for py_file in sorted(BASE_DIR.rglob("*.py")): rel = str(py_file.relative_to(BASE_DIR)) parts = rel.split(os.sep) if any(part in OSPATH_ALLOWED_DIRS for part in parts): continue try: source = py_file.read_text(encoding="utf-8") except Exception: continue count = source.count("os.path.") if count > 0: errors.append( f"OSPATH: {rel} has {count} os.path.* calls " f"(use pathlib.Path)" ) return errors def run_ruff(fix: bool = False) -> list[str]: """Run ruff if available, return error output.""" import shutil ruff_bin = shutil.which("ruff") if not ruff_bin: try: import ruff as _ # noqa: F401 ruff_bin = sys.executable + " -m ruff" except ImportError: return ["RUFF: not installed (pip install ruff)"] import subprocess cmd = [sys.executable, "-m", "ruff", "check", str(BASE_DIR)] if fix: cmd.append("--fix") try: result = subprocess.run( cmd, capture_output=True, text=True, timeout=30 ) output = result.stdout.strip() + result.stderr.strip() if output: return [f"RUFF:\n{output}"] return [] except Exception as e: return [f"RUFF: failed to run: {e}"] def main() -> int: args = set(sys.argv[1:]) if "--baseline" in args: sizes = get_file_sizes() save_baseline(sizes) return 0 sizes = get_file_sizes() baseline = load_baseline() all_errors: list[str] = [] print("=" * 60) print("AI-LSC Framework Guardrail Validation") print("=" * 60) # 1. Bloat check errors = check_bloat(sizes, baseline) if errors: all_errors.extend(errors) print(f"\n[FAIL] Bloat detection: {len(errors)} violations") elif baseline: print("\n[PASS] Bloat detection: no abnormal growth") else: print("\n[SKIP] Bloat detection: no baseline (run with --baseline)") # 2. Size limits errors = check_size_limits(sizes) if errors: all_errors.extend(errors) print(f"[FAIL] Size limits: {len(errors)} oversized modules") else: print(f"[PASS] Size limits: all modules under {MAX_MODULE_LINES} lines") # 3. Subprocess leakage errors = check_subprocess_leakage() if errors: all_errors.extend(errors) print(f"[FAIL] Subprocess leakage: {len(errors)} violations") else: print("[PASS] Subprocess leakage: clean") # 4. Parent coupling errors = check_parent_coupling() if errors: all_errors.extend(errors) print(f"[FAIL] Parent coupling: {len(errors)} violations") else: print("[PASS] Parent coupling: clean") # 5. os.path contamination errors = check_ospath_contamination() if errors: all_errors.extend(errors) print(f"[FAIL] os.path: {len(errors)} files with os.path.* calls") else: print("[PASS] os.path: clean") # 6. Ruff lint fix_mode = "--fix" in args errors = run_ruff(fix=fix_mode) if errors: all_errors.extend(errors) print(f"[{'FIXED' if fix_mode else 'FAIL'}] Ruff lint: see above") else: print("[PASS] Ruff lint: clean") # Summary print("\n" + "=" * 60) if all_errors: print(f"RESULT: {len(all_errors)} guardrail violations") for e in all_errors: # Truncate long ruff output lines = e.split("\n") for line in lines[:5]: print(f" {line}") if len(lines) > 5: print(f" ... ({len(lines) - 5} more lines)") return 1 else: print("RESULT: ALL GUARDRAILS PASSED") return 0 if __name__ == "__main__": sys.exit(main())