fester/analysis/failure_autopsy.py

99 lines
2.4 KiB
Python

# analysis/failure_autopsy.py
class FailureAutopsy:
def __init__(self, journal, critical_path=None):
self.journal = journal
self.critical_path = critical_path
# -----------------------------
# FIND FAILURE EVENTS
# -----------------------------
def find_failures(self):
return [
e for e in self.journal.events
if e["type"] == "execution_result"
and e["data"].get("state") == "failed"
]
# -----------------------------
# TRACE BACKWARD DEPENDENCY CHAIN
# -----------------------------
def trace_dependencies(self, action_name):
trace = []
visited = set()
def walk(name):
if name in visited:
return
visited.add(name)
events = self.journal.trace_action(name)
trace.append({
"action": name,
"events": events
})
for e in events:
deps = e["data"].get("deps", [])
for d in deps:
walk(d)
walk(action_name)
return trace
# -----------------------------
# GET LAST SCHEDULER DECISION
# -----------------------------
def last_decision(self, action_name):
events = self.journal.trace_action(action_name)
for e in reversed(events):
if e["type"] == "schedule_decision":
return e["data"]
return None
# -----------------------------
# FULL AUTOPSY REPORT
# -----------------------------
def report(self, action_name):
failures = self.find_failures()
target_failure = None
for f in failures:
if f["data"].get("action") == action_name:
target_failure = f
break
if not target_failure:
return {
"status": "no_failure_found",
"action": action_name
}
deps_trace = self.trace_dependencies(action_name)
last_sched = self.last_decision(action_name)
return {
"status": "failure_detected",
"action": action_name,
"failure_event": target_failure,
"last_scheduler_decision": last_sched,
"dependency_trace": deps_trace,
"on_critical_path": (
action_name in (self.critical_path or {}).get("score_map", {})
)
}