99 lines
2.4 KiB
Python
99 lines
2.4 KiB
Python
# analysis/failure_autopsy.py
|
|
|
|
class FailureAutopsy:
|
|
|
|
def __init__(self, journal, critical_path=None):
|
|
self.journal = journal
|
|
self.critical_path = critical_path
|
|
|
|
# -----------------------------
|
|
# FIND FAILURE EVENTS
|
|
# -----------------------------
|
|
def find_failures(self):
|
|
|
|
return [
|
|
e for e in self.journal.events
|
|
if e["type"] == "execution_result"
|
|
and e["data"].get("state") == "failed"
|
|
]
|
|
|
|
# -----------------------------
|
|
# TRACE BACKWARD DEPENDENCY CHAIN
|
|
# -----------------------------
|
|
def trace_dependencies(self, action_name):
|
|
|
|
trace = []
|
|
visited = set()
|
|
|
|
def walk(name):
|
|
if name in visited:
|
|
return
|
|
visited.add(name)
|
|
|
|
events = self.journal.trace_action(name)
|
|
|
|
trace.append({
|
|
"action": name,
|
|
"events": events
|
|
})
|
|
|
|
for e in events:
|
|
deps = e["data"].get("deps", [])
|
|
for d in deps:
|
|
walk(d)
|
|
|
|
walk(action_name)
|
|
|
|
return trace
|
|
|
|
# -----------------------------
|
|
# GET LAST SCHEDULER DECISION
|
|
# -----------------------------
|
|
def last_decision(self, action_name):
|
|
|
|
events = self.journal.trace_action(action_name)
|
|
|
|
for e in reversed(events):
|
|
if e["type"] == "schedule_decision":
|
|
return e["data"]
|
|
|
|
return None
|
|
|
|
# -----------------------------
|
|
# FULL AUTOPSY REPORT
|
|
# -----------------------------
|
|
def report(self, action_name):
|
|
|
|
failures = self.find_failures()
|
|
|
|
target_failure = None
|
|
|
|
for f in failures:
|
|
if f["data"].get("action") == action_name:
|
|
target_failure = f
|
|
break
|
|
|
|
if not target_failure:
|
|
return {
|
|
"status": "no_failure_found",
|
|
"action": action_name
|
|
}
|
|
|
|
deps_trace = self.trace_dependencies(action_name)
|
|
last_sched = self.last_decision(action_name)
|
|
|
|
return {
|
|
"status": "failure_detected",
|
|
"action": action_name,
|
|
|
|
"failure_event": target_failure,
|
|
|
|
"last_scheduler_decision": last_sched,
|
|
|
|
"dependency_trace": deps_trace,
|
|
|
|
"on_critical_path": (
|
|
action_name in (self.critical_path or {}).get("score_map", {})
|
|
)
|
|
}
|