class PolicyEngine: def __init__(self, db): self.db = db self.rules = [] self.overrides = {} # ----------------------------- # REGISTER RULE # ----------------------------- def add_rule(self, rule): self.rules.append(rule) # ----------------------------- # SET LIVE OVERRIDE # ----------------------------- def set_override(self, key, value): self.overrides[key] = value # ----------------------------- # CLEAR OVERRIDE # ----------------------------- def clear_override(self, key): if key in self.overrides: del self.overrides[key] # ----------------------------- # APPLY POLICY TO NODE SELECTION # ----------------------------- def evaluate(self, action, target, node): score_modifier = 0 # ----------------------------- # APPLY STATIC RULES # ----------------------------- for rule in self.rules: score_modifier += rule.apply(action, target, node) # ----------------------------- # APPLY HEURISTICS # ----------------------------- score_modifier += self._heuristics(action, target, node) # ----------------------------- # APPLY OVERRIDES # ----------------------------- score_modifier += self._overrides(action, target, node) return score_modifier # ----------------------------- # HEURISTICS (LEARNING LAYER) # ----------------------------- def _heuristics(self, action, target, node): score = 0 history = self.db.events # simple heuristic: prefer nodes that succeeded before success_count = 0 for session_id in history: for event in history[session_id]: if event["type"] == "action_end": if ( event["data"]["action"] == action["name"] and event["data"]["state"] == "done" and event["data"].get("node") == node["name"] ): success_count += 1 score += success_count * 2 return score # ----------------------------- # LIVE OVERRIDES # ----------------------------- def _overrides(self, action, target, node): score = 0 # force node forced = self.overrides.get("force_node") if forced and node["name"] == forced: score += 1000 # avoid node avoid = self.overrides.get("avoid_node") if avoid and node["name"] == avoid: score -= 1000 return score