adding naive jax and libraries and make adjustments

2026-06-01 00:53:36 +00:00 · 2026-02-17 14:48:18 +01:00
parent 66c4a0cd1d
commit 802f31b4a1
17 changed files with 2331 additions and 6 deletions
--- a/engine/lib/coi.py
+++ b/engine/lib/coi.py
@@ -0,0 +1,76 @@
+import numpy as np
+from typing import Dict
+
+
+def compute_agent_probability(
+    trajectory: list, human_transitions: Dict, agent_transitions: Dict
+) -> float:
+    """estimate agent probability via KL divergence between trajectory transitions and reference models
+
+    compares empirical trajectory transition distribution to human/agent prototypes
+
+    args:
+        trajectory: list of state/event strings from session
+        human_transitions: reference transition dict from human MDP (event->event->prob)
+        agent_transitions: reference transition dict from agent MDP (event->event->prob)
+
+    returns:
+        agent probability in [0, 1] via softmax over KL divergences
+    """
+    if len(trajectory) < 2:
+        return 0.0  # insufficient data, assume human
+
+    # build empirical transition distribution from trajectory
+    trans_counts = {}
+    for s, s_next in zip(trajectory[:-1], trajectory[1:]):
+        if s not in trans_counts:
+            trans_counts[s] = {}
+        trans_counts[s][s_next] = trans_counts[s].get(s_next, 0) + 1
+
+    # normalize to probabilities
+    empirical = {}
+    for s, nxt in trans_counts.items():
+        total = sum(nxt.values())
+        empirical[s] = {s_n: cnt / total for s_n, cnt in nxt.items()}
+
+    # compute KL divergence to each prototype
+    def kl_div(p_dist: Dict, q_dist: Dict) -> float:
+        eps = 1e-10
+        # aggregate over all source states in empirical dist
+        kl = 0.0
+        for s in p_dist:
+            if s not in q_dist:
+                continue  # skip states not in reference
+            p_trans, q_trans = p_dist[s], q_dist[s]
+            for k in p_trans:
+                p_val = p_trans[k] + eps
+                q_val = q_trans.get(k, 0.0) + eps
+                kl += p_val * np.log(p_val / q_val)
+        return kl
+
+    kl_human = kl_div(empirical, human_transitions)
+    kl_agent = kl_div(empirical, agent_transitions)
+
+    # convert to probability via softmax (lower KL = higher prob)
+    # agent_prob = exp(-kl_agent) / (exp(-kl_human) + exp(-kl_agent))
+    exp_h = np.exp(-kl_human)
+    exp_a = np.exp(-kl_agent)
+    return float(exp_a / (exp_h + exp_a + 1e-10))
+
+
+def extract_purchases(trajectories: list) -> Dict[int, int]:
+    purchases: Dict[int, int] = {}
+    for traj in trajectories:
+        if traj and "checkout" in traj[-1] and "_product" in traj[-1]:
+            prod_id = int(traj[-1].rsplit("_product", 1)[1])
+            purchases[prod_id] = purchases.get(prod_id, 0) + 1
+    return purchases
+
+
+def compute_uplift_coi(
+    prices: np.ndarray, purchases: Dict[int, int], baseline_prices: np.ndarray
+) -> float:
+    # TODO: consider view-weighted fractional purchase for denser signal
+    return float(
+        sum(max(0.0, prices[k] - baseline_prices[k]) * n for k, n in purchases.items())
+    )