responsive and representative demand for COI erosion

2026-07-16 01:53:37 +00:00 · 2026-03-11 12:46:22 +01:00
parent 0f708aab15
commit fa2dde8307
7 changed files with 66 additions and 145 deletions
--- a/engine/lib/demand.py
+++ b/engine/lib/demand.py
@@ -17,18 +17,32 @@ def generate_demand_for_actor(
    params: tuple,
    noise_std: float = 1.0,
    distribution_method=np.random.normal,
+    normalize: bool = False,
 ) -> np.ndarray:
    """d(p;0) = max(0, valuation - price) + epsi for single actor type
    params: (mean, std) for valuation distribution D_H or D_A"""
    val = distribution_method(*params, size=len(prices))
    noise = distribution_method(0, noise_std, len(prices))
    demand = np.maximum(0, val - prices + noise)
+    if not normalize:
+        return demand
    total = np.sum(demand)
    return demand / total * 100 if total > 0 else demand


-def estimate_demand(trajectories, action_weights=None):
-    return estimate_weighted_demand(trajectories, action_weights)
+def estimate_demand(
+    trajectories,
+    action_weights=None,
+    *,
+    normalize: bool = False,
+    per_session: bool = True,
+):
+    return estimate_weighted_demand(
+        trajectories,
+        action_weights,
+        normalize=normalize,
+        per_session=per_session,
+    )


 def _parse_event_state(state: str):
@@ -50,7 +64,13 @@ def _weight_for_action(action: str, action_weights: dict) -> float:
    return CATEGORY_WEIGHTS["nav"]


-def estimate_weighted_demand(trajectories, action_weights=None):
+def estimate_weighted_demand(
+    trajectories,
+    action_weights=None,
+    *,
+    normalize: bool = False,
+    per_session: bool = True,
+):
    action_weights = (
        DEFAULT_ACTION_WEIGHTS if action_weights is None else action_weights
    )
@@ -64,12 +84,20 @@ def estimate_weighted_demand(trajectories, action_weights=None):
            if w <= 0:
                continue
            scores[product_id] = scores.get(product_id, 0.0) + w
-    total = sum(scores.values())
-    return (
-        {pid: (score / total) * 100 for pid, score in scores.items()}
-        if total > 0
-        else {}
-    )
+    if not scores:
+        return {}
+
+    if per_session and len(trajectories) > 0:
+        inv_n = 1.0 / float(len(trajectories))
+        scores = {pid: score * inv_n for pid, score in scores.items()}
+
+    if not normalize:
+        return scores
+
+    total = float(sum(scores.values()))
+    if total <= 0:
+        return {}
+    return {pid: (score / total) * 100.0 for pid, score in scores.items()}


 # Example usage
--- a/engine/lib/wrappers.py
+++ b/engine/lib/wrappers.py
@@ -32,17 +32,23 @@ class EconomicMetricsWrapper(gym.Wrapper):
        obs, reward, terminated, truncated, info = self.env.step(action)

        # extract from unwrapped env
-        prices = self.env.unwrapped._prices
+        quoted_prices = np.asarray(self.env.unwrapped._prices, dtype=float)
+        effective_prices = np.asarray(
+            info.get("effective_prices", quoted_prices), dtype=float
+        )
+        if effective_prices.shape != quoted_prices.shape:
+            effective_prices = quoted_prices
        demand_dict = self.env.unwrapped._demand
-        demand = np.array([demand_dict.get(i, 0.0) for i in range(len(prices))])
+        demand = np.array([demand_dict.get(i, 0.0) for i in range(len(quoted_prices))])

        # core calculations
-        revenue = float(np.sum(prices * demand))
-        avg_price = float(np.mean(prices))
+        revenue = float(info.get("revenue", np.sum(effective_prices * demand)))
+        quoted_revenue = float(np.sum(quoted_prices * demand))
+        avg_price = float(np.mean(effective_prices))
        margin = (avg_price - self.p_min) / max(avg_price, 1e-6)
        coi_level = avg_price - self.p_min  # E[P] - p_min per thesis Def 1

-        self._price_history.append(prices.copy())
+        self._price_history.append(effective_prices.copy())
        self._revenue_history.append(revenue)

        # regret vs baseline (golden path)
@@ -53,6 +59,7 @@ class EconomicMetricsWrapper(gym.Wrapper):
        # inject structured metrics into info
        info["economics"] = {
            "revenue": revenue,
+            "quoted_revenue": quoted_revenue,
            "margin": margin,
            "coi_level": coi_level,
            "regret": regret,
@@ -71,10 +78,13 @@ class EconomicMetricsWrapper(gym.Wrapper):
            "agent_prob",
            "alpha_adv",
            "alpha_nominal",
+            "erosion_share",
+            "effective_price_mean",
        ):
            if key in info:
                info["economics"][key] = info[key]
-        info["prices"] = prices.copy()
+        info["prices"] = quoted_prices.copy()
+        info["effective_prices"] = effective_prices.copy()
        info["demand"] = demand.copy()

        return obs, reward, terminated, truncated, info