simple baselines and training setup to be refactored

2026-05-31 16:43:36 +00:00 · 2026-01-24 13:20:42 +01:00
parent 28669ea4c3
commit c5eae17924
2 changed files with 492 additions and 3 deletions
--- a/lab/case/thesis/simplified_env.py
+++ b/lab/case/thesis/simplified_env.py
@@ -39,7 +39,7 @@ class EnvConfig:
    seed: int | None = 42


-class PricingEnv:
+class PricingEnv(gym.Env if HAS_GYM else object):
    """RL environment for dynamic pricing under agent contamination.

    Implements the thesis formulation where:
@@ -204,13 +204,28 @@ class PricingEnv:
        terminated = self._t >= self.cfg.max_steps
        truncated = False

+        # compute metrics for tracking
+        revenue = float(np.dot(prices, self._demand_agg))
+        cost = float(np.dot(self._sys.costs, np.clip(self._demand_agg, 0, 1)))
+        profit = revenue - cost
+        n_agents = int(self._alpha * self.cfg.sessions_per_step)
+        price_std = float(np.std(prices))
+
        info = {
            "alpha_true": self._alpha,
            "alpha_est": self._sys.alpha,
-            "revenue": float(np.dot(prices, self._demand_agg)),
+            "alpha_error": abs(self._alpha - self._sys.alpha),
+            "revenue": revenue,
+            "profit": profit,
+            "cost": cost,
            "avg_margin": float(np.mean((prices - self._sys.costs) / self._sys.costs)),
            "n_sessions": len(demand),
-            "coi_erosion": coi_erosion(int(self._alpha * self.cfg.sessions_per_step), float(np.std(prices))),
+            "n_agents": n_agents,
+            "price_std": price_std,
+            "coi_erosion": coi_erosion(max(1, n_agents), price_std),
+            "coi_leakage": self._sys.alpha * float(np.mean(prices - self._sys.costs)),
+            "cumulative_reward": sum(self._episode_rewards),
+            "step": self._t,
        }

        return self._build_obs(), reward, terminated, truncated, info