From e8ef850089b2fc8addd081fafe968f43853d56da Mon Sep 17 00:00:00 2001 From: Daniel Rosel Date: Sat, 31 Jan 2026 12:06:48 +0100 Subject: [PATCH] feat: introduced simple COI proxy --- engine/wrapper.py | 24 +++++++++++++++++++++--- 1 file changed, 21 insertions(+), 3 deletions(-) diff --git a/engine/wrapper.py b/engine/wrapper.py index 0301082..7221a8a 100644 --- a/engine/wrapper.py +++ b/engine/wrapper.py @@ -3,6 +3,7 @@ from gymnasium import spaces import numpy as np from .engine import Limbo, MarketEngine, PricingEngine from .lib.render import DashboardRenderer +from .lib.coi import compute_coi_proxy class PHANTOM(gym.Env): @@ -15,11 +16,13 @@ class PHANTOM(gym.Env): N: int = 100, price_bounds: tuple = (10.0, 150.0), lambda_coi: float = 0.1, + coi_window: int = 10, render_mode: str = None): super().__init__() self.n_products = n_products self.price_bounds = price_bounds self.lambda_coi = lambda_coi + self.coi_window = coi_window # K steps for rolling COI calculation self.render_mode = render_mode self.alpha = alpha self.N = N @@ -44,15 +47,22 @@ class PHANTOM(gym.Env): self._price_history = [] self._revenue_history = [] self._renderer = None + self._initial_episode_prices = None # prices at episode start for COI calc def _get_obs(self) -> dict: demand_arr = np.array([self._demand.get(i, 0.0) for i in range(self.n_products)], dtype=np.float32) return {"demand": demand_arr, "prices": self._prices.astype(np.float32)} + def _compute_coi_proxy(self): + return compute_coi_proxy( + self._price_history, self._demand_history, self._initial_episode_prices, + self._prices, self.price_bounds, self.alpha, self.coi_window + ) + def _compute_reward(self, prices: np.ndarray, demand: dict) -> float: revenue = np.sum(prices * np.array([demand.get(i, 0.0) for i in range(self.n_products)])) - # TODO: implement supra-competitive price punishment - return float(revenue) + coi_penalty = self.lambda_coi * self._compute_coi_proxy() + return float(revenue - coi_penalty) def _record_history(self): demand_arr = np.array([self._demand.get(i, 0.0) for i in range(self.n_products)]) @@ -63,6 +73,7 @@ class PHANTOM(gym.Env): def reset(self, seed=None, options=None): super().reset(seed=seed) self._prices = np.random.uniform(*self.price_bounds, size=self.n_products) + self._initial_episode_prices = self._prices.copy() # snapshot for COI calculation self._demand = self.market.act(self._prices) self._step_count = 0 self._demand_history, self._price_history, self._revenue_history = [], [], [] @@ -75,10 +86,17 @@ class PHANTOM(gym.Env): self._step_count += 1 self._record_history() + coi_proxy = self._compute_coi_proxy() reward = self._compute_reward(self._prices, self._demand) terminated = self._step_count >= 100 - return self._get_obs(), reward, terminated, False, {"step": self._step_count} + info = { + "step": self._step_count, + "coi_proxy": coi_proxy, + "coi_penalty": self.lambda_coi * coi_proxy, + "raw_revenue": np.sum(self._prices * np.array([self._demand.get(i, 0.0) for i in range(self.n_products)])), + } + return self._get_obs(), reward, terminated, False, info def _compute_elasticity(self) -> np.ndarray: """point elasticity: e = (dQ/dP) * (P/Q) via finite differences, clipped to [-5, 5]"""