feat: introduced simple COI proxy

This commit is contained in:
2026-01-31 12:06:48 +01:00
parent e7cb48e9cd
commit e8ef850089

View File

@@ -3,6 +3,7 @@ from gymnasium import spaces
import numpy as np import numpy as np
from .engine import Limbo, MarketEngine, PricingEngine from .engine import Limbo, MarketEngine, PricingEngine
from .lib.render import DashboardRenderer from .lib.render import DashboardRenderer
from .lib.coi import compute_coi_proxy
class PHANTOM(gym.Env): class PHANTOM(gym.Env):
@@ -15,11 +16,13 @@ class PHANTOM(gym.Env):
N: int = 100, N: int = 100,
price_bounds: tuple = (10.0, 150.0), price_bounds: tuple = (10.0, 150.0),
lambda_coi: float = 0.1, lambda_coi: float = 0.1,
coi_window: int = 10,
render_mode: str = None): render_mode: str = None):
super().__init__() super().__init__()
self.n_products = n_products self.n_products = n_products
self.price_bounds = price_bounds self.price_bounds = price_bounds
self.lambda_coi = lambda_coi self.lambda_coi = lambda_coi
self.coi_window = coi_window # K steps for rolling COI calculation
self.render_mode = render_mode self.render_mode = render_mode
self.alpha = alpha self.alpha = alpha
self.N = N self.N = N
@@ -44,15 +47,22 @@ class PHANTOM(gym.Env):
self._price_history = [] self._price_history = []
self._revenue_history = [] self._revenue_history = []
self._renderer = None self._renderer = None
self._initial_episode_prices = None # prices at episode start for COI calc
def _get_obs(self) -> dict: def _get_obs(self) -> dict:
demand_arr = np.array([self._demand.get(i, 0.0) for i in range(self.n_products)], dtype=np.float32) demand_arr = np.array([self._demand.get(i, 0.0) for i in range(self.n_products)], dtype=np.float32)
return {"demand": demand_arr, "prices": self._prices.astype(np.float32)} return {"demand": demand_arr, "prices": self._prices.astype(np.float32)}
def _compute_coi_proxy(self):
return compute_coi_proxy(
self._price_history, self._demand_history, self._initial_episode_prices,
self._prices, self.price_bounds, self.alpha, self.coi_window
)
def _compute_reward(self, prices: np.ndarray, demand: dict) -> float: def _compute_reward(self, prices: np.ndarray, demand: dict) -> float:
revenue = np.sum(prices * np.array([demand.get(i, 0.0) for i in range(self.n_products)])) revenue = np.sum(prices * np.array([demand.get(i, 0.0) for i in range(self.n_products)]))
# TODO: implement supra-competitive price punishment coi_penalty = self.lambda_coi * self._compute_coi_proxy()
return float(revenue) return float(revenue - coi_penalty)
def _record_history(self): def _record_history(self):
demand_arr = np.array([self._demand.get(i, 0.0) for i in range(self.n_products)]) demand_arr = np.array([self._demand.get(i, 0.0) for i in range(self.n_products)])
@@ -63,6 +73,7 @@ class PHANTOM(gym.Env):
def reset(self, seed=None, options=None): def reset(self, seed=None, options=None):
super().reset(seed=seed) super().reset(seed=seed)
self._prices = np.random.uniform(*self.price_bounds, size=self.n_products) self._prices = np.random.uniform(*self.price_bounds, size=self.n_products)
self._initial_episode_prices = self._prices.copy() # snapshot for COI calculation
self._demand = self.market.act(self._prices) self._demand = self.market.act(self._prices)
self._step_count = 0 self._step_count = 0
self._demand_history, self._price_history, self._revenue_history = [], [], [] self._demand_history, self._price_history, self._revenue_history = [], [], []
@@ -75,10 +86,17 @@ class PHANTOM(gym.Env):
self._step_count += 1 self._step_count += 1
self._record_history() self._record_history()
coi_proxy = self._compute_coi_proxy()
reward = self._compute_reward(self._prices, self._demand) reward = self._compute_reward(self._prices, self._demand)
terminated = self._step_count >= 100 terminated = self._step_count >= 100
return self._get_obs(), reward, terminated, False, {"step": self._step_count} info = {
"step": self._step_count,
"coi_proxy": coi_proxy,
"coi_penalty": self.lambda_coi * coi_proxy,
"raw_revenue": np.sum(self._prices * np.array([self._demand.get(i, 0.0) for i in range(self.n_products)])),
}
return self._get_obs(), reward, terminated, False, info
def _compute_elasticity(self) -> np.ndarray: def _compute_elasticity(self) -> np.ndarray:
"""point elasticity: e = (dQ/dP) * (P/Q) via finite differences, clipped to [-5, 5]""" """point elasticity: e = (dQ/dP) * (P/Q) via finite differences, clipped to [-5, 5]"""