mirror of
https://github.com/velocitatem/PHANTOM.git
synced 2026-05-31 08:33:36 +00:00
feat: introduced simple COI proxy
This commit is contained in:
@@ -3,6 +3,7 @@ from gymnasium import spaces
|
|||||||
import numpy as np
|
import numpy as np
|
||||||
from .engine import Limbo, MarketEngine, PricingEngine
|
from .engine import Limbo, MarketEngine, PricingEngine
|
||||||
from .lib.render import DashboardRenderer
|
from .lib.render import DashboardRenderer
|
||||||
|
from .lib.coi import compute_coi_proxy
|
||||||
|
|
||||||
|
|
||||||
class PHANTOM(gym.Env):
|
class PHANTOM(gym.Env):
|
||||||
@@ -15,11 +16,13 @@ class PHANTOM(gym.Env):
|
|||||||
N: int = 100,
|
N: int = 100,
|
||||||
price_bounds: tuple = (10.0, 150.0),
|
price_bounds: tuple = (10.0, 150.0),
|
||||||
lambda_coi: float = 0.1,
|
lambda_coi: float = 0.1,
|
||||||
|
coi_window: int = 10,
|
||||||
render_mode: str = None):
|
render_mode: str = None):
|
||||||
super().__init__()
|
super().__init__()
|
||||||
self.n_products = n_products
|
self.n_products = n_products
|
||||||
self.price_bounds = price_bounds
|
self.price_bounds = price_bounds
|
||||||
self.lambda_coi = lambda_coi
|
self.lambda_coi = lambda_coi
|
||||||
|
self.coi_window = coi_window # K steps for rolling COI calculation
|
||||||
self.render_mode = render_mode
|
self.render_mode = render_mode
|
||||||
self.alpha = alpha
|
self.alpha = alpha
|
||||||
self.N = N
|
self.N = N
|
||||||
@@ -44,15 +47,22 @@ class PHANTOM(gym.Env):
|
|||||||
self._price_history = []
|
self._price_history = []
|
||||||
self._revenue_history = []
|
self._revenue_history = []
|
||||||
self._renderer = None
|
self._renderer = None
|
||||||
|
self._initial_episode_prices = None # prices at episode start for COI calc
|
||||||
|
|
||||||
def _get_obs(self) -> dict:
|
def _get_obs(self) -> dict:
|
||||||
demand_arr = np.array([self._demand.get(i, 0.0) for i in range(self.n_products)], dtype=np.float32)
|
demand_arr = np.array([self._demand.get(i, 0.0) for i in range(self.n_products)], dtype=np.float32)
|
||||||
return {"demand": demand_arr, "prices": self._prices.astype(np.float32)}
|
return {"demand": demand_arr, "prices": self._prices.astype(np.float32)}
|
||||||
|
|
||||||
|
def _compute_coi_proxy(self):
|
||||||
|
return compute_coi_proxy(
|
||||||
|
self._price_history, self._demand_history, self._initial_episode_prices,
|
||||||
|
self._prices, self.price_bounds, self.alpha, self.coi_window
|
||||||
|
)
|
||||||
|
|
||||||
def _compute_reward(self, prices: np.ndarray, demand: dict) -> float:
|
def _compute_reward(self, prices: np.ndarray, demand: dict) -> float:
|
||||||
revenue = np.sum(prices * np.array([demand.get(i, 0.0) for i in range(self.n_products)]))
|
revenue = np.sum(prices * np.array([demand.get(i, 0.0) for i in range(self.n_products)]))
|
||||||
# TODO: implement supra-competitive price punishment
|
coi_penalty = self.lambda_coi * self._compute_coi_proxy()
|
||||||
return float(revenue)
|
return float(revenue - coi_penalty)
|
||||||
|
|
||||||
def _record_history(self):
|
def _record_history(self):
|
||||||
demand_arr = np.array([self._demand.get(i, 0.0) for i in range(self.n_products)])
|
demand_arr = np.array([self._demand.get(i, 0.0) for i in range(self.n_products)])
|
||||||
@@ -63,6 +73,7 @@ class PHANTOM(gym.Env):
|
|||||||
def reset(self, seed=None, options=None):
|
def reset(self, seed=None, options=None):
|
||||||
super().reset(seed=seed)
|
super().reset(seed=seed)
|
||||||
self._prices = np.random.uniform(*self.price_bounds, size=self.n_products)
|
self._prices = np.random.uniform(*self.price_bounds, size=self.n_products)
|
||||||
|
self._initial_episode_prices = self._prices.copy() # snapshot for COI calculation
|
||||||
self._demand = self.market.act(self._prices)
|
self._demand = self.market.act(self._prices)
|
||||||
self._step_count = 0
|
self._step_count = 0
|
||||||
self._demand_history, self._price_history, self._revenue_history = [], [], []
|
self._demand_history, self._price_history, self._revenue_history = [], [], []
|
||||||
@@ -75,10 +86,17 @@ class PHANTOM(gym.Env):
|
|||||||
self._step_count += 1
|
self._step_count += 1
|
||||||
self._record_history()
|
self._record_history()
|
||||||
|
|
||||||
|
coi_proxy = self._compute_coi_proxy()
|
||||||
reward = self._compute_reward(self._prices, self._demand)
|
reward = self._compute_reward(self._prices, self._demand)
|
||||||
terminated = self._step_count >= 100
|
terminated = self._step_count >= 100
|
||||||
|
|
||||||
return self._get_obs(), reward, terminated, False, {"step": self._step_count}
|
info = {
|
||||||
|
"step": self._step_count,
|
||||||
|
"coi_proxy": coi_proxy,
|
||||||
|
"coi_penalty": self.lambda_coi * coi_proxy,
|
||||||
|
"raw_revenue": np.sum(self._prices * np.array([self._demand.get(i, 0.0) for i in range(self.n_products)])),
|
||||||
|
}
|
||||||
|
return self._get_obs(), reward, terminated, False, info
|
||||||
|
|
||||||
def _compute_elasticity(self) -> np.ndarray:
|
def _compute_elasticity(self) -> np.ndarray:
|
||||||
"""point elasticity: e = (dQ/dP) * (P/Q) via finite differences, clipped to [-5, 5]"""
|
"""point elasticity: e = (dQ/dP) * (P/Q) via finite differences, clipped to [-5, 5]"""
|
||||||
|
|||||||
Reference in New Issue
Block a user