import gymnasium as gym from gymnasium import spaces import numpy as np import matplotlib.pyplot as plt from matplotlib.gridspec import GridSpec import matplotlib.colors as mcolors from .engine import Limbo, MarketEngine, PricingEngine class PHANTOM(gym.Env): """Gymnasium wrapper for the Limbo pricing-market simulation. Platform sets prices, market responds with demand.""" metadata = {"render_modes": ["human", "ansi"]} def __init__(self, n_products: int = 10, alpha: float = 0.3, N: int = 100, price_bounds: tuple = (10.0, 150.0), lambda_coi: float = 0.1, # coi leakage penalty weight render_mode: str = None): super().__init__() self.n_products = n_products self.price_bounds = price_bounds self.lambda_coi = lambda_coi self.render_mode = render_mode self.alpha = alpha self.N = N self.market = MarketEngine(alpha=alpha, N=N) self._platform_stub = PricingEngine() self._limbo = Limbo(self._platform_stub, self.market) # action: continuous prices for each product self.action_space = spaces.Box( low=price_bounds[0], high=price_bounds[1], shape=(n_products,), dtype=np.float32 ) # observation: demand estimate + previous prices self.observation_space = spaces.Dict({ "demand": spaces.Box(low=0.0, high=100.0, shape=(n_products,), dtype=np.float32), "prices": spaces.Box(low=price_bounds[0], high=price_bounds[1], shape=(n_products,), dtype=np.float32), }) self._prices = None self._demand = None self._step_count = 0 self._demand_history = [] self._price_history = [] self._revenue_history = [] self._fig = None self._gs = None self._dashboard_colors = { 'bg': '#f5f0e8', 'panel': '#ebe3d5', 'accent': '#c9b99a', 'text': '#3d3229', 'green': '#5c7a5c', 'red': '#8b4049', 'blue': '#5a7384', 'orange': '#b87333', 'purple': '#7d6b7d' } def _get_obs(self) -> dict: demand_arr = np.array([self._demand.get(i, 0.0) for i in range(self.n_products)], dtype=np.float32) return {"demand": demand_arr, "prices": self._prices.astype(np.float32)} def _compute_reward(self, prices: np.ndarray, demand: dict) -> float: demand_arr = np.array([demand.get(i, 0.0) for i in range(self.n_products)]) revenue = np.sum(prices * demand_arr) # revenue = price * quantity proxy base_price = self.price_bounds[0] return float(revenue)# - self.lambda_coi * coi_leak) def _record_history(self): demand_arr = np.array([self._demand.get(i, 0.0) for i in range(self.n_products)]) self._demand_history.append(demand_arr) self._price_history.append(self._prices.copy()) revenue = np.sum(self._prices * demand_arr) self._revenue_history.append(revenue) def reset(self, seed=None, options=None): super().reset(seed=seed) self._prices = np.random.uniform(*self.price_bounds, size=self.n_products) self._demand = self.market.act(self._prices) self._step_count = 0 self._demand_history, self._price_history, self._revenue_history = [], [], [] self._record_history() return self._get_obs(), {} def step(self, action: np.ndarray): self._prices = np.clip(action, *self.price_bounds) self._demand = self.market.act(self._prices) self._step_count += 1 self._record_history() reward = self._compute_reward(self._prices, self._demand) terminated = self._step_count >= 100 truncated = False return self._get_obs(), reward, terminated, truncated, {"step": self._step_count} def _compute_elasticity(self) -> np.ndarray: """point elasticity: e = (dQ/dP) * (P/Q) estimated via finite differences, clipped to [-5, 5]""" if len(self._price_history) < 2: return np.zeros(self.n_products) p = np.array(self._price_history) q = np.array(self._demand_history) dp = np.diff(p, axis=0) dq = np.diff(q, axis=0) min_dp = 0.5 # ignore tiny price changes to avoid explosions valid = np.abs(dp) > min_dp with np.errstate(divide='ignore', invalid='ignore'): elasticity = np.where(valid, (dq / dp) * (p[:-1] / np.maximum(q[:-1], 1.0)), 0.0) elasticity = np.clip(elasticity, -5.0, 5.0) elasticity = np.nan_to_num(elasticity, nan=0.0) return np.mean(elasticity, axis=0) if len(elasticity) > 0 else np.zeros(self.n_products) def _style_axis(self, ax, title: str = None, xlabel: str = None, ylabel: str = None): c = self._dashboard_colors ax.set_facecolor(c['panel']) ax.spines['top'].set_visible(False); ax.spines['right'].set_visible(False) ax.spines['bottom'].set_color(c['accent']); ax.spines['left'].set_color(c['accent']) ax.tick_params(colors=c['text'], labelsize=8) if title: ax.set_title(title, color=c['text'], fontsize=11, fontweight='bold', pad=8) if xlabel: ax.set_xlabel(xlabel, color=c['text'], fontsize=9) if ylabel: ax.set_ylabel(ylabel, color=c['text'], fontsize=9) def render(self): if self.render_mode == "human": c = self._dashboard_colors if self._fig is None: plt.ion() self._fig = plt.figure(figsize=(14, 10), facecolor=c['bg']) self._gs = GridSpec(3, 3, figure=self._fig, hspace=0.35, wspace=0.3, left=0.07, right=0.95, top=0.92, bottom=0.08) plt.show(block=False) self._fig.clear() self._fig.suptitle(f'PHANTOM Market Dynamics [t={self._step_count}, α={self.alpha:.2f}]', color=c['text'], fontsize=14, fontweight='bold') demand_mat = np.array(self._demand_history).T price_mat = np.array(self._price_history).T elasticity = self._compute_elasticity() cmap = mcolors.LinearSegmentedColormap.from_list('phantom', [c['bg'], c['blue'], c['green']]) cmap_div = mcolors.LinearSegmentedColormap.from_list('elast', [c['red'], c['bg'], c['blue']]) # price-demand elasticity scatter (all historical data points) ax_elast = self._fig.add_subplot(self._gs[0, 0]) prices_flat = np.array(self._price_history).flatten() demands_flat = np.array(self._demand_history).flatten() product_ids = np.tile(np.arange(self.n_products), len(self._price_history)) scatter = ax_elast.scatter(prices_flat, demands_flat, c=product_ids, cmap='plasma', alpha=0.6, s=15, edgecolors='none') if len(prices_flat) > 1: # fit regression line z = np.polyfit(prices_flat, demands_flat, 1) p_line = np.linspace(prices_flat.min(), prices_flat.max(), 50) ax_elast.plot(p_line, np.polyval(z, p_line), '--', color=c['red'], lw=1.5, alpha=0.8) self._style_axis(ax_elast, "Price-Demand Relationship", "Price ($)", "Demand") # elasticity coefficients bar ax_ebar = self._fig.add_subplot(self._gs[0, 1]) colors_e = [c['red'] if e < -0.5 else c['blue'] if e > 0.5 else c['accent'] for e in elasticity] ax_ebar.barh(range(self.n_products), elasticity, color=colors_e, alpha=0.8, edgecolor=c['bg']) ax_ebar.axvline(0, color=c['text'], lw=0.8, alpha=0.5) ax_ebar.axvline(-1, color=c['red'], lw=1, ls='--', alpha=0.5) # unit elastic reference ax_ebar.set_yticks(range(self.n_products)) ax_ebar.set_yticklabels([f'P{i}' for i in range(self.n_products)], fontsize=7) self._style_axis(ax_ebar, "Price Elasticity ε", "ε = (ΔQ/ΔP)·(P/Q)", None) # session composition pie ax_pie = self._fig.add_subplot(self._gs[0, 2]) n_humans, n_agents = self.market.Nhumans, self.market.Nagents ax_pie.set_facecolor(c['panel']) wedges, _ = ax_pie.pie([n_humans, n_agents], colors=[c['blue'], c['red']], startangle=90, wedgeprops={'linewidth': 2, 'edgecolor': c['bg']}) ax_pie.legend(wedges, [f'H ({n_humans})', f'A ({n_agents})'], loc='lower center', fontsize=8, frameon=False, labelcolor=c['text'], bbox_to_anchor=(0.5, -0.05)) ax_pie.set_title("Session Mix", color=c['text'], fontsize=11, fontweight='bold') # price heatmap over time ax_pheat = self._fig.add_subplot(self._gs[1, :2]) im_p = ax_pheat.imshow(price_mat, aspect='auto', cmap='viridis', origin='lower') self._style_axis(ax_pheat, "Price Heatmap P(product, t)", "Step", "Product") cbar_p = self._fig.colorbar(im_p, ax=ax_pheat, fraction=0.03, pad=0.02) cbar_p.ax.tick_params(colors=c['text'], labelsize=7) cbar_p.set_label('$', color=c['text'], fontsize=8) # demand heatmap over time ax_dheat = self._fig.add_subplot(self._gs[1, 2]) im_d = ax_dheat.imshow(demand_mat, aspect='auto', cmap=cmap, origin='lower') self._style_axis(ax_dheat, "Demand Q(product, t)", "Step", None) cbar_d = self._fig.colorbar(im_d, ax=ax_dheat, fraction=0.046, pad=0.02) cbar_d.ax.tick_params(colors=c['text'], labelsize=7) # cross-correlation matrix (price-demand covariance per product) ax_corr = self._fig.add_subplot(self._gs[2, 0]) if len(self._price_history) > 2: corr_mat = np.corrcoef(price_mat, demand_mat)[:self.n_products, self.n_products:] im_corr = ax_corr.imshow(corr_mat, cmap=cmap_div, vmin=-1, vmax=1, aspect='auto') ax_corr.set_xticks(range(self.n_products)) ax_corr.set_yticks(range(self.n_products)) ax_corr.set_xticklabels([f'Q{i}' for i in range(self.n_products)], fontsize=6) ax_corr.set_yticklabels([f'P{i}' for i in range(self.n_products)], fontsize=6) cbar_c = self._fig.colorbar(im_corr, ax=ax_corr, fraction=0.046, pad=0.02) cbar_c.ax.tick_params(colors=c['text'], labelsize=7) self._style_axis(ax_corr, "Price-Demand Correlation", None, None) # revenue curve with demand dispersion (std dev shows concentration) ax_rev = self._fig.add_subplot(self._gs[2, 1:]) n_steps = len(self._revenue_history) demand_std = [np.std(d) for d in self._demand_history] ax_rev.fill_between(range(n_steps), self._revenue_history, alpha=0.3, color=c['green']) ax_rev.plot(self._revenue_history, color=c['green'], linewidth=2, label='Revenue') ax_rev.set_xlim(0, max(n_steps, 1)) ax_rev.set_ylim(0, max(self._revenue_history) * 1.1 if self._revenue_history else 1) ax2 = ax_rev.twinx() ax2.plot(range(n_steps), demand_std, color=c['blue'], linewidth=2, ls='-', alpha=0.9, label='σ(Demand)') d_min, d_max = min(demand_std), max(demand_std) margin = (d_max - d_min) * 0.2 if d_max > d_min else 0.5 ax2.set_ylim(max(0, d_min - margin), d_max + margin) ax2.tick_params(axis='y', colors=c['blue'], labelsize=8) ax2.spines['right'].set_color(c['blue']) ax2.set_ylabel('Demand σ', color=c['blue'], fontsize=9) self._style_axis(ax_rev, "Revenue & Demand Dispersion", "Step", "Revenue ($)") ax_rev.legend(loc='upper left', fontsize=7, frameon=False, labelcolor=c['text']) ax2.legend(loc='upper right', fontsize=7, frameon=False, labelcolor=c['text']) self._fig.canvas.draw_idle() self._fig.canvas.flush_events() plt.pause(0.05) elif self.render_mode == "ansi": return f"step={self._step_count}, prices={self._prices}, demand={self._demand}" return None def close(self): if self._fig: plt.close(self._fig) self._fig = None if __name__ == "__main__": env = PHANTOM(n_products=15, alpha=0.3, N=100, render_mode="human") obs, _ = env.reset() for step in range(100): action = env.action_space.sample() obs, reward, term, trunc, info = env.step(action) env.render() if term: break env.close()